Merge "Update to the BitVector Implementation"
diff --git a/Android.mk b/Android.mk
index 612fc40..62d40bb 100644
--- a/Android.mk
+++ b/Android.mk
@@ -55,12 +55,15 @@
 	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.odex
 	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.oat
 	rm -f $(TARGET_OUT_JAVA_LIBRARIES)/*.art
+	rm -f $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)/$(DEXPREOPT_BOOT_JAR_DIR)/*.oat
+	rm -f $(DEXPREOPT_PRODUCT_DIR_FULL_PATH)/$(DEXPREOPT_BOOT_JAR_DIR)/*.art
 	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*.odex
 	rm -f $(TARGET_OUT_UNSTRIPPED)/system/framework/*.oat
 	rm -f $(TARGET_OUT_APPS)/*.odex
 	rm -f $(TARGET_OUT_INTERMEDIATES)/JAVA_LIBRARIES/*_intermediates/javalib.odex
 	rm -f $(TARGET_OUT_INTERMEDIATES)/APPS/*_intermediates/*.odex
 	rm -rf /tmp/test-*/dalvik-cache/*@classes.dex
+	rm -rf /tmp/android-data/dalvik-cache/*@classes.dex
 
 .PHONY: clean-oat-target
 clean-oat-target:
@@ -71,9 +74,9 @@
 	adb shell rm $(ART_TEST_DIR)/*.odex
 	adb shell rm $(ART_TEST_DIR)/*.oat
 	adb shell rm $(ART_TEST_DIR)/*.art
-	adb shell rm $(DALVIK_CACHE_DIR)/*.dex
-	adb shell rm $(DALVIK_CACHE_DIR)/*.oat
-	adb shell rm $(DALVIK_CACHE_DIR)/*.art
+	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.dex
+	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.oat
+	adb shell rm $(ART_DALVIK_CACHE_DIR)/*.art
 	adb shell rm $(DEXPREOPT_BOOT_JAR_DIR)/*.oat
 	adb shell rm $(DEXPREOPT_BOOT_JAR_DIR)/*.art
 	adb shell rm system/app/*.odex
@@ -89,7 +92,6 @@
 include $(art_path)/disassembler/Android.mk
 include $(art_path)/oatdump/Android.mk
 include $(art_path)/dalvikvm/Android.mk
-include $(art_path)/jdwpspy/Android.mk
 include $(art_build_path)/Android.oat.mk
 
 # ART_HOST_DEPENDENCIES depends on Android.executable.mk above for ART_HOST_EXECUTABLES
@@ -331,7 +333,7 @@
 .PHONY: dump-oat-boot
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
 dump-oat-boot: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(OATDUMP)
-	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt
+	$(OATDUMP) --image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --output=$(ART_DUMP_OAT_PATH)/boot.oatdump.txt --host-prefix=$(DEXPREOPT_PRODUCT_DIR_FULL_PATH)
 	@echo Output in $(ART_DUMP_OAT_PATH)/boot.oatdump.txt
 endif
 
@@ -353,19 +355,19 @@
 .PHONY: use-art
 use-art:
 	adb root && sleep 3
-	adb shell setprop persist.sys.dalvik.vm.lib libart.so
+	adb shell setprop persist.sys.dalvik.vm.lib.1 libart.so
 	adb reboot
 
 .PHONY: use-artd
 use-artd:
 	adb root && sleep 3
-	adb shell setprop persist.sys.dalvik.vm.lib libartd.so
+	adb shell setprop persist.sys.dalvik.vm.lib.1 libartd.so
 	adb reboot
 
 .PHONY: use-dalvik
 use-dalvik:
 	adb root && sleep 3
-	adb shell setprop persist.sys.dalvik.vm.lib libdvm.so
+	adb shell setprop persist.sys.dalvik.vm.lib.1 libdvm.so
 	adb reboot
 
 ########################################################################
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 30d7dcb..415d810 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -87,7 +87,12 @@
 
 # Clang build.
 # ART_TARGET_CLANG := true
-# ART_HOST_CLANG := true
+ifeq ($(HOST_OS),darwin)
+ART_HOST_CLANG := true
+endif
+
+# directory used for dalvik-cache on device
+ART_DALVIK_CACHE_DIR := /data/dalvik-cache
 
 # directory used for gtests on device
 ART_NATIVETEST_DIR := /data/nativetest/art
@@ -116,7 +121,7 @@
 	-Wall \
 	-Werror \
 	-Wextra \
-	-Wstrict-aliasing=3 \
+	-Wstrict-aliasing \
 	-fstrict-aliasing
 
 ifeq ($(ART_SMALL_MODE),true)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index bed48ba..acaa0f8 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -30,6 +30,7 @@
 	compiler/utils/x86/managed_register_x86_test.cc \
 	runtime/barrier_test.cc \
 	runtime/base/bit_vector_test.cc \
+	runtime/base/hex_dump_test.cc \
 	runtime/base/histogram_test.cc \
 	runtime/base/mutex_test.cc \
 	runtime/base/timing_logger_test.cc \
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index ec6efbc..10dc2d3 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -68,7 +68,7 @@
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.oat.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += $(HOST_CORE_IMG_OUT)
 include $(BUILD_PHONY_PACKAGE)
-endif
+endif # ART_BUILD_HOST
 
 # If we aren't building the host toolchain, skip building the target core.art.
 ifeq ($(WITH_HOST_DALVIK),true)
@@ -80,15 +80,5 @@
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.oat.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += $(TARGET_CORE_IMG_OUT)
 include $(BUILD_PHONY_PACKAGE)
-endif
-
-ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-include $(CLEAR_VARS)
-LOCAL_MODULE := boot.art
-LOCAL_MODULE_TAGS := optional
-LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
-LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.oat.mk
-LOCAL_ADDITIONAL_DEPENDENCIES += $(DEFAULT_DEX_PREOPT_INSTALLED_IMAGE)
-include $(BUILD_PHONY_PACKAGE)
-endif
-endif
+endif # ART_BUILD_TARGET
+endif # WITH_HOST_DALVIK
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 4340929..c6662c2 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -59,7 +59,8 @@
 	dex/frontend.cc \
 	dex/mir_graph.cc \
 	dex/mir_analysis.cc \
-	dex/verified_methods_data.cc \
+	dex/verified_method.cc \
+	dex/verification_results.cc \
 	dex/vreg_analysis.cc \
 	dex/ssa_transformation.cc \
 	driver/compiler_driver.cc \
@@ -196,6 +197,34 @@
   LOCAL_SHARED_LIBRARIES += libbcc libbcinfo libLLVM
   ifeq ($(ART_USE_PORTABLE_COMPILER),true)
     LOCAL_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
+    ifeq ($$(art_target_or_host),target)
+      ifeq ($(TARGET_ARCH),arm)
+        LOCAL_STATIC_LIBRARIES += libmcldARMInfo libmcldARMTarget
+      else # TARGET_ARCH != arm
+      ifeq ($(TARGET_ARCH),x86)
+        LOCAL_STATIC_LIBRARIES += libmcldX86Info libmcldX86Target
+      else # TARGET_ARCH != x86
+      ifeq ($(TARGET_ARCH),x86_64)
+        LOCAL_STATIC_LIBRARIES += libmcldX86Info libmcldX86Target
+      else # TARGET_ARCH != x86_64
+      ifeq ($(TARGET_ARCH),mips)
+        LOCAL_STATIC_LIBRARIES += libmcldMipsInfo libmcldMipsTarget
+      else # TARGET_ARCH != mips
+      ifeq ($(TARGET_ARCH),aarch64)
+         $$(info TODOAArch64: $$(LOCAL_PATH)/Android.mk Add AArch64 specific MCLinker libraries)
+      else # TARGET_ARCH != aarch64
+        $$(error unsupported TARGET_ARCH=$(TARGET_ARCH))
+      endif # TARGET_ARCH != aarch64
+      endif # TARGET_ARCH != mips
+      endif # TARGET_ARCH != x86_64
+      endif # TARGET_ARCH != x86
+      endif # TARGET_ARCH != arm
+    else # host
+      LOCAL_STATIC_LIBRARIES += libmcldARMInfo libmcldARMTarget
+      LOCAL_STATIC_LIBRARIES += libmcldX86Info libmcldX86Target
+      LOCAL_STATIC_LIBRARIES += libmcldMipsInfo libmcldMipsTarget
+    endif
+    LOCAL_STATIC_LIBRARIES += libmcldCore libmcldObject libmcldADT libmcldFragment libmcldTarget libmcldCodeGen libmcldLDVariant libmcldMC libmcldSupport libmcldLD
   endif
 
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
diff --git a/compiler/buffered_output_stream.cc b/compiler/buffered_output_stream.cc
index 81a58f6..0940a80 100644
--- a/compiler/buffered_output_stream.cc
+++ b/compiler/buffered_output_stream.cc
@@ -23,7 +23,7 @@
 BufferedOutputStream::BufferedOutputStream(OutputStream* out)
     : OutputStream(out->GetLocation()), out_(out), used_(0) {}
 
-bool BufferedOutputStream::WriteFully(const void* buffer, int64_t byte_count) {
+bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) {
   if (byte_count > kBufferSize) {
     Flush();
     return out_->WriteFully(buffer, byte_count);
diff --git a/compiler/buffered_output_stream.h b/compiler/buffered_output_stream.h
index 7d874fb..75a3f24 100644
--- a/compiler/buffered_output_stream.h
+++ b/compiler/buffered_output_stream.h
@@ -31,7 +31,7 @@
     delete out_;
   }
 
-  virtual bool WriteFully(const void* buffer, int64_t byte_count);
+  virtual bool WriteFully(const void* buffer, size_t byte_count);
 
   virtual off_t Seek(off_t offset, Whence whence);
 
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 29ff390..f6d724a 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -20,14 +20,16 @@
 namespace art {
 
 CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
-                           const std::vector<uint8_t>& code)
-    : compiler_driver_(compiler_driver), instruction_set_(instruction_set), code_(nullptr) {
-  SetCode(code);
+                           const std::vector<uint8_t>& quick_code)
+    : compiler_driver_(compiler_driver), instruction_set_(instruction_set),
+      portable_code_(nullptr), quick_code_(nullptr) {
+  SetCode(&quick_code, nullptr);
 }
 
 CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
                            const std::string& elf_object, const std::string& symbol)
-    : compiler_driver_(compiler_driver), instruction_set_(instruction_set), symbol_(symbol) {
+    : compiler_driver_(compiler_driver), instruction_set_(instruction_set),
+      portable_code_(nullptr), quick_code_(nullptr), symbol_(symbol) {
   CHECK_NE(elf_object.size(), 0U);
   CHECK_NE(symbol.size(), 0U);
   std::vector<uint8_t> temp_code(elf_object.size());
@@ -38,12 +40,41 @@
   // change to have different kinds of compiled methods.  This is
   // being deferred until we work on hybrid execution or at least
   // until we work on batch compilation.
-  SetCode(temp_code);
+  SetCode(nullptr, &temp_code);
 }
 
-void CompiledCode::SetCode(const std::vector<uint8_t>& code) {
-  CHECK(!code.empty());
-  code_ = compiler_driver_->DeduplicateCode(code);
+void CompiledCode::SetCode(const std::vector<uint8_t>* quick_code,
+                           const std::vector<uint8_t>* portable_code) {
+  if (portable_code != nullptr) {
+    CHECK(!portable_code->empty());
+    portable_code_ = compiler_driver_->DeduplicateCode(*portable_code);
+  }
+  if (quick_code != nullptr) {
+    CHECK(!quick_code->empty());
+    quick_code_ = compiler_driver_->DeduplicateCode(*quick_code);
+  }
+}
+
+bool CompiledCode::operator==(const CompiledCode& rhs) const {
+  if (quick_code_ != nullptr) {
+    if (rhs.quick_code_ == nullptr) {
+      return false;
+    } else if (quick_code_->size() != rhs.quick_code_->size()) {
+      return false;
+    } else {
+      return std::equal(quick_code_->begin(), quick_code_->end(), rhs.quick_code_->begin());
+    }
+  } else if (portable_code_ != nullptr) {
+    if (rhs.portable_code_ == nullptr) {
+      return false;
+    } else if (portable_code_->size() != rhs.portable_code_->size()) {
+      return false;
+    } else {
+      return std::equal(portable_code_->begin(), portable_code_->end(),
+                        rhs.portable_code_->begin());
+    }
+  }
+  return (rhs.quick_code_ == nullptr) && (rhs.portable_code_ == nullptr);
 }
 
 uint32_t CompiledCode::AlignCode(uint32_t offset) const {
@@ -100,7 +131,6 @@
   }
 }
 
-#if defined(ART_USE_PORTABLE_COMPILER)
 const std::string& CompiledCode::GetSymbol() const {
   CHECK_NE(0U, symbol_.size());
   return symbol_;
@@ -114,18 +144,17 @@
 void CompiledCode::AddOatdataOffsetToCompliledCodeOffset(uint32_t offset) {
   oatdata_offsets_to_compiled_code_offset_.push_back(offset);
 }
-#endif
 
 CompiledMethod::CompiledMethod(CompilerDriver& driver,
                                InstructionSet instruction_set,
-                               const std::vector<uint8_t>& code,
+                               const std::vector<uint8_t>& quick_code,
                                const size_t frame_size_in_bytes,
                                const uint32_t core_spill_mask,
                                const uint32_t fp_spill_mask,
                                const std::vector<uint8_t>& mapping_table,
                                const std::vector<uint8_t>& vmap_table,
                                const std::vector<uint8_t>& native_gc_map)
-    : CompiledCode(&driver, instruction_set, code), frame_size_in_bytes_(frame_size_in_bytes),
+    : CompiledCode(&driver, instruction_set, quick_code), frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
   mapping_table_(driver.DeduplicateMappingTable(mapping_table)),
   vmap_table_(driver.DeduplicateVMapTable(vmap_table)),
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index e4fedf1..6112305 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -36,7 +36,7 @@
  public:
   // For Quick to supply an code blob
   CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
-               const std::vector<uint8_t>& code);
+               const std::vector<uint8_t>& quick_code);
 
   // For Portable to supply an ELF object
   CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
@@ -46,16 +46,18 @@
     return instruction_set_;
   }
 
-  const std::vector<uint8_t>& GetCode() const {
-    return *code_;
+  const std::vector<uint8_t>* GetPortableCode() const {
+    return portable_code_;
   }
 
-  void SetCode(const std::vector<uint8_t>& code);
-
-  bool operator==(const CompiledCode& rhs) const {
-    return (code_ == rhs.code_);
+  const std::vector<uint8_t>* GetQuickCode() const {
+    return quick_code_;
   }
 
+  void SetCode(const std::vector<uint8_t>* quick_code, const std::vector<uint8_t>* portable_code);
+
+  bool operator==(const CompiledCode& rhs) const;
+
   // To align an offset from a page-aligned value to make it suitable
   // for code storage. For example on ARM, to ensure that PC relative
   // valu computations work out as expected.
@@ -72,19 +74,20 @@
   static const void* CodePointer(const void* code_pointer,
                                  InstructionSet instruction_set);
 
-#if defined(ART_USE_PORTABLE_COMPILER)
   const std::string& GetSymbol() const;
   const std::vector<uint32_t>& GetOatdataOffsetsToCompliledCodeOffset() const;
   void AddOatdataOffsetToCompliledCodeOffset(uint32_t offset);
-#endif
 
  private:
-  CompilerDriver* compiler_driver_;
+  CompilerDriver* const compiler_driver_;
 
   const InstructionSet instruction_set_;
 
-  // Used to store the PIC code for Quick and an ELF image for portable.
-  std::vector<uint8_t>* code_;
+  // The ELF image for portable.
+  std::vector<uint8_t>* portable_code_;
+
+  // Used to store the PIC code for Quick.
+  std::vector<uint8_t>* quick_code_;
 
   // Used for the Portable ELF symbol name.
   const std::string symbol_;
@@ -101,7 +104,7 @@
   // Constructs a CompiledMethod for the non-LLVM compilers.
   CompiledMethod(CompilerDriver& driver,
                  InstructionSet instruction_set,
-                 const std::vector<uint8_t>& code,
+                 const std::vector<uint8_t>& quick_code,
                  const size_t frame_size_in_bytes,
                  const uint32_t core_spill_mask,
                  const uint32_t fp_spill_mask,
@@ -109,10 +112,10 @@
                  const std::vector<uint8_t>& vmap_table,
                  const std::vector<uint8_t>& native_gc_map);
 
-  // Constructs a CompiledMethod for the JniCompiler.
+  // Constructs a CompiledMethod for the QuickJniCompiler.
   CompiledMethod(CompilerDriver& driver,
                  InstructionSet instruction_set,
-                 const std::vector<uint8_t>& code,
+                 const std::vector<uint8_t>& quick_code,
                  const size_t frame_size_in_bytes,
                  const uint32_t core_spill_mask,
                  const uint32_t fp_spill_mask);
diff --git a/compiler/dex/arena_allocator.cc b/compiler/dex/arena_allocator.cc
index 132831c..8d24439 100644
--- a/compiler/dex/arena_allocator.cc
+++ b/compiler/dex/arena_allocator.cc
@@ -52,7 +52,8 @@
       next_(nullptr) {
   if (kUseMemMap) {
     std::string error_msg;
-    map_ = MemMap::MapAnonymous("dalvik-arena", NULL, size, PROT_READ | PROT_WRITE, &error_msg);
+    map_ = MemMap::MapAnonymous("dalvik-arena", NULL, size, PROT_READ | PROT_WRITE, false,
+                                &error_msg);
     CHECK(map_ != nullptr) << error_msg;
     memory_ = map_->Begin();
     size_ = map_->Size();
diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc
index f013067..2ab6252 100644
--- a/compiler/dex/bb_optimizations.cc
+++ b/compiler/dex/bb_optimizations.cc
@@ -23,7 +23,7 @@
 /*
  * Code Layout pass implementation start.
  */
-bool CodeLayout::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool CodeLayout::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->LayoutBlocks(bb);
   // No need of repeating, so just return false.
   return false;
@@ -32,13 +32,13 @@
 /*
  * SSATransformation pass implementation start.
  */
-bool SSATransformation::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool SSATransformation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->InsertPhiNodeOperands(bb);
   // No need of repeating, so just return false.
   return false;
 }
 
-void SSATransformation::End(CompilationUnit *cUnit) const {
+void SSATransformation::End(CompilationUnit* cUnit) const {
   // Verify the dataflow information after the pass.
   if (cUnit->enable_debug & (1 << kDebugVerifyDataflow)) {
     cUnit->mir_graph->VerifyDataflow();
@@ -48,7 +48,7 @@
 /*
  * ConstantPropagation pass implementation start
  */
-bool ConstantPropagation::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool ConstantPropagation::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->DoConstantPropagation(bb);
   // No need of repeating, so just return false.
   return false;
@@ -57,7 +57,7 @@
 /*
  * MethodUseCount pass implementation start.
  */
-bool MethodUseCount::Gate(const CompilationUnit *cUnit) const {
+bool MethodUseCount::Gate(const CompilationUnit* cUnit) const {
   // First initialize the data.
   cUnit->mir_graph->InitializeMethodUses();
 
@@ -67,7 +67,7 @@
   return res;
 }
 
-bool MethodUseCount::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool MethodUseCount::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->CountUses(bb);
   // No need of repeating, so just return false.
   return false;
@@ -77,7 +77,7 @@
  * Null Check Elimination and Type Inference Initialization pass implementation start.
  */
 
-bool NullCheckEliminationAndTypeInferenceInit::Gate(const CompilationUnit *cUnit) const {
+bool NullCheckEliminationAndTypeInferenceInit::Gate(const CompilationUnit* cUnit) const {
   // First check the ssa register vector
   cUnit->mir_graph->CheckSSARegisterVector();
 
@@ -87,7 +87,8 @@
   return performInit;
 }
 
-bool NullCheckEliminationAndTypeInferenceInit::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool NullCheckEliminationAndTypeInferenceInit::WalkBasicBlocks(CompilationUnit* cUnit,
+                                                               BasicBlock* bb) const {
   cUnit->mir_graph->NullCheckEliminationInit(bb);
   // No need of repeating, so just return false.
   return false;
@@ -96,7 +97,7 @@
 /*
  * BasicBlock Combine pass implementation start.
  */
-bool BBCombine::WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+bool BBCombine::WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
   cUnit->mir_graph->CombineBlocks(bb);
 
   // No need of repeating, so just return false.
@@ -106,9 +107,7 @@
 /*
  * BasicBlock Optimization pass implementation start.
  */
-void BBOptimizations::Start(CompilationUnit *cUnit) const {
-  DCHECK_EQ(cUnit->num_compiler_temps, 0);
-
+void BBOptimizations::Start(CompilationUnit* cUnit) const {
   /*
    * This pass has a different ordering depEnding on the suppress exception,
    * so do the pass here for now:
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 768b273..1286a8e 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -28,14 +28,14 @@
  */
 class CodeLayout : public Pass {
  public:
-  CodeLayout():Pass("CodeLayout", "2_post_layout_cfg") {
+  CodeLayout() : Pass("CodeLayout", "2_post_layout_cfg") {
   }
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->VerifyDataflow();
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 };
 
 /**
@@ -44,16 +44,16 @@
  */
 class SSATransformation : public Pass {
  public:
-  SSATransformation():Pass("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
+  SSATransformation() : Pass("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->InitializeSSATransformation();
   }
 
-  void End(CompilationUnit *cUnit) const;
+  void End(CompilationUnit* cUnit) const;
 };
 
 /**
@@ -62,12 +62,12 @@
  */
 class ConstantPropagation : public Pass {
  public:
-  ConstantPropagation():Pass("ConstantPropagation") {
+  ConstantPropagation() : Pass("ConstantPropagation") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->InitializeConstantPropagation();
   }
 };
@@ -78,10 +78,10 @@
  */
 class InitRegLocations : public Pass {
  public:
-  InitRegLocations():Pass("InitRegLocation") {
+  InitRegLocations() : Pass("InitRegLocation", kNoNodes) {
   }
 
-  void Start(CompilationUnit *cUnit) const {
+  void Start(CompilationUnit* cUnit) const {
     cUnit->mir_graph->InitRegLocations();
   }
 };
@@ -92,12 +92,12 @@
  */
 class MethodUseCount : public Pass {
  public:
-  MethodUseCount():Pass("UseCount") {
+  MethodUseCount() : Pass("UseCount") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  bool Gate(const CompilationUnit *cUnit) const;
+  bool Gate(const CompilationUnit* cUnit) const;
 };
 
 /**
@@ -106,12 +106,12 @@
  */
 class NullCheckEliminationAndTypeInferenceInit : public Pass {
  public:
-  NullCheckEliminationAndTypeInferenceInit():Pass("NCE_TypeInferenceInit") {
+  NullCheckEliminationAndTypeInferenceInit() : Pass("NCE_TypeInferenceInit") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 
-  bool Gate(const CompilationUnit *cUnit) const;
+  bool Gate(const CompilationUnit* cUnit) const;
 };
 
 /**
@@ -120,10 +120,11 @@
  */
 class NullCheckEliminationAndTypeInference : public Pass {
  public:
-  NullCheckEliminationAndTypeInference():Pass("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
+  NullCheckEliminationAndTypeInference()
+    : Pass("NCE_TypeInference", kRepeatingPreOrderDFSTraversal, "4_post_nce_cfg") {
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const {
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const {
     return cUnit->mir_graph->EliminateNullChecksAndInferTypes(bb);
   }
 };
@@ -134,14 +135,14 @@
  */
 class BBCombine : public Pass {
  public:
-  BBCombine():Pass("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
+  BBCombine() : Pass("BBCombine", kPreOrderDFSTraversal, "5_post_bbcombine_cfg") {
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const CompilationUnit* cUnit) const {
     return ((cUnit->disable_opt & (1 << kSuppressExceptionEdges)) != 0);
   }
 
-  bool WalkBasicBlocks(CompilationUnit *cUnit, BasicBlock *bb) const;
+  bool WalkBasicBlocks(CompilationUnit* cUnit, BasicBlock* bb) const;
 };
 
 /**
@@ -150,14 +151,14 @@
  */
 class BBOptimizations : public Pass {
  public:
-  BBOptimizations():Pass("BBOptimizations", "5_post_bbo_cfg") {
+  BBOptimizations() : Pass("BBOptimizations", kNoNodes, "5_post_bbo_cfg") {
   }
 
-  bool Gate(const CompilationUnit *cUnit) const {
+  bool Gate(const CompilationUnit* cUnit) const {
     return ((cUnit->disable_opt & (1 << kBBOpt)) == 0);
   }
 
-  void Start(CompilationUnit *cUnit) const;
+  void Start(CompilationUnit* cUnit) const;
 };
 
 }  // namespace art
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 18122b3..2bc36a5 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -210,6 +210,22 @@
   kOpInvalid,
 };
 
+enum MoveType {
+  kMov8GP,      // Move 8-bit general purpose register.
+  kMov16GP,     // Move 16-bit general purpose register.
+  kMov32GP,     // Move 32-bit general purpose register.
+  kMov64GP,     // Move 64-bit general purpose register.
+  kMov32FP,     // Move 32-bit FP register.
+  kMov64FP,     // Move 64-bit FP register.
+  kMovLo64FP,   // Move low 32-bits of 64-bit FP register.
+  kMovHi64FP,   // Move high 32-bits of 64-bit FP register.
+  kMovU128FP,   // Move 128-bit FP register to/from possibly unaligned region.
+  kMov128FP = kMovU128FP,
+  kMovA128FP,   // Move 128-bit FP register to/from region surely aligned to 16-bytes.
+  kMovLo128FP,  // Move low 64-bits of 128-bit FP register.
+  kMovHi128FP,  // Move high 64-bits of 128-bit FP register.
+};
+
 std::ostream& operator<<(std::ostream& os, const OpKind& kind);
 
 enum ConditionCode {
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 3798b45..32fd79b 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -81,9 +81,6 @@
   uint16_t num_outs;
   uint16_t num_regs;            // Unlike num_dalvik_registers, does not include ins.
 
-  // TODO: may want to move this to MIRGraph.
-  uint16_t num_compiler_temps;
-
   // If non-empty, apply optimizer/debug flags only to matching methods.
   std::string compiler_method_match;
   // Flips sense of compiler_method_match - apply flags if doesn't match.
diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h
index 0ca1a47..f8b9c1a 100644
--- a/compiler/dex/dataflow_iterator-inl.h
+++ b/compiler/dex/dataflow_iterator-inl.h
@@ -107,7 +107,7 @@
   // Find the next BasicBlock.
   while (keep_looking == true) {
     // Get next BasicBlock.
-    res = all_nodes_iterator_->Next();
+    res = all_nodes_iterator_.Next();
 
     // Are we done or is the BasicBlock not hidden?
     if ((res == NULL) || (res->hidden == false)) {
diff --git a/compiler/dex/dataflow_iterator.h b/compiler/dex/dataflow_iterator.h
index 658a9b1..b45d6a4 100644
--- a/compiler/dex/dataflow_iterator.h
+++ b/compiler/dex/dataflow_iterator.h
@@ -138,21 +138,6 @@
 
         return ForwardSingleNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(PreOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -184,21 +169,6 @@
 
         return ForwardRepeatNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(RepeatingPreOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -230,21 +200,6 @@
 
         return ForwardRepeatNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(RepeatingPostOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -275,21 +230,6 @@
 
         return ReverseSingleNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(ReversePostOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -321,21 +261,6 @@
 
         return ReverseRepeatNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(RepeatingReversePostOrderDfsIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -366,21 +291,6 @@
 
         return ForwardSingleNext();
       }
-
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(PostOrderDOMIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
   };
 
   /**
@@ -394,16 +304,15 @@
        * @param mir_graph The MIRGraph considered.
        */
       explicit AllNodesIterator(MIRGraph* mir_graph)
-          : DataflowIterator(mir_graph, 0, 0) {
-        all_nodes_iterator_ = new
-            (mir_graph->GetArena()) GrowableArray<BasicBlock*>::Iterator(mir_graph->GetBlockList());
+          : DataflowIterator(mir_graph, 0, 0),
+            all_nodes_iterator_(mir_graph->GetBlockList()) {
       }
 
       /**
        * @brief Resetting the iterator.
        */
       void Reset() {
-        all_nodes_iterator_->Reset();
+        all_nodes_iterator_.Reset();
       }
 
       /**
@@ -413,23 +322,8 @@
        */
       virtual BasicBlock* Next(bool had_change = false) ALWAYS_INLINE;
 
-      /**
-       * @brief Redefine the new operator to use the arena
-       * @param size actually unused, we use our own class size
-       * @param arena the arena to perform the actual allocation
-       * @return the pointer to the newly allocated object
-       */
-      static void* operator new(size_t size, ArenaAllocator* arena) {
-        return arena->Alloc(sizeof(AllNodesIterator), ArenaAllocator::kAllocGrowableBitMap);
-      }
-
-      /**
-       * @brief Redefine delete to not actually delete anything since we are using the arena
-       */
-      static void operator delete(void* p) {}
-
     private:
-      GrowableArray<BasicBlock*>::Iterator* all_nodes_iterator_;    /**< @brief The list of all the nodes */
+      GrowableArray<BasicBlock*>::Iterator all_nodes_iterator_;    /**< @brief The list of all the nodes */
   };
 
 }  // namespace art
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 3368132..ff8fea0 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -176,8 +176,7 @@
   if (!kEnableCheckCastEllision || !PerformOptimizations()) {
     return inst;
   }
-  MethodReference referrer(&GetDexFile(), unit_.GetDexMethodIndex());
-  if (!driver_.IsSafeCast(referrer, dex_pc)) {
+  if (!driver_.IsSafeCast(&unit_, dex_pc)) {
     return inst;
   }
   // Ok, this is a safe cast. Since the "check-cast" instruction size is 2 code
@@ -272,15 +271,16 @@
 }  // namespace optimizer
 }  // namespace art
 
-extern "C" void ArtCompileDEX(art::CompilerDriver& compiler, const art::DexFile::CodeItem* code_item,
+extern "C" void ArtCompileDEX(art::CompilerDriver& driver, const art::DexFile::CodeItem* code_item,
                   uint32_t access_flags, art::InvokeType invoke_type,
                   uint16_t class_def_idx, uint32_t method_idx, jobject class_loader,
                   const art::DexFile& dex_file,
                   art::DexToDexCompilationLevel dex_to_dex_compilation_level) {
   if (dex_to_dex_compilation_level != art::kDontDexToDexCompile) {
     art::DexCompilationUnit unit(NULL, class_loader, art::Runtime::Current()->GetClassLinker(),
-                                 dex_file, code_item, class_def_idx, method_idx, access_flags);
-    art::optimizer::DexCompiler dex_compiler(compiler, unit, dex_to_dex_compilation_level);
+                                 dex_file, code_item, class_def_idx, method_idx, access_flags,
+                                 driver.GetVerifiedMethod(&dex_file, method_idx));
+    art::optimizer::DexCompiler dex_compiler(driver, unit, dex_to_dex_compilation_level);
     dex_compiler.Compile();
   }
 }
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 364a8bc..c2016d0 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -130,7 +130,6 @@
     num_ins(0),
     num_outs(0),
     num_regs(0),
-    num_compiler_temps(0),
     compiler_flip_match(false),
     arena(pool),
     mir_graph(NULL),
@@ -141,25 +140,24 @@
 CompilationUnit::~CompilationUnit() {
 }
 
+// TODO: Add a cumulative version of logging, and combine with dex2oat --dump-timing
 void CompilationUnit::StartTimingSplit(const char* label) {
-  if (compiler_driver->GetDumpPasses()) {
+  if (enable_debug & (1 << kDebugTimings)) {
     timings.StartSplit(label);
   }
 }
 
 void CompilationUnit::NewTimingSplit(const char* label) {
-  if (compiler_driver->GetDumpPasses()) {
+  if (enable_debug & (1 << kDebugTimings)) {
     timings.NewSplit(label);
   }
 }
 
 void CompilationUnit::EndTiming() {
-  if (compiler_driver->GetDumpPasses()) {
+  if (enable_debug & (1 << kDebugTimings)) {
     timings.EndSplit();
-    if (enable_debug & (1 << kDebugTimings)) {
-      LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file);
-      LOG(INFO) << Dumpable<TimingLogger>(timings);
-    }
+    LOG(INFO) << "TIMINGS " << PrettyMethod(method_idx, *dex_file);
+    LOG(INFO) << Dumpable<TimingLogger>(timings);
   }
 }
 
@@ -237,6 +235,43 @@
   cu.StartTimingSplit("BuildMIRGraph");
   cu.mir_graph.reset(new MIRGraph(&cu, &cu.arena));
 
+  /*
+   * After creation of the MIR graph, also create the code generator.
+   * The reason we do this is that optimizations on the MIR graph may need to get information
+   * that is only available if a CG exists.
+   */
+#if defined(ART_USE_PORTABLE_COMPILER)
+  if (compiler_backend == kPortable) {
+    cu.cg.reset(PortableCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena, llvm_compilation_unit));
+  } else {
+#endif
+    Mir2Lir* mir_to_lir = nullptr;
+    switch (compiler.GetInstructionSet()) {
+      case kThumb2:
+        mir_to_lir = ArmCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena);
+        break;
+      case kMips:
+        mir_to_lir = MipsCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena);
+        break;
+      case kX86:
+        mir_to_lir = X86CodeGenerator(&cu, cu.mir_graph.get(), &cu.arena);
+        break;
+      default:
+        LOG(FATAL) << "Unexpected instruction set: " << compiler.GetInstructionSet();
+    }
+
+    cu.cg.reset(mir_to_lir);
+
+    /* The number of compiler temporaries depends on backend so set it up now if possible */
+    if (mir_to_lir) {
+      size_t max_temps = mir_to_lir->GetMaxPossibleCompilerTemps();
+      bool set_max = cu.mir_graph->SetMaxAvailableNonSpecialCompilerTemps(max_temps);
+      CHECK(set_max);
+    }
+#if defined(ART_USE_PORTABLE_COMPILER)
+  }
+#endif
+
   /* Gathering opcode stats? */
   if (kCompilerDebugFlags & (1 << kDebugCountOpcodes)) {
     cu.mir_graph->EnableOpcodeCounting();
@@ -270,28 +305,6 @@
 
   CompiledMethod* result = NULL;
 
-#if defined(ART_USE_PORTABLE_COMPILER)
-  if (compiler_backend == kPortable) {
-    cu.cg.reset(PortableCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena, llvm_compilation_unit));
-  } else {
-#endif
-    switch (compiler.GetInstructionSet()) {
-      case kThumb2:
-        cu.cg.reset(ArmCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena));
-        break;
-      case kMips:
-        cu.cg.reset(MipsCodeGenerator(&cu, cu.mir_graph.get(), &cu.arena));
-        break;
-      case kX86:
-        cu.cg.reset(X86CodeGenerator(&cu, cu.mir_graph.get(), &cu.arena));
-        break;
-      default:
-        LOG(FATAL) << "Unexpected instruction set: " << compiler.GetInstructionSet();
-    }
-#if defined(ART_USE_PORTABLE_COMPILER)
-  }
-#endif
-
   cu.cg->Materialize();
 
   cu.NewTimingSplit("Dedupe");  /* deduping takes up the vast majority of time in GetCompiledMethod(). */
@@ -317,9 +330,6 @@
   }
 
   cu.EndTiming();
-  compiler.GetTimingsLogger().Start();
-  compiler.GetTimingsLogger().AddLogger(cu.timings);
-  compiler.GetTimingsLogger().End();
   return result;
 }
 
diff --git a/compiler/dex/frontend.h b/compiler/dex/frontend.h
index 8eb6684..8ce1206 100644
--- a/compiler/dex/frontend.h
+++ b/compiler/dex/frontend.h
@@ -18,12 +18,7 @@
 #define ART_COMPILER_DEX_FRONTEND_H_
 
 #include "dex_file.h"
-#include "dex_instruction.h"
-
-
-
-
-
+#include "invoke_type.h"
 
 namespace llvm {
   class Module;
@@ -82,9 +77,6 @@
   kDebugTimings
 };
 
-class DexFileToMethodInlinerMap;
-class CompilerDriver;
-
 class LLVMInfo {
   public:
     LLVMInfo();
@@ -113,8 +105,8 @@
     UniquePtr<art::llvm::IRBuilder> ir_builder_;
 };
 
-struct CompilationUnit;
-struct BasicBlock;
+struct CompiledMethod;
+class CompilerDriver;
 
 }  // namespace art
 
diff --git a/compiler/dex/growable_array.h b/compiler/dex/growable_array.h
index 639120a..6ed207c 100644
--- a/compiler/dex/growable_array.h
+++ b/compiler/dex/growable_array.h
@@ -40,6 +40,7 @@
   kGrowableArrayFillArrayData,
   kGrowableArraySuccessorBlocks,
   kGrowableArrayPredecessors,
+  kGrowableArraySlowPaths,
   kGNumListKinds
 };
 
@@ -66,11 +67,6 @@
           idx_ = 0;
         }
 
-        static void* operator new(size_t size, ArenaAllocator* arena) {
-          return arena->Alloc(sizeof(GrowableArray::Iterator), ArenaAllocator::kAllocGrowableArray);
-        };
-        static void operator delete(void* p) {}  // Nop.
-
       private:
         size_t idx_;
         GrowableArray* const g_list_;
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index 75883b7..9e83210 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -380,9 +380,6 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Establish value number for loaded register. Note use of memory version.
@@ -421,9 +418,6 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Rev the memory version
@@ -447,9 +441,6 @@
         } else {
           null_checked_.insert(base);
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         uint16_t memory_version = GetMemoryVersion(base, field_ref);
         if (opcode == Instruction::IGET_WIDE) {
@@ -479,9 +470,6 @@
         } else {
           null_checked_.insert(base);
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         AdvanceMemoryVersion(base, field_ref);
       }
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index c235448..9680450 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1158,8 +1158,8 @@
     ssa_last_defs_[i] = 0;
   }
 
-  /* Add ssa reg for Method* */
-  method_sreg_ = AddNewSReg(SSA_METHOD_BASEREG);
+  // Create a compiler temporary for Method*. This is done after SSA initialization.
+  GetNewCompilerTemp(kCompilerTempSpecialMethodPtr, false);
 
   /*
    * Allocate the BasicBlockDataFlow structure for the entry and code blocks
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 8d1653f..10bcdb9 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -46,7 +46,6 @@
 
 MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
     : reg_location_(NULL),
-      compiler_temps_(arena, 6, kGrowableArrayMisc),
       cu_(cu),
       ssa_base_vregs_(NULL),
       ssa_subscripts_(NULL),
@@ -82,8 +81,13 @@
       checkstats_(NULL),
       arena_(arena),
       backward_branches_(0),
-      forward_branches_(0) {
+      forward_branches_(0),
+      compiler_temps_(arena, 6, kGrowableArrayMisc),
+      num_non_special_compiler_temps_(0),
+      max_available_non_special_compiler_temps_(0) {
   try_block_addr_ = new (arena_) ArenaBitVector(arena_, 0, true /* expandable */);
+  max_available_special_compiler_temps_ = std::abs(static_cast<int>(kVRegNonSpecialTempBaseReg))
+      - std::abs(static_cast<int>(kVRegTempBaseReg));
 }
 
 MIRGraph::~MIRGraph() {
@@ -126,9 +130,6 @@
   bottom_block->terminated_by_return = orig_block->terminated_by_return;
   orig_block->terminated_by_return = false;
 
-  /* Add it to the quick lookup cache */
-  dex_pc_to_block_map_.Put(bottom_block->start_offset, bottom_block->id);
-
   /* Handle the taken path */
   bottom_block->taken = orig_block->taken;
   if (bottom_block->taken != NullBasicBlockId) {
@@ -177,19 +178,29 @@
   }
 
   // Associate dex instructions in the bottom block with the new container.
-  MIR* p = bottom_block->first_mir_insn;
-  while (p != NULL) {
+  DCHECK(insn != nullptr);
+  DCHECK(insn != orig_block->first_mir_insn);
+  DCHECK(insn == bottom_block->first_mir_insn);
+  DCHECK_EQ(insn->offset, bottom_block->start_offset);
+  DCHECK(static_cast<int>(insn->dalvikInsn.opcode) == kMirOpCheck ||
+         !IsPseudoMirOp(insn->dalvikInsn.opcode));
+  DCHECK_EQ(dex_pc_to_block_map_.Get(insn->offset), orig_block->id);
+  MIR* p = insn;
+  dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
+  while (p != bottom_block->last_mir_insn) {
+    p = p->next;
+    DCHECK(p != nullptr);
     int opcode = p->dalvikInsn.opcode;
     /*
      * Some messiness here to ensure that we only enter real opcodes and only the
      * first half of a potentially throwing instruction that has been split into
-     * CHECK and work portions.  The 2nd half of a split operation will have a non-null
-     * throw_insn pointer that refers to the 1st half.
+     * CHECK and work portions. Since the 2nd half of a split operation is always
+     * the first in a BasicBlock, we can't hit it here.
      */
-    if ((opcode == kMirOpCheck) || (!IsPseudoMirOp(opcode) && (p->meta.throw_insn == NULL))) {
+    if ((opcode == kMirOpCheck) || !IsPseudoMirOp(opcode)) {
+      DCHECK_EQ(dex_pc_to_block_map_.Get(p->offset), orig_block->id);
       dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
     }
-    p = (p == bottom_block->last_mir_insn) ? NULL : p->next;
   }
 
   return bottom_block;
@@ -508,7 +519,6 @@
       static_cast<Instruction::Code>(kMirOpCheck);
   // Associate the two halves
   insn->meta.throw_insn = new_insn;
-  new_insn->meta.throw_insn = insn;
   AppendMIR(new_block, new_insn);
   return new_block;
 }
@@ -523,7 +533,8 @@
   current_offset_ = 0;
   // TODO: will need to snapshot stack image and use that as the mir context identification.
   m_units_.push_back(new DexCompilationUnit(cu_, class_loader, Runtime::Current()->GetClassLinker(),
-                     dex_file, current_code_item_, class_def_idx, method_idx, access_flags));
+                     dex_file, current_code_item_, class_def_idx, method_idx, access_flags,
+                     cu_->compiler_driver->GetVerifiedMethod(&dex_file, method_idx)));
   const uint16_t* code_ptr = current_code_item_->insns_;
   const uint16_t* code_end =
       current_code_item_->insns_ + current_code_item_->insns_size_in_code_units_;
@@ -973,7 +984,7 @@
         str.append(StringPrintf(", #%d", insn.vB));
         break;
       case Instruction::k51l:  // Add one wide immediate
-        str.append(StringPrintf(", #%lld", insn.vB_wide));
+        str.append(StringPrintf(", #%" PRId64, insn.vB_wide));
         break;
       case Instruction::k21c:  // One register, one string/type/method index
       case Instruction::k31c:
@@ -1026,7 +1037,7 @@
   }
   if (IsConst(reg_location_[ssa_reg])) {
     if (!singles_only && reg_location_[ssa_reg].wide) {
-      return StringPrintf("v%d_%d#0x%llx", SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg),
+      return StringPrintf("v%d_%d#0x%" PRIx64, SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg),
                           ConstantValueWide(reg_location_[ssa_reg]));
     } else {
       return StringPrintf("v%d_%d#0x%x", SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg),
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index b68e699..f8706c4 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -168,11 +168,6 @@
 #define INVALID_REG (0xFF)
 #define INVALID_OFFSET (0xDEADF00FU)
 
-/* SSA encodings for special registers */
-#define SSA_METHOD_BASEREG (-2)
-/* First compiler temp basereg, grows smaller */
-#define SSA_CTEMP_BASEREG (SSA_METHOD_BASEREG - 1)
-
 #define MIR_IGNORE_NULL_CHECK           (1 << kMIRIgnoreNullCheck)
 #define MIR_NULL_CHECK_ONLY             (1 << kMIRNullCheckOnly)
 #define MIR_IGNORE_RANGE_CHECK          (1 << kMIRIgnoreRangeCheck)
@@ -195,7 +190,13 @@
  * name of compiler-introduced temporaries.
  */
 struct CompilerTemp {
-  int32_t s_reg;
+  int32_t v_reg;      // Virtual register number for temporary.
+  int32_t s_reg_low;  // SSA name for low Dalvik word.
+};
+
+enum CompilerTempType {
+  kCompilerTempVR,                // A virtual register temporary.
+  kCompilerTempSpecialMethodPtr,  // Temporary that keeps track of current method pointer.
 };
 
 // When debug option enabled, records effectiveness of null and range check elimination.
@@ -253,8 +254,10 @@
   union {
     // Incoming edges for phi node.
     BasicBlockId* phi_incoming;
-    // Establish link between two halves of throwing instructions.
+    // Establish link from check instruction (kMirOpCheck) to the actual throwing instruction.
     MIR* throw_insn;
+    // Fused cmp branch condition.
+    ConditionCode ccode;
   } meta;
 };
 
@@ -569,16 +572,82 @@
     return bad_loc;
   }
 
-  int GetMethodSReg() {
+  int GetMethodSReg() const {
     return method_sreg_;
   }
 
+  /**
+   * @brief Used to obtain the number of compiler temporaries being used.
+   * @return Returns the number of compiler temporaries.
+   */
+  size_t GetNumUsedCompilerTemps() const {
+    size_t total_num_temps = compiler_temps_.Size();
+    DCHECK_LE(num_non_special_compiler_temps_, total_num_temps);
+    return total_num_temps;
+  }
+
+  /**
+   * @brief Used to obtain the number of non-special compiler temporaries being used.
+   * @return Returns the number of non-special compiler temporaries.
+   */
+  size_t GetNumNonSpecialCompilerTemps() const {
+    return num_non_special_compiler_temps_;
+  }
+
+  /**
+   * @brief Used to set the total number of available non-special compiler temporaries.
+   * @details Can fail setting the new max if there are more temps being used than the new_max.
+   * @param new_max The new maximum number of non-special compiler temporaries.
+   * @return Returns true if the max was set and false if failed to set.
+   */
+  bool SetMaxAvailableNonSpecialCompilerTemps(size_t new_max) {
+    if (new_max < GetNumNonSpecialCompilerTemps()) {
+      return false;
+    } else {
+      max_available_non_special_compiler_temps_ = new_max;
+      return true;
+    }
+  }
+
+  /**
+   * @brief Provides the number of non-special compiler temps available.
+   * @details Even if this returns zero, special compiler temps are guaranteed to be available.
+   * @return Returns the number of available temps.
+   */
+  size_t GetNumAvailableNonSpecialCompilerTemps();
+
+  /**
+   * @brief Used to obtain an existing compiler temporary.
+   * @param index The index of the temporary which must be strictly less than the
+   * number of temporaries.
+   * @return Returns the temporary that was asked for.
+   */
+  CompilerTemp* GetCompilerTemp(size_t index) const {
+    return compiler_temps_.Get(index);
+  }
+
+  /**
+   * @brief Used to obtain the maximum number of compiler temporaries that can be requested.
+   * @return Returns the maximum number of compiler temporaries, whether used or not.
+   */
+  size_t GetMaxPossibleCompilerTemps() const {
+    return max_available_special_compiler_temps_ + max_available_non_special_compiler_temps_;
+  }
+
+  /**
+   * @brief Used to obtain a new unique compiler temporary.
+   * @param ct_type Type of compiler temporary requested.
+   * @param wide Whether we should allocate a wide temporary.
+   * @return Returns the newly created compiler temporary.
+   */
+  CompilerTemp* GetNewCompilerTemp(CompilerTempType ct_type, bool wide);
+
   bool MethodIsLeaf() {
     return attributes_ & METHOD_IS_LEAF;
   }
 
   RegLocation GetRegLocation(int index) {
-    DCHECK((index >= 0) && (index > num_ssa_regs_));
+    DCHECK((index >= 0) && (index < num_ssa_regs_));
     return reg_location_[index];
   }
 
@@ -725,7 +794,6 @@
 
   // TODO: make these private.
   RegLocation* reg_location_;                         // Map SSA names to location.
-  GrowableArray<CompilerTemp*> compiler_temps_;
   SafeMap<unsigned int, unsigned int> block_id_map_;  // Block collapse lookup cache.
 
   static const uint64_t oat_data_flow_attributes_[kMirOpLast];
@@ -834,6 +902,10 @@
   ArenaAllocator* arena_;
   int backward_branches_;
   int forward_branches_;
+  GrowableArray<CompilerTemp*> compiler_temps_;
+  size_t num_non_special_compiler_temps_;
+  size_t max_available_non_special_compiler_temps_;
+  size_t max_available_special_compiler_temps_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index ee9f28e..209ed3d 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -199,13 +199,94 @@
   return raw_use_counts_.Get(s_reg);
 }
 
+size_t MIRGraph::GetNumAvailableNonSpecialCompilerTemps() {
+  if (num_non_special_compiler_temps_ >= max_available_non_special_compiler_temps_) {
+    return 0;
+  } else {
+    return max_available_non_special_compiler_temps_ - num_non_special_compiler_temps_;
+  }
+}
+
+static const RegLocation temp_loc = {kLocCompilerTemp,
+                                     0, 1 /*defined*/, 0, 0, 0, 0, 0, 1 /*home*/,
+                                     kVectorNotUsed, INVALID_REG, INVALID_REG, INVALID_SREG,
+                                     INVALID_SREG};
+
+CompilerTemp* MIRGraph::GetNewCompilerTemp(CompilerTempType ct_type, bool wide) {
+  // There is a limit to the number of non-special temps so check to make sure it wasn't exceeded.
+  if (ct_type == kCompilerTempVR) {
+    size_t available_temps = GetNumAvailableNonSpecialCompilerTemps();
+    if (available_temps <= 0 || (available_temps <= 1 && wide)) {
+      return 0;
+    }
+  }
+
+  CompilerTemp *compiler_temp = static_cast<CompilerTemp *>(arena_->Alloc(sizeof(CompilerTemp),
+                                                            ArenaAllocator::kAllocRegAlloc));
+
+  // Create the type of temp requested. Special temps need special handling because
+  // they have a specific virtual register assignment.
+  if (ct_type == kCompilerTempSpecialMethodPtr) {
+    DCHECK_EQ(wide, false);
+    compiler_temp->v_reg = static_cast<int>(kVRegMethodPtrBaseReg);
+    compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
+
+    // The MIR graph keeps track of the sreg for method pointer specially, so record that now.
+    method_sreg_ = compiler_temp->s_reg_low;
+  } else {
+    DCHECK_EQ(ct_type, kCompilerTempVR);
+
+    // The new non-special compiler temp must receive a unique v_reg with a negative value.
+    compiler_temp->v_reg = static_cast<int>(kVRegNonSpecialTempBaseReg) - num_non_special_compiler_temps_;
+    compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
+    num_non_special_compiler_temps_++;
+
+    if (wide) {
+      // Ensure that the two registers are consecutive. Since the virtual registers used for temps grow in a
+      // negative fashion, we need the smaller to refer to the low part. Thus, we redefine the v_reg and s_reg_low.
+      compiler_temp->v_reg--;
+      int ssa_reg_high = compiler_temp->s_reg_low;
+      compiler_temp->s_reg_low = AddNewSReg(compiler_temp->v_reg);
+      int ssa_reg_low = compiler_temp->s_reg_low;
+
+      // If needed initialize the register location for the high part.
+      // The low part is handled later in this method on a common path.
+      if (reg_location_ != nullptr) {
+        reg_location_[ssa_reg_high] = temp_loc;
+        reg_location_[ssa_reg_high].high_word = 1;
+        reg_location_[ssa_reg_high].s_reg_low = ssa_reg_low;
+        reg_location_[ssa_reg_high].wide = true;
+
+        // A new SSA needs new use counts.
+        use_counts_.Insert(0);
+        raw_use_counts_.Insert(0);
+      }
+
+      num_non_special_compiler_temps_++;
+    }
+  }
+
+  // Have we already allocated the register locations?
+  if (reg_location_ != nullptr) {
+    int ssa_reg_low = compiler_temp->s_reg_low;
+    reg_location_[ssa_reg_low] = temp_loc;
+    reg_location_[ssa_reg_low].s_reg_low = ssa_reg_low;
+    reg_location_[ssa_reg_low].wide = wide;
+
+    // A new SSA needs new use counts.
+    use_counts_.Insert(0);
+    raw_use_counts_.Insert(0);
+  }
+
+  compiler_temps_.Insert(compiler_temp);
+  return compiler_temp;
+}
 
 /* Do some MIR-level extended basic block optimizations */
 bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
   if (bb->block_type == kDead) {
     return true;
   }
-  int num_temps = 0;
   bool use_lvn = bb->use_lvn;
   UniquePtr<LocalValueNumbering> local_valnum;
   if (use_lvn) {
@@ -259,7 +340,7 @@
             if ((ccode != kCondNv) &&
                 (mir->ssa_rep->defs[0] == mir_next->ssa_rep->uses[0]) &&
                 (GetSSAUseCount(mir->ssa_rep->defs[0]) == 1)) {
-              mir_next->dalvikInsn.arg[0] = ccode;
+              mir_next->meta.ccode = ccode;
               switch (opcode) {
                 case Instruction::CMPL_FLOAT:
                   mir_next->dalvikInsn.opcode =
@@ -323,9 +404,10 @@
           break;
       }
       // Is this the select pattern?
-      // TODO: flesh out support for Mips and X86.  NOTE: llvm's select op doesn't quite work here.
+      // TODO: flesh out support for Mips.  NOTE: llvm's select op doesn't quite work here.
       // TUNING: expand to support IF_xx compare & branches
-      if (!(cu_->compiler_backend == kPortable) && (cu_->instruction_set == kThumb2) &&
+      if ((cu_->compiler_backend != kPortable) &&
+          (cu_->instruction_set == kThumb2 || cu_->instruction_set == kX86) &&
           ((mir->dalvikInsn.opcode == Instruction::IF_EQZ) ||
           (mir->dalvikInsn.opcode == Instruction::IF_NEZ))) {
         BasicBlock* ft = GetBasicBlock(bb->fall_through);
@@ -391,6 +473,11 @@
                 }
               }
               if (const_form) {
+                /*
+                 * TODO: If both constants are the same value, then instead of generating
+                 * a select, we should simply generate a const bytecode. This should be
+                 * considered after inlining which can lead to CFG of this form.
+                 */
                 // "true" set val in vB
                 mir->dalvikInsn.vB = if_true->dalvikInsn.vB;
                 // "false" set val in vC
@@ -462,9 +549,6 @@
     bb = ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) ? NextDominatedBlock(bb) : NULL;
   }
 
-  if (num_temps > cu_->num_compiler_temps) {
-    cu_->num_compiler_temps = num_temps;
-  }
   return true;
 }
 
diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h
index c52ddf5..255892e 100644
--- a/compiler/dex/pass.h
+++ b/compiler/dex/pass.h
@@ -41,6 +41,7 @@
   kRepeatingPostOrderDFSTraversal,         /**< @brief Depth-First-Search / Repeating Post-Order. */
   kRepeatingReversePostOrderDFSTraversal,  /**< @brief Depth-First-Search / Repeating Reverse Post-Order. */
   kPostOrderDOMTraversal,                  /**< @brief Dominator tree / Post-Order. */
+  kNoNodes,                                /**< @brief Skip BasicBlock traversal. */
 };
 
 /**
@@ -50,20 +51,22 @@
  */
 class Pass {
  public:
-  Pass(const char *name, DataFlowAnalysisMode type, bool freed, const unsigned int f, const char *dump): pass_name_(name), traversal_type_(type), flags_(f), dump_cfg_folder_(dump) {
+  explicit Pass(const char* name, DataFlowAnalysisMode type = kAllNodes,
+                unsigned int flags = 0u, const char* dump = "")
+    : pass_name_(name), traversal_type_(type), flags_(flags), dump_cfg_folder_(dump) {
   }
 
-  Pass(const char *name, const char *dump): pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
+  Pass(const char* name, DataFlowAnalysisMode type, const char* dump)
+    : pass_name_(name), traversal_type_(type), flags_(0), dump_cfg_folder_(dump) {
   }
 
-  explicit Pass(const char *name):pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_("") {
+  Pass(const char* name, const char* dump)
+    : pass_name_(name), traversal_type_(kAllNodes), flags_(0), dump_cfg_folder_(dump) {
   }
 
-  Pass(const char *name, DataFlowAnalysisMode type, const char *dump):pass_name_(name), traversal_type_(type), flags_(false), dump_cfg_folder_(dump) {
+  virtual ~Pass() {
   }
 
-  virtual ~Pass() {}
-
   virtual const char* GetName() const {
     return pass_name_;
   }
@@ -76,14 +79,16 @@
     return (flags_ & flag);
   }
 
-  const char* GetDumpCFGFolder() const {return dump_cfg_folder_;}
+  const char* GetDumpCFGFolder() const {
+    return dump_cfg_folder_;
+  }
 
   /**
    * @brief Gate for the pass: determines whether to execute the pass or not considering a CompilationUnit
    * @param c_unit the CompilationUnit.
    * @return whether or not to execute the pass
    */
-  virtual bool Gate(const CompilationUnit *c_unit) const {
+  virtual bool Gate(const CompilationUnit* c_unit) const {
     // Unused parameter.
     UNUSED(c_unit);
 
@@ -95,7 +100,7 @@
    * @brief Start of the pass: called before the WalkBasicBlocks function
    * @param c_unit the considered CompilationUnit.
    */
-  virtual void Start(CompilationUnit *c_unit) const {
+  virtual void Start(CompilationUnit* c_unit) const {
     // Unused parameter.
     UNUSED(c_unit);
   }
@@ -104,7 +109,7 @@
    * @brief End of the pass: called after the WalkBasicBlocks function
    * @param c_unit the considered CompilationUnit.
    */
-  virtual void End(CompilationUnit *c_unit) const {
+  virtual void End(CompilationUnit* c_unit) const {
     // Unused parameter.
     UNUSED(c_unit);
   }
@@ -115,7 +120,7 @@
    * @param bb the BasicBlock.
    * @return whether or not there is a change when walking the BasicBlock
    */
-  virtual bool WalkBasicBlocks(CompilationUnit *c_unit, BasicBlock *bb) const {
+  virtual bool WalkBasicBlocks(CompilationUnit* c_unit, BasicBlock* bb) const {
     // Unused parameters.
     UNUSED(c_unit);
     UNUSED(bb);
diff --git a/compiler/dex/pass_driver.cc b/compiler/dex/pass_driver.cc
index 820dc5a..4f8739a 100644
--- a/compiler/dex/pass_driver.cc
+++ b/compiler/dex/pass_driver.cc
@@ -16,6 +16,8 @@
 
 #include <dlfcn.h>
 
+#include "base/logging.h"
+#include "base/macros.h"
 #include "bb_optimizations.h"
 #include "compiler_internals.h"
 #include "dataflow_iterator.h"
@@ -28,7 +30,8 @@
 namespace {  // anonymous namespace
 
 /**
- * @brief Helper function to create a single instance of a given Pass and can be shared across the threads
+ * @brief Helper function to create a single instance of a given Pass and can be shared across
+ * the threads.
  */
 template <typename PassType>
 const Pass* GetPassInstance() {
@@ -36,55 +39,58 @@
   return &pass;
 }
 
+void DoWalkBasicBlocks(CompilationUnit* c_unit, const Pass* pass, DataflowIterator* iterator) {
+  // Paranoid: Check the iterator before walking the BasicBlocks.
+  DCHECK(iterator != nullptr);
+
+  bool change = false;
+  for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) {
+    change = pass->WalkBasicBlocks(c_unit, bb);
+  }
+}
+
+template <typename Iterator>
+inline void DoWalkBasicBlocks(CompilationUnit* c_unit, const Pass* pass) {
+  Iterator iterator(c_unit->mir_graph.get());
+  DoWalkBasicBlocks(c_unit, pass, &iterator);
+}
+
 }  // anonymous namespace
 
-PassDriver::PassDriver(CompilationUnit* cu, bool create_default_passes) : cu_(cu) {
-  dump_cfg_folder_ = "/sdcard/";
+PassDriver::PassDriver(CompilationUnit* cu, bool create_default_passes)
+    : cu_(cu), dump_cfg_folder_("/sdcard/") {
+  DCHECK(cu != nullptr);
 
   // If need be, create the default passes.
-  if (create_default_passes == true) {
+  if (create_default_passes) {
     CreatePasses();
   }
 }
 
 PassDriver::~PassDriver() {
-  // Clear the map: done to remove any chance of having a pointer after freeing below
-  pass_map_.clear();
 }
 
-void PassDriver::InsertPass(const Pass* new_pass, bool warn_override) {
-  assert(new_pass != 0);
+void PassDriver::InsertPass(const Pass* new_pass) {
+  DCHECK(new_pass != nullptr);
+  DCHECK(new_pass->GetName() != nullptr && new_pass->GetName()[0] != 0);
 
-  // Get name here to not do it all over the method.
-  const std::string& name = new_pass->GetName();
+  // It is an error to override an existing pass.
+  DCHECK(GetPass(new_pass->GetName()) == nullptr)
+      << "Pass name " << new_pass->GetName() << " already used.";
 
-  // Do we want to warn the user about squashing a pass?
-  if (warn_override == false) {
-    auto it = pass_map_.find(name);
-
-    if (it != pass_map_.end()) {
-      LOG(INFO) << "Pass name " << name << " already used, overwriting pass";
-    }
-  }
-
-  // Now add to map and list.
-  pass_map_.Put(name, new_pass);
+  // Now add to the list.
   pass_list_.push_back(new_pass);
 }
 
 void PassDriver::CreatePasses() {
   /*
-   * Create the pass list:
-   *   - These passes are immutable and are shared across the threads:
-   *    - This is achieved via:
-   *     - The UniquePtr used here.
-   *     - DISALLOW_COPY_AND_ASSIGN in the base Pass class.
+   * Create the pass list. These passes are immutable and are shared across the threads.
    *
    * Advantage is that there will be no race conditions here.
    * Disadvantage is the passes can't change their internal states depending on CompilationUnit:
    *   - This is not yet an issue: no current pass would require it.
    */
-  static const Pass* passes[] = {
+  static const Pass* const passes[] = {
       GetPassInstance<CodeLayout>(),
       GetPassInstance<SSATransformation>(),
       GetPassInstance<ConstantPropagation>(),
@@ -96,14 +102,10 @@
       GetPassInstance<BBOptimizations>(),
   };
 
-  // Get number of elements in the array.
-  unsigned int nbr = (sizeof(passes) / sizeof(passes[0]));
-
-  // Insert each pass into the map and into the list via the InsertPass method:
-  //   - Map is used for the lookup
-  //   - List is used for the pass walk
-  for (unsigned int i = 0; i < nbr; i++) {
-    InsertPass(passes[i]);
+  // Insert each pass into the list via the InsertPass method.
+  pass_list_.reserve(arraysize(passes));
+  for (const Pass* pass : passes) {
+    InsertPass(pass);
   }
 }
 
@@ -114,49 +116,37 @@
 }
 
 void PassDriver::DispatchPass(CompilationUnit* c_unit, const Pass* curPass) {
-  DataflowIterator* iterator = 0;
-
   LOG(DEBUG) << "Dispatching " << curPass->GetName();
 
-  MIRGraph* mir_graph = c_unit->mir_graph.get();
-  ArenaAllocator *arena = &(c_unit->arena);
-
-  // Let us start by getting the right iterator.
   DataFlowAnalysisMode mode = curPass->GetTraversal();
 
   switch (mode) {
     case kPreOrderDFSTraversal:
-      iterator = new (arena) PreOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<PreOrderDfsIterator>(c_unit, curPass);
       break;
     case kRepeatingPreOrderDFSTraversal:
-      iterator = new (arena) RepeatingPreOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(c_unit, curPass);
       break;
     case kRepeatingPostOrderDFSTraversal:
-      iterator = new (arena) RepeatingPostOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(c_unit, curPass);
       break;
     case kReversePostOrderDFSTraversal:
-      iterator = new (arena) ReversePostOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<ReversePostOrderDfsIterator>(c_unit, curPass);
       break;
     case kRepeatingReversePostOrderDFSTraversal:
-      iterator = new (arena) RepeatingReversePostOrderDfsIterator(mir_graph);
+      DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(c_unit, curPass);
       break;
     case kPostOrderDOMTraversal:
-      iterator = new (arena) PostOrderDOMIterator(mir_graph);
+      DoWalkBasicBlocks<PostOrderDOMIterator>(c_unit, curPass);
       break;
     case kAllNodes:
-      iterator = new (arena) AllNodesIterator(mir_graph);
+      DoWalkBasicBlocks<AllNodesIterator>(c_unit, curPass);
+      break;
+    case kNoNodes:
       break;
     default:
       LOG(DEBUG) << "Iterator mode not handled in dispatcher: " << mode;
-      return;
-  }
-
-  // Paranoid: Check the iterator before walking the BasicBlocks.
-  assert(iterator != 0);
-
-  bool change = false;
-  for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) {
-    change = curPass->WalkBasicBlocks(c_unit, bb);
+      break;
   }
 }
 
@@ -166,33 +156,34 @@
   curPass->End(c_unit);
 }
 
-bool PassDriver::RunPass(CompilationUnit* c_unit, const Pass* curPass, bool time_split) {
-  // Paranoid: c_unit or curPass cannot be 0, and the pass should have a name.
-  if (c_unit == 0 || curPass == 0 || (strcmp(curPass->GetName(), "") == 0)) {
-    return false;
-  }
+bool PassDriver::RunPass(CompilationUnit* c_unit, const Pass* pass, bool time_split) {
+  // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name.
+  DCHECK(c_unit != nullptr);
+  DCHECK(pass != nullptr);
+  DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0);
 
   // Do we perform a time split
-  if (time_split == true) {
-    c_unit->NewTimingSplit(curPass->GetName());
+  if (time_split) {
+    c_unit->NewTimingSplit(pass->GetName());
   }
 
   // Check the pass gate first.
-  bool shouldApplyPass = curPass->Gate(c_unit);
+  bool should_apply_pass = pass->Gate(c_unit);
 
-  if (shouldApplyPass == true) {
+  if (should_apply_pass) {
     // Applying the pass: first start, doWork, and end calls.
-    ApplyPass(c_unit, curPass);
+    ApplyPass(c_unit, pass);
 
     // Clean up if need be.
-    HandlePassFlag(c_unit, curPass);
+    HandlePassFlag(c_unit, pass);
 
     // Do we want to log it?
     if ((c_unit->enable_debug&  (1 << kDebugDumpCFG)) != 0) {
       // Do we have a pass folder?
-      const std::string& passFolder = curPass->GetDumpCFGFolder();
+      const char* passFolder = pass->GetDumpCFGFolder();
+      DCHECK(passFolder != nullptr);
 
-      if (passFolder != "") {
+      if (passFolder[0] != 0) {
         // Create directory prefix.
         std::string prefix = GetDumpCFGFolder();
         prefix += passFolder;
@@ -204,19 +195,18 @@
   }
 
   // If the pass gate passed, we can declare success.
-  return shouldApplyPass;
+  return should_apply_pass;
 }
 
-bool PassDriver::RunPass(CompilationUnit* c_unit, const std::string& pass_name) {
-  // Paranoid: c_unit cannot be 0 and we need a pass name.
-  if (c_unit == 0 || pass_name == "") {
-    return false;
-  }
+bool PassDriver::RunPass(CompilationUnit* c_unit, const char* pass_name) {
+  // Paranoid: c_unit cannot be nullptr and we need a pass name.
+  DCHECK(c_unit != nullptr);
+  DCHECK(pass_name != nullptr && pass_name[0] != 0);
 
-  const Pass* curPass = GetPass(pass_name);
+  const Pass* cur_pass = GetPass(pass_name);
 
-  if (curPass != 0) {
-    return RunPass(c_unit, curPass);
+  if (cur_pass != nullptr) {
+    return RunPass(c_unit, cur_pass);
   }
 
   // Return false, we did not find the pass.
@@ -224,27 +214,26 @@
 }
 
 void PassDriver::Launch() {
-  for (const Pass *curPass : pass_list_) {
-    RunPass(cu_, curPass, true);
+  for (const Pass* cur_pass : pass_list_) {
+    RunPass(cu_, cur_pass, true);
   }
 }
 
 void PassDriver::PrintPassNames() const {
   LOG(INFO) << "Loop Passes are:";
 
-  for (const Pass *curPass : pass_list_) {
-    LOG(INFO) << "\t-" << curPass->GetName();
+  for (const Pass* cur_pass : pass_list_) {
+    LOG(INFO) << "\t-" << cur_pass->GetName();
   }
 }
 
-const Pass* PassDriver::GetPass(const std::string& name) const {
-  auto it = pass_map_.find(name);
-
-  if (it != pass_map_.end()) {
-    return it->second;
+const Pass* PassDriver::GetPass(const char* name) const {
+  for (const Pass* cur_pass : pass_list_) {
+    if (strcmp(name, cur_pass->GetName()) == 0) {
+      return cur_pass;
+    }
   }
-
-  return 0;
+  return nullptr;
 }
 
 }  // namespace art
diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h
index d580460..c734d3e 100644
--- a/compiler/dex/pass_driver.h
+++ b/compiler/dex/pass_driver.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_DEX_PASS_DRIVER_H_
 #define ART_COMPILER_DEX_PASS_DRIVER_H_
 
-#include <list>
+#include <vector>
 #include "pass.h"
 #include "safe_map.h"
 
@@ -42,7 +42,7 @@
    * @param new_pass the new Pass to insert in the map and list.
    * @param warn_override warn if the name of the Pass is already used.
    */
-  void InsertPass(const Pass* new_pass, bool warn_override = true);
+  void InsertPass(const Pass* new_pass);
 
   /**
    * @brief Run a pass using the name as key.
@@ -50,7 +50,7 @@
    * @param pass_name the Pass name.
    * @return whether the pass was applied.
    */
-  bool RunPass(CompilationUnit* c_unit, const std::string& pass_name);
+  bool RunPass(CompilationUnit* c_unit, const char* pass_name);
 
   /**
    * @brief Run a pass using the Pass itself.
@@ -75,20 +75,17 @@
 
   void PrintPassNames() const;
 
-  const Pass* GetPass(const std::string& name) const;
+  const Pass* GetPass(const char* name) const;
 
-  const char *GetDumpCFGFolder() const {
+  const char* GetDumpCFGFolder() const {
     return dump_cfg_folder_;
   }
 
  protected:
   void CreatePasses();
 
-  /** @brief The Pass Map: contains name -> pass for quick lookup. */
-  SafeMap<std::string, const Pass*> pass_map_;
-
   /** @brief List of passes: provides the order to execute the passes. */
-  std::list<const Pass*> pass_list_;
+  std::vector<const Pass*> pass_list_;
 
   /** @brief The CompilationUnit on which to execute the passes on. */
   CompilationUnit* const cu_;
diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc
index 70b660b..e6cc2de 100644
--- a/compiler/dex/portable/mir_to_gbc.cc
+++ b/compiler/dex/portable/mir_to_gbc.cc
@@ -1522,7 +1522,7 @@
   reg_info.push_back(irb_->getInt32(cu_->num_ins));
   reg_info.push_back(irb_->getInt32(cu_->num_regs));
   reg_info.push_back(irb_->getInt32(cu_->num_outs));
-  reg_info.push_back(irb_->getInt32(cu_->num_compiler_temps));
+  reg_info.push_back(irb_->getInt32(mir_graph_->GetNumUsedCompilerTemps()));
   reg_info.push_back(irb_->getInt32(mir_graph_->GetNumSSARegs()));
   ::llvm::MDNode* reg_info_node = ::llvm::MDNode::get(*context_, reg_info);
   inst->setMetadata("RegInfo", reg_info_node);
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 661050f..fdc609a 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -127,15 +127,19 @@
   }
 }
 
-MIR* ArmMir2Lir::SpecialIGet(BasicBlock** bb, MIR* mir,
-                             OpSize size, bool long_or_double, bool is_object) {
-  int32_t field_offset;
-  bool is_volatile;
-  uint32_t field_idx = mir->dalvikInsn.vC;
-  bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile);
-  if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) {
-    return NULL;
+MIR* ArmMir2Lir::SpecialIGet(BasicBlock** bb, MIR* mir, const InlineMethod& special) {
+  // FastInstance() already checked by DexFileMethodInliner.
+  const InlineIGetIPutData& data = special.d.ifield_data;
+  if (data.method_is_static || data.object_arg != 0) {
+    return NULL;  // The object is not "this" and has to be null-checked.
   }
+
+  OpSize size = static_cast<OpSize>(data.op_size);
+  DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
+  bool long_or_double = (data.op_size == kLong);
+  bool is_object = data.is_object;
+
+  // TODO: Generate the method using only the data in special.
   RegLocation rl_obj = mir_graph_->GetSrc(mir, 0);
   LockLiveArgs(mir);
   rl_obj = ArmMir2Lir::ArgLoc(rl_obj);
@@ -148,19 +152,24 @@
   // Point of no return - no aborts after this
   ArmMir2Lir::GenPrintLabel(mir);
   rl_obj = LoadArg(rl_obj);
+  uint32_t field_idx = mir->dalvikInsn.vC;
   GenIGet(field_idx, mir->optimization_flags, size, rl_dest, rl_obj, long_or_double, is_object);
   return GetNextMir(bb, mir);
 }
 
-MIR* ArmMir2Lir::SpecialIPut(BasicBlock** bb, MIR* mir,
-                             OpSize size, bool long_or_double, bool is_object) {
-  int32_t field_offset;
-  bool is_volatile;
-  uint32_t field_idx = mir->dalvikInsn.vC;
-  bool fast_path = FastInstance(field_idx, false, &field_offset, &is_volatile);
-  if (!fast_path || !(mir->optimization_flags & MIR_IGNORE_NULL_CHECK)) {
-    return NULL;
+MIR* ArmMir2Lir::SpecialIPut(BasicBlock** bb, MIR* mir, const InlineMethod& special) {
+  // FastInstance() already checked by DexFileMethodInliner.
+  const InlineIGetIPutData& data = special.d.ifield_data;
+  if (data.method_is_static || data.object_arg != 0) {
+    return NULL;  // The object is not "this" and has to be null-checked.
   }
+
+  OpSize size = static_cast<OpSize>(data.op_size);
+  DCHECK_NE(data.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
+  bool long_or_double = (data.op_size == kLong);
+  bool is_object = data.is_object;
+
+  // TODO: Generate the method using only the data in special.
   RegLocation rl_src;
   RegLocation rl_obj;
   LockLiveArgs(mir);
@@ -174,7 +183,7 @@
   rl_src = ArmMir2Lir::ArgLoc(rl_src);
   rl_obj = ArmMir2Lir::ArgLoc(rl_obj);
   // Reject if source is split across registers & frame
-  if (rl_obj.location == kLocInvalid) {
+  if (rl_src.location == kLocInvalid) {
     ResetRegPool();
     return NULL;
   }
@@ -182,6 +191,7 @@
   ArmMir2Lir::GenPrintLabel(mir);
   rl_obj = LoadArg(rl_obj);
   rl_src = LoadArg(rl_src);
+  uint32_t field_idx = mir->dalvikInsn.vC;
   GenIPut(field_idx, mir->optimization_flags, size, rl_src, rl_obj, long_or_double, is_object);
   return GetNextMir(bb, mir);
 }
@@ -219,8 +229,6 @@
  */
 void ArmMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir,
                                 const InlineMethod& special) {
-  // TODO: Generate the method using only the data in special. (Requires FastInstance() field
-  // validation in DexFileMethodInliner::AnalyseIGetMethod()/AnalyseIPutMethod().)
   DCHECK(special.flags & kInlineSpecial);
   current_dalvik_offset_ = mir->offset;
   MIR* next_mir = NULL;
@@ -231,30 +239,17 @@
       break;
     case kInlineOpConst:
       ArmMir2Lir::GenPrintLabel(mir);
-      LoadConstant(rARM_RET0, special.data);
+      LoadConstant(rARM_RET0, static_cast<int>(special.d.data));
       next_mir = GetNextMir(&bb, mir);
       break;
-    case kInlineOpIGet: {
-      InlineIGetIPutData data;
-      data.data = special.data;
-      OpSize op_size = static_cast<OpSize>(data.d.op_size);
-      DCHECK_NE(data.d.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
-      bool long_or_double = (data.d.op_size == kLong);
-      bool is_object = data.d.is_object;
-      next_mir = SpecialIGet(&bb, mir, op_size, long_or_double, is_object);
+    case kInlineOpIGet:
+      next_mir = SpecialIGet(&bb, mir, special);
       break;
-    }
-    case kInlineOpIPut: {
-      InlineIGetIPutData data;
-      data.data = special.data;
-      OpSize op_size = static_cast<OpSize>(data.d.op_size);
-      DCHECK_NE(data.d.op_size, kDouble);  // The inliner doesn't distinguish kDouble, uses kLong.
-      bool long_or_double = (data.d.op_size == kLong);
-      bool is_object = data.d.is_object;
-      next_mir = SpecialIPut(&bb, mir, op_size, long_or_double, is_object);
+    case kInlineOpIPut:
+      next_mir = SpecialIPut(&bb, mir, special);
       break;
-    }
     case kInlineOpReturnArg:
+      // TODO: Generate the method using only the data in special.
       next_mir = SpecialIdentity(mir);
       break;
     default:
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 2bc579a..598da89 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
 #define ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
 
+#include "arm_lir.h"
 #include "dex/compiler_internals.h"
 
 namespace art {
@@ -94,9 +95,9 @@
                      RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                           RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +111,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, bool is_div);
@@ -153,6 +154,8 @@
     LIR* OpRegImm(OpKind op, int r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset);
     LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2);
+    LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type);
     LIR* OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src);
     LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value);
     LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2);
@@ -187,14 +190,16 @@
     RegLocation LoadArg(RegLocation loc);
     void LockLiveArgs(MIR* mir);
     MIR* GetNextMir(BasicBlock** p_bb, MIR* mir);
-    MIR* SpecialIGet(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object);
-    MIR* SpecialIPut(BasicBlock** bb, MIR* mir, OpSize size, bool long_or_double, bool is_object);
+    MIR* SpecialIGet(BasicBlock** bb, MIR* mir, const InlineMethod& special);
+    MIR* SpecialIPut(BasicBlock** bb, MIR* mir, const InlineMethod& special);
     MIR* SpecialIdentity(MIR* mir);
     LIR* LoadFPConstantValue(int r_dest, int value);
-    bool BadOverlap(RegLocation rl_src, RegLocation rl_dest);
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void AssignDataOffsets();
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                          RegLocation rl_src2, bool is_div, bool check_zero);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 1a9d9c5..46542e1 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -209,7 +209,7 @@
     NewLIR2(kThumb2Vcmps, rl_src1.low_reg, rl_src2.low_reg);
   }
   NewLIR0(kThumb2Fmstat);
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
   switch (ccode) {
     case kCondEq:
     case kCondNe:
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 86ae75e..150794e 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -228,7 +228,7 @@
   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
   // Normalize such that if either operand is constant, src2 will be constant.
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
   if (rl_src1.is_const) {
     std::swap(rl_src1, rl_src2);
     ccode = FlipComparisonOrder(ccode);
@@ -444,6 +444,17 @@
   return NULL;
 }
 
+RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, bool is_div, bool check_zero) {
+  LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
+  return rl_dest;
+}
+
+RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
+  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
+  return rl_dest;
+}
+
 RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit,
                                      bool is_div) {
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -783,20 +794,8 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-
- /*
-  * Check to see if a result pair has a misaligned overlap with an operand pair.  This
-  * is not usual for dx to generate, but it is legal (for now).  In a future rev of
-  * dex, we'll want to make this case illegal.
-  */
-bool ArmMir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) {
-  DCHECK(rl_src.wide);
-  DCHECK(rl_dest.wide);
-  return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
-}
-
-void ArmMir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
+void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
     /*
      * To pull off inline multiply, we have a worst-case requirement of 8 temporary
      * registers.  Normally for Arm, we get 5.  We can get to 6 by including
@@ -868,27 +867,27 @@
     UnmarkTemp(rARM_LR);
 }
 
-void ArmMir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAddLong for Arm";
 }
 
-void ArmMir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenSubLong for Arm";
 }
 
-void ArmMir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAndLong for Arm";
 }
 
-void ArmMir2Lir::GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenOrLong for Arm";
 }
 
-void ArmMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of genXoLong for Arm";
 }
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 7591041..ceec7d5 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -14,10 +14,12 @@
  * limitations under the License.
  */
 
+#include "codegen_arm.h"
+
+#include <inttypes.h>
+
 #include <string>
 
-#include "arm_lir.h"
-#include "codegen_arm.h"
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
 
@@ -407,9 +409,8 @@
              strcpy(tbuf, cc_names[operand]);
              break;
            case 't':
-             snprintf(tbuf, arraysize(tbuf), "0x%08x (L%p)",
-                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 +
-                 (operand << 1),
+             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
+                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
                  lir->target);
              break;
            case 'u': {
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 07fc6c7..9d3968b 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -367,6 +367,16 @@
   return OpRegRegShift(op, r_dest_src1, r_src2, 0);
 }
 
+LIR* ArmMir2Lir::OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* ArmMir2Lir::OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
 LIR* ArmMir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src) {
   LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm";
   return NULL;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 29554c0..5e0fed7 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -21,7 +21,8 @@
 #include "mir_to_lir-inl.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "dex/quick/dex_file_to_method_inliner_map.h"
-#include "dex/verified_methods_data.h"
+#include "dex/verification_results.h"
+#include "dex/verified_method.h"
 #include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier.h"
 
@@ -34,7 +35,7 @@
 void DumpMappingTable(const char* table_name, const char* descriptor, const char* name,
                       const Signature& signature, uint32_t size, It first) {
   if (size != 0) {
-    std::string line(StringPrintf("\n  %s %s%s_%s_table[%zu] = {", table_name,
+    std::string line(StringPrintf("\n  %s %s%s_%s_table[%u] = {", table_name,
                      descriptor, name, signature.ToString().c_str(), size));
     std::replace(line.begin(), line.end(), ';', '_');
     LOG(INFO) << line;
@@ -234,8 +235,8 @@
                                                lir, base_addr));
         std::string op_operands(BuildInsnString(GetTargetInstFmt(lir->opcode),
                                                     lir, base_addr));
-        LOG(INFO) << StringPrintf("%05x: %-9s%s%s",
-                                  reinterpret_cast<unsigned int>(base_addr + offset),
+        LOG(INFO) << StringPrintf("%5p: %-9s%s%s",
+                                  base_addr + offset,
                                   op_name.c_str(), op_operands.c_str(),
                                   lir->flags.is_nop ? "(nop)" : "");
       }
@@ -251,7 +252,7 @@
 }
 
 void Mir2Lir::DumpPromotionMap() {
-  int num_regs = cu_->num_dalvik_registers + cu_->num_compiler_temps + 1;
+  int num_regs = cu_->num_dalvik_registers + mir_graph_->GetNumUsedCompilerTemps();
   for (int i = 0; i < num_regs; i++) {
     PromotionMap v_reg_map = promotion_map_[i];
     std::string buf;
@@ -288,7 +289,7 @@
   LOG(INFO) << "Outs         : " << cu_->num_outs;
   LOG(INFO) << "CoreSpills       : " << num_core_spills_;
   LOG(INFO) << "FPSpills       : " << num_fp_spills_;
-  LOG(INFO) << "CompilerTemps    : " << cu_->num_compiler_temps;
+  LOG(INFO) << "CompilerTemps    : " << mir_graph_->GetNumUsedCompilerTemps();
   LOG(INFO) << "Frame size       : " << frame_size_;
   LOG(INFO) << "code size is " << total_size_ <<
     " bytes, Dalvik size is " << insns_size * 2;
@@ -440,6 +441,20 @@
     PushPointer(code_buffer_, &id);
     data_lir = NEXT_LIR(data_lir);
   }
+  // Push class literals.
+  data_lir = class_literal_list_;
+  while (data_lir != NULL) {
+    uint32_t target = data_lir->operands[0];
+    cu_->compiler_driver->AddClassPatch(cu_->dex_file,
+                                        cu_->class_def_idx,
+                                        cu_->method_idx,
+                                        target,
+                                        code_buffer_.size());
+    const DexFile::TypeId& id = cu_->dex_file->GetTypeId(target);
+    // unique value based on target to ensure code deduplication works
+    PushPointer(code_buffer_, &id);
+    data_lir = NEXT_LIR(data_lir);
+  }
 }
 
 /* Write the switch tables to the output stream */
@@ -749,10 +764,10 @@
     }
   }
   MethodReference method_ref(cu_->dex_file, cu_->method_idx);
-  const std::vector<uint8_t>* gc_map_raw =
-      cu_->compiler_driver->GetVerifiedMethodsData()->GetDexGcMap(method_ref);
-  verifier::DexPcToReferenceMap dex_gc_map(&(*gc_map_raw)[0]);
-  DCHECK_EQ(gc_map_raw->size(), dex_gc_map.RawSize());
+  const std::vector<uint8_t>& gc_map_raw =
+      mir_graph_->GetCurrentDexCompilationUnit()->GetVerifiedMethod()->GetDexGcMap();
+  verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
+  DCHECK_EQ(gc_map_raw.size(), dex_gc_map.RawSize());
   // Compute native offset to references size.
   NativePcToReferenceMapBuilder native_gc_map_builder(&native_gc_map_,
                                                       mapping_table.PcToDexSize(),
@@ -772,6 +787,7 @@
   offset = AssignLiteralOffsetCommon(literal_list_, offset);
   offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset);
   offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset);
+  offset = AssignLiteralPointerOffsetCommon(class_literal_list_, offset);
   return offset;
 }
 
@@ -960,6 +976,7 @@
     : Backend(arena),
       literal_list_(NULL),
       method_literal_list_(NULL),
+      class_literal_list_(NULL),
       code_literal_list_(NULL),
       first_fixup_(NULL),
       cu_(cu),
@@ -975,6 +992,7 @@
       data_offset_(0),
       total_size_(0),
       block_label_list_(NULL),
+      promotion_map_(NULL),
       current_dalvik_offset_(0),
       estimated_native_code_size_(0),
       reg_pool_(NULL),
@@ -985,10 +1003,8 @@
       core_spill_mask_(0),
       fp_spill_mask_(0),
       first_lir_insn_(NULL),
-      last_lir_insn_(NULL) {
-  promotion_map_ = static_cast<PromotionMap*>
-      (arena_->Alloc((cu_->num_dalvik_registers  + cu_->num_compiler_temps + 1) *
-                      sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc));
+      last_lir_insn_(NULL),
+      slow_paths_(arena, 32, kGrowableArraySlowPaths) {
   // Reserve pointer id 0 for NULL.
   size_t null_idx = WrapPointer(NULL);
   DCHECK_EQ(null_idx, 0U);
@@ -1064,13 +1080,27 @@
   return result;
 }
 
+size_t Mir2Lir::GetMaxPossibleCompilerTemps() const {
+  // Chose a reasonably small value in order to contain stack growth.
+  // Backends that are smarter about spill region can return larger values.
+  const size_t max_compiler_temps = 10;
+  return max_compiler_temps;
+}
+
+size_t Mir2Lir::GetNumBytesForCompilerTempSpillRegion() {
+  // By default assume that the Mir2Lir will need one slot for each temporary.
+  // If the backend can better determine temps that have non-overlapping ranges and
+  // temps that do not need spilled, it can actually provide a small region.
+  return (mir_graph_->GetNumUsedCompilerTemps() * sizeof(uint32_t));
+}
+
 int Mir2Lir::ComputeFrameSize() {
   /* Figure out the frame size */
   static const uint32_t kAlignMask = kStackAlignment - 1;
-  uint32_t size = (num_core_spills_ + num_fp_spills_ +
-                   1 /* filler word */ + cu_->num_regs + cu_->num_outs +
-                   cu_->num_compiler_temps + 1 /* cur_method* */)
-                   * sizeof(uint32_t);
+  uint32_t size = ((num_core_spills_ + num_fp_spills_ +
+                   1 /* filler word */ + cu_->num_regs + cu_->num_outs)
+                   * sizeof(uint32_t)) +
+                   GetNumBytesForCompilerTempSpillRegion();
   /* Align and set */
   return (size + kAlignMask) & ~(kAlignMask);
 }
@@ -1121,4 +1151,39 @@
   new_lir->next->prev = new_lir;
 }
 
+bool Mir2Lir::IsPowerOfTwo(uint64_t x) {
+  return (x & (x - 1)) == 0;
+}
+
+// Returns the index of the lowest set bit in 'x'.
+int32_t Mir2Lir::LowestSetBit(uint64_t x) {
+  int bit_posn = 0;
+  while ((x & 0xf) == 0) {
+    bit_posn += 4;
+    x >>= 4;
+  }
+  while ((x & 1) == 0) {
+    bit_posn++;
+    x >>= 1;
+  }
+  return bit_posn;
+}
+
+bool Mir2Lir::BadOverlap(RegLocation rl_src, RegLocation rl_dest) {
+  DCHECK(rl_src.wide);
+  DCHECK(rl_dest.wide);
+  return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
+}
+
+LIR *Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                int offset, int check_value, LIR* target) {
+  // Handle this for architectures that can't compare to memory.
+  LoadWordDisp(base_reg, offset, temp_reg);
+  LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
+  return branch;
+}
+
+void Mir2Lir::AddSlowPath(LIRSlowPath* slowpath) {
+  slow_paths_.Insert(slowpath);
+}
 }  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 0937be3..0ad8abf 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -24,11 +24,27 @@
 #include "dex/mir_graph.h"
 #include "dex_instruction.h"
 #include "dex_instruction-inl.h"
+#include "verifier/method_verifier.h"
+#include "verifier/method_verifier-inl.h"
 
 #include "dex_file_method_inliner.h"
 
 namespace art {
 
+namespace {  // anonymous namespace
+
+constexpr uint8_t kIGetIPutOpSizes[] = {
+    kWord,          // IGET, IPUT
+    kLong,          // IGET_WIDE, IPUT_WIDE
+    kWord,          // IGET_OBJECT, IPUT_OBJECT
+    kSignedByte,    // IGET_BOOLEAN, IPUT_BOOLEAN
+    kSignedByte,    // IGET_BYTE, IPUT_BYTE
+    kUnsignedHalf,  // IGET_CHAR, IPUT_CHAR
+    kSignedHalf,    // IGET_SHORT, IPUT_SHORT
+};
+
+}  // anonymous namespace
+
 const uint32_t DexFileMethodInliner::kIndexUnresolved;
 const char* const DexFileMethodInliner::kClassCacheNames[] = {
     "Z",                       // kClassCacheBoolean
@@ -167,7 +183,7 @@
 
 const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods[] = {
 #define INTRINSIC(c, n, p, o, d) \
-    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, kInlineIntrinsic, d } }
+    { { kClassCache ## c, kNameCache ## n, kProtoCache ## p }, { o, kInlineIntrinsic, { d } } }
 
     INTRINSIC(JavaLangDouble, DoubleToRawLongBits, D_J, kIntrinsicDoubleCvt, 0),
     INTRINSIC(JavaLangDouble, LongBitsToDouble, J_D, kIntrinsicDoubleCvt, 0),
@@ -248,57 +264,58 @@
 DexFileMethodInliner::~DexFileMethodInliner() {
 }
 
-bool DexFileMethodInliner::AnalyseMethodCode(uint32_t method_idx,
-                                             const DexFile::CodeItem* code_item) {
+bool DexFileMethodInliner::AnalyseMethodCode(verifier::MethodVerifier* verifier) {
   // We currently support only plain return or 2-instruction methods.
 
+  const DexFile::CodeItem* code_item = verifier->CodeItem();
   DCHECK_NE(code_item->insns_size_in_code_units_, 0u);
   const Instruction* instruction = Instruction::At(code_item->insns_);
   Instruction::Code opcode = instruction->Opcode();
 
+  InlineMethod method;
+  bool success;
   switch (opcode) {
     case Instruction::RETURN_VOID:
-      return AddInlineMethod(method_idx, kInlineOpNop, kInlineSpecial, 0);
+      method.opcode = kInlineOpNop;
+      method.flags = kInlineSpecial;
+      method.d.data = 0u;
+      success = true;
+      break;
     case Instruction::RETURN:
     case Instruction::RETURN_OBJECT:
-      return AnalyseReturnMethod(method_idx, code_item, kWord);
     case Instruction::RETURN_WIDE:
-      return AnalyseReturnMethod(method_idx, code_item, kLong);
+      success = AnalyseReturnMethod(code_item, &method);
+      break;
     case Instruction::CONST:
     case Instruction::CONST_4:
     case Instruction::CONST_16:
     case Instruction::CONST_HIGH16:
       // TODO: Support wide constants (RETURN_WIDE).
-      return AnalyseConstMethod(method_idx, code_item);
+      success = AnalyseConstMethod(code_item, &method);
+      break;
     case Instruction::IGET:
-      return AnalyseIGetMethod(method_idx, code_item, kWord, false);
     case Instruction::IGET_OBJECT:
-      return AnalyseIGetMethod(method_idx, code_item, kWord, true);
     case Instruction::IGET_BOOLEAN:
     case Instruction::IGET_BYTE:
-      return AnalyseIGetMethod(method_idx, code_item, kSignedByte, false);
     case Instruction::IGET_CHAR:
-      return AnalyseIGetMethod(method_idx, code_item, kUnsignedHalf, false);
     case Instruction::IGET_SHORT:
-      return AnalyseIGetMethod(method_idx, code_item, kSignedHalf, false);
     case Instruction::IGET_WIDE:
-      return AnalyseIGetMethod(method_idx, code_item, kLong, false);
+      success = AnalyseIGetMethod(verifier, &method);
+      break;
     case Instruction::IPUT:
-      return AnalyseIPutMethod(method_idx, code_item, kWord, false);
     case Instruction::IPUT_OBJECT:
-      return AnalyseIPutMethod(method_idx, code_item, kWord, true);
     case Instruction::IPUT_BOOLEAN:
     case Instruction::IPUT_BYTE:
-      return AnalyseIPutMethod(method_idx, code_item, kSignedByte, false);
     case Instruction::IPUT_CHAR:
-      return AnalyseIPutMethod(method_idx, code_item, kUnsignedHalf, false);
     case Instruction::IPUT_SHORT:
-      return AnalyseIPutMethod(method_idx, code_item, kSignedHalf, false);
     case Instruction::IPUT_WIDE:
-      return AnalyseIPutMethod(method_idx, code_item, kLong, false);
+      success = AnalyseIPutMethod(verifier, &method);
+      break;
     default:
-      return false;
-    }
+      success = false;
+      break;
+  }
+  return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method);
 }
 
 bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index) {
@@ -323,13 +340,13 @@
     case kIntrinsicFloatCvt:
       return backend->GenInlinedFloatCvt(info);
     case kIntrinsicReverseBytes:
-      return backend->GenInlinedReverseBytes(info, static_cast<OpSize>(intrinsic.data));
+      return backend->GenInlinedReverseBytes(info, static_cast<OpSize>(intrinsic.d.data));
     case kIntrinsicAbsInt:
       return backend->GenInlinedAbsInt(info);
     case kIntrinsicAbsLong:
       return backend->GenInlinedAbsLong(info);
     case kIntrinsicMinMaxInt:
-      return backend->GenInlinedMinMaxInt(info, intrinsic.data & kIntrinsicFlagMin);
+      return backend->GenInlinedMinMaxInt(info, intrinsic.d.data & kIntrinsicFlagMin);
     case kIntrinsicSqrt:
       return backend->GenInlinedSqrt(info);
     case kIntrinsicCharAt:
@@ -337,26 +354,27 @@
     case kIntrinsicCompareTo:
       return backend->GenInlinedStringCompareTo(info);
     case kIntrinsicIsEmptyOrLength:
-      return backend->GenInlinedStringIsEmptyOrLength(info, intrinsic.data & kIntrinsicFlagIsEmpty);
+      return backend->GenInlinedStringIsEmptyOrLength(
+          info, intrinsic.d.data & kIntrinsicFlagIsEmpty);
     case kIntrinsicIndexOf:
-      return backend->GenInlinedIndexOf(info, intrinsic.data & kIntrinsicFlagBase0);
+      return backend->GenInlinedIndexOf(info, intrinsic.d.data & kIntrinsicFlagBase0);
     case kIntrinsicCurrentThread:
       return backend->GenInlinedCurrentThread(info);
     case kIntrinsicPeek:
-      return backend->GenInlinedPeek(info, static_cast<OpSize>(intrinsic.data));
+      return backend->GenInlinedPeek(info, static_cast<OpSize>(intrinsic.d.data));
     case kIntrinsicPoke:
-      return backend->GenInlinedPoke(info, static_cast<OpSize>(intrinsic.data));
+      return backend->GenInlinedPoke(info, static_cast<OpSize>(intrinsic.d.data));
     case kIntrinsicCas:
-      return backend->GenInlinedCas(info, intrinsic.data & kIntrinsicFlagIsLong,
-                                    intrinsic.data & kIntrinsicFlagIsObject);
+      return backend->GenInlinedCas(info, intrinsic.d.data & kIntrinsicFlagIsLong,
+                                    intrinsic.d.data & kIntrinsicFlagIsObject);
     case kIntrinsicUnsafeGet:
-      return backend->GenInlinedUnsafeGet(info, intrinsic.data & kIntrinsicFlagIsLong,
-                                          intrinsic.data & kIntrinsicFlagIsVolatile);
+      return backend->GenInlinedUnsafeGet(info, intrinsic.d.data & kIntrinsicFlagIsLong,
+                                          intrinsic.d.data & kIntrinsicFlagIsVolatile);
     case kIntrinsicUnsafePut:
-      return backend->GenInlinedUnsafePut(info, intrinsic.data & kIntrinsicFlagIsLong,
-                                          intrinsic.data & kIntrinsicFlagIsObject,
-                                          intrinsic.data & kIntrinsicFlagIsVolatile,
-                                          intrinsic.data & kIntrinsicFlagIsOrdered);
+      return backend->GenInlinedUnsafePut(info, intrinsic.d.data & kIntrinsicFlagIsLong,
+                                          intrinsic.d.data & kIntrinsicFlagIsObject,
+                                          intrinsic.d.data & kIntrinsicFlagIsVolatile,
+                                          intrinsic.d.data & kIntrinsicFlagIsOrdered);
     default:
       LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode;
       return false;  // avoid warning "control reaches end of non-void function"
@@ -505,12 +523,10 @@
   dex_file_ = dex_file;
 }
 
-bool DexFileMethodInliner::AddInlineMethod(int32_t method_idx, InlineMethodOpcode opcode,
-                                           InlineMethodFlags flags, uint32_t data) {
+bool DexFileMethodInliner::AddInlineMethod(int32_t method_idx, const InlineMethod& method) {
   WriterMutexLock mu(Thread::Current(), lock_);
   if (LIKELY(inline_methods_.find(method_idx) == inline_methods_.end())) {
-    InlineMethod im = {opcode, flags, data};
-    inline_methods_.Put(method_idx, im);
+    inline_methods_.Put(method_idx, method);
     return true;
   } else {
     if (PrettyMethod(method_idx, *dex_file_) == "int java.lang.String.length()") {
@@ -522,26 +538,30 @@
   }
 }
 
-bool DexFileMethodInliner::AnalyseReturnMethod(int32_t method_idx,
-                                               const DexFile::CodeItem* code_item, OpSize size) {
+bool DexFileMethodInliner::AnalyseReturnMethod(const DexFile::CodeItem* code_item,
+                                               InlineMethod* result) {
   const Instruction* return_instruction = Instruction::At(code_item->insns_);
-  if (return_instruction->Opcode() == Instruction::RETURN_VOID) {
-    return AddInlineMethod(method_idx, kInlineOpNop, kInlineSpecial, 0);
-  }
+  Instruction::Code return_opcode = return_instruction->Opcode();
+  uint16_t size = (return_opcode == Instruction::RETURN_WIDE) ? kLong : kWord;
+  uint16_t is_object = (return_opcode == Instruction::RETURN_OBJECT) ? 1u : 0u;
   uint32_t reg = return_instruction->VRegA_11x();
   uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_;
   DCHECK_GE(reg, arg_start);
   DCHECK_LT(size == kLong ? reg + 1 : reg, code_item->registers_size_);
 
-  InlineReturnArgData data;
-  data.d.arg = reg - arg_start;
-  data.d.op_size = size;
-  data.d.reserved = 0;
-  return AddInlineMethod(method_idx, kInlineOpReturnArg, kInlineSpecial, data.data);
+  result->opcode = kInlineOpReturnArg;
+  result->flags = kInlineSpecial;
+  InlineReturnArgData* data = &result->d.return_data;
+  data->arg = reg - arg_start;
+  data->op_size = size;
+  data->is_object = is_object;
+  data->reserved = 0u;
+  data->reserved2 = 0u;
+  return true;
 }
 
-bool DexFileMethodInliner::AnalyseConstMethod(int32_t method_idx,
-                                              const DexFile::CodeItem* code_item) {
+bool DexFileMethodInliner::AnalyseConstMethod(const DexFile::CodeItem* code_item,
+                                              InlineMethod* result) {
   const Instruction* instruction = Instruction::At(code_item->insns_);
   const Instruction* return_instruction = instruction->Next();
   Instruction::Code return_opcode = return_instruction->Opcode();
@@ -566,13 +586,20 @@
   if (return_opcode == Instruction::RETURN_OBJECT && vB != 0) {
     return false;  // Returning non-null reference constant?
   }
-  return AddInlineMethod(method_idx, kInlineOpConst, kInlineSpecial, vB);
+  result->opcode = kInlineOpConst;
+  result->flags = kInlineSpecial;
+  result->d.data = static_cast<uint64_t>(vB);
+  return true;
 }
 
-bool DexFileMethodInliner::AnalyseIGetMethod(int32_t method_idx, const DexFile::CodeItem* code_item,
-                                             OpSize size, bool is_object) {
+bool DexFileMethodInliner::AnalyseIGetMethod(verifier::MethodVerifier* verifier,
+                                             InlineMethod* result) {
+  const DexFile::CodeItem* code_item = verifier->CodeItem();
   const Instruction* instruction = Instruction::At(code_item->insns_);
   Instruction::Code opcode = instruction->Opcode();
+  DCHECK_LT(static_cast<size_t>(opcode - Instruction::IGET), arraysize(kIGetIPutOpSizes));
+  uint16_t size = kIGetIPutOpSizes[opcode - Instruction::IGET];
+
   const Instruction* return_instruction = instruction->Next();
   Instruction::Code return_opcode = return_instruction->Opcode();
   if (!(return_opcode == Instruction::RETURN && size != kLong) &&
@@ -585,61 +612,74 @@
   DCHECK_LT(return_opcode == Instruction::RETURN_WIDE ? return_reg + 1 : return_reg,
             code_item->registers_size_);
 
-  uint32_t vA, vB, vC;
-  uint64_t dummy_wide;
-  instruction->Decode(vA, vB, dummy_wide, vC, nullptr);
+  uint32_t dst_reg = instruction->VRegA_22c();
+  uint32_t object_reg = instruction->VRegB_22c();
+  uint32_t field_idx = instruction->VRegC_22c();
   uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_;
-  DCHECK_GE(vB, arg_start);
-  DCHECK_LT(vB, code_item->registers_size_);
-  DCHECK_LT(size == kLong ? vA + 1 : vA, code_item->registers_size_);
-  if (vA != return_reg) {
-    return false;  // Not returning the value retrieved by iget?
+  DCHECK_GE(object_reg, arg_start);
+  DCHECK_LT(object_reg, code_item->registers_size_);
+  DCHECK_LT(size == kLong ? dst_reg + 1 : dst_reg, code_item->registers_size_);
+  if (dst_reg != return_reg) {
+    return false;  // Not returning the value retrieved by IGET?
   }
 
-  // TODO: Check that the field is FastInstance().
+  if (!CompilerDriver::ComputeSpecialAccessorInfo(field_idx, false, verifier,
+                                                  &result->d.ifield_data)) {
+    return false;
+  }
 
-  InlineIGetIPutData data;
-  data.d.field = vC;
-  data.d.op_size = size;
-  data.d.is_object = is_object;
-  data.d.object_arg = vB - arg_start;  // Allow iget on any register, not just "this"
-  data.d.src_arg = 0;
-  data.d.reserved = 0;
-  return AddInlineMethod(method_idx, kInlineOpIGet, kInlineSpecial, data.data);
+  result->opcode = kInlineOpIGet;
+  result->flags = kInlineSpecial;
+  InlineIGetIPutData* data = &result->d.ifield_data;
+  data->op_size = size;
+  data->is_object = (opcode == Instruction::IGET_OBJECT) ? 1u : 0u;
+  data->object_arg = object_reg - arg_start;  // Allow IGET on any register, not just "this".
+  data->src_arg = 0;
+  data->reserved = 0;
+  return true;
 }
 
-bool DexFileMethodInliner::AnalyseIPutMethod(int32_t method_idx, const DexFile::CodeItem* code_item,
-                                             OpSize size, bool is_object) {
+bool DexFileMethodInliner::AnalyseIPutMethod(verifier::MethodVerifier* verifier,
+                                             InlineMethod* result) {
+  const DexFile::CodeItem* code_item = verifier->CodeItem();
   const Instruction* instruction = Instruction::At(code_item->insns_);
+  Instruction::Code opcode = instruction->Opcode();
+  DCHECK_LT(static_cast<size_t>(opcode - Instruction::IPUT), arraysize(kIGetIPutOpSizes));
+  uint16_t size = kIGetIPutOpSizes[opcode - Instruction::IPUT];
+
   const Instruction* return_instruction = instruction->Next();
   if (return_instruction->Opcode() != Instruction::RETURN_VOID) {
     // TODO: Support returning an argument.
     // This is needed by builder classes and generated accessor setters.
     //    builder.setX(value): iput value, this, fieldX; return-object this;
     //    object.access$nnn(value): iput value, this, fieldX; return value;
-    // Use InlineIGetIPutData::d::reserved to hold the information.
+    // Use InlineIGetIPutData::reserved to hold the information.
     return false;
   }
 
-  uint32_t vA, vB, vC;
-  uint64_t dummy_wide;
-  instruction->Decode(vA, vB, dummy_wide, vC, nullptr);
+  uint32_t src_reg = instruction->VRegA_22c();
+  uint32_t object_reg = instruction->VRegB_22c();
+  uint32_t field_idx = instruction->VRegC_22c();
   uint32_t arg_start = code_item->registers_size_ - code_item->ins_size_;
-  DCHECK_GE(vB, arg_start);
-  DCHECK_GE(vA, arg_start);
-  DCHECK_LT(vB, code_item->registers_size_);
-  DCHECK_LT(size == kLong ? vA + 1 : vA, code_item->registers_size_);
+  DCHECK_GE(object_reg, arg_start);
+  DCHECK_LT(object_reg, code_item->registers_size_);
+  DCHECK_GE(src_reg, arg_start);
+  DCHECK_LT(size == kLong ? src_reg + 1 : src_reg, code_item->registers_size_);
 
-  // TODO: Check that the field (vC) is FastInstance().
+  if (!CompilerDriver::ComputeSpecialAccessorInfo(field_idx, true, verifier,
+                                                  &result->d.ifield_data)) {
+    return false;
+  }
 
-  InlineIGetIPutData data;
-  data.d.field = vC;
-  data.d.op_size = size;
-  data.d.is_object = is_object;
-  data.d.object_arg = vB - arg_start;  // Allow iput on any register, not just "this"
-  data.d.src_arg = vA - arg_start;
-  data.d.reserved = 0;
-  return AddInlineMethod(method_idx, kInlineOpIPut, kInlineSpecial, data.data);
+  result->opcode = kInlineOpIPut;
+  result->flags = kInlineSpecial;
+  InlineIGetIPutData* data = &result->d.ifield_data;
+  data->op_size = size;
+  data->is_object = (opcode == Instruction::IPUT_OBJECT) ? 1u : 0u;
+  data->object_arg = object_reg - arg_start;  // Allow IPUT on any register, not just "this".
+  data->src_arg = src_reg - arg_start;
+  data->reserved = 0;
+  return true;
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 6e81303..fe0824c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -27,6 +27,10 @@
 
 namespace art {
 
+namespace verifier {
+class MethodVerifier;
+}  // namespace verifier
+
 class CallInfo;
 class Mir2Lir;
 
@@ -62,13 +66,7 @@
   kInlineSpecial       = 0x0002,
 };
 
-struct InlineMethod {
-  InlineMethodOpcode opcode;
-  InlineMethodFlags flags;
-  uint32_t data;
-};
-
-// IntrinsicFlags are stored in InlineMethod::data
+// IntrinsicFlags are stored in InlineMethod::d::raw_data
 enum IntrinsicFlags {
   kIntrinsicFlagNone = 0,
 
@@ -97,28 +95,37 @@
 COMPILE_ASSERT(kWord < 8 && kLong < 8 && kSingle < 8 && kDouble < 8 && kUnsignedHalf < 8 &&
                kSignedHalf < 8 && kUnsignedByte < 8 && kSignedByte < 8, op_size_field_too_narrow);
 
-union InlineIGetIPutData {
-  uint32_t data;
-  struct {
-    uint16_t field;
-    uint32_t op_size : 3;  // OpSize
-    uint32_t is_object : 1;
-    uint32_t object_arg : 4;
-    uint32_t src_arg : 4;  // iput only
-    uint32_t reserved : 4;
-  } d;
+struct InlineIGetIPutData {
+  uint16_t op_size : 3;  // OpSize
+  uint16_t is_object : 1;
+  uint16_t object_arg : 4;
+  uint16_t src_arg : 4;  // iput only
+  uint16_t method_is_static : 1;
+  uint16_t reserved : 3;
+  uint16_t field_idx;
+  uint32_t is_volatile : 1;
+  uint32_t field_offset : 31;
 };
-COMPILE_ASSERT(sizeof(InlineIGetIPutData) == sizeof(uint32_t), InvalidSizeOfInlineIGetIPutData);
+COMPILE_ASSERT(sizeof(InlineIGetIPutData) == sizeof(uint64_t), InvalidSizeOfInlineIGetIPutData);
 
-union InlineReturnArgData {
-  uint32_t data;
-  struct {
-    uint16_t arg;
-    uint32_t op_size : 3;  // OpSize
-    uint32_t reserved : 13;
+struct InlineReturnArgData {
+  uint16_t arg;
+  uint16_t op_size : 3;  // OpSize
+  uint16_t is_object : 1;
+  uint16_t reserved : 12;
+  uint32_t reserved2;
+};
+COMPILE_ASSERT(sizeof(InlineReturnArgData) == sizeof(uint64_t), InvalidSizeOfInlineReturnArgData);
+
+struct InlineMethod {
+  InlineMethodOpcode opcode;
+  InlineMethodFlags flags;
+  union {
+    uint64_t data;
+    InlineIGetIPutData ifield_data;
+    InlineReturnArgData return_data;
   } d;
 };
-COMPILE_ASSERT(sizeof(InlineReturnArgData) == sizeof(uint32_t), InvalidSizeOfInlineReturnArgData);
 
 /**
  * Handles inlining of methods from a particular DexFile.
@@ -144,8 +151,8 @@
      * @param method_idx the index of the inlining candidate.
      * @param code_item a previously verified code item of the method.
      */
-    bool AnalyseMethodCode(uint32_t method_idx,
-                           const DexFile::CodeItem* code_item) LOCKS_EXCLUDED(lock_);
+    bool AnalyseMethodCode(verifier::MethodVerifier* verifier)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
 
     /**
      * Check whether a particular method index corresponds to an intrinsic function.
@@ -369,17 +376,14 @@
 
     friend class DexFileToMethodInlinerMap;
 
-    bool AddInlineMethod(int32_t method_idx, InlineMethodOpcode opcode,
-                         InlineMethodFlags flags, uint32_t data) LOCKS_EXCLUDED(lock_);
+    bool AddInlineMethod(int32_t method_idx, const InlineMethod& method) LOCKS_EXCLUDED(lock_);
 
-    bool AnalyseReturnMethod(int32_t method_idx, const DexFile::CodeItem* code_item,
-                             OpSize size) LOCKS_EXCLUDED(lock_);
-    bool AnalyseConstMethod(int32_t method_idx, const DexFile::CodeItem* code_item)
-                            LOCKS_EXCLUDED(lock_);
-    bool AnalyseIGetMethod(int32_t method_idx, const DexFile::CodeItem* code_item,
-                           OpSize size, bool is_object) LOCKS_EXCLUDED(lock_);
-    bool AnalyseIPutMethod(int32_t method_idx, const DexFile::CodeItem* code_item,
-                           OpSize size, bool is_object) LOCKS_EXCLUDED(lock_);
+    static bool AnalyseReturnMethod(const DexFile::CodeItem* code_item, InlineMethod* result);
+    static bool AnalyseConstMethod(const DexFile::CodeItem* code_item, InlineMethod* result);
+    static bool AnalyseIGetMethod(verifier::MethodVerifier* verifier, InlineMethod* result)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+    static bool AnalyseIPutMethod(verifier::MethodVerifier* verifier, InlineMethod* result)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
     ReaderWriterMutex lock_;
     /*
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 3bd0298..c59f3b8 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -19,7 +19,9 @@
 #include "dex/quick/mir_to_lir-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mirror/array.h"
+#include "mirror/object-inl.h"
 #include "verifier/method_verifier.h"
+#include <functional>
 
 namespace art {
 
@@ -206,13 +208,43 @@
                           RegLocation rl_src) {
   FlushAllRegs();  /* Everything to home location */
   ThreadOffset func_offset(-1);
-  if (cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file,
+  const DexFile* dex_file = cu_->dex_file;
+  CompilerDriver* driver = cu_->compiler_driver;
+  if (cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *dex_file,
                                                        type_idx)) {
-    func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocArray);
+    bool is_type_initialized;  // Ignored as an array does not have an initializer.
+    bool use_direct_type_ptr;
+    uintptr_t direct_type_ptr;
+    if (kEmbedClassInCode &&
+        driver->CanEmbedTypeInCode(*dex_file, type_idx,
+                                   &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
+      // The fast path.
+      if (!use_direct_type_ptr) {
+        // Use the literal pool and a PC-relative load from a data word.
+        LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
+        if (data_target == nullptr) {
+          data_target = AddWordData(&class_literal_list_, type_idx);
+        }
+        LIR* load_pc_rel = OpPcRelLoad(TargetReg(kArg0), data_target);
+        AppendLIR(load_pc_rel);
+        func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocArrayResolved);
+        CallRuntimeHelperRegMethodRegLocation(func_offset, TargetReg(kArg0), rl_src, true);
+      } else {
+        // Use the direct pointer.
+        func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocArrayResolved);
+        CallRuntimeHelperImmMethodRegLocation(func_offset, direct_type_ptr, rl_src, true);
+      }
+    } else {
+      // The slow path.
+      DCHECK_EQ(func_offset.Int32Value(), -1);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocArray);
+      CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
+    }
+    DCHECK_NE(func_offset.Int32Value(), -1);
   } else {
     func_offset= QUICK_ENTRYPOINT_OFFSET(pAllocArrayWithAccessCheck);
+    CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
   }
-  CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true);
   RegLocation rl_result = GetReturn(false);
   StoreValue(rl_dest, rl_result);
 }
@@ -327,6 +359,34 @@
   }
 }
 
+//
+// Slow path to ensure a class is initialized for sget/sput.
+//
+class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
+ public:
+  StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont,
+           int storage_index, int r_base) :
+    LIRSlowPath(m2l, m2l->GetCurrentDexPc(), unresolved, cont), uninit_(uninit), storage_index_(storage_index),
+    r_base_(r_base) {
+  }
+
+  void Compile() {
+    LIR* unresolved_target = GenerateTargetLabel();
+    uninit_->target = unresolved_target;
+    m2l_->CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage),
+                            storage_index_, true);
+    // Copy helper's result into r_base, a no-op on all but MIPS.
+    m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0));
+
+    m2l_->OpUnconditionalBranch(cont_);
+  }
+
+ private:
+  LIR* const uninit_;
+  const int storage_index_;
+  const int r_base_;
+};
+
 void Mir2Lir::GenSput(uint32_t field_idx, RegLocation rl_src, bool is_long_or_double,
                       bool is_object) {
   int field_offset;
@@ -370,24 +430,20 @@
       // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
       if (!is_initialized) {
         // Check if r_base is NULL or a not yet initialized class.
-        // TUNING: fast path should fall through
+
+        // The slow path is invoked if the r_base is NULL or the class pointed
+        // to by it is not initialized.
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
         int r_tmp = TargetReg(kArg2);
         LockTemp(r_tmp);
-        // TODO: Fuse the compare of a constant with memory on X86 and avoid the load.
-        LoadWordDisp(r_base, mirror::Class::StatusOffset().Int32Value(), r_tmp);
-        LIR* initialized_branch = OpCmpImmBranch(kCondGe, r_tmp, mirror::Class::kStatusInitialized,
-                                                 NULL);
+        LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
+                                          mirror::Class::StatusOffset().Int32Value(),
+                                          mirror::Class::kStatusInitialized, NULL);
+        LIR* cont = NewLIR0(kPseudoTargetLabel);
 
-        LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
-        unresolved_branch->target = unresolved_target;
-        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage), storage_index,
-                             true);
-        // Copy helper's result into r_base, a no-op on all but MIPS.
-        OpRegCopy(r_base, TargetReg(kRet0));
-
-        LIR* initialized_target = NewLIR0(kPseudoTargetLabel);
-        initialized_branch->target = initialized_target;
+        AddSlowPath(new (arena_) StaticFieldSlowPath(this,
+                                                     unresolved_branch, uninit_branch, cont,
+                                                     storage_index, r_base));
 
         FreeTemp(r_tmp);
       }
@@ -464,24 +520,20 @@
       // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
       if (!is_initialized) {
         // Check if r_base is NULL or a not yet initialized class.
-        // TUNING: fast path should fall through
+
+        // The slow path is invoked if the r_base is NULL or the class pointed
+        // to by it is not initialized.
         LIR* unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
         int r_tmp = TargetReg(kArg2);
         LockTemp(r_tmp);
-        // TODO: Fuse the compare of a constant with memory on X86 and avoid the load.
-        LoadWordDisp(r_base, mirror::Class::StatusOffset().Int32Value(), r_tmp);
-        LIR* initialized_branch = OpCmpImmBranch(kCondGe, r_tmp, mirror::Class::kStatusInitialized,
-                                                 NULL);
+        LIR* uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
+                                          mirror::Class::StatusOffset().Int32Value(),
+                                          mirror::Class::kStatusInitialized, NULL);
+        LIR* cont = NewLIR0(kPseudoTargetLabel);
 
-        LIR* unresolved_target = NewLIR0(kPseudoTargetLabel);
-        unresolved_branch->target = unresolved_target;
-        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeStaticStorage), storage_index,
-                             true);
-        // Copy helper's result into r_base, a no-op on all but MIPS.
-        OpRegCopy(r_base, TargetReg(kRet0));
-
-        LIR* initialized_target = NewLIR0(kPseudoTargetLabel);
-        initialized_branch->target = initialized_target;
+        AddSlowPath(new (arena_) StaticFieldSlowPath(this,
+                                                     unresolved_branch, uninit_branch, cont,
+                                                     storage_index, r_base));
 
         FreeTemp(r_tmp);
       }
@@ -521,6 +573,16 @@
   }
 }
 
+// Generate code for all slow paths.
+void Mir2Lir::HandleSlowPaths() {
+  int n = slow_paths_.Size();
+  for (int i = 0; i < n; ++i) {
+    LIRSlowPath* slowpath = slow_paths_.Get(i);
+    slowpath->Compile();
+  }
+  slow_paths_.Reset();
+}
+
 void Mir2Lir::HandleSuspendLaunchPads() {
   int num_elems = suspend_launchpads_.Size();
   ThreadOffset helper_offset = QUICK_ENTRYPOINT_OFFSET(pTestSuspend);
@@ -789,32 +851,40 @@
         type_idx) || SLOW_TYPE_PATH) {
       // Slow path, at runtime test if type is null and if so initialize
       FlushAllRegs();
-      LIR* branch1 = OpCmpImmBranch(kCondEq, rl_result.low_reg, 0, NULL);
-      // Resolved, store and hop over following code
+      LIR* branch = OpCmpImmBranch(kCondEq, rl_result.low_reg, 0, NULL);
+      LIR* cont = NewLIR0(kPseudoTargetLabel);
+
+      // Object to generate the slow path for class resolution.
+      class SlowPath : public LIRSlowPath {
+       public:
+        SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, const int type_idx,
+                 const RegLocation& rl_method, const RegLocation& rl_result) :
+                   LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), type_idx_(type_idx),
+                   rl_method_(rl_method), rl_result_(rl_result) {
+        }
+
+        void Compile() {
+          GenerateTargetLabel();
+
+          m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx_,
+                                        rl_method_.low_reg, true);
+          m2l_->OpRegCopy(rl_result_.low_reg,  m2l_->TargetReg(kRet0));
+
+          m2l_->OpUnconditionalBranch(cont_);
+        }
+
+       private:
+        const int type_idx_;
+        const RegLocation rl_method_;
+        const RegLocation rl_result_;
+      };
+
+      // Add to list for future.
+      AddSlowPath(new (arena_) SlowPath(this, branch, cont,
+                                        type_idx, rl_method, rl_result));
+
       StoreValue(rl_dest, rl_result);
-      /*
-       * Because we have stores of the target value on two paths,
-       * clobber temp tracking for the destination using the ssa name
-       */
-      ClobberSReg(rl_dest.s_reg_low);
-      LIR* branch2 = OpUnconditionalBranch(0);
-      // TUNING: move slow path to end & remove unconditional branch
-      LIR* target1 = NewLIR0(kPseudoTargetLabel);
-      // Call out to helper, which will return resolved type in kArg0
-      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx,
-                              rl_method.low_reg, true);
-      RegLocation rl_result = GetReturn(false);
-      StoreValue(rl_dest, rl_result);
-      /*
-       * Because we have stores of the target value on two paths,
-       * clobber temp tracking for the destination using the ssa name
-       */
-      ClobberSReg(rl_dest.s_reg_low);
-      // Rejoin code paths
-      LIR* target2 = NewLIR0(kPseudoTargetLabel);
-      branch1->target = target1;
-      branch2->target = target2;
-    } else {
+     } else {
       // Fast path, we're done - just store result
       StoreValue(rl_dest, rl_result);
     }
@@ -830,36 +900,65 @@
     // slow path, resolve string if not in dex cache
     FlushAllRegs();
     LockCallTemps();  // Using explicit registers
-    LoadCurrMethodDirect(TargetReg(kArg2));
-    LoadWordDisp(TargetReg(kArg2),
-                 mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), TargetReg(kArg0));
+
+    // If the Method* is already in a register, we can save a copy.
+    RegLocation rl_method = mir_graph_->GetMethodLoc();
+    int r_method;
+    if (rl_method.location == kLocPhysReg) {
+      // A temp would conflict with register use below.
+      DCHECK(!IsTemp(rl_method.low_reg));
+      r_method = rl_method.low_reg;
+    } else {
+      r_method = TargetReg(kArg2);
+      LoadCurrMethodDirect(r_method);
+    }
+    LoadWordDisp(r_method, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(),
+                 TargetReg(kArg0));
+
     // Might call out to helper, which will return resolved string in kRet0
-    int r_tgt = CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(pResolveString));
     LoadWordDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0));
-    LoadConstant(TargetReg(kArg1), string_idx);
-    if (cu_->instruction_set == kThumb2) {
-      OpRegImm(kOpCmp, TargetReg(kRet0), 0);  // Is resolved?
+    if (cu_->instruction_set == kThumb2 ||
+        cu_->instruction_set == kMips) {
+      //  OpRegImm(kOpCmp, TargetReg(kRet0), 0);  // Is resolved?
+      LoadConstant(TargetReg(kArg1), string_idx);
+      LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0), 0, NULL);
+      LIR* cont = NewLIR0(kPseudoTargetLabel);
       GenBarrier();
-      // For testing, always force through helper
-      if (!EXERCISE_SLOWEST_STRING_PATH) {
-        OpIT(kCondEq, "T");
-      }
-      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));   // .eq
-      LIR* call_inst = OpReg(kOpBlx, r_tgt);    // .eq, helper(Method*, string_idx)
-      MarkSafepointPC(call_inst);
-      FreeTemp(r_tgt);
-    } else if (cu_->instruction_set == kMips) {
-      LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kRet0), 0, NULL);
-      OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));   // .eq
-      LIR* call_inst = OpReg(kOpBlx, r_tgt);
-      MarkSafepointPC(call_inst);
-      FreeTemp(r_tgt);
-      LIR* target = NewLIR0(kPseudoTargetLabel);
-      branch->target = target;
+
+      // Object to generate the slow path for string resolution.
+      class SlowPath : public LIRSlowPath {
+       public:
+        SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, int r_method) :
+          LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), r_method_(r_method) {
+        }
+
+        void Compile() {
+          GenerateTargetLabel();
+
+          int r_tgt = m2l_->CallHelperSetup(QUICK_ENTRYPOINT_OFFSET(pResolveString));
+
+          m2l_->OpRegCopy(m2l_->TargetReg(kArg0), r_method_);   // .eq
+          LIR* call_inst = m2l_->OpReg(kOpBlx, r_tgt);
+          m2l_->MarkSafepointPC(call_inst);
+          m2l_->FreeTemp(r_tgt);
+
+          m2l_->OpUnconditionalBranch(cont_);
+        }
+
+       private:
+         int r_method_;
+      };
+
+      // Add to list for future.
+      AddSlowPath(new (arena_) SlowPath(this, fromfast, cont, r_method));
     } else {
       DCHECK_EQ(cu_->instruction_set, kX86);
-      CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pResolveString), TargetReg(kArg2),
+      LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kRet0), 0, NULL);
+      LoadConstant(TargetReg(kArg1), string_idx);
+      CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pResolveString), r_method,
                               TargetReg(kArg1), true);
+      LIR* target = NewLIR0(kPseudoTargetLabel);
+      branch->target = target;
     }
     GenBarrier();
     StoreValue(rl_dest, GetReturn(false));
@@ -883,13 +982,53 @@
   // alloc will always check for resolution, do we also need to verify
   // access because the verifier was unable to?
   ThreadOffset func_offset(-1);
-  if (cu_->compiler_driver->CanAccessInstantiableTypeWithoutChecks(
-      cu_->method_idx, *cu_->dex_file, type_idx)) {
-    func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObject);
+  const DexFile* dex_file = cu_->dex_file;
+  CompilerDriver* driver = cu_->compiler_driver;
+  if (driver->CanAccessInstantiableTypeWithoutChecks(
+      cu_->method_idx, *dex_file, type_idx)) {
+    bool is_type_initialized;
+    bool use_direct_type_ptr;
+    uintptr_t direct_type_ptr;
+    if (kEmbedClassInCode &&
+        driver->CanEmbedTypeInCode(*dex_file, type_idx,
+                                   &is_type_initialized, &use_direct_type_ptr, &direct_type_ptr)) {
+      // The fast path.
+      if (!use_direct_type_ptr) {
+        // Use the literal pool and a PC-relative load from a data word.
+        LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
+        if (data_target == nullptr) {
+          data_target = AddWordData(&class_literal_list_, type_idx);
+        }
+        LIR* load_pc_rel = OpPcRelLoad(TargetReg(kArg0), data_target);
+        AppendLIR(load_pc_rel);
+        if (!is_type_initialized) {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectResolved);
+          CallRuntimeHelperRegMethod(func_offset, TargetReg(kArg0), true);
+        } else {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectInitialized);
+          CallRuntimeHelperRegMethod(func_offset, TargetReg(kArg0), true);
+        }
+      } else {
+        // Use the direct pointer.
+        if (!is_type_initialized) {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectResolved);
+          CallRuntimeHelperImmMethod(func_offset, direct_type_ptr, true);
+        } else {
+          func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectInitialized);
+          CallRuntimeHelperImmMethod(func_offset, direct_type_ptr, true);
+        }
+      }
+    } else {
+      // The slow path.
+      DCHECK_EQ(func_offset.Int32Value(), -1);
+      func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObject);
+      CallRuntimeHelperImmMethod(func_offset, type_idx, true);
+    }
+    DCHECK_NE(func_offset.Int32Value(), -1);
   } else {
     func_offset = QUICK_ENTRYPOINT_OFFSET(pAllocObjectWithAccessCheck);
+    CallRuntimeHelperImmMethod(func_offset, type_idx, true);
   }
-  CallRuntimeHelperImmMethod(func_offset, type_idx, true);
   RegLocation rl_result = GetReturn(false);
   StoreValue(rl_dest, rl_result);
 }
@@ -903,6 +1042,9 @@
 // question with simple comparisons.
 void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
                                  RegLocation rl_src) {
+  // X86 has its own implementation.
+  DCHECK_NE(cu_->instruction_set, kX86);
+
   RegLocation object = LoadValue(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   int result_reg = rl_result.low_reg;
@@ -1092,8 +1234,7 @@
   // Note: currently type_known_final is unused, as optimizing will only improve the performance
   // of the exception throw path.
   DexCompilationUnit* cu = mir_graph_->GetCurrentDexCompilationUnit();
-  const MethodReference mr(cu->GetDexFile(), cu->GetDexMethodIndex());
-  if (!needs_access_check && cu_->compiler_driver->IsSafeCast(mr, insn_idx)) {
+  if (!needs_access_check && cu_->compiler_driver->IsSafeCast(cu, insn_idx)) {
     // Verifier type analysis proved this check cast would never cause an exception.
     return;
   }
@@ -1122,37 +1263,90 @@
     LoadWordDisp(class_reg, offset_of_type, class_reg);
     if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx)) {
       // Need to test presence of type in dex cache at runtime
-      LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
-      // Not resolved
-      // Call out to helper, which will return resolved type in kArg0
-      // InitializeTypeFromCode(idx, method)
-      CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx,
-                              TargetReg(kArg1), true);
-      OpRegCopy(class_reg, TargetReg(kRet0));  // Align usage with fast path
-      // Rejoin code paths
-      LIR* hop_target = NewLIR0(kPseudoTargetLabel);
-      hop_branch->target = hop_target;
+      LIR* hop_branch = OpCmpImmBranch(kCondEq, class_reg, 0, NULL);
+      LIR* cont = NewLIR0(kPseudoTargetLabel);
+
+      // Slow path to initialize the type.  Executed if the type is NULL.
+      class SlowPath : public LIRSlowPath {
+       public:
+        SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, const int type_idx,
+                 const int class_reg) :
+                   LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), type_idx_(type_idx),
+                   class_reg_(class_reg) {
+        }
+
+        void Compile() {
+          GenerateTargetLabel();
+
+          // Call out to helper, which will return resolved type in kArg0
+          // InitializeTypeFromCode(idx, method)
+          m2l_->CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx_,
+                                        m2l_->TargetReg(kArg1), true);
+          m2l_->OpRegCopy(class_reg_, m2l_->TargetReg(kRet0));  // Align usage with fast path
+          m2l_->OpUnconditionalBranch(cont_);
+        }
+       public:
+        const int type_idx_;
+        const int class_reg_;
+      };
+
+      AddSlowPath(new (arena_) SlowPath(this, hop_branch, cont,
+                                        type_idx, class_reg));
     }
   }
   // At this point, class_reg (kArg2) has class
   LoadValueDirectFixed(rl_src, TargetReg(kArg0));  // kArg0 <= ref
-  /* Null is OK - continue */
-  LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
-  /* load object->klass_ */
-  DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
-  LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1));
-  /* kArg1 now contains object->klass_ */
-  LIR* branch2 = NULL;
-  if (!type_known_abstract) {
-    branch2 = OpCmpBranch(kCondEq, TargetReg(kArg1), class_reg, NULL);
-  }
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), TargetReg(kArg2),
-                          TargetReg(kArg1), true);
-  /* branch target here */
-  LIR* target = NewLIR0(kPseudoTargetLabel);
-  branch1->target = target;
-  if (branch2 != NULL) {
-    branch2->target = target;
+
+  // Slow path for the case where the classes are not equal.  In this case we need
+  // to call a helper function to do the check.
+  class SlowPath : public LIRSlowPath {
+   public:
+    SlowPath(Mir2Lir* m2l, LIR* fromfast, LIR* cont, bool load):
+               LIRSlowPath(m2l, m2l->GetCurrentDexPc(), fromfast, cont), load_(load) {
+    }
+
+    void Compile() {
+      GenerateTargetLabel();
+
+      if (load_) {
+        m2l_->LoadWordDisp(m2l_->TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
+                           m2l_->TargetReg(kArg1));
+      }
+      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pCheckCast), m2l_->TargetReg(kArg2),
+                                    m2l_->TargetReg(kArg1), true);
+
+      m2l_->OpUnconditionalBranch(cont_);
+    }
+
+   private:
+    bool load_;
+  };
+
+  if (type_known_abstract) {
+    // Easier case, run slow path if target is non-null (slow path will load from target)
+    LIR* branch = OpCmpImmBranch(kCondNe, TargetReg(kArg0), 0, NULL);
+    LIR* cont = NewLIR0(kPseudoTargetLabel);
+    AddSlowPath(new (arena_) SlowPath(this, branch, cont, true));
+  } else {
+    // Harder, more common case.  We need to generate a forward branch over the load
+    // if the target is null.  If it's non-null we perform the load and branch to the
+    // slow path if the classes are not equal.
+
+    /* Null is OK - continue */
+    LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL);
+    /* load object->klass_ */
+    DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0);
+    LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(),
+                    TargetReg(kArg1));
+
+    LIR* branch2 = OpCmpBranch(kCondNe, TargetReg(kArg1), class_reg, NULL);
+    LIR* cont = NewLIR0(kPseudoTargetLabel);
+
+    // Add the slow path that will not perform load since this is already done.
+    AddSlowPath(new (arena_) SlowPath(this, branch2, cont, false));
+
+    // Set the null check to branch to the continuation.
+    branch1->target = cont;
   }
 }
 
@@ -1232,6 +1426,7 @@
 
 void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
                             RegLocation rl_src1, RegLocation rl_src2) {
+  DCHECK_NE(cu_->instruction_set, kX86);
   OpKind op = kOpBkpt;
   bool is_div_rem = false;
   bool check_zero = false;
@@ -1310,15 +1505,9 @@
     } else {
       if (shift_op) {
         int t_reg = INVALID_REG;
-        if (cu_->instruction_set == kX86) {
-          // X86 doesn't require masking and must use ECX
-          t_reg = TargetReg(kCount);  // rCX
-          LoadValueDirectFixed(rl_src2, t_reg);
-        } else {
-          rl_src2 = LoadValue(rl_src2, kCoreReg);
-          t_reg = AllocTemp();
-          OpRegRegImm(kOpAnd, t_reg, rl_src2.low_reg, 31);
-        }
+        rl_src2 = LoadValue(rl_src2, kCoreReg);
+        t_reg = AllocTemp();
+        OpRegRegImm(kOpAnd, t_reg, rl_src2.low_reg, 31);
         rl_src1 = LoadValue(rl_src1, kCoreReg);
         rl_result = EvalLoc(rl_dest, kCoreReg, true);
         OpRegRegReg(op, rl_result.low_reg, rl_src1.low_reg, t_reg);
@@ -1382,30 +1571,12 @@
  * or produce corresponding Thumb instructions directly.
  */
 
-static bool IsPowerOfTwo(int x) {
-  return (x & (x - 1)) == 0;
-}
-
 // Returns true if no more than two bits are set in 'x'.
 static bool IsPopCountLE2(unsigned int x) {
   x &= x - 1;
   return (x & (x - 1)) == 0;
 }
 
-// Returns the index of the lowest set bit in 'x'.
-static int32_t LowestSetBit(uint32_t x) {
-  int bit_posn = 0;
-  while ((x & 0xf) == 0) {
-    bit_posn += 4;
-    x >>= 4;
-  }
-  while ((x & 1) == 0) {
-    bit_posn++;
-    x >>= 1;
-  }
-  return bit_posn;
-}
-
 // Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
 // and store the result in 'rl_dest'.
 bool Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
@@ -1609,6 +1780,9 @@
         rl_src = LoadValue(rl_src, kCoreReg);
         rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div);
         done = true;
+      } else if (cu_->instruction_set == kX86) {
+        rl_result = GenDivRemLit(rl_dest, rl_src, lit, is_div);
+        done = true;
       } else if (cu_->instruction_set == kThumb2) {
         if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) {
           // Use ARM SDIV instruction for division.  For remainder we also need to
@@ -1677,7 +1851,7 @@
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
       if (cu_->instruction_set != kThumb2) {
-        GenAddLong(rl_dest, rl_src1, rl_src2);
+        GenAddLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpAdd;
@@ -1686,7 +1860,7 @@
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (cu_->instruction_set != kThumb2) {
-        GenSubLong(rl_dest, rl_src1, rl_src2);
+        GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpSub;
@@ -1694,8 +1868,8 @@
       break;
     case Instruction::MUL_LONG:
     case Instruction::MUL_LONG_2ADDR:
-      if (cu_->instruction_set == kThumb2) {
-        GenMulLong(rl_dest, rl_src1, rl_src2);
+      if (cu_->instruction_set != kMips) {
+        GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       } else {
         call_out = true;
@@ -1721,7 +1895,7 @@
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
       if (cu_->instruction_set == kX86) {
-        return GenAndLong(rl_dest, rl_src1, rl_src2);
+        return GenAndLong(opcode, rl_dest, rl_src1, rl_src2);
       }
       first_op = kOpAnd;
       second_op = kOpAnd;
@@ -1729,7 +1903,7 @@
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
       if (cu_->instruction_set == kX86) {
-        GenOrLong(rl_dest, rl_src1, rl_src2);
+        GenOrLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpOr;
@@ -1738,7 +1912,7 @@
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
       if (cu_->instruction_set == kX86) {
-        GenXorLong(rl_dest, rl_src1, rl_src2);
+        GenXorLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpXor;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index d942a24..6aaad66 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -142,6 +142,30 @@
   CallHelper(r_tgt, helper_offset, safepoint_pc);
 }
 
+void Mir2Lir::CallRuntimeHelperRegMethod(ThreadOffset helper_offset, int arg0, bool safepoint_pc) {
+  int r_tgt = CallHelperSetup(helper_offset);
+  DCHECK_NE(TargetReg(kArg1), arg0);
+  if (TargetReg(kArg0) != arg0) {
+    OpRegCopy(TargetReg(kArg0), arg0);
+  }
+  LoadCurrMethodDirect(TargetReg(kArg1));
+  ClobberCallerSave();
+  CallHelper(r_tgt, helper_offset, safepoint_pc);
+}
+
+void Mir2Lir::CallRuntimeHelperRegMethodRegLocation(ThreadOffset helper_offset, int arg0,
+                                                    RegLocation arg2, bool safepoint_pc) {
+  int r_tgt = CallHelperSetup(helper_offset);
+  DCHECK_NE(TargetReg(kArg1), arg0);
+  if (TargetReg(kArg0) != arg0) {
+    OpRegCopy(TargetReg(kArg0), arg0);
+  }
+  LoadCurrMethodDirect(TargetReg(kArg1));
+  LoadValueDirectFixed(arg2, TargetReg(kArg2));
+  ClobberCallerSave();
+  CallHelper(r_tgt, helper_offset, safepoint_pc);
+}
+
 void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset helper_offset, RegLocation arg0,
                                                       RegLocation arg1, bool safepoint_pc) {
   int r_tgt = CallHelperSetup(helper_offset);
@@ -429,7 +453,7 @@
       if (cu->instruction_set != kX86) {
         if (direct_code == 0) {
           cg->LoadWordDisp(cg->TargetReg(kArg0),
-                           mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(),
+                           mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
                            cg->TargetReg(kInvokeTgt));
         }
         break;
@@ -482,7 +506,7 @@
     case 4:  // Get the compiled code address [uses kArg0, sets kInvokeTgt]
       if (cu->instruction_set != kX86) {
         cg->LoadWordDisp(cg->TargetReg(kArg0),
-                         mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(),
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
                          cg->TargetReg(kInvokeTgt));
         break;
       }
@@ -537,7 +561,7 @@
     case 5:  // Get the compiled code address [use kArg0, set kInvokeTgt]
       if (cu->instruction_set != kX86) {
         cg->LoadWordDisp(cg->TargetReg(kArg0),
-                         mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(),
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
                          cg->TargetReg(kInvokeTgt));
         break;
       }
@@ -787,42 +811,145 @@
     }
   }
 
+  // Logic below assumes that Method pointer is at offset zero from SP.
+  DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
+
+  // The first 3 arguments are passed via registers.
+  // TODO: For 64-bit, instead of hardcoding 4 for Method* size, we should either
+  // get size of uintptr_t or size of object reference according to model being used.
+  int outs_offset = 4 /* Method* */ + (3 * sizeof(uint32_t));
   int start_offset = SRegOffset(info->args[3].s_reg_low);
-  int outs_offset = 4 /* Method* */ + (3 * 4);
-  if (cu_->instruction_set != kThumb2) {
+  int regs_left_to_pass_via_stack = info->num_arg_words - 3;
+  DCHECK_GT(regs_left_to_pass_via_stack, 0);
+
+  if (cu_->instruction_set == kThumb2 && regs_left_to_pass_via_stack <= 16) {
+    // Use vldm/vstm pair using kArg3 as a temp
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+    OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset);
+    LIR* ld = OpVldm(TargetReg(kArg3), regs_left_to_pass_via_stack);
+    // TUNING: loosen barrier
+    ld->u.m.def_mask = ENCODE_ALL;
+    SetMemRefType(ld, true /* is_load */, kDalvikReg);
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+    OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), 4 /* Method* */ + (3 * 4));
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+    LIR* st = OpVstm(TargetReg(kArg3), regs_left_to_pass_via_stack);
+    SetMemRefType(st, false /* is_load */, kDalvikReg);
+    st->u.m.def_mask = ENCODE_ALL;
+    call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                             direct_code, direct_method, type);
+  } else if (cu_->instruction_set == kX86) {
+    int current_src_offset = start_offset;
+    int current_dest_offset = outs_offset;
+
+    while (regs_left_to_pass_via_stack > 0) {
+      // This is based on the knowledge that the stack itself is 16-byte aligned.
+      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
+      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
+      size_t bytes_to_move;
+
+      /*
+       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
+       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
+       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
+       * We do this because we could potentially do a smaller move to align.
+       */
+      if (regs_left_to_pass_via_stack == 4 ||
+          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
+        // Moving 128-bits via xmm register.
+        bytes_to_move = sizeof(uint32_t) * 4;
+
+        // Allocate a free xmm temp. Since we are working through the calling sequence,
+        // we expect to have an xmm temporary available.
+        int temp = AllocTempDouble();
+        CHECK_GT(temp, 0);
+
+        LIR* ld1 = nullptr;
+        LIR* ld2 = nullptr;
+        LIR* st1 = nullptr;
+        LIR* st2 = nullptr;
+
+        /*
+         * The logic is similar for both loads and stores. If we have 16-byte alignment,
+         * do an aligned move. If we have 8-byte alignment, then do the move in two
+         * parts. This approach prevents possible cache line splits. Finally, fall back
+         * to doing an unaligned move. In most cases we likely won't split the cache
+         * line but we cannot prove it and thus take a conservative approach.
+         */
+        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
+        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+
+        if (src_is_16b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP);
+        } else if (src_is_8b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP);
+          ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1), kMovHi128FP);
+        } else {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP);
+        }
+
+        if (dest_is_16b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP);
+        } else if (dest_is_8b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP);
+          st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1), temp, kMovHi128FP);
+        } else {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP);
+        }
+
+        // TODO If we could keep track of aliasing information for memory accesses that are wider
+        // than 64-bit, we wouldn't need to set up a barrier.
+        if (ld1 != nullptr) {
+          if (ld2 != nullptr) {
+            // For 64-bit load we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
+            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
+          } else {
+            // Set barrier for 128-bit load.
+            SetMemRefType(ld1, true /* is_load */, kDalvikReg);
+            ld1->u.m.def_mask = ENCODE_ALL;
+          }
+        }
+        if (st1 != nullptr) {
+          if (st2 != nullptr) {
+            // For 64-bit store we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
+            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
+          } else {
+            // Set barrier for 128-bit store.
+            SetMemRefType(st1, false /* is_load */, kDalvikReg);
+            st1->u.m.def_mask = ENCODE_ALL;
+          }
+        }
+
+        // Free the temporary used for the data movement.
+        FreeTemp(temp);
+      } else {
+        // Moving 32-bits via general purpose register.
+        bytes_to_move = sizeof(uint32_t);
+
+        // Instead of allocating a new temp, simply reuse one of the registers being used
+        // for argument passing.
+        int temp = TargetReg(kArg3);
+
+        // Now load the argument VR and store to the outs.
+        LoadWordDisp(TargetReg(kSp), current_src_offset, temp);
+        StoreWordDisp(TargetReg(kSp), current_dest_offset, temp);
+      }
+
+      current_src_offset += bytes_to_move;
+      current_dest_offset += bytes_to_move;
+      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+    }
+  } else {
     // Generate memcpy
     OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset);
     OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset);
     CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(pMemcpy), TargetReg(kArg0),
                                TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
-  } else {
-    if (info->num_arg_words >= 20) {
-      // Generate memcpy
-      OpRegRegImm(kOpAdd, TargetReg(kArg0), TargetReg(kSp), outs_offset);
-      OpRegRegImm(kOpAdd, TargetReg(kArg1), TargetReg(kSp), start_offset);
-      CallRuntimeHelperRegRegImm(QUICK_ENTRYPOINT_OFFSET(pMemcpy), TargetReg(kArg0),
-                                 TargetReg(kArg1), (info->num_arg_words - 3) * 4, false);
-    } else {
-      // Use vldm/vstm pair using kArg3 as a temp
-      int regs_left = std::min(info->num_arg_words - 3, 16);
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-      OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), start_offset);
-      LIR* ld = OpVldm(TargetReg(kArg3), regs_left);
-      // TUNING: loosen barrier
-      ld->u.m.def_mask = ENCODE_ALL;
-      SetMemRefType(ld, true /* is_load */, kDalvikReg);
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-      OpRegRegImm(kOpAdd, TargetReg(kArg3), TargetReg(kSp), 4 /* Method* */ + (3 * 4));
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-      LIR* st = OpVstm(TargetReg(kArg3), regs_left);
-      SetMemRefType(st, false /* is_load */, kDalvikReg);
-      st->u.m.def_mask = ENCODE_ALL;
-      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
-                               direct_code, direct_method, type);
-    }
   }
 
   call_state = LoadArgRegs(info, call_state, next_call_insn,
@@ -1205,10 +1332,6 @@
     // TODO - add Mips implementation
     return false;
   }
-  if (cu_->instruction_set == kX86 && is_object) {
-    // TODO: fix X86, it exhausts registers for card marking.
-    return false;
-  }
   // Unused - RegLocation rl_src_unsafe = info->args[0];
   RegLocation rl_src_obj = info->args[1];  // Object
   RegLocation rl_src_offset = info->args[2];  // long low
@@ -1228,6 +1351,9 @@
     rl_value = LoadValue(rl_src_value, kCoreReg);
     StoreBaseIndexed(rl_object.low_reg, rl_offset.low_reg, rl_value.low_reg, 0, kWord);
   }
+
+  // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
+  FreeTemp(rl_offset.low_reg);
   if (is_volatile) {
     GenMemBarrier(kStoreLoad);
   }
@@ -1311,7 +1437,7 @@
   } else {
     if (fast_path) {
       call_inst = OpMem(kOpBlx, TargetReg(kArg0),
-                        mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value());
+                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
     } else {
       ThreadOffset trampoline(-1);
       switch (info->type) {
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 8f2f6ad..f7c2821 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -294,6 +294,85 @@
   }
 }
 
+void Mir2Lir::StoreFinalValue(RegLocation rl_dest, RegLocation rl_src) {
+  DCHECK_EQ(rl_src.location, kLocPhysReg);
+
+  if (rl_dest.location == kLocPhysReg) {
+    OpRegCopy(rl_dest.low_reg, rl_src.low_reg);
+  } else {
+    // Just re-assign the register.  Dest gets Src's reg.
+    rl_dest.low_reg = rl_src.low_reg;
+    rl_dest.location = kLocPhysReg;
+    Clobber(rl_src.low_reg);
+  }
+
+  // Dest is now live and dirty (until/if we flush it to home location)
+  MarkLive(rl_dest.low_reg, rl_dest.s_reg_low);
+  MarkDirty(rl_dest);
+
+
+  ResetDefLoc(rl_dest);
+  if (IsDirty(rl_dest.low_reg) &&
+      oat_live_out(rl_dest.s_reg_low)) {
+    LIR *def_start = last_lir_insn_;
+    StoreBaseDisp(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low),
+                  rl_dest.low_reg, kWord);
+    MarkClean(rl_dest);
+    LIR *def_end = last_lir_insn_;
+    if (!rl_dest.ref) {
+      // Exclude references from store elimination
+      MarkDef(rl_dest, def_start, def_end);
+    }
+  }
+}
+
+void Mir2Lir::StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src) {
+  DCHECK_EQ(IsFpReg(rl_src.low_reg), IsFpReg(rl_src.high_reg));
+  DCHECK(rl_dest.wide);
+  DCHECK(rl_src.wide);
+  DCHECK_EQ(rl_src.location, kLocPhysReg);
+
+  if (rl_dest.location == kLocPhysReg) {
+    OpRegCopyWide(rl_dest.low_reg, rl_dest.high_reg, rl_src.low_reg, rl_src.high_reg);
+  } else {
+    // Just re-assign the registers.  Dest gets Src's regs.
+    rl_dest.low_reg = rl_src.low_reg;
+    rl_dest.high_reg = rl_src.high_reg;
+    rl_dest.location = kLocPhysReg;
+    Clobber(rl_src.low_reg);
+    Clobber(rl_src.high_reg);
+  }
+
+  // Dest is now live and dirty (until/if we flush it to home location).
+  MarkLive(rl_dest.low_reg, rl_dest.s_reg_low);
+
+  // Does this wide value live in two registers (or one vector one)?
+  if (rl_dest.low_reg != rl_dest.high_reg) {
+    MarkLive(rl_dest.high_reg, GetSRegHi(rl_dest.s_reg_low));
+    MarkDirty(rl_dest);
+    MarkPair(rl_dest.low_reg, rl_dest.high_reg);
+  } else {
+    // This must be an x86 vector register value,
+    DCHECK(IsFpReg(rl_dest.low_reg) && (cu_->instruction_set == kX86));
+    MarkDirty(rl_dest);
+  }
+
+  ResetDefLocWide(rl_dest);
+  if ((IsDirty(rl_dest.low_reg) ||
+      IsDirty(rl_dest.high_reg)) &&
+      (oat_live_out(rl_dest.s_reg_low) ||
+      oat_live_out(GetSRegHi(rl_dest.s_reg_low)))) {
+    LIR *def_start = last_lir_insn_;
+    DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
+              mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
+    StoreBaseDispWide(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low),
+                      rl_dest.low_reg, rl_dest.high_reg);
+    MarkClean(rl_dest);
+    LIR *def_end = last_lir_insn_;
+    MarkDefWide(rl_dest, def_start, def_end);
+  }
+}
+
 /* Utilities to load the current Method* */
 void Mir2Lir::LoadCurrMethodDirect(int r_tgt) {
   LoadValueDirectFixed(mir_graph_->GetMethodLoc(), r_tgt);
@@ -303,4 +382,47 @@
   return LoadValue(mir_graph_->GetMethodLoc(), kCoreReg);
 }
 
+RegLocation Mir2Lir::ForceTemp(RegLocation loc) {
+  DCHECK(!loc.wide);
+  DCHECK(loc.location == kLocPhysReg);
+  DCHECK(!IsFpReg(loc.low_reg));
+  DCHECK(!IsFpReg(loc.high_reg));
+  if (IsTemp(loc.low_reg)) {
+    Clobber(loc.low_reg);
+  } else {
+    int temp_low = AllocTemp();
+    OpRegCopy(temp_low, loc.low_reg);
+    loc.low_reg = temp_low;
+  }
+
+  // Ensure that this doesn't represent the original SR any more.
+  loc.s_reg_low = INVALID_SREG;
+  return loc;
+}
+
+RegLocation Mir2Lir::ForceTempWide(RegLocation loc) {
+  DCHECK(loc.wide);
+  DCHECK(loc.location == kLocPhysReg);
+  DCHECK(!IsFpReg(loc.low_reg));
+  DCHECK(!IsFpReg(loc.high_reg));
+  if (IsTemp(loc.low_reg)) {
+    Clobber(loc.low_reg);
+  } else {
+    int temp_low = AllocTemp();
+    OpRegCopy(temp_low, loc.low_reg);
+    loc.low_reg = temp_low;
+  }
+  if (IsTemp(loc.high_reg)) {
+    Clobber(loc.high_reg);
+  } else {
+    int temp_high = AllocTemp();
+    OpRegCopy(temp_high, loc.high_reg);
+    loc.high_reg = temp_high;
+  }
+
+  // Ensure that this doesn't represent the original SR any more.
+  loc.s_reg_low = INVALID_SREG;
+  return loc;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index a5a14d5..11b8f83 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -94,9 +94,9 @@
                      RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                                   RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +110,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, bool is_div);
@@ -151,6 +151,8 @@
     LIR* OpRegImm(OpKind op, int r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset);
     LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2);
+    LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type);
     LIR* OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src);
     LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value);
     LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2);
@@ -175,6 +177,9 @@
 
   private:
     void ConvertShortToLongBranch(LIR* lir);
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                          RegLocation rl_src2, bool is_div, bool check_zero);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 180d56c..013041a 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -250,6 +250,17 @@
   return rl_result;
 }
 
+RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, bool is_div, bool check_zero) {
+  LOG(FATAL) << "Unexpected use of GenDivRem for Mips";
+  return rl_dest;
+}
+
+RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
+  LOG(FATAL) << "Unexpected use of GenDivRemLit for Mips";
+  return rl_dest;
+}
+
 void MipsMir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) {
   LOG(FATAL) << "Unexpected use of OpLea for Arm";
 }
@@ -356,13 +367,13 @@
   return NULL;
 }
 
-void MipsMir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenMulLong for Mips";
 }
 
-void MipsMir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -383,8 +394,8 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -425,18 +436,19 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+void MipsMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1,
                              RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAndLong for Mips";
 }
 
-void MipsMir2Lir::GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
+void MipsMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenOrLong for Mips";
 }
 
-void MipsMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenXorLong for Mips";
 }
 
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 1aee06c..b744adc 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -15,12 +15,15 @@
  */
 
 #include "codegen_mips.h"
+
+#include <inttypes.h>
+
+#include <string>
+
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "mips_lir.h"
 
-#include <string>
-
 namespace art {
 
 static int core_regs[] = {r_ZERO, r_AT, r_V0, r_V1, r_A0, r_A1, r_A2, r_A3,
@@ -203,9 +206,9 @@
              snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
              break;
            case 't':
-             snprintf(tbuf, arraysize(tbuf), "0x%08x (L%p)",
-                      reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 2),
-                      lir->target);
+             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
+                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
+                 lir->target);
              break;
            case 'T':
              snprintf(tbuf, arraysize(tbuf), "0x%08x", operand << 2);
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index c5e2b36..21c971c 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -325,6 +325,16 @@
   return NewLIR2(opcode, r_dest_src1, r_src2);
 }
 
+LIR* MipsMir2Lir::OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
+LIR* MipsMir2Lir::OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) {
+  UNIMPLEMENTED(FATAL);
+  return nullptr;
+}
+
 LIR* MipsMir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src) {
   LOG(FATAL) << "Unexpected use of OpCondRegReg for MIPS";
   return NULL;
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 6281eff..ae54fb8 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -342,8 +342,8 @@
       bool is_safe = is_null;  // Always safe to store null.
       if (!is_safe) {
         // Check safety from verifier type information.
-        const MethodReference mr(cu_->dex_file, cu_->method_idx);
-        is_safe = cu_->compiler_driver->IsSafeCast(mr, mir->offset);
+        const DexCompilationUnit* unit = mir_graph_->GetCurrentDexCompilationUnit();
+        is_safe = cu_->compiler_driver->IsSafeCast(unit, mir->offset);
       }
       if (is_null || is_safe) {
         // Store of constant null doesn't require an assignability test and can be generated inline
@@ -762,11 +762,13 @@
       // Combine check and work halves of throwing instruction.
       MIR* work_half = mir->meta.throw_insn;
       mir->dalvikInsn.opcode = work_half->dalvikInsn.opcode;
+      mir->meta = work_half->meta;  // Whatever the work_half had, we need to copy it.
       opcode = work_half->dalvikInsn.opcode;
       SSARepresentation* ssa_rep = work_half->ssa_rep;
       work_half->ssa_rep = mir->ssa_rep;
       mir->ssa_rep = ssa_rep;
       work_half->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheckPart2);
+      work_half->meta.throw_insn = mir;
     }
 
     if (opcode >= kMirOpFirst) {
@@ -837,6 +839,8 @@
       next_bb = iter.Next();
     } while ((next_bb != NULL) && (next_bb->block_type == kDead));
   }
+  HandleSlowPaths();
+
   cu_->NewTimingSplit("Launchpads");
   HandleSuspendLaunchPads();
 
@@ -845,4 +849,15 @@
   HandleIntrinsicLaunchPads();
 }
 
+//
+// LIR Slow Path
+//
+
+LIR* Mir2Lir::LIRSlowPath::GenerateTargetLabel() {
+  LIR* target = m2l_->RawLIR(current_dex_pc_, kPseudoTargetLabel);
+  m2l_->AppendLIR(target);
+  fromfast_->target = target;
+  m2l_->SetCurrentDexPc(current_dex_pc_);
+  return target;
+}
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index c157327..3a68044 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -258,6 +258,63 @@
       bool first_in_pair;
     };
 
+    //
+    // Slow paths.  This object is used generate a sequence of code that is executed in the
+    // slow path.  For example, resolving a string or class is slow as it will only be executed
+    // once (after that it is resolved and doesn't need to be done again).  We want slow paths
+    // to be placed out-of-line, and not require a (mispredicted, probably) conditional forward
+    // branch over them.
+    //
+    // If you want to create a slow path, declare a class derived from LIRSlowPath and provide
+    // the Compile() function that will be called near the end of the code generated by the
+    // method.
+    //
+    // The basic flow for a slow path is:
+    //
+    //     CMP reg, #value
+    //     BEQ fromfast
+    //   cont:
+    //     ...
+    //     fast path code
+    //     ...
+    //     more code
+    //     ...
+    //     RETURN
+    ///
+    //   fromfast:
+    //     ...
+    //     slow path code
+    //     ...
+    //     B cont
+    //
+    // So you see we need two labels and two branches.  The first branch (called fromfast) is
+    // the conditional branch to the slow path code.  The second label (called cont) is used
+    // as an unconditional branch target for getting back to the code after the slow path
+    // has completed.
+    //
+
+    class LIRSlowPath {
+     public:
+      LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast,
+                  LIR* cont = nullptr) :
+        m2l_(m2l), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) {
+      }
+      virtual ~LIRSlowPath() {}
+      virtual void Compile() = 0;
+
+      static void* operator new(size_t size, ArenaAllocator* arena) {
+        return arena->Alloc(size, ArenaAllocator::kAllocData);
+      }
+
+     protected:
+      LIR* GenerateTargetLabel();
+
+      Mir2Lir* const m2l_;
+      const DexOffset current_dex_pc_;
+      LIR* const fromfast_;
+      LIR* const cont_;
+    };
+
     virtual ~Mir2Lir() {}
 
     int32_t s4FromSwitchData(const void* switch_data) {
@@ -310,6 +367,23 @@
     void InsertLIRBefore(LIR* current_lir, LIR* new_lir);
     void InsertLIRAfter(LIR* current_lir, LIR* new_lir);
 
+    /**
+     * @brief Provides the maximum number of compiler temporaries that the backend can/wants
+     * to place in a frame.
+     * @return Returns the maximum number of compiler temporaries.
+     */
+    size_t GetMaxPossibleCompilerTemps() const;
+
+    /**
+     * @brief Provides the number of bytes needed in frame for spilling of compiler temporaries.
+     * @return Returns the size in bytes for space needed for compiler temporary spill region.
+     */
+    size_t GetNumBytesForCompilerTempSpillRegion();
+
+    DexOffset GetCurrentDexPc() const {
+      return current_dalvik_offset_;
+    }
+
     int ComputeFrameSize();
     virtual void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
@@ -457,6 +531,7 @@
     void HandleSuspendLaunchPads();
     void HandleIntrinsicLaunchPads();
     void HandleThrowLaunchPads();
+    void HandleSlowPaths();
     void GenBarrier();
     LIR* GenCheck(ConditionCode c_code, ThrowKind kind);
     LIR* GenImmedCheck(ConditionCode c_code, int reg, int imm_val,
@@ -497,8 +572,6 @@
                       RegLocation rl_src1, RegLocation rl_src2);
     void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
                         RegLocation rl_src1, RegLocation rl_shift);
-    void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
-                       RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest,
                           RegLocation rl_src, int lit);
     void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
@@ -507,8 +580,11 @@
                            RegLocation rl_src);
     void GenSuspendTest(int opt_flags);
     void GenSuspendTestAndBranch(int opt_flags, LIR* target);
+
     // This will be overridden by x86 implementation.
     virtual void GenConstWide(RegLocation rl_dest, int64_t value);
+    virtual void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
+                       RegLocation rl_src1, RegLocation rl_src2);
 
     // Shared by all targets - implemented in gen_invoke.cc.
     int CallHelperSetup(ThreadOffset helper_offset);
@@ -529,6 +605,9 @@
                                  bool safepoint_pc);
     void CallRuntimeHelperImmMethod(ThreadOffset helper_offset, int arg0,
                                     bool safepoint_pc);
+    void CallRuntimeHelperRegMethod(ThreadOffset helper_offset, int arg0, bool safepoint_pc);
+    void CallRuntimeHelperRegMethodRegLocation(ThreadOffset helper_offset, int arg0,
+                                               RegLocation arg2, bool safepoint_pc);
     void CallRuntimeHelperRegLocationRegLocation(ThreadOffset helper_offset,
                                                  RegLocation arg0, RegLocation arg1,
                                                  bool safepoint_pc);
@@ -627,6 +706,30 @@
      */
     void StoreValueWide(RegLocation rl_dest, RegLocation rl_src);
 
+    /**
+     * @brief Used to do the final store to a destination as per bytecode semantics.
+     * @see StoreValue
+     * @param rl_dest The destination dalvik register location.
+     * @param rl_src The source register location. It must be kLocPhysReg
+     *
+     * This is used for x86 two operand computations, where we have computed the correct
+     * register value that now needs to be properly registered.  This is used to avoid an
+     * extra register copy that would result if StoreValue was called.
+     */
+    void StoreFinalValue(RegLocation rl_dest, RegLocation rl_src);
+
+    /**
+     * @brief Used to do the final store in a wide destination as per bytecode semantics.
+     * @see StoreValueWide
+     * @param rl_dest The destination dalvik register location.
+     * @param rl_src The source register location. It must be kLocPhysReg
+     *
+     * This is used for x86 two operand computations, where we have computed the correct
+     * register values that now need to be properly registered.  This is used to avoid an
+     * extra pair of register copies that would result if StoreValueWide was called.
+     */
+    void StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src);
+
     // Shared by all targets - implemented in mir_to_lir.cc.
     void CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list);
     void HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir);
@@ -634,7 +737,19 @@
     void SpecialMIR2LIR(const InlineMethod& special);
     void MethodMIR2LIR();
 
-
+    // Routines that work for the generic case, but may be overriden by target.
+    /*
+     * @brief Compare memory to immediate, and branch if condition true.
+     * @param cond The condition code that when true will branch to the target.
+     * @param temp_reg A temporary register that can be used if compare to memory is not
+     * supported by the architecture.
+     * @param base_reg The register holding the base address.
+     * @param offset The offset from the base.
+     * @param check_value The immediate to compare to.
+     * @returns The branch instruction that was generated.
+     */
+    virtual LIR* OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                   int offset, int check_value, LIR* target);
 
     // Required for target - codegen helpers.
     virtual bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
@@ -695,11 +810,14 @@
     // Required for target - Dalvik-level generators.
     virtual void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_src2) = 0;
-    virtual void GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenMulLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenAddLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenAndLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
     virtual void GenArithOpDouble(Instruction::Code opcode,
                                   RegLocation rl_dest, RegLocation rl_src1,
@@ -727,11 +845,14 @@
     virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0;
     virtual bool GenInlinedPoke(CallInfo* info, OpSize size) = 0;
     virtual void GenNegLong(RegLocation rl_dest, RegLocation rl_src) = 0;
-    virtual void GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenOrLong(Instruction::Code,
+                           RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2) = 0;
-    virtual void GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenSubLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenXorLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
     virtual LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base,
                                 int offset, ThrowKind kind) = 0;
@@ -739,6 +860,25 @@
                                   bool is_div) = 0;
     virtual RegLocation GenDivRemLit(RegLocation rl_dest, int reg_lo, int lit,
                                      bool is_div) = 0;
+    /*
+     * @brief Generate an integer div or rem operation by a literal.
+     * @param rl_dest Destination Location.
+     * @param rl_src1 Numerator Location.
+     * @param rl_src2 Divisor Location.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     * @param check_zero 'true' if an exception should be generated if the divisor is 0.
+     */
+    virtual RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                                  RegLocation rl_src2, bool is_div, bool check_zero) = 0;
+    /*
+     * @brief Generate an integer div or rem operation by a literal.
+     * @param rl_dest Destination Location.
+     * @param rl_src Numerator Location.
+     * @param lit Divisor.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     */
+    virtual RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1,
+                                     int lit, bool is_div) = 0;
     virtual void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
 
@@ -758,7 +898,14 @@
     virtual void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias,
                                      bool is_double) = 0;
     virtual void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) = 0;
+
+    /**
+     * @brief Lowers the kMirOpSelect MIR into LIR.
+     * @param bb The basic block in which the MIR is from.
+     * @param mir The MIR whose opcode is kMirOpSelect.
+     */
     virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0;
+
     virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0;
     virtual void GenMoveException(RegLocation rl_dest) = 0;
     virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
@@ -799,6 +946,27 @@
     virtual LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2) = 0;
 
     /**
+     * @brief Used to generate an LIR that does a load from mem to reg.
+     * @param r_dest The destination physical register.
+     * @param r_base The base physical register for memory operand.
+     * @param offset The displacement for memory operand.
+     * @param move_type Specification on the move desired (size, alignment, register kind).
+     * @return Returns the generate move LIR.
+     */
+    virtual LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) = 0;
+
+    /**
+     * @brief Used to generate an LIR that does a store from reg to mem.
+     * @param r_base The base physical register for memory operand.
+     * @param offset The displacement for memory operand.
+     * @param r_src The destination physical register.
+     * @param bytes_to_move The number of bytes to move.
+     * @param is_aligned Whether the memory location is known to be aligned.
+     * @return Returns the generate move LIR.
+     */
+    virtual LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) = 0;
+
+    /**
      * @brief Used for generating a conditional register to register operation.
      * @param op The opcode kind.
      * @param cc The condition code that when true will perform the opcode.
@@ -835,10 +1003,50 @@
     CompilationUnit* GetCompilationUnit() {
       return cu_;
     }
+    /*
+     * @brief Returns the index of the lowest set bit in 'x'.
+     * @param x Value to be examined.
+     * @returns The bit number of the lowest bit set in the value.
+     */
+    int32_t LowestSetBit(uint64_t x);
+    /*
+     * @brief Is this value a power of two?
+     * @param x Value to be examined.
+     * @returns 'true' if only 1 bit is set in the value.
+     */
+    bool IsPowerOfTwo(uint64_t x);
+    /*
+     * @brief Do these SRs overlap?
+     * @param rl_op1 One RegLocation
+     * @param rl_op2 The other RegLocation
+     * @return 'true' if the VR pairs overlap
+     *
+     * Check to see if a result pair has a misaligned overlap with an operand pair.  This
+     * is not usual for dx to generate, but it is legal (for now).  In a future rev of
+     * dex, we'll want to make this case illegal.
+     */
+    bool BadOverlap(RegLocation rl_op1, RegLocation rl_op2);
+
+    /*
+     * @brief Force a location (in a register) into a temporary register
+     * @param loc location of result
+     * @returns update location
+     */
+    RegLocation ForceTemp(RegLocation loc);
+
+    /*
+     * @brief Force a wide location (in registers) into temporary registers
+     * @param loc location of result
+     * @returns update location
+     */
+    RegLocation ForceTempWide(RegLocation loc);
+
+    virtual void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                                    RegLocation rl_dest, RegLocation rl_src);
+
+    void AddSlowPath(LIRSlowPath* slowpath);
 
   private:
-    void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
-                            RegLocation rl_src);
     void GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final,
                                     bool type_known_abstract, bool use_declaring_class,
                                     bool can_assume_type_is_in_dex_cache,
@@ -851,10 +1059,16 @@
       p->def_end = NULL;
     }
 
+    void SetCurrentDexPc(DexOffset dexpc) {
+      current_dalvik_offset_ = dexpc;
+    }
+
+
   public:
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
     LIR* method_literal_list_;                 // Method literals requiring patching.
+    LIR* class_literal_list_;                  // Class literals requiring patching.
     LIR* code_literal_list_;                   // Code literals requiring patching.
     LIR* first_fixup_;                         // Doubly-linked list of LIR nodes requiring fixups.
 
@@ -905,6 +1119,8 @@
     unsigned int fp_spill_mask_;
     LIR* first_lir_insn_;
     LIR* last_lir_insn_;
+
+    GrowableArray<LIRSlowPath*> slow_paths_;
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 32c22f2..eb70d8c 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -132,9 +132,15 @@
     DCHECK_LT(v_reg, cu_->num_dalvik_registers);
     return v_reg;
   } else {
-    int pos = std::abs(v_reg) - std::abs(SSA_METHOD_BASEREG);
-    DCHECK_LE(pos, cu_->num_compiler_temps);
-    return cu_->num_dalvik_registers + pos;
+    /*
+     * It must be the case that the v_reg for temporary is less than or equal to the
+     * base reg for temps. For that reason, "position" must be zero or positive.
+     */
+    unsigned int position = std::abs(v_reg) - std::abs(static_cast<int>(kVRegTempBaseReg));
+
+    // The temporaries are placed after dalvik registers in the promotion map
+    DCHECK_LT(position, mir_graph_->GetNumUsedCompilerTemps());
+    return cu_->num_dalvik_registers + position;
   }
 }
 
@@ -897,10 +903,12 @@
  * optimization is disabled.
  */
 void Mir2Lir::DoPromotion() {
-  int reg_bias = cu_->num_compiler_temps + 1;
   int dalvik_regs = cu_->num_dalvik_registers;
-  int num_regs = dalvik_regs + reg_bias;
+  int num_regs = dalvik_regs + mir_graph_->GetNumUsedCompilerTemps();
   const int promotion_threshold = 1;
+  // Allocate the promotion map - one entry for each Dalvik vReg or compiler temp
+  promotion_map_ = static_cast<PromotionMap*>
+      (arena_->Alloc(num_regs * sizeof(promotion_map_[0]), ArenaAllocator::kAllocRegAlloc));
 
   // Allow target code to add any special registers
   AdjustSpillMask();
@@ -926,16 +934,13 @@
   for (int i = 0; i < dalvik_regs; i++) {
     core_regs[i].s_reg = FpRegs[i].s_reg = i;
   }
-  // Set ssa name for Method*
-  core_regs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();
-  FpRegs[dalvik_regs].s_reg = mir_graph_->GetMethodSReg();  // For consistecy.
-  FpRegs[dalvik_regs + num_regs].s_reg = mir_graph_->GetMethodSReg();  // for consistency.
-  // Set ssa names for compiler_temps
-  for (int i = 1; i <= cu_->num_compiler_temps; i++) {
-    CompilerTemp* ct = mir_graph_->compiler_temps_.Get(i);
-    core_regs[dalvik_regs + i].s_reg = ct->s_reg;
-    FpRegs[dalvik_regs + i].s_reg = ct->s_reg;
-    FpRegs[num_regs + dalvik_regs + i].s_reg = ct->s_reg;
+
+  // Set ssa names for compiler temporaries
+  for (unsigned int ct_idx = 0; ct_idx < mir_graph_->GetNumUsedCompilerTemps(); ct_idx++) {
+    CompilerTemp* ct = mir_graph_->GetCompilerTemp(ct_idx);
+    core_regs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
+    FpRegs[dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
+    FpRegs[num_regs + dalvik_regs + ct_idx].s_reg = ct->s_reg_low;
   }
 
   // Duplicate in upper half to represent possible fp double starting sregs.
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 1dcff65..ae53ddb 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -211,6 +211,8 @@
 #undef SHIFT_ENCODING_MAP
 
   { kX86Cmc, kNullary, NO_OPERAND, { 0, 0, 0xF5, 0, 0, 0, 0, 0}, "Cmc", "" },
+  { kX86Shld32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32", "!0r,!1r,!2d" },
+  { kX86Shrd32RRI,  kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01  | SETS_CCODES, { 0,    0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32", "!0r,!1r,!2d" },
 
   { kX86Test8RI,  kRegImm,             IS_BINARY_OP   | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" },
   { kX86Test8MI,  kMemImm,   IS_LOAD | IS_TERTIARY_OP | REG_USE0  | SETS_CCODES, { 0,    0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" },
@@ -242,12 +244,13 @@
   UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0,           R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
   UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
 
-  UNARY_ENCODING_MAP(Mul,     0x4, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
-  UNARY_ENCODING_MAP(Imul,    0x5, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
-  UNARY_ENCODING_MAP(Divmod,  0x6, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
-  UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
+  UNARY_ENCODING_MAP(Mul,     0x4, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
+  UNARY_ENCODING_MAP(Imul,    0x5, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
+  UNARY_ENCODING_MAP(Divmod,  0x6, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
+  UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
 #undef UNARY_ENCODING_MAP
 
+  { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { 0, 0, 0x99, 0, 0, 0, 0, 0 }, "Cdq", "" },
   { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" },
   { kX86Push32R,  kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0, 0, 0x50, 0,    0, 0, 0, 0 }, "Push32R",  "!0r" },
   { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0, 0, 0x58, 0,    0, 0, 0, 0 }, "Pop32R",   "!0r" },
@@ -287,12 +290,31 @@
   EXT_0F_ENCODING_MAP(Subss,     0xF3, 0x5C, REG_DEF0),
   EXT_0F_ENCODING_MAP(Divsd,     0xF2, 0x5E, REG_DEF0),
   EXT_0F_ENCODING_MAP(Divss,     0xF3, 0x5E, REG_DEF0),
+  EXT_0F_ENCODING_MAP(Punpckldq, 0x66, 0x62, REG_DEF0),
 
   { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
   { kX86SqrtsdRR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0xF2, 0, 0x0F, 0x51, 0, 0, 0, 0 }, "SqrtsdRR", "!0r,!1r" },
   { kX86FstpdM, kMem, IS_STORE | IS_BINARY_OP | REG_USE0, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
 
+  EXT_0F_ENCODING_MAP(Movups,    0x0, 0x10, REG_DEF0),
+  { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" },
+  { kX86MovupsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  EXT_0F_ENCODING_MAP(Movaps,    0x0, 0x28, REG_DEF0),
+  { kX86MovapsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsMR", "[!0r+!1d],!2r" },
+  { kX86MovapsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x29, 0, 0, 0, 0 }, "MovapsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  { kX86MovlpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRM", "!0r,[!1r+!2d]" },
+  { kX86MovlpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x12, 0, 0, 0, 0 }, "MovlpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86MovlpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsMR", "[!0r+!1d],!2r" },
+  { kX86MovlpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x13, 0, 0, 0, 0 }, "MovlpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
+  { kX86MovhpsRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE01,  { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRM", "!0r,[!1r+!2d]" },
+  { kX86MovhpsRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE012, { 0x0, 0, 0x0F, 0x16, 0, 0, 0, 0 }, "MovhpsRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86MovhpsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsMR", "[!0r+!1d],!2r" },
+  { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
   EXT_0F_ENCODING_MAP(Movdxr,    0x66, 0x6E, REG_DEF0),
   { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE01,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
   { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" },
@@ -421,6 +443,7 @@
     case kThreadImm:  // lir operands - 0: disp, 1: imm
       return ComputeSize(entry, 0, 0x12345678, false);  // displacement size is always 32bit
     case kRegRegImm:  // lir operands - 0: reg, 1: reg, 2: imm
+    case kRegRegImmRev:
       return ComputeSize(entry, 0, 0, false);
     case kRegMemImm:  // lir operands - 0: reg, 1: base, 2: disp, 3: imm
       return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
@@ -641,7 +664,6 @@
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
-  DCHECK_NE(rX86_SP, base);
   EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -754,6 +776,22 @@
   EmitImm(entry, imm);
 }
 
+void X86Mir2Lir::EmitRegRegImmRev(const X86EncodingMap* entry,
+                                  uint8_t reg1, uint8_t reg2, int32_t imm) {
+  EmitRegRegImm(entry, reg2, reg1, imm);
+}
+
+void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry,
+                               uint8_t reg, uint8_t base, int disp, int32_t imm) {
+  EmitPrefixAndOpcode(entry);
+  DCHECK(!X86_FPREG(reg));
+  DCHECK_LT(reg, 8);
+  EmitModrmDisp(reg, base, disp);
+  DCHECK_EQ(0, entry->skeleton.modrm_opcode);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  EmitImm(entry, imm);
+}
+
 void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
@@ -843,6 +881,20 @@
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
 
+void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base,
+                                int displacement, uint8_t cl) {
+  DCHECK_EQ(cl, static_cast<uint8_t>(rCX));
+  EmitPrefix(entry);
+  code_buffer_.push_back(entry->skeleton.opcode);
+  DCHECK_NE(0x0F, entry->skeleton.opcode);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode1);
+  DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+  DCHECK_LT(base, 8);
+  EmitModrmDisp(entry->skeleton.modrm_opcode, base, displacement);
+  DCHECK_EQ(0, entry->skeleton.ax_opcode);
+  DCHECK_EQ(0, entry->skeleton.immediate_bytes);
+}
+
 void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) {
   if (entry->skeleton.prefix1 != 0) {
     code_buffer_.push_back(entry->skeleton.prefix1);
@@ -1120,6 +1172,17 @@
           break;
         }
         default:
+          if (lir->flags.fixup == kFixupLoad) {
+            LIR *target_lir = lir->target;
+            DCHECK(target_lir != NULL);
+            CodeOffset target = target_lir->offset;
+            lir->operands[2] = target;
+            int newSize = GetInsnSize(lir);
+            if (newSize != lir->flags.size) {
+              lir->flags.size = newSize;
+              res = kRetryAll;
+            }
+          }
           break;
       }
     }
@@ -1185,9 +1248,16 @@
       case kRegRegStore:  // lir operands - 0: reg2, 1: reg1
         EmitRegReg(entry, lir->operands[1], lir->operands[0]);
         break;
+      case kRegRegImmRev:
+        EmitRegRegImmRev(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
       case kRegRegImm:
         EmitRegRegImm(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
+      case kRegMemImm:
+        EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
+                      lir->operands[3]);
+        break;
       case kRegImm:  // lir operands - 0: reg, 1: immediate
         EmitRegImm(entry, lir->operands[0], lir->operands[1]);
         break;
@@ -1203,6 +1273,9 @@
       case kShiftRegCl:  // lir operands - 0: reg, 1: cl
         EmitShiftRegCl(entry, lir->operands[0], lir->operands[1]);
         break;
+      case kShiftMemCl:  // lir operands - 0: base, 1:displacement, 2: cl
+        EmitShiftMemCl(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
+        break;
       case kRegCond:  // lir operands - 0: reg, 1: condition
         EmitRegCond(entry, lir->operands[0], lir->operands[1]);
         break;
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 4267b5b..93875c9 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -84,10 +84,19 @@
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
-  int start_of_method_reg = AllocTemp();
-  // Materialize a pointer to the switch table
   // NewLIR0(kX86Bkpt);
-  NewLIR1(kX86StartOfMethod, start_of_method_reg);
+
+  // Materialize a pointer to the switch table
+  int start_of_method_reg;
+  if (base_of_code_ != nullptr) {
+    // We can use the saved value.
+    RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+    rl_method = LoadValue(rl_method, kCoreReg);
+    start_of_method_reg = rl_method.low_reg;
+  } else {
+    start_of_method_reg = AllocTemp();
+    NewLIR1(kX86StartOfMethod, start_of_method_reg);
+  }
   int low_key = s4FromSwitchData(&table[2]);
   int keyReg;
   // Remove the bias, if necessary
@@ -142,7 +151,13 @@
   FlushAllRegs();   /* Everything to home location */
   LoadValueDirectFixed(rl_src, rX86_ARG0);
   // Materialize a pointer to the fill data image
-  NewLIR1(kX86StartOfMethod, rX86_ARG2);
+  if (base_of_code_ != nullptr) {
+    // We can use the saved value.
+    RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+    LoadValueDirect(rl_method, rX86_ARG2);
+  } else {
+    NewLIR1(kX86StartOfMethod, rX86_ARG2);
+  }
   NewLIR2(kX86PcRelAdr, rX86_ARG1, WrapPointer(tab_rec));
   NewLIR2(kX86Add32RR, rX86_ARG1, rX86_ARG2);
   CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(pHandleFillArrayData), rX86_ARG0,
@@ -211,6 +226,13 @@
 
   FlushIns(ArgLocs, rl_method);
 
+  if (base_of_code_ != nullptr) {
+    // We have been asked to save the address of the method start for later use.
+    NewLIR1(kX86StartOfMethod, rX86_ARG0);
+    int displacement = SRegOffset(base_of_code_->s_reg_low);
+    StoreBaseDisp(rX86_SP, displacement, rX86_ARG0, kWord);
+  }
+
   FreeTemp(rX86_ARG0);
   FreeTemp(rX86_ARG1);
   FreeTemp(rX86_ARG2);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 816f2d0..4c1c171 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -94,9 +94,9 @@
                      RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                                   RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +110,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     LIR* GenMemImmedCheck(ConditionCode c_code, int base, int offset, int check_value,
@@ -136,7 +136,59 @@
     void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
+    /*
+     * @brief Generate a two address long operation with a constant value
+     * @param rl_dest location of result
+     * @param rl_src constant source operand
+     * @param op Opcode to be generated
+     */
+    void GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+    /*
+     * @brief Generate a three address long operation with a constant value
+     * @param rl_dest location of result
+     * @param rl_src1 source operand
+     * @param rl_src2 constant source operand
+     * @param op Opcode to be generated
+     */
+    void GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
+                        RegLocation rl_src2, Instruction::Code op);
 
+    /**
+      * @brief Generate a long arithmetic operation.
+      * @param rl_dest The destination.
+      * @param rl_src1 First operand.
+      * @param rl_src2 Second operand.
+      * @param op The DEX opcode for the operation.
+      * @param is_commutative The sources can be swapped if needed.
+      */
+    void GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, Instruction::Code op, bool is_commutative);
+
+    /**
+      * @brief Generate a two operand long arithmetic operation.
+      * @param rl_dest The destination.
+      * @param rl_src Second operand.
+      * @param op The DEX opcode for the operation.
+      */
+    void GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+
+    /**
+      * @brief Generate a long operation.
+      * @param rl_dest The destination.  Must be in a register
+      * @param rl_src The other operand.  May be in a register or in memory.
+      * @param op The DEX opcode for the operation.
+      */
+    void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+
+    /**
+     * @brief Implement instanceof a final class with x86 specific code.
+     * @param use_declaring_class 'true' if we can use the class itself.
+     * @param type_idx Type index to use if use_declaring_class is 'false'.
+     * @param rl_dest Result to be set to 0 or 1.
+     * @param rl_src Object to be tested.
+     */
+    void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                            RegLocation rl_dest, RegLocation rl_src);
     // Single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
     LIR* OpCmpBranch(ConditionCode cond, int src1, int src2, LIR* target);
@@ -152,7 +204,11 @@
     LIR* OpRegCopyNoInsert(int r_dest, int r_src);
     LIR* OpRegImm(OpKind op, int r_dest_src1, int value);
     LIR* OpRegMem(OpKind op, int r_dest, int rBase, int offset);
+    LIR* OpMemReg(OpKind op, RegLocation rl_dest, int value);
+    LIR* OpRegMem(OpKind op, int r_dest, RegLocation value);
     LIR* OpRegReg(OpKind op, int r_dest_src1, int r_src2);
+    LIR* OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type);
+    LIR* OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type);
     LIR* OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src);
     LIR* OpRegRegImm(OpKind op, int r_dest, int r_src1, int value);
     LIR* OpRegRegReg(OpKind op, int r_dest, int r_src1, int r_src2);
@@ -179,6 +235,16 @@
     int AllocTempDouble();
     void ResetDefLocWide(RegLocation rl);
 
+    /*
+     * @brief x86 specific codegen for int operations.
+     * @param opcode Operation to perform.
+     * @param rl_dest Destination for the result.
+     * @param rl_lhs Left hand operand.
+     * @param rl_rhs Right hand operand.
+     */
+    void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
+                       RegLocation rl_lhs, RegLocation rl_rhs);
+
   private:
     void EmitPrefix(const X86EncodingMap* entry);
     void EmitOpcode(const X86EncodingMap* entry);
@@ -202,10 +268,13 @@
     void EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp);
     void EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2);
     void EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
+    void EmitRegRegImmRev(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm);
+    void EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int disp, int32_t imm);
     void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm);
     void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
     void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm);
+    void EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl);
     void EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl);
     void EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition);
 
@@ -230,6 +299,171 @@
                                   int64_t val, ConditionCode ccode);
     void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg);
     void GenConstWide(RegLocation rl_dest, int64_t value);
+
+    /*
+     * @brief Return the correct x86 opcode for the Dex operation
+     * @param op Dex opcode for the operation
+     * @param loc Register location of the operand
+     * @param is_high_op 'true' if this is an operation on the high word
+     * @param value Immediate value for the operation.  Used for byte variants
+     * @returns the correct x86 opcode to perform the operation
+     */
+    X86OpCode GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value);
+
+    /*
+     * @brief Return the correct x86 opcode for the Dex operation
+     * @param op Dex opcode for the operation
+     * @param dest location of the destination.  May be register or memory.
+     * @param rhs Location for the rhs of the operation.  May be in register or memory.
+     * @param is_high_op 'true' if this is an operation on the high word
+     * @returns the correct x86 opcode to perform the operation
+     * @note at most one location may refer to memory
+     */
+    X86OpCode GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
+                        bool is_high_op);
+
+    /*
+     * @brief Is this operation a no-op for this opcode and value
+     * @param op Dex opcode for the operation
+     * @param value Immediate value for the operation.
+     * @returns 'true' if the operation will have no effect
+     */
+    bool IsNoOp(Instruction::Code op, int32_t value);
+
+    /*
+     * @brief Dump a RegLocation using printf
+     * @param loc Register location to dump
+     */
+    static void DumpRegLocation(RegLocation loc);
+
+    /**
+     * @brief Calculate magic number and shift for a given divisor
+     * @param divisor divisor number for calculation
+     * @param magic hold calculated magic number
+     * @param shift hold calculated shift
+     */
+    void CalculateMagicAndShift(int divisor, int& magic, int& shift);
+
+    /*
+     * @brief Generate an integer div or rem operation.
+     * @param rl_dest Destination Location.
+     * @param rl_src1 Numerator Location.
+     * @param rl_src2 Divisor Location.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     * @param check_zero 'true' if an exception should be generated if the divisor is 0.
+     */
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                                  RegLocation rl_src2, bool is_div, bool check_zero);
+
+    /*
+     * @brief Generate an integer div or rem operation by a literal.
+     * @param rl_dest Destination Location.
+     * @param rl_src Numerator Location.
+     * @param lit Divisor.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     */
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div);
+
+    /*
+     * Generate code to implement long shift operations.
+     * @param opcode The DEX opcode to specify the shift type.
+     * @param rl_dest The destination.
+     * @param rl_src The value to be shifted.
+     * @param shift_amount How much to shift.
+     * @returns the RegLocation of the result.
+     */
+    RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                                  RegLocation rl_src, int shift_amount);
+    /*
+     * Generate an imul of a register by a constant or a better sequence.
+     * @param dest Destination Register.
+     * @param src Source Register.
+     * @param val Constant multiplier.
+     */
+    void GenImulRegImm(int dest, int src, int val);
+
+    /*
+     * Generate an imul of a memory location by a constant or a better sequence.
+     * @param dest Destination Register.
+     * @param sreg Symbolic register.
+     * @param displacement Displacement on stack of Symbolic Register.
+     * @param val Constant multiplier.
+     */
+    void GenImulMemImm(int dest, int sreg, int displacement, int val);
+
+    /*
+     * @brief Compare memory to immediate, and branch if condition true.
+     * @param cond The condition code that when true will branch to the target.
+     * @param temp_reg A temporary register that can be used if compare memory is not
+     * supported by the architecture.
+     * @param base_reg The register holding the base address.
+     * @param offset The offset from the base.
+     * @param check_value The immediate to compare to.
+     */
+    LIR* OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                           int offset, int check_value, LIR* target);
+    /*
+     * Can this operation be using core registers without temporaries?
+     * @param rl_lhs Left hand operand.
+     * @param rl_rhs Right hand operand.
+     * @returns 'true' if the operation can proceed without needing temporary regs.
+     */
+    bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs);
+
+    /*
+     * @brief Perform MIR analysis before compiling method.
+     * @note Invokes Mir2LiR::Materialize after analysis.
+     */
+    void Materialize();
+
+    /*
+     * @brief Analyze MIR before generating code, to prepare for the code generation.
+     */
+    void AnalyzeMIR();
+
+    /*
+     * @brief Analyze one basic block.
+     * @param bb Basic block to analyze.
+     */
+    void AnalyzeBB(BasicBlock * bb);
+
+    /*
+     * @brief Analyze one extended MIR instruction
+     * @param opcode MIR instruction opcode.
+     * @param bb Basic block containing instruction.
+     * @param mir Extended instruction to analyze.
+     */
+    void AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir);
+
+    /*
+     * @brief Analyze one MIR instruction
+     * @param opcode MIR instruction opcode.
+     * @param bb Basic block containing instruction.
+     * @param mir Instruction to analyze.
+     */
+    void AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir);
+
+    /*
+     * @brief Analyze one MIR float/double instruction
+     * @param opcode MIR instruction opcode.
+     * @param bb Basic block containing instruction.
+     * @param mir Instruction to analyze.
+     */
+    void AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir);
+
+    /*
+     * @brief Analyze one use of a double operand.
+     * @param rl_use Double RegLocation for the operand.
+     */
+    void AnalyzeDoubleUse(RegLocation rl_use);
+
+    // Information derived from analysis of MIR
+
+    // Have we decided to compute a ptr to code and store in temporary VR?
+    bool store_method_addr_;
+
+    // The compiler temporary for the code address of the method.
+    CompilerTemp *base_of_code_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 6272498..006fe76 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -303,7 +303,7 @@
     rl_src2 = LoadValue(rl_src2, kFPReg);
     NewLIR2(kX86UcomissRR, rl_src1.low_reg, rl_src2.low_reg);
   }
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
   switch (ccode) {
     case kCondEq:
       if (!gt_bias) {
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 01479a9..a567a8a 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -157,8 +157,7 @@
       NewLIR2(kX86MovdxrRR, dest_lo, src_lo);
       dest_hi = AllocTempDouble();
       NewLIR2(kX86MovdxrRR, dest_hi, src_hi);
-      NewLIR2(kX86PsllqRI, dest_hi, 32);
-      NewLIR2(kX86OrpsRR, dest_lo, dest_hi);
+      NewLIR2(kX86PunpckldqRR, dest_lo, dest_hi);
       FreeTemp(dest_hi);
     }
   } else {
@@ -180,14 +179,104 @@
 }
 
 void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
-  UNIMPLEMENTED(FATAL) << "Need codegen for GenSelect";
+  RegLocation rl_result;
+  RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
+  RegLocation rl_dest = mir_graph_->GetDest(mir);
+  rl_src = LoadValue(rl_src, kCoreReg);
+
+  // The kMirOpSelect has two variants, one for constants and one for moves.
+  const bool is_constant_case = (mir->ssa_rep->num_uses == 1);
+
+  if (is_constant_case) {
+    int true_val = mir->dalvikInsn.vB;
+    int false_val = mir->dalvikInsn.vC;
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+    /*
+     * 1) When the true case is zero and result_reg is not same as src_reg:
+     *     xor result_reg, result_reg
+     *     cmp $0, src_reg
+     *     mov t1, $false_case
+     *     cmovnz result_reg, t1
+     * 2) When the false case is zero and result_reg is not same as src_reg:
+     *     xor result_reg, result_reg
+     *     cmp $0, src_reg
+     *     mov t1, $true_case
+     *     cmovz result_reg, t1
+     * 3) All other cases (we do compare first to set eflags):
+     *     cmp $0, src_reg
+     *     mov result_reg, $true_case
+     *     mov t1, $false_case
+     *     cmovnz result_reg, t1
+     */
+    const bool result_reg_same_as_src = (rl_src.location == kLocPhysReg && rl_src.low_reg == rl_result.low_reg);
+    const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src);
+    const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src);
+    const bool catch_all_case = !(true_zero_case || false_zero_case);
+
+    if (true_zero_case || false_zero_case) {
+      OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg);
+    }
+
+    if (true_zero_case || false_zero_case || catch_all_case) {
+      OpRegImm(kOpCmp, rl_src.low_reg, 0);
+    }
+
+    if (catch_all_case) {
+      OpRegImm(kOpMov, rl_result.low_reg, true_val);
+    }
+
+    if (true_zero_case || false_zero_case || catch_all_case) {
+      int immediateForTemp = false_zero_case ? true_val : false_val;
+      int temp1_reg = AllocTemp();
+      OpRegImm(kOpMov, temp1_reg, immediateForTemp);
+
+      ConditionCode cc = false_zero_case ? kCondEq : kCondNe;
+      OpCondRegReg(kOpCmov, cc, rl_result.low_reg, temp1_reg);
+
+      FreeTemp(temp1_reg);
+    }
+  } else {
+    RegLocation rl_true = mir_graph_->GetSrc(mir, 1);
+    RegLocation rl_false = mir_graph_->GetSrc(mir, 2);
+    rl_true = LoadValue(rl_true, kCoreReg);
+    rl_false = LoadValue(rl_false, kCoreReg);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+    /*
+     * 1) When true case is already in place:
+     *     cmp $0, src_reg
+     *     cmovnz result_reg, false_reg
+     * 2) When false case is already in place:
+     *     cmp $0, src_reg
+     *     cmovz result_reg, true_reg
+     * 3) When neither cases are in place:
+     *     cmp $0, src_reg
+     *     mov result_reg, true_reg
+     *     cmovnz result_reg, false_reg
+     */
+
+    // kMirOpSelect is generated just for conditional cases when comparison is done with zero.
+    OpRegImm(kOpCmp, rl_src.low_reg, 0);
+
+    if (rl_result.low_reg == rl_true.low_reg) {
+      OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg);
+    } else if (rl_result.low_reg == rl_false.low_reg) {
+      OpCondRegReg(kOpCmov, kCondEq, rl_result.low_reg, rl_true.low_reg);
+    } else {
+      OpRegCopy(rl_result.low_reg, rl_true.low_reg);
+      OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg);
+    }
+  }
+
+  StoreValue(rl_dest, rl_result);
 }
 
 void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
   LIR* taken = &block_label_list_[bb->taken];
   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
 
   if (rl_src1.is_const) {
     std::swap(rl_src1, rl_src2);
@@ -284,18 +373,261 @@
   OpCmpImmBranch(ccode, low_reg, val_lo, taken);
 }
 
+void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
+  // It does not make sense to calculate magic and shift for zero divisor.
+  DCHECK_NE(divisor, 0);
+
+  /* According to H.S.Warren's Hacker's Delight Chapter 10 and
+   * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
+   * The magic number M and shift S can be calculated in the following way:
+   * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
+   * where divisor(d) >=2.
+   * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
+   * where divisor(d) <= -2.
+   * Thus nc can be calculated like:
+   * nc = 2^31 + 2^31 % d - 1, where d >= 2
+   * nc = -2^31 + (2^31 + 1) % d, where d >= 2.
+   *
+   * So the shift p is the smallest p satisfying
+   * 2^p > nc * (d - 2^p % d), where d >= 2
+   * 2^p > nc * (d + 2^p % d), where d <= -2.
+   *
+   * the magic number M is calcuated by
+   * M = (2^p + d - 2^p % d) / d, where d >= 2
+   * M = (2^p - d - 2^p % d) / d, where d <= -2.
+   *
+   * Notice that p is always bigger than or equal to 32, so we just return 32-p as
+   * the shift number S.
+   */
+
+  int32_t p = 31;
+  const uint32_t two31 = 0x80000000U;
+
+  // Initialize the computations.
+  uint32_t abs_d = (divisor >= 0) ? divisor : -divisor;
+  uint32_t tmp = two31 + (static_cast<uint32_t>(divisor) >> 31);
+  uint32_t abs_nc = tmp - 1 - tmp % abs_d;
+  uint32_t quotient1 = two31 / abs_nc;
+  uint32_t remainder1 = two31 % abs_nc;
+  uint32_t quotient2 = two31 / abs_d;
+  uint32_t remainder2 = two31 % abs_d;
+
+  /*
+   * To avoid handling both positive and negative divisor, Hacker's Delight
+   * introduces a method to handle these 2 cases together to avoid duplication.
+   */
+  uint32_t delta;
+  do {
+    p++;
+    quotient1 = 2 * quotient1;
+    remainder1 = 2 * remainder1;
+    if (remainder1 >= abs_nc) {
+      quotient1++;
+      remainder1 = remainder1 - abs_nc;
+    }
+    quotient2 = 2 * quotient2;
+    remainder2 = 2 * remainder2;
+    if (remainder2 >= abs_d) {
+      quotient2++;
+      remainder2 = remainder2 - abs_d;
+    }
+    delta = abs_d - remainder2;
+  } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
+
+  magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
+  shift = p - 32;
+}
+
 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, int reg_lo,
                                      int lit, bool is_div) {
   LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
   return rl_dest;
 }
 
+RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
+                                     int imm, bool is_div) {
+  // Use a multiply (and fixup) to perform an int div/rem by a constant.
+
+  // We have to use fixed registers, so flush all the temps.
+  FlushAllRegs();
+  LockCallTemps();  // Prepare for explicit register usage.
+
+  // Assume that the result will be in EDX.
+  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+                          r2, INVALID_REG, INVALID_SREG, INVALID_SREG};
+
+  // handle 0x80000000 / -1 special case.
+  LIR *minint_branch = 0;
+  if (imm == -1) {
+    if (is_div) {
+      LoadValueDirectFixed(rl_src, r0);
+      OpRegImm(kOpCmp, r0, 0x80000000);
+      minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
+
+      // for x != MIN_INT, x / -1 == -x.
+      NewLIR1(kX86Neg32R, r0);
+
+      LIR* branch_around = NewLIR1(kX86Jmp8, 0);
+      // The target for cmp/jmp above.
+      minint_branch->target = NewLIR0(kPseudoTargetLabel);
+      // EAX already contains the right value (0x80000000),
+      branch_around->target = NewLIR0(kPseudoTargetLabel);
+    } else {
+      // x % -1 == 0.
+      LoadConstantNoClobber(r0, 0);
+    }
+    // For this case, return the result in EAX.
+    rl_result.low_reg = r0;
+  } else {
+    DCHECK(imm <= -2 || imm >= 2);
+    // Use H.S.Warren's Hacker's Delight Chapter 10 and
+    // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
+    int magic, shift;
+    CalculateMagicAndShift(imm, magic, shift);
+
+    /*
+     * For imm >= 2,
+     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
+     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
+     * For imm <= -2,
+     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
+     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
+     * We implement this algorithm in the following way:
+     * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
+     * 2. if imm > 0 and magic < 0, add numerator to EDX
+     *    if imm < 0 and magic > 0, sub numerator from EDX
+     * 3. if S !=0, SAR S bits for EDX
+     * 4. add 1 to EDX if EDX < 0
+     * 5. Thus, EDX is the quotient
+     */
+
+    // Numerator into EAX.
+    int numerator_reg = -1;
+    if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
+      // We will need the value later.
+      if (rl_src.location == kLocPhysReg) {
+        // We can use it directly.
+        DCHECK(rl_src.low_reg != r0 && rl_src.low_reg != r2);
+        numerator_reg = rl_src.low_reg;
+      } else {
+        LoadValueDirectFixed(rl_src, r1);
+        numerator_reg = r1;
+      }
+      OpRegCopy(r0, numerator_reg);
+    } else {
+      // Only need this once.  Just put it into EAX.
+      LoadValueDirectFixed(rl_src, r0);
+    }
+
+    // EDX = magic.
+    LoadConstantNoClobber(r2, magic);
+
+    // EDX:EAX = magic & dividend.
+    NewLIR1(kX86Imul32DaR, r2);
+
+    if (imm > 0 && magic < 0) {
+      // Add numerator to EDX.
+      DCHECK_NE(numerator_reg, -1);
+      NewLIR2(kX86Add32RR, r2, numerator_reg);
+    } else if (imm < 0 && magic > 0) {
+      DCHECK_NE(numerator_reg, -1);
+      NewLIR2(kX86Sub32RR, r2, numerator_reg);
+    }
+
+    // Do we need the shift?
+    if (shift != 0) {
+      // Shift EDX by 'shift' bits.
+      NewLIR2(kX86Sar32RI, r2, shift);
+    }
+
+    // Add 1 to EDX if EDX < 0.
+
+    // Move EDX to EAX.
+    OpRegCopy(r0, r2);
+
+    // Move sign bit to bit 0, zeroing the rest.
+    NewLIR2(kX86Shr32RI, r2, 31);
+
+    // EDX = EDX + EAX.
+    NewLIR2(kX86Add32RR, r2, r0);
+
+    // Quotient is in EDX.
+    if (!is_div) {
+      // We need to compute the remainder.
+      // Remainder is divisor - (quotient * imm).
+      DCHECK_NE(numerator_reg, -1);
+      OpRegCopy(r0, numerator_reg);
+
+      // EAX = numerator * imm.
+      OpRegRegImm(kOpMul, r2, r2, imm);
+
+      // EDX -= EAX.
+      NewLIR2(kX86Sub32RR, r0, r2);
+
+      // For this case, return the result in EAX.
+      rl_result.low_reg = r0;
+    }
+  }
+
+  return rl_result;
+}
+
 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, int reg_lo,
                                   int reg_hi, bool is_div) {
   LOG(FATAL) << "Unexpected use of GenDivRem for x86";
   return rl_dest;
 }
 
+RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                                  RegLocation rl_src2, bool is_div, bool check_zero) {
+  // We have to use fixed registers, so flush all the temps.
+  FlushAllRegs();
+  LockCallTemps();  // Prepare for explicit register usage.
+
+  // Load LHS into EAX.
+  LoadValueDirectFixed(rl_src1, r0);
+
+  // Load RHS into EBX.
+  LoadValueDirectFixed(rl_src2, r1);
+
+  // Copy LHS sign bit into EDX.
+  NewLIR0(kx86Cdq32Da);
+
+  if (check_zero) {
+    // Handle division by zero case.
+    GenImmedCheck(kCondEq, r1, 0, kThrowDivZero);
+  }
+
+  // Have to catch 0x80000000/-1 case, or we will get an exception!
+  OpRegImm(kOpCmp, r1, -1);
+  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // RHS is -1.
+  OpRegImm(kOpCmp, r0, 0x80000000);
+  LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // In 0x80000000/-1 case.
+  if (!is_div) {
+    // For DIV, EAX is already right. For REM, we need EDX 0.
+    LoadConstantNoClobber(r2, 0);
+  }
+  LIR* done = NewLIR1(kX86Jmp8, 0);
+
+  // Expected case.
+  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
+  minint_branch->target = minus_one_branch->target;
+  NewLIR1(kX86Idivmod32DaR, r1);
+  done->target = NewLIR0(kPseudoTargetLabel);
+
+  // Result is in EAX for div and EDX for rem.
+  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+                          r0, INVALID_REG, INVALID_SREG, INVALID_SREG};
+  if (!is_div) {
+    rl_result.low_reg = r2;
+  }
+  return rl_result;
+}
+
 bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
   DCHECK_EQ(cu_->instruction_set, kX86);
 
@@ -498,7 +830,7 @@
 // Decrement register and branch on condition
 LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) {
   OpRegImm(kOpSub, reg, 1);
-  return OpCmpImmBranch(c_code, reg, 0, target);
+  return OpCondBranch(c_code, target);
 }
 
 bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
@@ -512,100 +844,353 @@
   return NULL;
 }
 
-void X86Mir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
+void X86Mir2Lir::GenImulRegImm(int dest, int src, int val) {
+  switch (val) {
+    case 0:
+      NewLIR2(kX86Xor32RR, dest, dest);
+      break;
+    case 1:
+      OpRegCopy(dest, src);
+      break;
+    default:
+      OpRegRegImm(kOpMul, dest, src, val);
+      break;
+  }
+}
+
+void X86Mir2Lir::GenImulMemImm(int dest, int sreg, int displacement, int val) {
+  LIR *m;
+  switch (val) {
+    case 0:
+      NewLIR2(kX86Xor32RR, dest, dest);
+      break;
+    case 1:
+      LoadBaseDisp(rX86_SP, displacement, dest, kWord, sreg);
+      break;
+    default:
+      m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest, rX86_SP,
+                  displacement, val);
+      AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */);
+      break;
+  }
+}
+
+void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
-  LOG(FATAL) << "Unexpected use of GenX86Long for x86";
-}
-void X86Mir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
+  if (rl_src1.is_const) {
+    std::swap(rl_src1, rl_src2);
+  }
+  // Are we multiplying by a constant?
+  if (rl_src2.is_const) {
+    // Do special compare/branch against simple const operand
+    int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+    if (val == 0) {
+      RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+      OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg);
+      OpRegReg(kOpXor, rl_result.high_reg, rl_result.high_reg);
+      StoreValueWide(rl_dest, rl_result);
+      return;
+    } else if (val == 1) {
+      rl_src1 = EvalLocWide(rl_src1, kCoreReg, true);
+      StoreValueWide(rl_dest, rl_src1);
+      return;
+    } else if (val == 2) {
+      GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
+      return;
+    } else if (IsPowerOfTwo(val)) {
+      int shift_amount = LowestSetBit(val);
+      if (!BadOverlap(rl_src1, rl_dest)) {
+        rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+        RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest,
+                                                  rl_src1, shift_amount);
+        StoreValueWide(rl_dest, rl_result);
+        return;
+      }
+    }
+
+    // Okay, just bite the bullet and do it.
+    int32_t val_lo = Low32Bits(val);
+    int32_t val_hi = High32Bits(val);
+    FlushAllRegs();
+    LockCallTemps();  // Prepare for explicit register usage.
+    rl_src1 = UpdateLocWide(rl_src1);
+    bool src1_in_reg = rl_src1.location == kLocPhysReg;
+    int displacement = SRegOffset(rl_src1.s_reg_low);
+
+    // ECX <- 1H * 2L
+    // EAX <- 1L * 2H
+    if (src1_in_reg) {
+      GenImulRegImm(r1, rl_src1.high_reg, val_lo);
+      GenImulRegImm(r0, rl_src1.low_reg, val_hi);
+    } else {
+      GenImulMemImm(r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo);
+      GenImulMemImm(r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi);
+    }
+
+    // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
+    NewLIR2(kX86Add32RR, r1, r0);
+
+    // EAX <- 2L
+    LoadConstantNoClobber(r0, val_lo);
+
+    // EDX:EAX <- 2L * 1L (double precision)
+    if (src1_in_reg) {
+      NewLIR1(kX86Mul32DaR, rl_src1.low_reg);
+    } else {
+      LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET);
+      AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+                              true /* is_load */, true /* is_64bit */);
+    }
+
+    // EDX <- EDX + ECX (add high words)
+    NewLIR2(kX86Add32RR, r2, r1);
+
+    // Result is EDX:EAX
+    RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2,
+                             INVALID_SREG, INVALID_SREG};
+    StoreValueWide(rl_dest, rl_result);
+    return;
+  }
+
+  // Nope.  Do it the hard way
   FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) + (r2:r3)
-  OpRegReg(kOpAdd, r0, r2);  // r0 = r0 + r2
-  OpRegReg(kOpAdc, r1, r3);  // r1 = r1 + r3 + CF
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
+  LockCallTemps();  // Prepare for explicit register usage.
+  rl_src1 = UpdateLocWide(rl_src1);
+  rl_src2 = UpdateLocWide(rl_src2);
+
+  // At this point, the VRs are in their home locations.
+  bool src1_in_reg = rl_src1.location == kLocPhysReg;
+  bool src2_in_reg = rl_src2.location == kLocPhysReg;
+
+  // ECX <- 1H
+  if (src1_in_reg) {
+    NewLIR2(kX86Mov32RR, r1, rl_src1.high_reg);
+  } else {
+    LoadBaseDisp(rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, r1,
+                 kWord, GetSRegHi(rl_src1.s_reg_low));
+  }
+
+  // EAX <- 2H
+  if (src2_in_reg) {
+    NewLIR2(kX86Mov32RR, r0, rl_src2.high_reg);
+  } else {
+    LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, r0,
+                 kWord, GetSRegHi(rl_src2.s_reg_low));
+  }
+
+  // EAX <- EAX * 1L  (2H * 1L)
+  if (src1_in_reg) {
+    NewLIR2(kX86Imul32RR, r0, rl_src1.low_reg);
+  } else {
+    int displacement = SRegOffset(rl_src1.s_reg_low);
+    LIR *m = NewLIR3(kX86Imul32RM, r0, rX86_SP, displacement + LOWORD_OFFSET);
+    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is_64bit */);
+  }
+
+  // ECX <- ECX * 2L  (1H * 2L)
+  if (src2_in_reg) {
+    NewLIR2(kX86Imul32RR, r1, rl_src2.low_reg);
+  } else {
+    int displacement = SRegOffset(rl_src2.s_reg_low);
+    LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET);
+    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is_64bit */);
+  }
+
+  // ECX <- ECX + EAX  (2H * 1L) + (1H * 2L)
+  NewLIR2(kX86Add32RR, r1, r0);
+
+  // EAX <- 2L
+  if (src2_in_reg) {
+    NewLIR2(kX86Mov32RR, r0, rl_src2.low_reg);
+  } else {
+    LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, r0,
+                 kWord, rl_src2.s_reg_low);
+  }
+
+  // EDX:EAX <- 2L * 1L (double precision)
+  if (src1_in_reg) {
+    NewLIR1(kX86Mul32DaR, rl_src1.low_reg);
+  } else {
+    int displacement = SRegOffset(rl_src1.s_reg_low);
+    LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET);
+    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is_64bit */);
+  }
+
+  // EDX <- EDX + ECX (add high words)
+  NewLIR2(kX86Add32RR, r2, r1);
+
+  // Result is EDX:EAX
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2,
+                           INVALID_SREG, INVALID_SREG};
   StoreValueWide(rl_dest, rl_result);
 }
 
-void X86Mir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) + (r2:r3)
-  OpRegReg(kOpSub, r0, r2);  // r0 = r0 - r2
-  OpRegReg(kOpSbc, r1, r3);  // r1 = r1 - r3 - CF
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
+                                   Instruction::Code op) {
+  DCHECK_EQ(rl_dest.location, kLocPhysReg);
+  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
+  if (rl_src.location == kLocPhysReg) {
+    // Both operands are in registers.
+    if (rl_dest.low_reg == rl_src.high_reg) {
+      // The registers are the same, so we would clobber it before the use.
+      int temp_reg = AllocTemp();
+      OpRegCopy(temp_reg, rl_dest.low_reg);
+      rl_src.high_reg = temp_reg;
+    }
+    NewLIR2(x86op, rl_dest.low_reg, rl_src.low_reg);
+
+    x86op = GetOpcode(op, rl_dest, rl_src, true);
+    NewLIR2(x86op, rl_dest.high_reg, rl_src.high_reg);
+    FreeTemp(rl_src.low_reg);
+    FreeTemp(rl_src.high_reg);
+    return;
+  }
+
+  // RHS is in memory.
+  DCHECK((rl_src.location == kLocDalvikFrame) ||
+         (rl_src.location == kLocCompilerTemp));
+  int rBase = TargetReg(kSp);
+  int displacement = SRegOffset(rl_src.s_reg_low);
+
+  LIR *lir = NewLIR3(x86op, rl_dest.low_reg, rBase, displacement + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, true /* is64bit */);
+  x86op = GetOpcode(op, rl_dest, rl_src, true);
+  lir = NewLIR3(x86op, rl_dest.high_reg, rBase, displacement + HIWORD_OFFSET);
+  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                          true /* is_load */, true /* is64bit */);
 }
 
-void X86Mir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) & (r2:r3)
-  OpRegReg(kOpAnd, r0, r2);  // r0 = r0 & r2
-  OpRegReg(kOpAnd, r1, r3);  // r1 = r1 & r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
+  rl_dest = UpdateLocWide(rl_dest);
+  if (rl_dest.location == kLocPhysReg) {
+    // Ensure we are in a register pair
+    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+
+    rl_src = UpdateLocWide(rl_src);
+    GenLongRegOrMemOp(rl_result, rl_src, op);
+    StoreFinalValueWide(rl_dest, rl_result);
+    return;
+  }
+
+  // It wasn't in registers, so it better be in memory.
+  DCHECK((rl_dest.location == kLocDalvikFrame) ||
+         (rl_dest.location == kLocCompilerTemp));
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+
+  // Operate directly into memory.
+  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
+  int rBase = TargetReg(kSp);
+  int displacement = SRegOffset(rl_dest.s_reg_low);
+
+  LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, rl_src.low_reg);
+  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                          false /* is_load */, true /* is64bit */);
+  x86op = GetOpcode(op, rl_dest, rl_src, true);
+  lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, rl_src.high_reg);
+  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                          false /* is_load */, true /* is64bit */);
+  FreeTemp(rl_src.low_reg);
+  FreeTemp(rl_src.high_reg);
 }
 
-void X86Mir2Lir::GenOrLong(RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) | (r2:r3)
-  OpRegReg(kOpOr, r0, r2);  // r0 = r0 | r2
-  OpRegReg(kOpOr, r1, r3);  // r1 = r1 | r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
+                              RegLocation rl_src2, Instruction::Code op,
+                              bool is_commutative) {
+  // Is this really a 2 operand operation?
+  switch (op) {
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::SUB_LONG_2ADDR:
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG_2ADDR:
+      GenLongArith(rl_dest, rl_src2, op);
+      return;
+    default:
+      break;
+  }
+
+  if (rl_dest.location == kLocPhysReg) {
+    RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
+
+    // We are about to clobber the LHS, so it needs to be a temp.
+    rl_result = ForceTempWide(rl_result);
+
+    // Perform the operation using the RHS.
+    rl_src2 = UpdateLocWide(rl_src2);
+    GenLongRegOrMemOp(rl_result, rl_src2, op);
+
+    // And now record that the result is in the temp.
+    StoreFinalValueWide(rl_dest, rl_result);
+    return;
+  }
+
+  // It wasn't in registers, so it better be in memory.
+  DCHECK((rl_dest.location == kLocDalvikFrame) ||
+         (rl_dest.location == kLocCompilerTemp));
+  rl_src1 = UpdateLocWide(rl_src1);
+  rl_src2 = UpdateLocWide(rl_src2);
+
+  // Get one of the source operands into temporary register.
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  if (IsTemp(rl_src1.low_reg) && IsTemp(rl_src1.high_reg)) {
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  } else if (is_commutative) {
+    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+    // We need at least one of them to be a temporary.
+    if (!(IsTemp(rl_src2.low_reg) && IsTemp(rl_src2.high_reg))) {
+      rl_src1 = ForceTempWide(rl_src1);
+    }
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  } else {
+    // Need LHS to be the temp.
+    rl_src1 = ForceTempWide(rl_src1);
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  }
+
+  StoreFinalValueWide(rl_dest, rl_src1);
 }
 
-void X86Mir2Lir::GenXorLong(RegLocation rl_dest,
+void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest,
                             RegLocation rl_src1, RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) ^ (r2:r3)
-  OpRegReg(kOpXor, r0, r2);  // r0 = r0 ^ r2
-  OpRegReg(kOpXor, r1, r3);  // r1 = r1 ^ r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
+}
+
+void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest,
+                           RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
 }
 
 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src, r0, r1);
-  // Compute (r1:r0) = -(r1:r0)
-  OpRegReg(kOpNeg, r0, r0);  // r0 = -r0
-  OpRegImm(kOpAdc, r1, 0);   // r1 = r1 + CF
-  OpRegReg(kOpNeg, r1, r1);  // r1 = -r1
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_result = ForceTempWide(rl_src);
+  if (rl_dest.low_reg == rl_src.high_reg) {
+    // The registers are the same, so we would clobber it before the use.
+    int temp_reg = AllocTemp();
+    OpRegCopy(temp_reg, rl_result.low_reg);
+    rl_result.high_reg = temp_reg;
+  }
+  OpRegReg(kOpNeg, rl_result.low_reg, rl_result.low_reg);    // rLow = -rLow
+  OpRegImm(kOpAdc, rl_result.high_reg, 0);                   // rHigh = rHigh + CF
+  OpRegReg(kOpNeg, rl_result.high_reg, rl_result.high_reg);  // rHigh = -rHigh
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -740,16 +1325,626 @@
   }
 }
 
+RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                                          RegLocation rl_src, int shift_amount) {
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  switch (opcode) {
+    case Instruction::SHL_LONG:
+    case Instruction::SHL_LONG_2ADDR:
+      DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
+      if (shift_amount == 32) {
+        OpRegCopy(rl_result.high_reg, rl_src.low_reg);
+        LoadConstant(rl_result.low_reg, 0);
+      } else if (shift_amount > 31) {
+        OpRegCopy(rl_result.high_reg, rl_src.low_reg);
+        FreeTemp(rl_src.high_reg);
+        NewLIR2(kX86Sal32RI, rl_result.high_reg, shift_amount - 32);
+        LoadConstant(rl_result.low_reg, 0);
+      } else {
+        OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+        OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+        NewLIR3(kX86Shld32RRI, rl_result.high_reg, rl_result.low_reg, shift_amount);
+        NewLIR2(kX86Sal32RI, rl_result.low_reg, shift_amount);
+      }
+      break;
+    case Instruction::SHR_LONG:
+    case Instruction::SHR_LONG_2ADDR:
+      if (shift_amount == 32) {
+        OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+        OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+        NewLIR2(kX86Sar32RI, rl_result.high_reg, 31);
+      } else if (shift_amount > 31) {
+        OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+        OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+        NewLIR2(kX86Sar32RI, rl_result.low_reg, shift_amount - 32);
+        NewLIR2(kX86Sar32RI, rl_result.high_reg, 31);
+      } else {
+        OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+        OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+        NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount);
+        NewLIR2(kX86Sar32RI, rl_result.high_reg, shift_amount);
+      }
+      break;
+    case Instruction::USHR_LONG:
+    case Instruction::USHR_LONG_2ADDR:
+      if (shift_amount == 32) {
+        OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+        LoadConstant(rl_result.high_reg, 0);
+      } else if (shift_amount > 31) {
+        OpRegCopy(rl_result.low_reg, rl_src.high_reg);
+        NewLIR2(kX86Shr32RI, rl_result.low_reg, shift_amount - 32);
+        LoadConstant(rl_result.high_reg, 0);
+      } else {
+        OpRegCopy(rl_result.low_reg, rl_src.low_reg);
+        OpRegCopy(rl_result.high_reg, rl_src.high_reg);
+        NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount);
+        NewLIR2(kX86Shr32RI, rl_result.high_reg, shift_amount);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected case";
+  }
+  return rl_result;
+}
+
 void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src1, RegLocation rl_shift) {
-  // Default implementation is just to ignore the constant case.
-  GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
+                                   RegLocation rl_src, RegLocation rl_shift) {
+  // Per spec, we only care about low 6 bits of shift amount.
+  int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
+  if (shift_amount == 0) {
+    rl_src = LoadValueWide(rl_src, kCoreReg);
+    StoreValueWide(rl_dest, rl_src);
+    return;
+  } else if (shift_amount == 1 &&
+            (opcode ==  Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
+    // Need to handle this here to avoid calling StoreValueWide twice.
+    GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src);
+    return;
+  }
+  if (BadOverlap(rl_src, rl_dest)) {
+    GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift);
+    return;
+  }
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount);
+  StoreValueWide(rl_dest, rl_result);
 }
 
 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  // Default - bail to non-const handler.
-  GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+  switch (opcode) {
+    case Instruction::ADD_LONG:
+    case Instruction::AND_LONG:
+    case Instruction::OR_LONG:
+    case Instruction::XOR_LONG:
+      if (rl_src2.is_const) {
+        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+      } else {
+        DCHECK(rl_src1.is_const);
+        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+      }
+      break;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (rl_src2.is_const) {
+        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+      } else {
+        GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
+      }
+      break;
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG_2ADDR:
+    case Instruction::AND_LONG_2ADDR:
+      if (rl_src2.is_const) {
+        GenLongImm(rl_dest, rl_src2, opcode);
+      } else {
+        DCHECK(rl_src1.is_const);
+        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+      }
+      break;
+    default:
+      // Default - bail to non-const handler.
+      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+      break;
+  }
 }
 
+bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
+  switch (op) {
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      return value == -1;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      return value == 0;
+    default:
+      return false;
+  }
+}
+
+X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
+                                bool is_high_op) {
+  bool rhs_in_mem = rhs.location != kLocPhysReg;
+  bool dest_in_mem = dest.location != kLocPhysReg;
+  DCHECK(!rhs_in_mem || !dest_in_mem);
+  switch (op) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      if (dest_in_mem) {
+        return is_high_op ? kX86Adc32MR : kX86Add32MR;
+      } else if (rhs_in_mem) {
+        return is_high_op ? kX86Adc32RM : kX86Add32RM;
+      }
+      return is_high_op ? kX86Adc32RR : kX86Add32RR;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (dest_in_mem) {
+        return is_high_op ? kX86Sbb32MR : kX86Sub32MR;
+      } else if (rhs_in_mem) {
+        return is_high_op ? kX86Sbb32RM : kX86Sub32RM;
+      }
+      return is_high_op ? kX86Sbb32RR : kX86Sub32RR;
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      if (dest_in_mem) {
+        return kX86And32MR;
+      }
+      return rhs_in_mem ? kX86And32RM : kX86And32RR;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      if (dest_in_mem) {
+        return kX86Or32MR;
+      }
+      return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      if (dest_in_mem) {
+        return kX86Xor32MR;
+      }
+      return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << op;
+      return kX86Add32RR;
+  }
+}
+
+X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
+                                int32_t value) {
+  bool in_mem = loc.location != kLocPhysReg;
+  bool byte_imm = IS_SIMM8(value);
+  DCHECK(in_mem || !IsFpReg(loc.low_reg));
+  switch (op) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      if (byte_imm) {
+        if (in_mem) {
+          return is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
+        }
+        return is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
+      }
+      if (in_mem) {
+        return is_high_op ? kX86Adc32MI : kX86Add32MI;
+      }
+      return is_high_op ? kX86Adc32RI : kX86Add32RI;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (byte_imm) {
+        if (in_mem) {
+          return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
+        }
+        return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
+      }
+      if (in_mem) {
+        return is_high_op ? kX86Sbb32MI : kX86Sub32MI;
+      }
+      return is_high_op ? kX86Sbb32RI : kX86Sub32RI;
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      if (byte_imm) {
+        return in_mem ? kX86And32MI8 : kX86And32RI8;
+      }
+      return in_mem ? kX86And32MI : kX86And32RI;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      if (byte_imm) {
+        return in_mem ? kX86Or32MI8 : kX86Or32RI8;
+      }
+      return in_mem ? kX86Or32MI : kX86Or32RI;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      if (byte_imm) {
+        return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
+      }
+      return in_mem ? kX86Xor32MI : kX86Xor32RI;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << op;
+      return kX86Add32MI;
+  }
+}
+
+void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
+  DCHECK(rl_src.is_const);
+  int64_t val = mir_graph_->ConstantValueWide(rl_src);
+  int32_t val_lo = Low32Bits(val);
+  int32_t val_hi = High32Bits(val);
+  rl_dest = UpdateLocWide(rl_dest);
+
+  // Can we just do this into memory?
+  if ((rl_dest.location == kLocDalvikFrame) ||
+      (rl_dest.location == kLocCompilerTemp)) {
+    int rBase = TargetReg(kSp);
+    int displacement = SRegOffset(rl_dest.s_reg_low);
+
+    if (!IsNoOp(op, val_lo)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
+      LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, val_lo);
+      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                              false /* is_load */, true /* is64bit */);
+    }
+    if (!IsNoOp(op, val_hi)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
+      LIR *lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, val_hi);
+      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                                false /* is_load */, true /* is64bit */);
+    }
+    return;
+  }
+
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  DCHECK_EQ(rl_result.location, kLocPhysReg);
+  DCHECK(!IsFpReg(rl_result.low_reg));
+
+  if (!IsNoOp(op, val_lo)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
+    NewLIR2(x86op, rl_result.low_reg, val_lo);
+  }
+  if (!IsNoOp(op, val_hi)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
+    NewLIR2(x86op, rl_result.high_reg, val_hi);
+  }
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
+                                RegLocation rl_src2, Instruction::Code op) {
+  DCHECK(rl_src2.is_const);
+  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+  int32_t val_lo = Low32Bits(val);
+  int32_t val_hi = High32Bits(val);
+  rl_dest = UpdateLocWide(rl_dest);
+  rl_src1 = UpdateLocWide(rl_src1);
+
+  // Can we do this directly into the destination registers?
+  if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
+      rl_dest.low_reg == rl_src1.low_reg && rl_dest.high_reg == rl_src1.high_reg &&
+      !IsFpReg(rl_dest.low_reg)) {
+    if (!IsNoOp(op, val_lo)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
+      NewLIR2(x86op, rl_dest.low_reg, val_lo);
+    }
+    if (!IsNoOp(op, val_hi)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
+      NewLIR2(x86op, rl_dest.high_reg, val_hi);
+    }
+    return;
+  }
+
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  DCHECK_EQ(rl_src1.location, kLocPhysReg);
+
+  // We need the values to be in a temporary
+  RegLocation rl_result = ForceTempWide(rl_src1);
+  if (!IsNoOp(op, val_lo)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
+    NewLIR2(x86op, rl_result.low_reg, val_lo);
+  }
+  if (!IsNoOp(op, val_hi)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
+    NewLIR2(x86op, rl_result.high_reg, val_hi);
+  }
+
+  StoreFinalValueWide(rl_dest, rl_result);
+}
+
+// For final classes there are no sub-classes to check and so we can answer the instance-of
+// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86.
+void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx,
+                                    RegLocation rl_dest, RegLocation rl_src) {
+  RegLocation object = LoadValue(rl_src, kCoreReg);
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  int result_reg = rl_result.low_reg;
+
+  // SETcc only works with EAX..EDX.
+  if (result_reg == object.low_reg || result_reg >= 4) {
+    result_reg = AllocTypedTemp(false, kCoreReg);
+    DCHECK_LT(result_reg, 4);
+  }
+
+  // Assume that there is no match.
+  LoadConstant(result_reg, 0);
+  LIR* null_branchover = OpCmpImmBranch(kCondEq, object.low_reg, 0, NULL);
+
+  int check_class = AllocTypedTemp(false, kCoreReg);
+
+  // If Method* is already in a register, we can save a copy.
+  RegLocation rl_method = mir_graph_->GetMethodLoc();
+  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() +
+    (sizeof(mirror::Class*) * type_idx);
+
+  if (rl_method.location == kLocPhysReg) {
+    if (use_declaring_class) {
+      LoadWordDisp(rl_method.low_reg,
+                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+                   check_class);
+    } else {
+      LoadWordDisp(rl_method.low_reg,
+                   mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                   check_class);
+      LoadWordDisp(check_class, offset_of_type, check_class);
+    }
+  } else {
+    LoadCurrMethodDirect(check_class);
+    if (use_declaring_class) {
+      LoadWordDisp(check_class,
+                   mirror::ArtMethod::DeclaringClassOffset().Int32Value(),
+                   check_class);
+    } else {
+      LoadWordDisp(check_class,
+                   mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
+                   check_class);
+      LoadWordDisp(check_class, offset_of_type, check_class);
+    }
+  }
+
+  // Compare the computed class to the class in the object.
+  DCHECK_EQ(object.location, kLocPhysReg);
+  OpRegMem(kOpCmp, check_class, object.low_reg,
+           mirror::Object::ClassOffset().Int32Value());
+
+  // Set the low byte of the result to 0 or 1 from the compare condition code.
+  NewLIR2(kX86Set8R, result_reg, kX86CondEq);
+
+  LIR* target = NewLIR0(kPseudoTargetLabel);
+  null_branchover->target = target;
+  FreeTemp(check_class);
+  if (IsTemp(result_reg)) {
+    OpRegCopy(rl_result.low_reg, result_reg);
+    FreeTemp(result_reg);
+  }
+  StoreValue(rl_dest, rl_result);
+}
+
+void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_lhs, RegLocation rl_rhs) {
+  OpKind op = kOpBkpt;
+  bool is_div_rem = false;
+  bool unary = false;
+  bool shift_op = false;
+  bool is_two_addr = false;
+  RegLocation rl_result;
+  switch (opcode) {
+    case Instruction::NEG_INT:
+      op = kOpNeg;
+      unary = true;
+      break;
+    case Instruction::NOT_INT:
+      op = kOpMvn;
+      unary = true;
+      break;
+    case Instruction::ADD_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::ADD_INT:
+      op = kOpAdd;
+      break;
+    case Instruction::SUB_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SUB_INT:
+      op = kOpSub;
+      break;
+    case Instruction::MUL_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::MUL_INT:
+      op = kOpMul;
+      break;
+    case Instruction::DIV_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::DIV_INT:
+      op = kOpDiv;
+      is_div_rem = true;
+      break;
+    /* NOTE: returns in kArg1 */
+    case Instruction::REM_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::REM_INT:
+      op = kOpRem;
+      is_div_rem = true;
+      break;
+    case Instruction::AND_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::AND_INT:
+      op = kOpAnd;
+      break;
+    case Instruction::OR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::OR_INT:
+      op = kOpOr;
+      break;
+    case Instruction::XOR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::XOR_INT:
+      op = kOpXor;
+      break;
+    case Instruction::SHL_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHL_INT:
+      shift_op = true;
+      op = kOpLsl;
+      break;
+    case Instruction::SHR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHR_INT:
+      shift_op = true;
+      op = kOpAsr;
+      break;
+    case Instruction::USHR_INT_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::USHR_INT:
+      shift_op = true;
+      op = kOpLsr;
+      break;
+    default:
+      LOG(FATAL) << "Invalid word arith op: " << opcode;
+  }
+
+    // Can we convert to a two address instruction?
+  if (!is_two_addr &&
+        (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
+         mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) {
+      is_two_addr = true;
+    }
+
+  // Get the div/rem stuff out of the way.
+  if (is_div_rem) {
+    rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true);
+    StoreValue(rl_dest, rl_result);
+    return;
+  }
+
+  if (unary) {
+    rl_lhs = LoadValue(rl_lhs, kCoreReg);
+    rl_result = UpdateLoc(rl_dest);
+    rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegReg(op, rl_result.low_reg, rl_lhs.low_reg);
+  } else {
+    if (shift_op) {
+      // X86 doesn't require masking and must use ECX.
+      int t_reg = TargetReg(kCount);  // rCX
+      LoadValueDirectFixed(rl_rhs, t_reg);
+      if (is_two_addr) {
+        // Can we do this directly into memory?
+        rl_result = UpdateLoc(rl_dest);
+        rl_rhs = LoadValue(rl_rhs, kCoreReg);
+        if (rl_result.location != kLocPhysReg) {
+          // Okay, we can do this into memory
+          OpMemReg(op, rl_result, t_reg);
+          FreeTemp(t_reg);
+          return;
+        } else if (!IsFpReg(rl_result.low_reg)) {
+          // Can do this directly into the result register
+          OpRegReg(op, rl_result.low_reg, t_reg);
+          FreeTemp(t_reg);
+          StoreFinalValue(rl_dest, rl_result);
+          return;
+        }
+      }
+      // Three address form, or we can't do directly.
+      rl_lhs = LoadValue(rl_lhs, kCoreReg);
+      rl_result = EvalLoc(rl_dest, kCoreReg, true);
+      OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, t_reg);
+      FreeTemp(t_reg);
+    } else {
+      // Multiply is 3 operand only (sort of).
+      if (is_two_addr && op != kOpMul) {
+        // Can we do this directly into memory?
+        rl_result = UpdateLoc(rl_dest);
+        if (rl_result.location == kLocPhysReg) {
+          // Can we do this from memory directly?
+          rl_rhs = UpdateLoc(rl_rhs);
+          if (rl_rhs.location != kLocPhysReg) {
+            OpRegMem(op, rl_result.low_reg, rl_rhs);
+            StoreFinalValue(rl_dest, rl_result);
+            return;
+          } else if (!IsFpReg(rl_rhs.low_reg)) {
+            OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg);
+            StoreFinalValue(rl_dest, rl_result);
+            return;
+          }
+        }
+        rl_rhs = LoadValue(rl_rhs, kCoreReg);
+        if (rl_result.location != kLocPhysReg) {
+          // Okay, we can do this into memory.
+          OpMemReg(op, rl_result, rl_rhs.low_reg);
+          return;
+        } else if (!IsFpReg(rl_result.low_reg)) {
+          // Can do this directly into the result register.
+          OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg);
+          StoreFinalValue(rl_dest, rl_result);
+          return;
+        } else {
+          rl_lhs = LoadValue(rl_lhs, kCoreReg);
+          rl_result = EvalLoc(rl_dest, kCoreReg, true);
+          OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+        }
+      } else {
+        // Try to use reg/memory instructions.
+        rl_lhs = UpdateLoc(rl_lhs);
+        rl_rhs = UpdateLoc(rl_rhs);
+        // We can't optimize with FP registers.
+        if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) {
+          // Something is difficult, so fall back to the standard case.
+          rl_lhs = LoadValue(rl_lhs, kCoreReg);
+          rl_rhs = LoadValue(rl_rhs, kCoreReg);
+          rl_result = EvalLoc(rl_dest, kCoreReg, true);
+          OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+        } else {
+          // We can optimize by moving to result and using memory operands.
+          if (rl_rhs.location != kLocPhysReg) {
+            // Force LHS into result.
+            rl_result = EvalLoc(rl_dest, kCoreReg, true);
+            LoadValueDirect(rl_lhs, rl_result.low_reg);
+            OpRegMem(op, rl_result.low_reg, rl_rhs);
+          } else if (rl_lhs.location != kLocPhysReg) {
+            // RHS is in a register; LHS is in memory.
+            if (op != kOpSub) {
+              // Force RHS into result and operate on memory.
+              rl_result = EvalLoc(rl_dest, kCoreReg, true);
+              OpRegCopy(rl_result.low_reg, rl_rhs.low_reg);
+              OpRegMem(op, rl_result.low_reg, rl_lhs);
+            } else {
+              // Subtraction isn't commutative.
+              rl_lhs = LoadValue(rl_lhs, kCoreReg);
+              rl_rhs = LoadValue(rl_rhs, kCoreReg);
+              rl_result = EvalLoc(rl_dest, kCoreReg, true);
+              OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+            }
+          } else {
+            // Both are in registers.
+            rl_lhs = LoadValue(rl_lhs, kCoreReg);
+            rl_rhs = LoadValue(rl_rhs, kCoreReg);
+            rl_result = EvalLoc(rl_dest, kCoreReg, true);
+            OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg);
+          }
+        }
+      }
+    }
+  }
+  StoreValue(rl_dest, rl_result);
+}
+
+bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) {
+  // If we have non-core registers, then we can't do good things.
+  if (rl_lhs.location == kLocPhysReg && IsFpReg(rl_lhs.low_reg)) {
+    return false;
+  }
+  if (rl_rhs.location == kLocPhysReg && IsFpReg(rl_rhs.low_reg)) {
+    return false;
+  }
+
+  // Everything will be fine :-).
+  return true;
+}
 }  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 5c993c5..a347d8b 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -243,9 +243,9 @@
             }
             break;
           case 't':
-            buf += StringPrintf("0x%08x (L%p)",
-                                reinterpret_cast<uintptr_t>(base_addr)
-                                + lir->offset + operand, lir->target);
+            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
+                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
+                                lir->target);
             break;
           default:
             buf += StringPrintf("DecodeError '%c'", fmt[i]);
@@ -679,31 +679,24 @@
   }
 
   DCHECK_NE(loc.s_reg_low, INVALID_SREG);
-  if (IsFpReg(loc.low_reg) && reg_class != kCoreReg) {
-    // Need a wide vector register.
-    low_reg = AllocTypedTemp(true, reg_class);
-    loc.low_reg = low_reg;
-    loc.high_reg = low_reg;  // Play nice with existing code.
-    loc.vec_len = kVectorLength8;
-    if (update) {
-      loc.location = kLocPhysReg;
-      MarkLive(loc.low_reg, loc.s_reg_low);
-    }
+  DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
+
+  new_regs = AllocTypedTempPair(loc.fp, reg_class);
+  loc.low_reg = new_regs & 0xff;
+  loc.high_reg = (new_regs >> 8) & 0xff;
+
+  if (loc.low_reg == loc.high_reg) {
     DCHECK(IsFpReg(loc.low_reg));
+    loc.vec_len = kVectorLength8;
   } else {
-    DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
-
-    new_regs = AllocTypedTempPair(loc.fp, reg_class);
-    loc.low_reg = new_regs & 0xff;
-    loc.high_reg = (new_regs >> 8) & 0xff;
-
     MarkPair(loc.low_reg, loc.high_reg);
-    if (update) {
-      loc.location = kLocPhysReg;
-      MarkLive(loc.low_reg, loc.s_reg_low);
+  }
+  if (update) {
+    loc.location = kLocPhysReg;
+    MarkLive(loc.low_reg, loc.s_reg_low);
+    if (loc.low_reg != loc.high_reg) {
       MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
     }
-    DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
   }
   return loc;
 }
@@ -796,4 +789,31 @@
   // Just use the standard code to do the generation.
   Mir2Lir::GenConstWide(rl_dest, value);
 }
+
+// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
+void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
+  LOG(INFO)  << "location: " << loc.location << ','
+             << (loc.wide ? " w" : "  ")
+             << (loc.defined ? " D" : "  ")
+             << (loc.is_const ? " c" : "  ")
+             << (loc.fp ? " F" : "  ")
+             << (loc.core ? " C" : "  ")
+             << (loc.ref ? " r" : "  ")
+             << (loc.high_word ? " h" : "  ")
+             << (loc.home ? " H" : "  ")
+             << " vec_len: " << loc.vec_len
+             << ", low: " << static_cast<int>(loc.low_reg)
+             << ", high: " << static_cast<int>(loc.high_reg)
+             << ", s_reg: " << loc.s_reg_low
+             << ", orig: " << loc.orig_sreg;
+}
+
+void X86Mir2Lir::Materialize() {
+  // A good place to put the analysis before starting.
+  AnalyzeMIR();
+
+  // Now continue with regular code generation.
+  Mir2Lir::Materialize();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 91c39fa..e2744d0 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -16,6 +16,7 @@
 
 #include "codegen_x86.h"
 #include "dex/quick/mir_to_lir-inl.h"
+#include "dex/dataflow_iterator-inl.h"
 #include "x86_lir.h"
 
 namespace art {
@@ -61,7 +62,7 @@
 }
 
 bool X86Mir2Lir::InexpensiveConstantDouble(int64_t value) {
-  return false;  // TUNING
+  return value == 0;
 }
 
 /*
@@ -141,7 +142,14 @@
     case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break;
     case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break;
     case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
-    case kOpMov: return LoadConstantNoClobber(r_dest_src1, value);
+    case kOpMov:
+      /*
+       * Moving the constant zero into register can be specialized as an xor of the register.
+       * However, that sets eflags while the move does not. For that reason here, always do
+       * the move and if caller is flexible, they should be calling LoadConstantNoClobber instead.
+       */
+      opcode = kX86Mov32RI;
+      break;
     case kOpMul:
       opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
       return NewLIR3(opcode, r_dest_src1, r_dest_src1, value);
@@ -203,6 +211,110 @@
     return NewLIR2(opcode, r_dest_src1, r_src2);
 }
 
+LIR* X86Mir2Lir::OpMovRegMem(int r_dest, int r_base, int offset, MoveType move_type) {
+  DCHECK(!(X86_FPREG(r_base)));
+
+  X86OpCode opcode = kX86Nop;
+  switch (move_type) {
+    case kMov8GP:
+      CHECK(!X86_FPREG(r_dest));
+      opcode = kX86Mov8RM;
+      break;
+    case kMov16GP:
+      CHECK(!X86_FPREG(r_dest));
+      opcode = kX86Mov16RM;
+      break;
+    case kMov32GP:
+      CHECK(!X86_FPREG(r_dest));
+      opcode = kX86Mov32RM;
+      break;
+    case kMov32FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovssRM;
+      break;
+    case kMov64FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovsdRM;
+      break;
+    case kMovU128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovupsRM;
+      break;
+    case kMovA128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovapsRM;
+      break;
+    case kMovLo128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovlpsRM;
+      break;
+    case kMovHi128FP:
+      CHECK(X86_FPREG(r_dest));
+      opcode = kX86MovhpsRM;
+      break;
+    case kMov64GP:
+    case kMovLo64FP:
+    case kMovHi64FP:
+    default:
+      LOG(FATAL) << "Bad case in OpMovRegMem";
+      break;
+  }
+
+  return NewLIR3(opcode, r_dest, r_base, offset);
+}
+
+LIR* X86Mir2Lir::OpMovMemReg(int r_base, int offset, int r_src, MoveType move_type) {
+  DCHECK(!(X86_FPREG(r_base)));
+
+  X86OpCode opcode = kX86Nop;
+  switch (move_type) {
+    case kMov8GP:
+      CHECK(!X86_FPREG(r_src));
+      opcode = kX86Mov8MR;
+      break;
+    case kMov16GP:
+      CHECK(!X86_FPREG(r_src));
+      opcode = kX86Mov16MR;
+      break;
+    case kMov32GP:
+      CHECK(!X86_FPREG(r_src));
+      opcode = kX86Mov32MR;
+      break;
+    case kMov32FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovssMR;
+      break;
+    case kMov64FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovsdMR;
+      break;
+    case kMovU128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovupsMR;
+      break;
+    case kMovA128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovapsMR;
+      break;
+    case kMovLo128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovlpsMR;
+      break;
+    case kMovHi128FP:
+      CHECK(X86_FPREG(r_src));
+      opcode = kX86MovhpsMR;
+      break;
+    case kMov64GP:
+    case kMovLo64FP:
+    case kMovHi64FP:
+    default:
+      LOG(FATAL) << "Bad case in OpMovMemReg";
+      break;
+  }
+
+  return NewLIR3(opcode, r_base, offset, r_src);
+}
+
 LIR* X86Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, int r_dest, int r_src) {
   // The only conditional reg to reg operation supported is Cmov
   DCHECK_EQ(op, kOpCmov);
@@ -229,7 +341,57 @@
       LOG(FATAL) << "Bad case in OpRegMem " << op;
       break;
   }
-  return NewLIR3(opcode, r_dest, rBase, offset);
+  LIR *l = NewLIR3(opcode, r_dest, rBase, offset);
+  if (rBase == rX86_SP) {
+    AnnotateDalvikRegAccess(l, offset >> 2, true /* is_load */, false /* is_64bit */);
+  }
+  return l;
+}
+
+LIR* X86Mir2Lir::OpMemReg(OpKind op, RegLocation rl_dest, int r_value) {
+  DCHECK_NE(rl_dest.location, kLocPhysReg);
+  int displacement = SRegOffset(rl_dest.s_reg_low);
+  X86OpCode opcode = kX86Nop;
+  switch (op) {
+    case kOpSub: opcode = kX86Sub32MR; break;
+    case kOpMov: opcode = kX86Mov32MR; break;
+    case kOpCmp: opcode = kX86Cmp32MR; break;
+    case kOpAdd: opcode = kX86Add32MR; break;
+    case kOpAnd: opcode = kX86And32MR; break;
+    case kOpOr:  opcode = kX86Or32MR; break;
+    case kOpXor: opcode = kX86Xor32MR; break;
+    case kOpLsl: opcode = kX86Sal32MC; break;
+    case kOpLsr: opcode = kX86Shr32MC; break;
+    case kOpAsr: opcode = kX86Sar32MC; break;
+    default:
+      LOG(FATAL) << "Bad case in OpMemReg " << op;
+      break;
+  }
+  LIR *l = NewLIR3(opcode, rX86_SP, displacement, r_value);
+  AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, false /* is_64bit */);
+  return l;
+}
+
+LIR* X86Mir2Lir::OpRegMem(OpKind op, int r_dest, RegLocation rl_value) {
+  DCHECK_NE(rl_value.location, kLocPhysReg);
+  int displacement = SRegOffset(rl_value.s_reg_low);
+  X86OpCode opcode = kX86Nop;
+  switch (op) {
+    case kOpSub: opcode = kX86Sub32RM; break;
+    case kOpMov: opcode = kX86Mov32RM; break;
+    case kOpCmp: opcode = kX86Cmp32RM; break;
+    case kOpAdd: opcode = kX86Add32RM; break;
+    case kOpAnd: opcode = kX86And32RM; break;
+    case kOpOr:  opcode = kX86Or32RM; break;
+    case kOpXor: opcode = kX86Xor32RM; break;
+    case kOpMul: opcode = kX86Imul32RM; break;
+    default:
+      LOG(FATAL) << "Bad case in OpRegMem " << op;
+      break;
+  }
+  LIR *l = NewLIR3(opcode, r_dest, rX86_SP, displacement);
+  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */);
+  return l;
 }
 
 LIR* X86Mir2Lir::OpRegRegReg(OpKind op, int r_dest, int r_src1,
@@ -337,6 +499,27 @@
       DCHECK_EQ(r_dest_lo, r_dest_hi);
       if (value == 0) {
         return NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo);
+      } else if (base_of_code_ != nullptr) {
+        // We will load the value from the literal area.
+        LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
+        if (data_target == NULL) {
+          data_target = AddWideData(&literal_list_, val_lo, val_hi);
+        }
+
+        // Address the start of the method
+        RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+        rl_method = LoadValue(rl_method, kCoreReg);
+
+        // Load the proper value from the literal area.
+        // We don't know the proper offset for the value, so pick one that will force
+        // 4 byte offset.  We will fix this up in the assembler later to have the right
+        // value.
+        res = LoadBaseDisp(rl_method.low_reg, 256 /* bogus */, r_dest_lo, kDouble, INVALID_SREG);
+        res->target = data_target;
+        res->flags.fixup = kFixupLoad;
+        SetMemRefType(res, true, kLiteral);
+        // Redo after we assign target to ensure size is correct.
+        SetupResourceMasks(res);
       } else {
         if (val_lo == 0) {
           res = NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo);
@@ -346,8 +529,7 @@
         if (val_hi != 0) {
           r_dest_hi = AllocTempDouble();
           LoadConstantNoClobber(r_dest_hi, val_hi);
-          NewLIR2(kX86PsllqRI, r_dest_hi, 32);
-          NewLIR2(kX86OrpsRR, r_dest_lo, r_dest_hi);
+          NewLIR2(kX86PunpckldqRR, r_dest_lo, r_dest_hi);
           FreeTemp(r_dest_hi);
         }
       }
@@ -435,15 +617,37 @@
                      displacement + LOWORD_OFFSET);
     } else {
       if (rBase == r_dest) {
-        load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
-                        displacement + HIWORD_OFFSET);
-        load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
-                       displacement + LOWORD_OFFSET);
+        if (r_dest_hi == r_index) {
+          // We can't use either register for the first load.
+          int temp = AllocTemp();
+          load2 = NewLIR5(opcode, temp, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+          load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+          OpRegCopy(r_dest_hi, temp);
+          FreeTemp(temp);
+        } else {
+          load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+          load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+        }
       } else {
-        load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
-                       displacement + LOWORD_OFFSET);
-        load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
-                        displacement + HIWORD_OFFSET);
+        if (r_dest == r_index) {
+          // We can't use either register for the first load.
+          int temp = AllocTemp();
+          load = NewLIR5(opcode, temp, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+          load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+          OpRegCopy(r_dest, temp);
+          FreeTemp(temp);
+        } else {
+          load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+          load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+        }
       }
     }
   }
@@ -572,9 +776,133 @@
   NewLIR2(kX86MovdxrRR, fp_reg, low_reg);
   int tmp_reg = AllocTempDouble();
   NewLIR2(kX86MovdxrRR, tmp_reg, high_reg);
-  NewLIR2(kX86PsllqRI, tmp_reg, 32);
-  NewLIR2(kX86OrpsRR, fp_reg, tmp_reg);
+  NewLIR2(kX86PunpckldqRR, fp_reg, tmp_reg);
   FreeTemp(tmp_reg);
 }
 
+LIR* X86Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
+                                   int offset, int check_value, LIR* target) {
+    NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base_reg, offset,
+            check_value);
+    LIR* branch = OpCondBranch(cond, target);
+    return branch;
+}
+
+void X86Mir2Lir::AnalyzeMIR() {
+  // Assume we don't need a pointer to the base of the code.
+  cu_->NewTimingSplit("X86 MIR Analysis");
+  store_method_addr_ = false;
+
+  // Walk the MIR looking for interesting items.
+  PreOrderDfsIterator iter(mir_graph_);
+  BasicBlock* curr_bb = iter.Next();
+  while (curr_bb != NULL) {
+    AnalyzeBB(curr_bb);
+    curr_bb = iter.Next();
+  }
+
+  // Did we need a pointer to the method code?
+  if (store_method_addr_) {
+    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, false);
+  } else {
+    base_of_code_ = nullptr;
+  }
+}
+
+void X86Mir2Lir::AnalyzeBB(BasicBlock * bb) {
+  if (bb->block_type == kDead) {
+    // Ignore dead blocks
+    return;
+  }
+
+  for (MIR *mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
+    int opcode = mir->dalvikInsn.opcode;
+    if (opcode >= kMirOpFirst) {
+      AnalyzeExtendedMIR(opcode, bb, mir);
+    } else {
+      AnalyzeMIR(opcode, bb, mir);
+    }
+  }
+}
+
+
+void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock * bb, MIR *mir) {
+  switch (opcode) {
+    // Instructions referencing doubles.
+    case kMirOpFusedCmplDouble:
+    case kMirOpFusedCmpgDouble:
+      AnalyzeFPInstruction(opcode, bb, mir);
+      break;
+    default:
+      // Ignore the rest.
+      break;
+  }
+}
+
+void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock * bb, MIR *mir) {
+  // Looking for
+  // - Do we need a pointer to the code (used for packed switches and double lits)?
+
+  switch (opcode) {
+    // Instructions referencing doubles.
+    case Instruction::CMPL_DOUBLE:
+    case Instruction::CMPG_DOUBLE:
+    case Instruction::NEG_DOUBLE:
+    case Instruction::ADD_DOUBLE:
+    case Instruction::SUB_DOUBLE:
+    case Instruction::MUL_DOUBLE:
+    case Instruction::DIV_DOUBLE:
+    case Instruction::REM_DOUBLE:
+    case Instruction::ADD_DOUBLE_2ADDR:
+    case Instruction::SUB_DOUBLE_2ADDR:
+    case Instruction::MUL_DOUBLE_2ADDR:
+    case Instruction::DIV_DOUBLE_2ADDR:
+    case Instruction::REM_DOUBLE_2ADDR:
+      AnalyzeFPInstruction(opcode, bb, mir);
+      break;
+    // Packed switches and array fills need a pointer to the base of the method.
+    case Instruction::FILL_ARRAY_DATA:
+    case Instruction::PACKED_SWITCH:
+      store_method_addr_ = true;
+      break;
+    default:
+      // Other instructions are not interesting yet.
+      break;
+  }
+}
+
+void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock * bb, MIR *mir) {
+  // Look at all the uses, and see if they are double constants.
+  uint64_t attrs = mir_graph_->oat_data_flow_attributes_[opcode];
+  int next_sreg = 0;
+  if (attrs & DF_UA) {
+    if (attrs & DF_A_WIDE) {
+      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      next_sreg += 2;
+    } else {
+      next_sreg++;
+    }
+  }
+  if (attrs & DF_UB) {
+    if (attrs & DF_B_WIDE) {
+      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      next_sreg += 2;
+    } else {
+      next_sreg++;
+    }
+  }
+  if (attrs & DF_UC) {
+    if (attrs & DF_C_WIDE) {
+      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+    }
+  }
+}
+
+void X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) {
+  // If this is a double literal, we will want it in the literal pool.
+  if (use.is_const) {
+    store_method_addr_ = true;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 1488f5d..6962ff7 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -304,6 +304,8 @@
   BinaryShiftOpCode(kX86Sar),
 #undef BinaryShiftOpcode
   kX86Cmc,
+  kX86Shld32RRI,
+  kX86Shrd32RRI,
 #define UnaryOpcode(opcode, reg, mem, array) \
   opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
   opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
@@ -316,6 +318,7 @@
   UnaryOpcode(kX86Imul, DaR, DaM, DaA),
   UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
   UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
+  kx86Cdq32Da,
   kX86Bswap32R,
   kX86Push32R, kX86Pop32R,
 #undef UnaryOpcode
@@ -349,10 +352,19 @@
   Binary0fOpCode(kX86Subss),    // float subtract
   Binary0fOpCode(kX86Divsd),    // double divide
   Binary0fOpCode(kX86Divss),    // float divide
+  Binary0fOpCode(kX86Punpckldq),  // Interleave low-order double words
   kX86PsrlqRI,                  // right shift of floating point registers
   kX86PsllqRI,                  // left shift of floating point registers
   kX86SqrtsdRR,                 // sqrt of floating point register
   kX86FstpdM,                   // Store and pop top x87 fp stack
+  Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
+  kX86MovupsMR, kX86MovupsAR,   // store unaligned packed single FP values from xmm1 to m128
+  Binary0fOpCode(kX86Movaps),   // load aligned packed single FP values from xmm2/m128 to xmm1
+  kX86MovapsMR, kX86MovapsAR,   // store aligned packed single FP values from xmm1 to m128
+  kX86MovlpsRM, kX86MovlpsRA,   // load packed single FP values from m64 to low quadword of xmm
+  kX86MovlpsMR, kX86MovlpsAR,   // store packed single FP values from low quadword of xmm to m64
+  kX86MovhpsRM, kX86MovhpsRA,   // load packed single FP values from m64 to high quadword of xmm
+  kX86MovhpsMR, kX86MovhpsAR,   // store packed single FP values from high quadword of xmm to m64
   Binary0fOpCode(kX86Movdxr),   // move into xmm from gpr
   kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,  // move into reg from xmm
   kX86Set8R, kX86Set8M, kX86Set8A,  // set byte depending on condition operand
@@ -397,6 +409,7 @@
   kRegImm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
   kRegRegImm, kRegMemImm, kRegArrayImm,    // RRI, RMI and RAI instruction kinds.
   kMovRegImm,                              // Shorter form move RI.
+  kRegRegImmRev,                           // RRI with first reg in r/m
   kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
   kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
   kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
new file mode 100644
index 0000000..edccec5
--- /dev/null
+++ b/compiler/dex/verification_results.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "verification_results.h"
+
+#include "base/stl_util.h"
+#include "base/mutex.h"
+#include "base/mutex-inl.h"
+#include "thread.h"
+#include "thread-inl.h"
+#include "verified_method.h"
+#include "verifier/method_verifier.h"
+#include "verifier/method_verifier-inl.h"
+
+namespace art {
+
+VerificationResults::VerificationResults()
+    : verified_methods_lock_("compiler verified methods lock"),
+      verified_methods_(),
+      rejected_classes_lock_("compiler rejected classes lock"),
+      rejected_classes_() {
+}
+
+VerificationResults::~VerificationResults() {
+  Thread* self = Thread::Current();
+  {
+    WriterMutexLock mu(self, verified_methods_lock_);
+    STLDeleteValues(&verified_methods_);
+  }
+}
+
+bool VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
+  MethodReference ref = method_verifier->GetMethodReference();
+  bool compile = IsCandidateForCompilation(ref, method_verifier->GetAccessFlags());
+  // TODO: Check also for virtual/interface invokes when DEX-to-DEX supports devirtualization.
+  if (!compile && !method_verifier->HasCheckCasts()) {
+    return true;
+  }
+
+  const VerifiedMethod* verified_method = VerifiedMethod::Create(method_verifier, compile);
+  if (verified_method == nullptr) {
+    DCHECK(method_verifier->HasFailures());
+    return false;
+  }
+
+  WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
+  auto it = verified_methods_.find(ref);
+  if (it != verified_methods_.end()) {
+    // TODO: Investigate why are we doing the work again for this method and try to avoid it.
+    LOG(WARNING) << "Method processed more than once: "
+        << PrettyMethod(ref.dex_method_index, *ref.dex_file);
+    DCHECK_EQ(it->second->GetDevirtMap().size(), verified_method->GetDevirtMap().size());
+    DCHECK_EQ(it->second->GetSafeCastSet().size(), verified_method->GetSafeCastSet().size());
+    DCHECK_EQ(it->second->GetDexGcMap().size(), verified_method->GetDexGcMap().size());
+    delete it->second;
+    verified_methods_.erase(it);
+  }
+  verified_methods_.Put(ref, verified_method);
+  DCHECK(verified_methods_.find(ref) != verified_methods_.end());
+  return true;
+}
+
+const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref) {
+  ReaderMutexLock mu(Thread::Current(), verified_methods_lock_);
+  auto it = verified_methods_.find(ref);
+  return (it != verified_methods_.end()) ? it->second : nullptr;
+}
+
+void VerificationResults::AddRejectedClass(ClassReference ref) {
+  {
+    WriterMutexLock mu(Thread::Current(), rejected_classes_lock_);
+    rejected_classes_.insert(ref);
+  }
+  DCHECK(IsClassRejected(ref));
+}
+
+bool VerificationResults::IsClassRejected(ClassReference ref) {
+  ReaderMutexLock mu(Thread::Current(), rejected_classes_lock_);
+  return (rejected_classes_.find(ref) != rejected_classes_.end());
+}
+
+bool VerificationResults::IsCandidateForCompilation(MethodReference& method_ref,
+                                                    const uint32_t access_flags) {
+#ifdef ART_SEA_IR_MODE
+    bool use_sea = Runtime::Current()->IsSeaIRMode();
+    use_sea = use_sea && (std::string::npos != PrettyMethod(
+                          method_ref.dex_method_index, *(method_ref.dex_file)).find("fibonacci"));
+    if (use_sea) return true;
+#endif
+  // Don't compile class initializers, ever.
+  if (((access_flags & kAccConstructor) != 0) && ((access_flags & kAccStatic) != 0)) {
+    return false;
+  }
+  return (Runtime::Current()->GetCompilerFilter() != Runtime::kInterpretOnly);
+}
+
+}  // namespace art
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
new file mode 100644
index 0000000..2eb0713
--- /dev/null
+++ b/compiler/dex/verification_results.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_VERIFICATION_RESULTS_H_
+#define ART_COMPILER_DEX_VERIFICATION_RESULTS_H_
+
+#include <stdint.h>
+#include <set>
+#include <vector>
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "class_reference.h"
+#include "method_reference.h"
+#include "safe_map.h"
+
+namespace art {
+
+namespace verifier {
+class MethodVerifier;
+}  // namespace verifier
+
+class VerifiedMethod;
+
+class VerificationResults {
+  public:
+    VerificationResults();
+    ~VerificationResults();
+
+    bool ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        LOCKS_EXCLUDED(verified_methods_lock_);
+
+    const VerifiedMethod* GetVerifiedMethod(MethodReference ref)
+        LOCKS_EXCLUDED(verified_methods_lock_);
+
+    void AddRejectedClass(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
+    bool IsClassRejected(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
+
+    static bool IsCandidateForCompilation(MethodReference& method_ref,
+                                          const uint32_t access_flags);
+
+  private:
+    // Verified methods.
+    typedef SafeMap<MethodReference, const VerifiedMethod*,
+        MethodReferenceComparator> VerifiedMethodMap;
+    ReaderWriterMutex verified_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+    VerifiedMethodMap verified_methods_;
+
+    // Rejected classes.
+    ReaderWriterMutex rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+    std::set<ClassReference> rejected_classes_ GUARDED_BY(rejected_classes_lock_);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_VERIFICATION_RESULTS_H_
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
new file mode 100644
index 0000000..0f812a4
--- /dev/null
+++ b/compiler/dex/verified_method.cc
@@ -0,0 +1,312 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "verified_method.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "base/logging.h"
+#include "base/stl_util.h"
+#include "dex_file.h"
+#include "dex_instruction.h"
+#include "dex_instruction-inl.h"
+#include "base/mutex.h"
+#include "base/mutex-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
+#include "mirror/class.h"
+#include "mirror/class-inl.h"
+#include "mirror/dex_cache.h"
+#include "mirror/dex_cache-inl.h"
+#include "mirror/object.h"
+#include "mirror/object-inl.h"
+#include "UniquePtr.h"
+#include "verifier/dex_gc_map.h"
+#include "verifier/method_verifier.h"
+#include "verifier/method_verifier-inl.h"
+#include "verifier/register_line.h"
+#include "verifier/register_line-inl.h"
+
+namespace art {
+
+const VerifiedMethod* VerifiedMethod::Create(verifier::MethodVerifier* method_verifier,
+                                             bool compile) {
+  UniquePtr<VerifiedMethod> verified_method(new VerifiedMethod);
+  if (compile) {
+    /* Generate a register map. */
+    if (!verified_method->GenerateGcMap(method_verifier)) {
+      CHECK(method_verifier->HasFailures());
+      return nullptr;  // Not a real failure, but a failure to encode.
+    }
+    if (kIsDebugBuild) {
+      VerifyGcMap(method_verifier, verified_method->dex_gc_map_);
+    }
+
+    // TODO: move this out when DEX-to-DEX supports devirtualization.
+    if (method_verifier->HasVirtualOrInterfaceInvokes()) {
+      verified_method->GenerateDevirtMap(method_verifier);
+    }
+  }
+
+  if (method_verifier->HasCheckCasts()) {
+    verified_method->GenerateSafeCastSet(method_verifier);
+  }
+  return verified_method.release();
+}
+
+const MethodReference* VerifiedMethod::GetDevirtTarget(uint32_t dex_pc) const {
+  auto it = devirt_map_.find(dex_pc);
+  return (it != devirt_map_.end()) ? &it->second : nullptr;
+}
+
+bool VerifiedMethod::IsSafeCast(uint32_t pc) const {
+  return std::binary_search(safe_cast_set_.begin(), safe_cast_set_.end(), pc);
+}
+
+bool VerifiedMethod::GenerateGcMap(verifier::MethodVerifier* method_verifier) {
+  DCHECK(dex_gc_map_.empty());
+  size_t num_entries, ref_bitmap_bits, pc_bits;
+  ComputeGcMapSizes(method_verifier, &num_entries, &ref_bitmap_bits, &pc_bits);
+  // There's a single byte to encode the size of each bitmap.
+  if (ref_bitmap_bits >= (8 /* bits per byte */ * 8192 /* 13-bit size */ )) {
+    // TODO: either a better GC map format or per method failures
+    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Cannot encode GC map for method with " << ref_bitmap_bits << " registers";
+    return false;
+  }
+  size_t ref_bitmap_bytes = (ref_bitmap_bits + 7) / 8;
+  // There are 2 bytes to encode the number of entries.
+  if (num_entries >= 65536) {
+    // TODO: Either a better GC map format or per method failures.
+    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Cannot encode GC map for method with " << num_entries << " entries";
+    return false;
+  }
+  size_t pc_bytes;
+  verifier::RegisterMapFormat format;
+  if (pc_bits <= 8) {
+    format = verifier::kRegMapFormatCompact8;
+    pc_bytes = 1;
+  } else if (pc_bits <= 16) {
+    format = verifier::kRegMapFormatCompact16;
+    pc_bytes = 2;
+  } else {
+    // TODO: Either a better GC map format or per method failures.
+    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
+        << "Cannot encode GC map for method with "
+        << (1 << pc_bits) << " instructions (number is rounded up to nearest power of 2)";
+    return false;
+  }
+  size_t table_size = ((pc_bytes + ref_bitmap_bytes) * num_entries) + 4;
+  dex_gc_map_.reserve(table_size);
+  // Write table header.
+  dex_gc_map_.push_back(format | ((ref_bitmap_bytes & ~0xFF) >> 5));
+  dex_gc_map_.push_back(ref_bitmap_bytes & 0xFF);
+  dex_gc_map_.push_back(num_entries & 0xFF);
+  dex_gc_map_.push_back((num_entries >> 8) & 0xFF);
+  // Write table data.
+  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
+  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
+    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
+      dex_gc_map_.push_back(i & 0xFF);
+      if (pc_bytes == 2) {
+        dex_gc_map_.push_back((i >> 8) & 0xFF);
+      }
+      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
+      line->WriteReferenceBitMap(dex_gc_map_, ref_bitmap_bytes);
+    }
+  }
+  DCHECK_EQ(dex_gc_map_.size(), table_size);
+  return true;
+}
+
+void VerifiedMethod::VerifyGcMap(verifier::MethodVerifier* method_verifier,
+                                 const std::vector<uint8_t>& data) {
+  // Check that for every GC point there is a map entry, there aren't entries for non-GC points,
+  // that the table data is well formed and all references are marked (or not) in the bitmap.
+  verifier::DexPcToReferenceMap map(&data[0]);
+  DCHECK_EQ(data.size(), map.RawSize());
+  size_t map_index = 0;
+  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
+  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
+    const uint8_t* reg_bitmap = map.FindBitMap(i, false);
+    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
+      DCHECK_LT(map_index, map.NumEntries());
+      DCHECK_EQ(map.GetDexPc(map_index), i);
+      DCHECK_EQ(map.GetBitMap(map_index), reg_bitmap);
+      map_index++;
+      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
+      for (size_t j = 0; j < code_item->registers_size_; j++) {
+        if (line->GetRegisterType(j).IsNonZeroReferenceTypes()) {
+          DCHECK_LT(j / 8, map.RegWidth());
+          DCHECK_EQ((reg_bitmap[j / 8] >> (j % 8)) & 1, 1);
+        } else if ((j / 8) < map.RegWidth()) {
+          DCHECK_EQ((reg_bitmap[j / 8] >> (j % 8)) & 1, 0);
+        } else {
+          // If a register doesn't contain a reference then the bitmap may be shorter than the line.
+        }
+      }
+    } else {
+      DCHECK(reg_bitmap == NULL);
+    }
+  }
+}
+
+void VerifiedMethod::ComputeGcMapSizes(verifier::MethodVerifier* method_verifier,
+                                       size_t* gc_points, size_t* ref_bitmap_bits,
+                                       size_t* log2_max_gc_pc) {
+  size_t local_gc_points = 0;
+  size_t max_insn = 0;
+  size_t max_ref_reg = -1;
+  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
+  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
+    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
+      local_gc_points++;
+      max_insn = i;
+      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
+      max_ref_reg = line->GetMaxNonZeroReferenceReg(max_ref_reg);
+    }
+  }
+  *gc_points = local_gc_points;
+  *ref_bitmap_bits = max_ref_reg + 1;  // If max register is 0 we need 1 bit to encode (ie +1).
+  size_t i = 0;
+  while ((1U << i) <= max_insn) {
+    i++;
+  }
+  *log2_max_gc_pc = i;
+}
+
+void VerifiedMethod::GenerateDevirtMap(verifier::MethodVerifier* method_verifier) {
+  // It is risky to rely on reg_types for sharpening in cases of soft
+  // verification, we might end up sharpening to a wrong implementation. Just abort.
+  if (method_verifier->HasFailures()) {
+    return;
+  }
+
+  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
+  const uint16_t* insns = code_item->insns_;
+  const Instruction* inst = Instruction::At(insns);
+  const Instruction* end = Instruction::At(insns + code_item->insns_size_in_code_units_);
+
+  for (; inst < end; inst = inst->Next()) {
+    bool is_virtual   = (inst->Opcode() == Instruction::INVOKE_VIRTUAL) ||
+        (inst->Opcode() ==  Instruction::INVOKE_VIRTUAL_RANGE);
+    bool is_interface = (inst->Opcode() == Instruction::INVOKE_INTERFACE) ||
+        (inst->Opcode() == Instruction::INVOKE_INTERFACE_RANGE);
+
+    if (!is_interface && !is_virtual) {
+      continue;
+    }
+    // Get reg type for register holding the reference to the object that will be dispatched upon.
+    uint32_t dex_pc = inst->GetDexPc(insns);
+    verifier::RegisterLine* line = method_verifier->GetRegLine(dex_pc);
+    bool is_range = (inst->Opcode() ==  Instruction::INVOKE_VIRTUAL_RANGE) ||
+        (inst->Opcode() ==  Instruction::INVOKE_INTERFACE_RANGE);
+    const verifier::RegType&
+        reg_type(line->GetRegisterType(is_range ? inst->VRegC_3rc() : inst->VRegC_35c()));
+
+    if (!reg_type.HasClass()) {
+      // We will compute devirtualization information only when we know the Class of the reg type.
+      continue;
+    }
+    mirror::Class* reg_class = reg_type.GetClass();
+    if (reg_class->IsInterface()) {
+      // We can't devirtualize when the known type of the register is an interface.
+      continue;
+    }
+    if (reg_class->IsAbstract() && !reg_class->IsArrayClass()) {
+      // We can't devirtualize abstract classes except on arrays of abstract classes.
+      continue;
+    }
+    mirror::ArtMethod* abstract_method = method_verifier->GetDexCache()->GetResolvedMethod(
+        is_range ? inst->VRegB_3rc() : inst->VRegB_35c());
+    if (abstract_method == NULL) {
+      // If the method is not found in the cache this means that it was never found
+      // by ResolveMethodAndCheckAccess() called when verifying invoke_*.
+      continue;
+    }
+    // Find the concrete method.
+    mirror::ArtMethod* concrete_method = NULL;
+    if (is_interface) {
+      concrete_method = reg_type.GetClass()->FindVirtualMethodForInterface(abstract_method);
+    }
+    if (is_virtual) {
+      concrete_method = reg_type.GetClass()->FindVirtualMethodForVirtual(abstract_method);
+    }
+    if (concrete_method == NULL || concrete_method->IsAbstract()) {
+      // In cases where concrete_method is not found, or is abstract, continue to the next invoke.
+      continue;
+    }
+    if (reg_type.IsPreciseReference() || concrete_method->IsFinal() ||
+        concrete_method->GetDeclaringClass()->IsFinal()) {
+      // If we knew exactly the class being dispatched upon, or if the target method cannot be
+      // overridden record the target to be used in the compiler driver.
+      MethodReference concrete_ref(
+          concrete_method->GetDeclaringClass()->GetDexCache()->GetDexFile(),
+          concrete_method->GetDexMethodIndex());
+      devirt_map_.Put(dex_pc, concrete_ref);
+    }
+  }
+}
+
+void VerifiedMethod::GenerateSafeCastSet(verifier::MethodVerifier* method_verifier) {
+  /*
+   * Walks over the method code and adds any cast instructions in which
+   * the type cast is implicit to a set, which is used in the code generation
+   * to elide these casts.
+   */
+  if (method_verifier->HasFailures()) {
+    return;
+  }
+  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
+  const Instruction* inst = Instruction::At(code_item->insns_);
+  const Instruction* end = Instruction::At(code_item->insns_ +
+                                           code_item->insns_size_in_code_units_);
+
+  for (; inst < end; inst = inst->Next()) {
+    Instruction::Code code = inst->Opcode();
+    if ((code == Instruction::CHECK_CAST) || (code == Instruction::APUT_OBJECT)) {
+      uint32_t dex_pc = inst->GetDexPc(code_item->insns_);
+      const verifier::RegisterLine* line = method_verifier->GetRegLine(dex_pc);
+      bool is_safe_cast = false;
+      if (code == Instruction::CHECK_CAST) {
+        const verifier::RegType& reg_type(line->GetRegisterType(inst->VRegA_21c()));
+        const verifier::RegType& cast_type =
+            method_verifier->ResolveCheckedClass(inst->VRegB_21c());
+        is_safe_cast = cast_type.IsStrictlyAssignableFrom(reg_type);
+      } else {
+        const verifier::RegType& array_type(line->GetRegisterType(inst->VRegB_23x()));
+        // We only know its safe to assign to an array if the array type is precise. For example,
+        // an Object[] can have any type of object stored in it, but it may also be assigned a
+        // String[] in which case the stores need to be of Strings.
+        if (array_type.IsPreciseReference()) {
+          const verifier::RegType& value_type(line->GetRegisterType(inst->VRegA_23x()));
+          const verifier::RegType& component_type = method_verifier->GetRegTypeCache()
+              ->GetComponentType(array_type, method_verifier->GetClassLoader());
+          is_safe_cast = component_type.IsStrictlyAssignableFrom(value_type);
+        }
+      }
+      if (is_safe_cast) {
+        // Verify ordering for push_back() to the sorted vector.
+        DCHECK(safe_cast_set_.empty() || safe_cast_set_.back() < dex_pc);
+        safe_cast_set_.push_back(dex_pc);
+      }
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h
new file mode 100644
index 0000000..aa0e72a
--- /dev/null
+++ b/compiler/dex/verified_method.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_VERIFIED_METHOD_H_
+#define ART_COMPILER_DEX_VERIFIED_METHOD_H_
+
+#include <vector>
+
+#include "method_reference.h"
+#include "safe_map.h"
+
+namespace art {
+
+namespace verifier {
+class MethodVerifier;
+}  // namespace verifier
+
+class VerifiedMethod {
+ public:
+  // Cast elision set type.
+  // Since we're adding the dex PCs to the set in increasing order, a sorted vector
+  // is better for performance (not just memory usage), especially for large sets.
+  typedef std::vector<uint32_t> SafeCastSet;
+
+  // Devirtualization map type maps dex offset to concrete method reference.
+  typedef SafeMap<uint32_t, MethodReference> DevirtualizationMap;
+
+  static const VerifiedMethod* Create(verifier::MethodVerifier* method_verifier, bool compile)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ~VerifiedMethod() = default;
+
+  const std::vector<uint8_t>& GetDexGcMap() const {
+    return dex_gc_map_;
+  }
+
+  const DevirtualizationMap& GetDevirtMap() const {
+    return devirt_map_;
+  }
+
+  const SafeCastSet& GetSafeCastSet() const {
+    return safe_cast_set_;
+  }
+
+  // Returns the devirtualization target method, or nullptr if none.
+  const MethodReference* GetDevirtTarget(uint32_t dex_pc) const;
+
+  // Returns true if the cast can statically be verified to be redundant
+  // by using the check-cast elision peephole optimization in the verifier.
+  bool IsSafeCast(uint32_t pc) const;
+
+ private:
+  VerifiedMethod() = default;
+
+  /*
+   * Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
+   * verification). For type-precise determination we have all the data we need, so we just need to
+   * encode it in some clever fashion.
+   * Stores the data in dex_gc_map_, returns true on success and false on failure.
+   */
+  bool GenerateGcMap(verifier::MethodVerifier* method_verifier);
+
+  // Verify that the GC map associated with method_ is well formed.
+  static void VerifyGcMap(verifier::MethodVerifier* method_verifier,
+                          const std::vector<uint8_t>& data);
+
+  // Compute sizes for GC map data.
+  static void ComputeGcMapSizes(verifier::MethodVerifier* method_verifier,
+                                size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
+
+  // Generate devirtualizaion map into devirt_map_.
+  void GenerateDevirtMap(verifier::MethodVerifier* method_verifier)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Generate safe case set into safe_cast_set_.
+  void GenerateSafeCastSet(verifier::MethodVerifier* method_verifier)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  std::vector<uint8_t> dex_gc_map_;
+  DevirtualizationMap devirt_map_;
+  SafeCastSet safe_cast_set_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_VERIFIED_METHOD_H_
diff --git a/compiler/dex/verified_methods_data.cc b/compiler/dex/verified_methods_data.cc
deleted file mode 100644
index e6c4dda..0000000
--- a/compiler/dex/verified_methods_data.cc
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "base/stl_util.h"
-#include "dex_file.h"
-#include "dex_instruction.h"
-#include "dex_instruction-inl.h"
-#include "base/mutex.h"
-#include "base/mutex-inl.h"
-#include "mirror/art_method.h"
-#include "mirror/art_method-inl.h"
-#include "mirror/class.h"
-#include "mirror/class-inl.h"
-#include "mirror/dex_cache.h"
-#include "mirror/dex_cache-inl.h"
-#include "mirror/object.h"
-#include "mirror/object-inl.h"
-#include "verified_methods_data.h"
-#include "verifier/dex_gc_map.h"
-#include "verifier/method_verifier.h"
-#include "verifier/method_verifier-inl.h"
-#include "verifier/register_line.h"
-#include "verifier/register_line-inl.h"
-
-namespace art {
-
-VerifiedMethodsData::VerifiedMethodsData()
-    : dex_gc_maps_lock_("compiler GC maps lock"),
-      dex_gc_maps_(),
-      safecast_map_lock_("compiler Cast Elision lock"),
-      safecast_map_(),
-      devirt_maps_lock_("compiler Devirtualization lock"),
-      devirt_maps_(),
-      rejected_classes_lock_("compiler rejected classes lock"),
-      rejected_classes_() {
-}
-
-VerifiedMethodsData::~VerifiedMethodsData() {
-  Thread* self = Thread::Current();
-  {
-    WriterMutexLock mu(self, dex_gc_maps_lock_);
-    STLDeleteValues(&dex_gc_maps_);
-  }
-  {
-    WriterMutexLock mu(self, safecast_map_lock_);
-    STLDeleteValues(&safecast_map_);
-  }
-  {
-    WriterMutexLock mu(self, devirt_maps_lock_);
-    STLDeleteValues(&devirt_maps_);
-  }
-}
-
-bool VerifiedMethodsData::ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier) {
-  MethodReference ref = method_verifier->GetMethodReference();
-  bool compile = IsCandidateForCompilation(ref, method_verifier->GetAccessFlags());
-  if (compile) {
-    /* Generate a register map and add it to the method. */
-    const std::vector<uint8_t>* dex_gc_map = GenerateGcMap(method_verifier);
-    if (dex_gc_map == NULL) {
-      DCHECK(method_verifier->HasFailures());
-      return false;  // Not a real failure, but a failure to encode
-    }
-    if (kIsDebugBuild) {
-      VerifyGcMap(method_verifier, *dex_gc_map);
-    }
-    SetDexGcMap(ref, dex_gc_map);
-
-    // TODO: move this out when DEX-to-DEX supports devirtualization.
-    if (method_verifier->HasVirtualOrInterfaceInvokes()) {
-      PcToConcreteMethodMap* pc_to_concrete_method = GenerateDevirtMap(method_verifier);
-      if (pc_to_concrete_method != NULL) {
-        SetDevirtMap(ref, pc_to_concrete_method);
-      }
-    }
-  }
-
-  if (method_verifier->HasCheckCasts()) {
-    MethodSafeCastSet* method_to_safe_casts = GenerateSafeCastSet(method_verifier);
-    if (method_to_safe_casts != NULL) {
-      SetSafeCastMap(ref, method_to_safe_casts);
-    }
-  }
-  return true;
-}
-
-const std::vector<uint8_t>* VerifiedMethodsData::GetDexGcMap(MethodReference ref) {
-  ReaderMutexLock mu(Thread::Current(), dex_gc_maps_lock_);
-  DexGcMapTable::const_iterator it = dex_gc_maps_.find(ref);
-  CHECK(it != dex_gc_maps_.end())
-    << "Didn't find GC map for: " << PrettyMethod(ref.dex_method_index, *ref.dex_file);
-  CHECK(it->second != NULL);
-  return it->second;
-}
-
-const MethodReference* VerifiedMethodsData::GetDevirtMap(const MethodReference& ref,
-                                                                    uint32_t dex_pc) {
-  ReaderMutexLock mu(Thread::Current(), devirt_maps_lock_);
-  DevirtualizationMapTable::const_iterator it = devirt_maps_.find(ref);
-  if (it == devirt_maps_.end()) {
-    return NULL;
-  }
-
-  // Look up the PC in the map, get the concrete method to execute and return its reference.
-  PcToConcreteMethodMap::const_iterator pc_to_concrete_method = it->second->find(dex_pc);
-  if (pc_to_concrete_method != it->second->end()) {
-    return &(pc_to_concrete_method->second);
-  } else {
-    return NULL;
-  }
-}
-
-bool VerifiedMethodsData::IsSafeCast(MethodReference ref, uint32_t pc) {
-  ReaderMutexLock mu(Thread::Current(), safecast_map_lock_);
-  SafeCastMap::const_iterator it = safecast_map_.find(ref);
-  if (it == safecast_map_.end()) {
-    return false;
-  }
-
-  // Look up the cast address in the set of safe casts
-  // Use binary_search for lookup in the sorted vector.
-  return std::binary_search(it->second->begin(), it->second->end(), pc);
-}
-
-void VerifiedMethodsData::AddRejectedClass(ClassReference ref) {
-  {
-    WriterMutexLock mu(Thread::Current(), rejected_classes_lock_);
-    rejected_classes_.insert(ref);
-  }
-  DCHECK(IsClassRejected(ref));
-}
-
-bool VerifiedMethodsData::IsClassRejected(ClassReference ref) {
-  ReaderMutexLock mu(Thread::Current(), rejected_classes_lock_);
-  return (rejected_classes_.find(ref) != rejected_classes_.end());
-}
-
-bool VerifiedMethodsData::IsCandidateForCompilation(MethodReference& method_ref,
-                                                    const uint32_t access_flags) {
-#ifdef ART_SEA_IR_MODE
-    bool use_sea = Runtime::Current()->IsSeaIRMode();
-    use_sea = use_sea && (std::string::npos != PrettyMethod(
-                          method_ref.dex_method_index, *(method_ref.dex_file)).find("fibonacci"));
-    if (use_sea) return true;
-#endif
-  // Don't compile class initializers, ever.
-  if (((access_flags & kAccConstructor) != 0) && ((access_flags & kAccStatic) != 0)) {
-    return false;
-  }
-  return (Runtime::Current()->GetCompilerFilter() != Runtime::kInterpretOnly);
-}
-
-const std::vector<uint8_t>* VerifiedMethodsData::GenerateGcMap(
-    verifier::MethodVerifier* method_verifier) {
-  size_t num_entries, ref_bitmap_bits, pc_bits;
-  ComputeGcMapSizes(method_verifier, &num_entries, &ref_bitmap_bits, &pc_bits);
-  // There's a single byte to encode the size of each bitmap
-  if (ref_bitmap_bits >= (8 /* bits per byte */ * 8192 /* 13-bit size */ )) {
-    // TODO: either a better GC map format or per method failures
-    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
-        << "Cannot encode GC map for method with " << ref_bitmap_bits << " registers";
-    return NULL;
-  }
-  size_t ref_bitmap_bytes = (ref_bitmap_bits + 7) / 8;
-  // There are 2 bytes to encode the number of entries
-  if (num_entries >= 65536) {
-    // TODO: either a better GC map format or per method failures
-    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
-        << "Cannot encode GC map for method with " << num_entries << " entries";
-    return NULL;
-  }
-  size_t pc_bytes;
-  verifier::RegisterMapFormat format;
-  if (pc_bits <= 8) {
-    format = verifier::kRegMapFormatCompact8;
-    pc_bytes = 1;
-  } else if (pc_bits <= 16) {
-    format = verifier::kRegMapFormatCompact16;
-    pc_bytes = 2;
-  } else {
-    // TODO: either a better GC map format or per method failures
-    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
-        << "Cannot encode GC map for method with "
-        << (1 << pc_bits) << " instructions (number is rounded up to nearest power of 2)";
-    return NULL;
-  }
-  size_t table_size = ((pc_bytes + ref_bitmap_bytes) * num_entries) + 4;
-  std::vector<uint8_t>* table = new std::vector<uint8_t>;
-  if (table == NULL) {
-    method_verifier->Fail(verifier::VERIFY_ERROR_BAD_CLASS_HARD)
-        << "Failed to encode GC map (size=" << table_size << ")";
-    return NULL;
-  }
-  table->reserve(table_size);
-  // Write table header
-  table->push_back(format | ((ref_bitmap_bytes & ~0xFF) >> 5));
-  table->push_back(ref_bitmap_bytes & 0xFF);
-  table->push_back(num_entries & 0xFF);
-  table->push_back((num_entries >> 8) & 0xFF);
-  // Write table data
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      table->push_back(i & 0xFF);
-      if (pc_bytes == 2) {
-        table->push_back((i >> 8) & 0xFF);
-      }
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      line->WriteReferenceBitMap(*table, ref_bitmap_bytes);
-    }
-  }
-  DCHECK_EQ(table->size(), table_size);
-  return table;
-}
-
-void VerifiedMethodsData::VerifyGcMap(verifier::MethodVerifier* method_verifier,
-                                      const std::vector<uint8_t>& data) {
-  // Check that for every GC point there is a map entry, there aren't entries for non-GC points,
-  // that the table data is well formed and all references are marked (or not) in the bitmap
-  verifier::DexPcToReferenceMap map(&data[0]);
-  DCHECK_EQ(data.size(), map.RawSize());
-  size_t map_index = 0;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    const uint8_t* reg_bitmap = map.FindBitMap(i, false);
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      CHECK_LT(map_index, map.NumEntries());
-      CHECK_EQ(map.GetDexPc(map_index), i);
-      CHECK_EQ(map.GetBitMap(map_index), reg_bitmap);
-      map_index++;
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      for (size_t j = 0; j < code_item->registers_size_; j++) {
-        if (line->GetRegisterType(j).IsNonZeroReferenceTypes()) {
-          CHECK_LT(j / 8, map.RegWidth());
-          CHECK_EQ((reg_bitmap[j / 8] >> (j % 8)) & 1, 1);
-        } else if ((j / 8) < map.RegWidth()) {
-          CHECK_EQ((reg_bitmap[j / 8] >> (j % 8)) & 1, 0);
-        } else {
-          // If a register doesn't contain a reference then the bitmap may be shorter than the line
-        }
-      }
-    } else {
-      CHECK(reg_bitmap == NULL);
-    }
-  }
-}
-
-void VerifiedMethodsData::ComputeGcMapSizes(verifier::MethodVerifier* method_verifier,
-                                            size_t* gc_points, size_t* ref_bitmap_bits,
-                                            size_t* log2_max_gc_pc) {
-  size_t local_gc_points = 0;
-  size_t max_insn = 0;
-  size_t max_ref_reg = -1;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  for (size_t i = 0; i < code_item->insns_size_in_code_units_; i++) {
-    if (method_verifier->GetInstructionFlags(i).IsCompileTimeInfoPoint()) {
-      local_gc_points++;
-      max_insn = i;
-      verifier::RegisterLine* line = method_verifier->GetRegLine(i);
-      max_ref_reg = line->GetMaxNonZeroReferenceReg(max_ref_reg);
-    }
-  }
-  *gc_points = local_gc_points;
-  *ref_bitmap_bits = max_ref_reg + 1;  // if max register is 0 we need 1 bit to encode (ie +1)
-  size_t i = 0;
-  while ((1U << i) <= max_insn) {
-    i++;
-  }
-  *log2_max_gc_pc = i;
-}
-
-void VerifiedMethodsData::SetDexGcMap(MethodReference ref, const std::vector<uint8_t>* gc_map) {
-  DCHECK(Runtime::Current()->IsCompiler());
-  {
-    WriterMutexLock mu(Thread::Current(), dex_gc_maps_lock_);
-    DexGcMapTable::iterator it = dex_gc_maps_.find(ref);
-    if (it != dex_gc_maps_.end()) {
-      delete it->second;
-      dex_gc_maps_.erase(it);
-    }
-    dex_gc_maps_.Put(ref, gc_map);
-  }
-  DCHECK(GetDexGcMap(ref) != NULL);
-}
-
-VerifiedMethodsData::MethodSafeCastSet* VerifiedMethodsData::GenerateSafeCastSet(
-    verifier::MethodVerifier* method_verifier) {
-  /*
-   * Walks over the method code and adds any cast instructions in which
-   * the type cast is implicit to a set, which is used in the code generation
-   * to elide these casts.
-   */
-  if (method_verifier->HasFailures()) {
-    return NULL;
-  }
-  UniquePtr<MethodSafeCastSet> mscs;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  const Instruction* inst = Instruction::At(code_item->insns_);
-  const Instruction* end = Instruction::At(code_item->insns_ +
-                                           code_item->insns_size_in_code_units_);
-
-  for (; inst < end; inst = inst->Next()) {
-    Instruction::Code code = inst->Opcode();
-    if ((code == Instruction::CHECK_CAST) || (code == Instruction::APUT_OBJECT)) {
-      uint32_t dex_pc = inst->GetDexPc(code_item->insns_);
-      const verifier::RegisterLine* line = method_verifier->GetRegLine(dex_pc);
-      bool is_safe_cast = false;
-      if (code == Instruction::CHECK_CAST) {
-        const verifier::RegType& reg_type(line->GetRegisterType(inst->VRegA_21c()));
-        const verifier::RegType& cast_type =
-            method_verifier->ResolveCheckedClass(inst->VRegB_21c());
-        is_safe_cast = cast_type.IsStrictlyAssignableFrom(reg_type);
-      } else {
-        const verifier::RegType& array_type(line->GetRegisterType(inst->VRegB_23x()));
-        // We only know its safe to assign to an array if the array type is precise. For example,
-        // an Object[] can have any type of object stored in it, but it may also be assigned a
-        // String[] in which case the stores need to be of Strings.
-        if (array_type.IsPreciseReference()) {
-          const verifier::RegType& value_type(line->GetRegisterType(inst->VRegA_23x()));
-          const verifier::RegType& component_type = method_verifier->GetRegTypeCache()
-              ->GetComponentType(array_type, method_verifier->GetClassLoader());
-          is_safe_cast = component_type.IsStrictlyAssignableFrom(value_type);
-        }
-      }
-      if (is_safe_cast) {
-        if (mscs.get() == nullptr) {
-          mscs.reset(new MethodSafeCastSet());
-        } else {
-          DCHECK_LT(mscs->back(), dex_pc);  // Verify ordering for push_back() to the sorted vector.
-        }
-        mscs->push_back(dex_pc);
-      }
-    }
-  }
-  return mscs.release();
-}
-
-void  VerifiedMethodsData::SetSafeCastMap(MethodReference ref, const MethodSafeCastSet* cast_set) {
-  WriterMutexLock mu(Thread::Current(), safecast_map_lock_);
-  SafeCastMap::iterator it = safecast_map_.find(ref);
-  if (it != safecast_map_.end()) {
-    delete it->second;
-    safecast_map_.erase(it);
-  }
-  safecast_map_.Put(ref, cast_set);
-  DCHECK(safecast_map_.find(ref) != safecast_map_.end());
-}
-
-VerifiedMethodsData::PcToConcreteMethodMap* VerifiedMethodsData::GenerateDevirtMap(
-    verifier::MethodVerifier* method_verifier) {
-  // It is risky to rely on reg_types for sharpening in cases of soft
-  // verification, we might end up sharpening to a wrong implementation. Just abort.
-  if (method_verifier->HasFailures()) {
-    return NULL;
-  }
-
-  UniquePtr<PcToConcreteMethodMap> pc_to_concrete_method_map;
-  const DexFile::CodeItem* code_item = method_verifier->CodeItem();
-  const uint16_t* insns = code_item->insns_;
-  const Instruction* inst = Instruction::At(insns);
-  const Instruction* end = Instruction::At(insns + code_item->insns_size_in_code_units_);
-
-  for (; inst < end; inst = inst->Next()) {
-    bool is_virtual   = (inst->Opcode() == Instruction::INVOKE_VIRTUAL) ||
-        (inst->Opcode() ==  Instruction::INVOKE_VIRTUAL_RANGE);
-    bool is_interface = (inst->Opcode() == Instruction::INVOKE_INTERFACE) ||
-        (inst->Opcode() == Instruction::INVOKE_INTERFACE_RANGE);
-
-    if (!is_interface && !is_virtual) {
-      continue;
-    }
-    // Get reg type for register holding the reference to the object that will be dispatched upon.
-    uint32_t dex_pc = inst->GetDexPc(insns);
-    verifier::RegisterLine* line = method_verifier->GetRegLine(dex_pc);
-    bool is_range = (inst->Opcode() ==  Instruction::INVOKE_VIRTUAL_RANGE) ||
-        (inst->Opcode() ==  Instruction::INVOKE_INTERFACE_RANGE);
-    const verifier::RegType&
-        reg_type(line->GetRegisterType(is_range ? inst->VRegC_3rc() : inst->VRegC_35c()));
-
-    if (!reg_type.HasClass()) {
-      // We will compute devirtualization information only when we know the Class of the reg type.
-      continue;
-    }
-    mirror::Class* reg_class = reg_type.GetClass();
-    if (reg_class->IsInterface()) {
-      // We can't devirtualize when the known type of the register is an interface.
-      continue;
-    }
-    if (reg_class->IsAbstract() && !reg_class->IsArrayClass()) {
-      // We can't devirtualize abstract classes except on arrays of abstract classes.
-      continue;
-    }
-    mirror::ArtMethod* abstract_method = method_verifier->GetDexCache()->GetResolvedMethod(
-        is_range ? inst->VRegB_3rc() : inst->VRegB_35c());
-    if (abstract_method == NULL) {
-      // If the method is not found in the cache this means that it was never found
-      // by ResolveMethodAndCheckAccess() called when verifying invoke_*.
-      continue;
-    }
-    // Find the concrete method.
-    mirror::ArtMethod* concrete_method = NULL;
-    if (is_interface) {
-      concrete_method = reg_type.GetClass()->FindVirtualMethodForInterface(abstract_method);
-    }
-    if (is_virtual) {
-      concrete_method = reg_type.GetClass()->FindVirtualMethodForVirtual(abstract_method);
-    }
-    if (concrete_method == NULL || concrete_method->IsAbstract()) {
-      // In cases where concrete_method is not found, or is abstract, continue to the next invoke.
-      continue;
-    }
-    if (reg_type.IsPreciseReference() || concrete_method->IsFinal() ||
-        concrete_method->GetDeclaringClass()->IsFinal()) {
-      // If we knew exactly the class being dispatched upon, or if the target method cannot be
-      // overridden record the target to be used in the compiler driver.
-      if (pc_to_concrete_method_map.get() == NULL) {
-        pc_to_concrete_method_map.reset(new PcToConcreteMethodMap());
-      }
-      MethodReference concrete_ref(
-          concrete_method->GetDeclaringClass()->GetDexCache()->GetDexFile(),
-          concrete_method->GetDexMethodIndex());
-      pc_to_concrete_method_map->Put(dex_pc, concrete_ref);
-    }
-  }
-  return pc_to_concrete_method_map.release();
-}
-
-void  VerifiedMethodsData::SetDevirtMap(MethodReference ref,
-                                   const PcToConcreteMethodMap* devirt_map) {
-  WriterMutexLock mu(Thread::Current(), devirt_maps_lock_);
-  DevirtualizationMapTable::iterator it = devirt_maps_.find(ref);
-  if (it != devirt_maps_.end()) {
-    delete it->second;
-    devirt_maps_.erase(it);
-  }
-
-  devirt_maps_.Put(ref, devirt_map);
-  DCHECK(devirt_maps_.find(ref) != devirt_maps_.end());
-}
-
-}  // namespace art
diff --git a/compiler/dex/verified_methods_data.h b/compiler/dex/verified_methods_data.h
deleted file mode 100644
index d495dff..0000000
--- a/compiler/dex/verified_methods_data.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DEX_VERIFIED_METHODS_DATA_H_
-#define ART_COMPILER_DEX_VERIFIED_METHODS_DATA_H_
-
-#include <stdint.h>
-#include <set>
-#include <vector>
-
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "class_reference.h"
-#include "method_reference.h"
-#include "safe_map.h"
-
-namespace art {
-
-namespace verifier {
-class MethodVerifier;
-}  // namespace verifier
-
-class VerifiedMethodsData {
-  public:
-    VerifiedMethodsData();
-    ~VerifiedMethodsData();
-
-    bool ProcessVerifiedMethod(verifier::MethodVerifier* method_verifier)
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-        LOCKS_EXCLUDED(dex_gc_maps_lock_, devirt_maps_lock_, safecast_map_lock_);
-
-    const std::vector<uint8_t>* GetDexGcMap(MethodReference ref)
-        LOCKS_EXCLUDED(dex_gc_maps_lock_);
-
-    const MethodReference* GetDevirtMap(const MethodReference& ref, uint32_t dex_pc)
-        LOCKS_EXCLUDED(devirt_maps_lock_);
-
-    // Returns true if the cast can statically be verified to be redundant
-    // by using the check-cast elision peephole optimization in the verifier
-    bool IsSafeCast(MethodReference ref, uint32_t pc) LOCKS_EXCLUDED(safecast_map_lock_);
-
-    void AddRejectedClass(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
-    bool IsClassRejected(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
-
-    static bool IsCandidateForCompilation(MethodReference& method_ref,
-                                          const uint32_t access_flags);
-
-  private:
-    /*
-     * Generate the GC map for a method that has just been verified (i.e. we're doing this as part of
-     * verification). For type-precise determination we have all the data we need, so we just need to
-     * encode it in some clever fashion.
-     * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
-     */
-    const std::vector<uint8_t>* GenerateGcMap(verifier::MethodVerifier* method_verifier);
-
-    // Verify that the GC map associated with method_ is well formed
-    void VerifyGcMap(verifier::MethodVerifier* method_verifier, const std::vector<uint8_t>& data);
-
-    // Compute sizes for GC map data
-    void ComputeGcMapSizes(verifier::MethodVerifier* method_verifier,
-                           size_t* gc_points, size_t* ref_bitmap_bits, size_t* log2_max_gc_pc);
-
-    // All the GC maps that the verifier has created
-    typedef SafeMap<const MethodReference, const std::vector<uint8_t>*,
-        MethodReferenceComparator> DexGcMapTable;
-    ReaderWriterMutex dex_gc_maps_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-    DexGcMapTable dex_gc_maps_ GUARDED_BY(dex_gc_maps_lock_);
-    void SetDexGcMap(MethodReference ref, const std::vector<uint8_t>* dex_gc_map)
-        LOCKS_EXCLUDED(dex_gc_maps_lock_);
-
-    // Cast elision types.
-    // Since we're adding the dex PCs to the set in increasing order, a sorted vector
-    // is better for performance (not just memory usage), especially for large sets.
-    typedef std::vector<uint32_t> MethodSafeCastSet;
-    typedef SafeMap<MethodReference, const MethodSafeCastSet*,
-        MethodReferenceComparator> SafeCastMap;
-    MethodSafeCastSet* GenerateSafeCastSet(verifier::MethodVerifier* method_verifier)
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-    void SetSafeCastMap(MethodReference ref, const MethodSafeCastSet* mscs)
-        LOCKS_EXCLUDED(safecast_map_lock_);
-    ReaderWriterMutex safecast_map_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-    SafeCastMap safecast_map_ GUARDED_BY(safecast_map_lock_);
-
-    // Devirtualization map.
-    typedef SafeMap<uint32_t, MethodReference> PcToConcreteMethodMap;
-    typedef SafeMap<MethodReference, const PcToConcreteMethodMap*,
-        MethodReferenceComparator> DevirtualizationMapTable;
-    PcToConcreteMethodMap* GenerateDevirtMap(verifier::MethodVerifier* method_verifier)
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-    ReaderWriterMutex devirt_maps_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-    DevirtualizationMapTable devirt_maps_ GUARDED_BY(devirt_maps_lock_);
-    void SetDevirtMap(MethodReference ref, const PcToConcreteMethodMap* pc_method_map)
-          LOCKS_EXCLUDED(devirt_maps_lock_);
-
-    // Rejected classes
-    typedef std::set<ClassReference> RejectedClassesTable;
-    ReaderWriterMutex rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-    RejectedClassesTable rejected_classes_ GUARDED_BY(rejected_classes_lock_);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_DEX_VERIFIED_METHODS_DATA_H_
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index f211e3f..f8dc223 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -410,7 +410,8 @@
 
 void MIRGraph::InitRegLocations() {
   /* Allocate the location map */
-  RegLocation* loc = static_cast<RegLocation*>(arena_->Alloc(GetNumSSARegs() * sizeof(*loc),
+  int max_regs = GetNumSSARegs() + GetMaxPossibleCompilerTemps();
+  RegLocation* loc = static_cast<RegLocation*>(arena_->Alloc(max_regs * sizeof(*loc),
                                                              ArenaAllocator::kAllocRegAlloc));
   for (int i = 0; i < GetNumSSARegs(); i++) {
     loc[i] = fresh_loc;
@@ -418,13 +419,11 @@
     loc[i].is_const = is_constant_v_->IsBitSet(i);
   }
 
-  /* Patch up the locations for Method* and the compiler temps */
-  loc[method_sreg_].location = kLocCompilerTemp;
-  loc[method_sreg_].defined = true;
-  for (int i = 0; i < cu_->num_compiler_temps; i++) {
-    CompilerTemp* ct = compiler_temps_.Get(i);
-    loc[ct->s_reg].location = kLocCompilerTemp;
-    loc[ct->s_reg].defined = true;
+  /* Patch up the locations for the compiler temps */
+  GrowableArray<CompilerTemp*>::Iterator iter(&compiler_temps_);
+  for (CompilerTemp* ct = iter.Next(); ct != NULL; ct = iter.Next()) {
+    loc[ct->s_reg_low].location = kLocCompilerTemp;
+    loc[ct->s_reg_low].defined = true;
   }
 
   reg_location_ = loc;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 714dc4c..5b9d66c 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -27,7 +27,9 @@
 #include "class_linker.h"
 #include "dex_compilation_unit.h"
 #include "dex_file-inl.h"
-#include "dex/verified_methods_data.h"
+#include "dex/verification_results.h"
+#include "dex/verified_method.h"
+#include "dex/quick/dex_file_method_inliner.h"
 #include "jni_internal.h"
 #include "object_utils.h"
 #include "runtime.h"
@@ -48,6 +50,7 @@
 #include "thread_pool.h"
 #include "trampolines/trampoline_compiler.h"
 #include "verifier/method_verifier.h"
+#include "verifier/method_verifier-inl.h"
 
 #if defined(ART_USE_PORTABLE_COMPILER)
 #include "elf_writer_mclinker.h"
@@ -336,13 +339,13 @@
 extern "C" void compilerLLVMSetBitcodeFileName(art::CompilerDriver& driver,
                                                std::string const& filename);
 
-CompilerDriver::CompilerDriver(VerifiedMethodsData* verified_methods_data,
+CompilerDriver::CompilerDriver(VerificationResults* verification_results,
                                DexFileToMethodInlinerMap* method_inliner_map,
                                CompilerBackend compiler_backend, InstructionSet instruction_set,
                                InstructionSetFeatures instruction_set_features,
                                bool image, DescriptorSet* image_classes, size_t thread_count,
-                               bool dump_stats, bool dump_passes, CumulativeLogger* timer)
-    : verified_methods_data_(verified_methods_data),
+                               bool dump_stats)
+    : verification_results_(verification_results),
       method_inliner_map_(method_inliner_map),
       compiler_backend_(compiler_backend),
       instruction_set_(instruction_set),
@@ -356,8 +359,6 @@
       start_ns_(0),
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
-      dump_passes_(dump_passes),
-      timings_logger_(timer),
       compiler_library_(NULL),
       compiler_(NULL),
       compiler_context_(NULL),
@@ -441,6 +442,10 @@
     MutexLock mu(self, compiled_methods_lock_);
     STLDeleteElements(&methods_to_patch_);
   }
+  {
+    MutexLock mu(self, compiled_methods_lock_);
+    STLDeleteElements(&classes_to_patch_);
+  }
   CHECK_PTHREAD_CALL(pthread_key_delete, (tls_key_), "delete tls key");
   typedef void (*UninitCompilerContextFn)(CompilerDriver&);
   UninitCompilerContextFn uninit_compiler_context;
@@ -553,12 +558,15 @@
   }
 }
 
-void CompilerDriver::CompileOne(const mirror::ArtMethod* method, TimingLogger& timings) {
+void CompilerDriver::CompileOne(mirror::ArtMethod* method, TimingLogger& timings) {
   DCHECK(!Runtime::Current()->IsStarted());
   Thread* self = Thread::Current();
   jobject jclass_loader;
   const DexFile* dex_file;
   uint16_t class_def_idx;
+  uint32_t method_idx = method->GetDexMethodIndex();
+  uint32_t access_flags = method->GetAccessFlags();
+  InvokeType invoke_type = method->GetInvokeType();
   {
     ScopedObjectAccessUnchecked soa(self);
     ScopedLocalRef<jobject>
@@ -570,6 +578,7 @@
     dex_file = &mh.GetDexFile();
     class_def_idx = mh.GetClassDefIndex();
   }
+  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
   self->TransitionFromRunnableToSuspended(kNative);
 
   std::vector<const DexFile*> dex_files;
@@ -578,8 +587,6 @@
   UniquePtr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U));
   PreCompile(jclass_loader, dex_files, *thread_pool.get(), timings);
 
-  uint32_t method_idx = method->GetDexMethodIndex();
-  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
   // Can we run DEX-to-DEX compiler on this class ?
   DexToDexCompilationLevel dex_to_dex_compilation_level = kDontDexToDexCompile;
   {
@@ -589,8 +596,8 @@
                                               soa.Decode<mirror::ClassLoader*>(jclass_loader));
     dex_to_dex_compilation_level = GetDexToDexCompilationlevel(class_loader, *dex_file, class_def);
   }
-  CompileMethod(code_item, method->GetAccessFlags(), method->GetInvokeType(),
-                class_def_idx, method_idx, jclass_loader, *dex_file, dex_to_dex_compilation_level);
+  CompileMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx, jclass_loader,
+                *dex_file, dex_to_dex_compilation_level);
 
   self->GetJniEnv()->DeleteGlobalRef(jclass_loader);
 
@@ -908,6 +915,51 @@
   return result;
 }
 
+bool CompilerDriver::CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
+                                        bool* is_type_initialized, bool* use_direct_type_ptr,
+                                        uintptr_t* direct_type_ptr) {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
+  mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
+  if (resolved_class == nullptr) {
+    return false;
+  }
+  const bool compiling_boot = Runtime::Current()->GetHeap()->IsCompilingBoot();
+  if (compiling_boot) {
+    // boot -> boot class pointers.
+    // True if the class is in the image at boot compiling time.
+    const bool is_image_class = IsImage() && IsImageClass(
+        dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_));
+    // True if pc relative load works.
+    const bool support_boot_image_fixup = GetSupportBootImageFixup();
+    if (is_image_class && support_boot_image_fixup) {
+      *is_type_initialized = resolved_class->IsInitialized();
+      *use_direct_type_ptr = false;
+      *direct_type_ptr = 0;
+      return true;
+    } else {
+      return false;
+    }
+  } else {
+    // True if the class is in the image at app compiling time.
+    const bool class_in_image =
+        Runtime::Current()->GetHeap()->FindSpaceFromObject(resolved_class, false)->IsImageSpace();
+    if (class_in_image) {
+      // boot -> app class pointers.
+      *is_type_initialized = resolved_class->IsInitialized();
+      *use_direct_type_ptr = true;
+      *direct_type_ptr = reinterpret_cast<uintptr_t>(resolved_class);
+      return true;
+    } else {
+      // app -> app class pointers.
+      // Give up because app does not have an image and class
+      // isn't created at compile time.  TODO: implement this
+      // if/when each app gets an image.
+      return false;
+    }
+  }
+}
+
 static mirror::Class* ComputeCompilingMethodsClass(ScopedObjectAccess& soa,
                                                    SirtRef<mirror::DexCache>& dex_cache,
                                                    const DexCompilationUnit* mUnit)
@@ -945,6 +997,30 @@
                                                 class_loader, NULL, type);
 }
 
+bool CompilerDriver::ComputeSpecialAccessorInfo(uint32_t field_idx, bool is_put,
+                                                verifier::MethodVerifier* verifier,
+                                                InlineIGetIPutData* result) {
+  mirror::DexCache* dex_cache = verifier->GetDexCache();
+  uint32_t method_idx = verifier->GetMethodReference().dex_method_index;
+  mirror::ArtMethod* method = dex_cache->GetResolvedMethod(method_idx);
+  mirror::ArtField* field = dex_cache->GetResolvedField(field_idx);
+  if (method == nullptr || field == nullptr) {
+    return false;
+  }
+  mirror::Class* method_class = method->GetDeclaringClass();
+  mirror::Class* field_class = field->GetDeclaringClass();
+  if (!method_class->CanAccessResolvedField(field_class, field, dex_cache, field_idx) ||
+      (is_put && field->IsFinal() && method_class != field_class)) {
+    return false;
+  }
+  DCHECK_GE(field->GetOffset().Int32Value(), 0);
+  result->method_is_static = method->IsStatic();
+  result->field_idx = field_idx;
+  result->field_offset = field->GetOffset().Int32Value();
+  result->is_volatile = field->IsVolatile();
+  return true;
+}
+
 bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
                                               bool is_put, int* field_offset, bool* is_volatile) {
   ScopedObjectAccess soa(Thread::Current());
@@ -960,21 +1036,8 @@
         ComputeCompilingMethodsClass(soa, dex_cache, mUnit);
     if (referrer_class != NULL) {
       mirror::Class* fields_class = resolved_field->GetDeclaringClass();
-      bool access_ok = referrer_class->CanAccess(fields_class) &&
-                       referrer_class->CanAccessMember(fields_class,
-                                                       resolved_field->GetAccessFlags());
-      if (!access_ok) {
-        // The referring class can't access the resolved field, this may occur as a result of a
-        // protected field being made public by a sub-class. Resort to the dex file to determine
-        // the correct class for the access check.
-        const DexFile& dex_file = *referrer_class->GetDexCache()->GetDexFile();
-        mirror::Class* dex_fields_class = mUnit->GetClassLinker()->ResolveType(dex_file,
-                                                         dex_file.GetFieldId(field_idx).class_idx_,
-                                                         referrer_class);
-        access_ok = referrer_class->CanAccess(dex_fields_class) &&
-                    referrer_class->CanAccessMember(dex_fields_class,
-                                                    resolved_field->GetAccessFlags());
-      }
+      bool access_ok = referrer_class->CanAccessResolvedField(fields_class, resolved_field,
+                                                              dex_cache.get(), field_idx);
       bool is_write_to_final_from_wrong_class = is_put && resolved_field->IsFinal() &&
           fields_class != referrer_class;
       if (access_ok && !is_write_to_final_from_wrong_class) {
@@ -1020,23 +1083,8 @@
         stats_->ResolvedLocalStaticField();
         return true;  // fast path
       } else {
-        bool access_ok = referrer_class->CanAccess(fields_class) &&
-                         referrer_class->CanAccessMember(fields_class,
-                                                         resolved_field->GetAccessFlags());
-        if (!access_ok) {
-          // The referring class can't access the resolved field, this may occur as a result of a
-          // protected field being made public by a sub-class. Resort to the dex file to determine
-          // the correct class for the access check. Don't change the field's class as that is
-          // used to identify the SSB.
-          const DexFile& dex_file = *referrer_class->GetDexCache()->GetDexFile();
-          mirror::Class* dex_fields_class =
-              mUnit->GetClassLinker()->ResolveType(dex_file,
-                                                   dex_file.GetFieldId(field_idx).class_idx_,
-                                                   referrer_class);
-          access_ok = referrer_class->CanAccess(dex_fields_class) &&
-                      referrer_class->CanAccessMember(dex_fields_class,
-                                                      resolved_field->GetAccessFlags());
-        }
+        bool access_ok = referrer_class->CanAccessResolvedField(fields_class, resolved_field,
+                                                                dex_cache.get(), field_idx);
         bool is_write_to_final_from_wrong_class = is_put && resolved_field->IsFinal();
         if (access_ok && !is_write_to_final_from_wrong_class) {
           // We have the resolved field, we must make it into a index for the referrer
@@ -1150,7 +1198,7 @@
     if (no_guarantee_of_dex_cache_entry) {
       // See if the method is also declared in this dex cache.
       uint32_t dex_method_idx = MethodHelper(method).FindDexMethodIndexInOtherDexFile(
-          *referrer_class->GetDexCache()->GetDexFile());
+          *target_method->dex_file);
       if (dex_method_idx != DexFile::kDexNoIndex) {
         target_method->dex_method_index = dex_method_idx;
       } else {
@@ -1178,13 +1226,23 @@
         CHECK(!method->IsAbstract());
         *type = sharp_type;
         *direct_method = reinterpret_cast<uintptr_t>(method);
-        *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromCompiledCode());
+        if (compiler_backend_ == kQuick) {
+          *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCode());
+        } else {
+          CHECK_EQ(compiler_backend_, kPortable);
+          *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromPortableCompiledCode());
+        }
         target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile();
         target_method->dex_method_index = method->GetDexMethodIndex();
       } else if (!must_use_direct_pointers) {
         // Set the code and rely on the dex cache for the method.
         *type = sharp_type;
-        *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromCompiledCode());
+        if (compiler_backend_ == kQuick) {
+          *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCode());
+        } else {
+          CHECK_EQ(compiler_backend_, kPortable);
+          *direct_code = reinterpret_cast<uintptr_t>(method->GetEntryPointFromPortableCompiledCode());
+        }
       } else {
         // Direct pointers were required but none were available.
         VLOG(compiler) << "Dex cache devirtualization failed for: " << PrettyMethod(method);
@@ -1219,20 +1277,8 @@
     bool icce = resolved_method->CheckIncompatibleClassChange(*invoke_type);
     if (referrer_class != NULL && !icce) {
       mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-      if (!referrer_class->CanAccess(methods_class) ||
-          !referrer_class->CanAccessMember(methods_class,
-                                           resolved_method->GetAccessFlags())) {
-        // The referring class can't access the resolved method, this may occur as a result of a
-        // protected method being made public by implementing an interface that re-declares the
-        // method public. Resort to the dex file to determine the correct class for the access
-        // check.
-        uint16_t class_idx =
-            target_method->dex_file->GetMethodId(target_method->dex_method_index).class_idx_;
-        methods_class = mUnit->GetClassLinker()->ResolveType(*target_method->dex_file,
-                                                             class_idx, referrer_class);
-      }
-      if (referrer_class->CanAccess(methods_class) &&
-          referrer_class->CanAccessMember(methods_class, resolved_method->GetAccessFlags())) {
+      if (referrer_class->CanAccessResolvedMethod(methods_class, resolved_method, dex_cache.get(),
+                                                  target_method->dex_method_index)) {
         const bool enableFinalBasedSharpening = enable_devirtualization;
         // Sharpen a virtual call into a direct call when the target is known not to have been
         // overridden (ie is final).
@@ -1247,9 +1293,11 @@
 
         if (enableFinalBasedSharpening && (can_sharpen_virtual_based_on_type ||
                                             can_sharpen_super_based_on_type)) {
-          // Sharpen a virtual call into a direct call. The method_idx is into referrer's
-          // dex cache, check that this resolved method is where we expect it.
-          CHECK(referrer_class->GetDexCache()->GetResolvedMethod(target_method->dex_method_index) ==
+          // Sharpen a virtual call into a direct call. The method_idx is into the DexCache
+          // associated with target_method->dex_file.
+          CHECK(target_method->dex_file == mUnit->GetDexFile());
+          DCHECK(dex_cache.get() == mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile()));
+          CHECK(dex_cache->GetResolvedMethod(target_method->dex_method_index) ==
                 resolved_method) << PrettyMethod(resolved_method);
           InvokeType orig_invoke_type = *invoke_type;
           GetCodeAndMethodForDirectCall(invoke_type, kDirect, false, referrer_class, resolved_method,
@@ -1265,9 +1313,9 @@
         if (enableVerifierBasedSharpening && (*invoke_type == kVirtual ||
                                               *invoke_type == kInterface)) {
           // Did the verifier record a more precise invoke target based on its type information?
-          const MethodReference caller_method(mUnit->GetDexFile(), mUnit->GetDexMethodIndex());
+          DCHECK(mUnit->GetVerifiedMethod() != nullptr);
           const MethodReference* devirt_map_target =
-              verified_methods_data_->GetDevirtMap(caller_method, dex_pc);
+              mUnit->GetVerifiedMethod()->GetDevirtTarget(dex_pc);
           if (devirt_map_target != NULL) {
             SirtRef<mirror::DexCache> target_dex_cache(soa.Self(), mUnit->GetClassLinker()->FindDexCache(*devirt_map_target->dex_file));
             SirtRef<mirror::ClassLoader> class_loader(soa.Self(), soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader()));
@@ -1314,8 +1362,15 @@
   return false;  // Incomplete knowledge needs slow path.
 }
 
-bool CompilerDriver::IsSafeCast(const MethodReference& mr, uint32_t dex_pc) {
-  bool result = verified_methods_data_->IsSafeCast(mr, dex_pc);
+const VerifiedMethod* CompilerDriver::GetVerifiedMethod(const DexFile* dex_file,
+                                                        uint32_t method_idx) const {
+  MethodReference ref(dex_file, method_idx);
+  return verification_results_->GetVerifiedMethod(ref);
+}
+
+bool CompilerDriver::IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc) {
+  DCHECK(mUnit->GetVerifiedMethod() != nullptr);
+  bool result = mUnit->GetVerifiedMethod()->IsSafeCast(dex_pc);
   if (result) {
     stats_->SafeCast();
   } else {
@@ -1333,13 +1388,13 @@
                                   InvokeType target_invoke_type,
                                   size_t literal_offset) {
   MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  code_to_patch_.push_back(new PatchInformation(dex_file,
-                                                referrer_class_def_idx,
-                                                referrer_method_idx,
-                                                referrer_invoke_type,
-                                                target_method_idx,
-                                                target_invoke_type,
-                                                literal_offset));
+  code_to_patch_.push_back(new CallPatchInformation(dex_file,
+                                                    referrer_class_def_idx,
+                                                    referrer_method_idx,
+                                                    referrer_invoke_type,
+                                                    target_method_idx,
+                                                    target_invoke_type,
+                                                    literal_offset));
 }
 void CompilerDriver::AddMethodPatch(const DexFile* dex_file,
                                     uint16_t referrer_class_def_idx,
@@ -1349,13 +1404,25 @@
                                     InvokeType target_invoke_type,
                                     size_t literal_offset) {
   MutexLock mu(Thread::Current(), compiled_methods_lock_);
-  methods_to_patch_.push_back(new PatchInformation(dex_file,
-                                                   referrer_class_def_idx,
-                                                   referrer_method_idx,
-                                                   referrer_invoke_type,
-                                                   target_method_idx,
-                                                   target_invoke_type,
-                                                   literal_offset));
+  methods_to_patch_.push_back(new CallPatchInformation(dex_file,
+                                                       referrer_class_def_idx,
+                                                       referrer_method_idx,
+                                                       referrer_invoke_type,
+                                                       target_method_idx,
+                                                       target_invoke_type,
+                                                       literal_offset));
+}
+void CompilerDriver::AddClassPatch(const DexFile* dex_file,
+                                    uint16_t referrer_class_def_idx,
+                                    uint32_t referrer_method_idx,
+                                    uint32_t target_type_idx,
+                                    size_t literal_offset) {
+  MutexLock mu(Thread::Current(), compiled_methods_lock_);
+  classes_to_patch_.push_back(new TypePatchInformation(dex_file,
+                                                       referrer_class_def_idx,
+                                                       referrer_method_idx,
+                                                       target_type_idx,
+                                                       literal_offset));
 }
 
 class ParallelCompilationManager {
@@ -2249,7 +2316,7 @@
   }
   ClassReference ref(&dex_file, class_def_index);
   // Skip compiling classes with generic verifier failures since they will still fail at runtime
-  if (manager->GetCompiler()->verified_methods_data_->IsClassRejected(ref)) {
+  if (manager->GetCompiler()->verification_results_->IsClassRejected(ref)) {
     return;
   }
   const byte* class_data = dex_file.GetClassData(class_def);
@@ -2332,7 +2399,7 @@
   } else if ((access_flags & kAccAbstract) != 0) {
   } else {
     MethodReference method_ref(&dex_file, method_idx);
-    bool compile = VerifiedMethodsData::IsCandidateForCompilation(method_ref, access_flags);
+    bool compile = VerificationResults::IsCandidateForCompilation(method_ref, access_flags);
 
     if (compile) {
       CompilerFn compiler = compiler_;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index aabdf2f..ea43e4f 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -22,7 +22,6 @@
 #include <vector>
 
 #include "base/mutex.h"
-#include "base/timing_logger.h"
 #include "class_reference.h"
 #include "compiled_class.h"
 #include "compiled_method.h"
@@ -39,13 +38,19 @@
 
 namespace art {
 
+namespace verifier {
+class MethodVerifier;
+}  // namespace verifier
+
 class AOTCompilationStats;
 class ParallelCompilationManager;
 class DexCompilationUnit;
 class DexFileToMethodInlinerMap;
+class InlineIGetIPutData;
 class OatWriter;
 class TimingLogger;
-class VerifiedMethodsData;
+class VerificationResults;
+class VerifiedMethod;
 
 enum CompilerBackend {
   kQuick,
@@ -93,13 +98,12 @@
   // enabled.  "image_classes" lets the compiler know what classes it
   // can assume will be in the image, with NULL implying all available
   // classes.
-  explicit CompilerDriver(VerifiedMethodsData* verified_methods_data,
+  explicit CompilerDriver(VerificationResults* verification_results,
                           DexFileToMethodInlinerMap* method_inliner_map,
                           CompilerBackend compiler_backend, InstructionSet instruction_set,
                           InstructionSetFeatures instruction_set_features,
                           bool image, DescriptorSet* image_classes,
-                          size_t thread_count, bool dump_stats, bool dump_passes,
-                          CumulativeLogger* timer);
+                          size_t thread_count, bool dump_stats);
 
   ~CompilerDriver();
 
@@ -107,12 +111,12 @@
                   TimingLogger& timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  // Compile a single Method
-  void CompileOne(const mirror::ArtMethod* method, TimingLogger& timings)
+  // Compile a single Method.
+  void CompileOne(mirror::ArtMethod* method, TimingLogger& timings)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  VerifiedMethodsData* GetVerifiedMethodsData() const {
-    return verified_methods_data_;
+  VerificationResults* GetVerificationResults() const {
+    return verification_results_;
   }
 
   DexFileToMethodInlinerMap* GetMethodInlinerMap() const {
@@ -191,6 +195,17 @@
                                               uint32_t type_idx)
      LOCKS_EXCLUDED(Locks::mutator_lock_);
 
+  bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
+                          bool* is_type_initialized, bool* use_direct_type_ptr,
+                          uintptr_t* direct_type_ptr);
+
+  // Can we fast path instance field access in a verified accessor?
+  // If yes, computes field's offset and volatility and whether the method is static or not.
+  static bool ComputeSpecialAccessorInfo(uint32_t field_idx, bool is_put,
+                                         verifier::MethodVerifier* verifier,
+                                         InlineIGetIPutData* result)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Can we fast path instance field access? Computes field's offset and volatility.
   bool ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put,
                                 int* field_offset, bool* is_volatile)
@@ -211,7 +226,8 @@
                          uintptr_t* direct_code, uintptr_t* direct_method)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  bool IsSafeCast(const MethodReference& mr, uint32_t dex_pc);
+  const VerifiedMethod* GetVerifiedMethod(const DexFile* dex_file, uint32_t method_idx) const;
+  bool IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc);
 
   // Record patch information for later fix up.
   void AddCodePatch(const DexFile* dex_file,
@@ -230,6 +246,12 @@
                       InvokeType target_invoke_type,
                       size_t literal_offset)
       LOCKS_EXCLUDED(compiled_methods_lock_);
+  void AddClassPatch(const DexFile* dex_file,
+                     uint16_t referrer_class_def_idx,
+                     uint32_t referrer_method_idx,
+                     uint32_t target_method_idx,
+                     size_t literal_offset)
+      LOCKS_EXCLUDED(compiled_methods_lock_);
 
   void SetBitcodeFileName(std::string const& filename);
 
@@ -269,14 +291,8 @@
     return thread_count_;
   }
 
-  bool GetDumpPasses() const {
-    return dump_passes_;
-  }
-
-  CumulativeLogger& GetTimingsLogger() const {
-    return *timings_logger_;
-  }
-
+  class CallPatchInformation;
+  class TypePatchInformation;
   class PatchInformation {
    public:
     const DexFile& GetDexFile() const {
@@ -288,6 +304,48 @@
     uint32_t GetReferrerMethodIdx() const {
       return referrer_method_idx_;
     }
+    size_t GetLiteralOffset() const {
+      return literal_offset_;
+    }
+
+    virtual bool IsCall() const {
+      return false;
+    }
+    virtual bool IsType() const {
+      return false;
+    }
+    virtual const CallPatchInformation* AsCall() const {
+      LOG(FATAL) << "Unreachable";
+      return nullptr;
+    }
+    virtual const TypePatchInformation* AsType() const {
+      LOG(FATAL) << "Unreachable";
+      return nullptr;
+    }
+
+   protected:
+    PatchInformation(const DexFile* dex_file,
+                     uint16_t referrer_class_def_idx,
+                     uint32_t referrer_method_idx,
+                     size_t literal_offset)
+      : dex_file_(dex_file),
+        referrer_class_def_idx_(referrer_class_def_idx),
+        referrer_method_idx_(referrer_method_idx),
+        literal_offset_(literal_offset) {
+      CHECK(dex_file_ != NULL);
+    }
+    virtual ~PatchInformation() {}
+
+    const DexFile* const dex_file_;
+    const uint16_t referrer_class_def_idx_;
+    const uint32_t referrer_method_idx_;
+    const size_t literal_offset_;
+
+    friend class CompilerDriver;
+  };
+
+  class CallPatchInformation : public PatchInformation {
+   public:
     InvokeType GetReferrerInvokeType() const {
       return referrer_invoke_type_;
     }
@@ -297,46 +355,76 @@
     InvokeType GetTargetInvokeType() const {
       return target_invoke_type_;
     }
-    size_t GetLiteralOffset() const {;
-      return literal_offset_;
+
+    const CallPatchInformation* AsCall() const {
+      return this;
+    }
+    bool IsCall() const {
+      return true;
     }
 
    private:
-    PatchInformation(const DexFile* dex_file,
-                     uint16_t referrer_class_def_idx,
-                     uint32_t referrer_method_idx,
-                     InvokeType referrer_invoke_type,
-                     uint32_t target_method_idx,
-                     InvokeType target_invoke_type,
-                     size_t literal_offset)
-      : dex_file_(dex_file),
-        referrer_class_def_idx_(referrer_class_def_idx),
-        referrer_method_idx_(referrer_method_idx),
-        referrer_invoke_type_(referrer_invoke_type),
-        target_method_idx_(target_method_idx),
-        target_invoke_type_(target_invoke_type),
-        literal_offset_(literal_offset) {
-      CHECK(dex_file_ != NULL);
+    CallPatchInformation(const DexFile* dex_file,
+                         uint16_t referrer_class_def_idx,
+                         uint32_t referrer_method_idx,
+                         InvokeType referrer_invoke_type,
+                         uint32_t target_method_idx,
+                         InvokeType target_invoke_type,
+                         size_t literal_offset)
+        : PatchInformation(dex_file, referrer_class_def_idx,
+                           referrer_method_idx, literal_offset),
+          referrer_invoke_type_(referrer_invoke_type),
+          target_method_idx_(target_method_idx),
+          target_invoke_type_(target_invoke_type) {
     }
 
-    const DexFile* const dex_file_;
-    const uint16_t referrer_class_def_idx_;
-    const uint32_t referrer_method_idx_;
     const InvokeType referrer_invoke_type_;
     const uint32_t target_method_idx_;
     const InvokeType target_invoke_type_;
-    const size_t literal_offset_;
 
     friend class CompilerDriver;
-    DISALLOW_COPY_AND_ASSIGN(PatchInformation);
+    DISALLOW_COPY_AND_ASSIGN(CallPatchInformation);
   };
 
-  const std::vector<const PatchInformation*>& GetCodeToPatch() const {
+  class TypePatchInformation : public PatchInformation {
+   public:
+    uint32_t GetTargetTypeIdx() const {
+      return target_type_idx_;
+    }
+
+    bool IsType() const {
+      return true;
+    }
+    const TypePatchInformation* AsType() const {
+      return this;
+    }
+
+   private:
+    TypePatchInformation(const DexFile* dex_file,
+                         uint16_t referrer_class_def_idx,
+                         uint32_t referrer_method_idx,
+                         uint32_t target_type_idx,
+                         size_t literal_offset)
+        : PatchInformation(dex_file, referrer_class_def_idx,
+                           referrer_method_idx, literal_offset),
+          target_type_idx_(target_type_idx) {
+    }
+
+    const uint32_t target_type_idx_;
+
+    friend class CompilerDriver;
+    DISALLOW_COPY_AND_ASSIGN(TypePatchInformation);
+  };
+
+  const std::vector<const CallPatchInformation*>& GetCodeToPatch() const {
     return code_to_patch_;
   }
-  const std::vector<const PatchInformation*>& GetMethodsToPatch() const {
+  const std::vector<const CallPatchInformation*>& GetMethodsToPatch() const {
     return methods_to_patch_;
   }
+  const std::vector<const TypePatchInformation*>& GetClassesToPatch() const {
+    return classes_to_patch_;
+  }
 
   // Checks if class specified by type_idx is one of the image_classes_
   bool IsImageClass(const char* descriptor) const;
@@ -408,10 +496,11 @@
   static void CompileClass(const ParallelCompilationManager* context, size_t class_def_index)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  std::vector<const PatchInformation*> code_to_patch_;
-  std::vector<const PatchInformation*> methods_to_patch_;
+  std::vector<const CallPatchInformation*> code_to_patch_;
+  std::vector<const CallPatchInformation*> methods_to_patch_;
+  std::vector<const TypePatchInformation*> classes_to_patch_;
 
-  VerifiedMethodsData* verified_methods_data_;
+  VerificationResults* verification_results_;
   DexFileToMethodInlinerMap* method_inliner_map_;
 
   CompilerBackend compiler_backend_;
@@ -446,9 +535,6 @@
   UniquePtr<AOTCompilationStats> stats_;
 
   bool dump_stats_;
-  const bool dump_passes_;
-
-  CumulativeLogger* const timings_logger_;
 
   typedef void (*CompilerCallbackFn)(CompilerDriver& driver);
   typedef MutexLock* (*CompilerMutexLockFn)(CompilerDriver& driver);
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index a5eb94f..0d0c204 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -122,7 +122,11 @@
     EXPECT_TRUE(method != NULL) << "method_idx=" << i
                                 << " " << dex->GetMethodDeclaringClassDescriptor(dex->GetMethodId(i))
                                 << " " << dex->GetMethodName(dex->GetMethodId(i));
-    EXPECT_TRUE(method->GetEntryPointFromCompiledCode() != NULL) << "method_idx=" << i
+    EXPECT_TRUE(method->GetEntryPointFromQuickCompiledCode() != NULL) << "method_idx=" << i
+                                           << " "
+                                           << dex->GetMethodDeclaringClassDescriptor(dex->GetMethodId(i))
+                                           << " " << dex->GetMethodName(dex->GetMethodId(i));
+    EXPECT_TRUE(method->GetEntryPointFromPortableCompiledCode() != NULL) << "method_idx=" << i
                                            << " "
                                            << dex->GetMethodDeclaringClassDescriptor(dex->GetMethodId(i))
                                            << " " << dex->GetMethodName(dex->GetMethodId(i));
diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc
index c441d09..840b0ad 100644
--- a/compiler/driver/dex_compilation_unit.cc
+++ b/compiler/driver/dex_compilation_unit.cc
@@ -31,7 +31,8 @@
       code_item_(cu->code_item),
       class_def_idx_(cu->class_def_idx),
       dex_method_idx_(cu->method_idx),
-      access_flags_(cu->access_flags) {
+      access_flags_(cu->access_flags),
+      verified_method_(cu_->compiler_driver->GetVerifiedMethod(cu->dex_file, cu->method_idx)) {
 }
 
 DexCompilationUnit::DexCompilationUnit(CompilationUnit* cu,
@@ -41,7 +42,8 @@
                                        const DexFile::CodeItem* code_item,
                                        uint16_t class_def_idx,
                                        uint32_t method_idx,
-                                       uint32_t access_flags)
+                                       uint32_t access_flags,
+                                       const VerifiedMethod* verified_method)
     : cu_(cu),
       class_loader_(class_loader),
       class_linker_(class_linker),
@@ -49,7 +51,8 @@
       code_item_(code_item),
       class_def_idx_(class_def_idx),
       dex_method_idx_(method_idx),
-      access_flags_(access_flags) {
+      access_flags_(access_flags),
+      verified_method_(verified_method) {
 }
 
 const std::string& DexCompilationUnit::GetSymbol() {
diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h
index 3df50ff..84f5799 100644
--- a/compiler/driver/dex_compilation_unit.h
+++ b/compiler/driver/dex_compilation_unit.h
@@ -29,6 +29,7 @@
 }  // namespace mirror
 class ClassLinker;
 struct CompilationUnit;
+class VerifiedMethod;
 
 class DexCompilationUnit {
  public:
@@ -36,7 +37,8 @@
 
   DexCompilationUnit(CompilationUnit* cu, jobject class_loader, ClassLinker* class_linker,
                      const DexFile& dex_file, const DexFile::CodeItem* code_item,
-                     uint16_t class_def_idx, uint32_t method_idx, uint32_t access_flags);
+                     uint16_t class_def_idx, uint32_t method_idx, uint32_t access_flags,
+                     const VerifiedMethod* verified_method);
 
   CompilationUnit* GetCompilationUnit() const {
     return cu_;
@@ -96,6 +98,10 @@
     return ((access_flags_ & kAccSynchronized) != 0);
   }
 
+  const VerifiedMethod* GetVerifiedMethod() const {
+    return verified_method_;
+  }
+
   const std::string& GetSymbol();
 
  private:
@@ -111,6 +117,7 @@
   const uint16_t class_def_idx_;
   const uint32_t dex_method_idx_;
   const uint32_t access_flags_;
+  const VerifiedMethod* const verified_method_;
 
   std::string symbol_;
 };
diff --git a/compiler/elf_fixup.cc b/compiler/elf_fixup.cc
index c571288..66c8da1 100644
--- a/compiler/elf_fixup.cc
+++ b/compiler/elf_fixup.cc
@@ -177,7 +177,7 @@
     if (elf_dyn_needs_fixup) {
       uint32_t d_ptr = elf_dyn.d_un.d_ptr;
       if (DEBUG_FIXUP) {
-        LOG(INFO) << StringPrintf("In %s moving Elf32_Dyn[%d] from 0x%08x to 0x%08x",
+        LOG(INFO) << StringPrintf("In %s moving Elf32_Dyn[%d] from 0x%08x to 0x%08" PRIxPTR,
                                   elf_file.GetFile().GetPath().c_str(), i,
                                   d_ptr, d_ptr + base_address);
       }
@@ -196,7 +196,7 @@
       continue;
     }
     if (DEBUG_FIXUP) {
-      LOG(INFO) << StringPrintf("In %s moving Elf32_Shdr[%d] from 0x%08x to 0x%08x",
+      LOG(INFO) << StringPrintf("In %s moving Elf32_Shdr[%d] from 0x%08x to 0x%08" PRIxPTR,
                                 elf_file.GetFile().GetPath().c_str(), i,
                                 sh.sh_addr, sh.sh_addr + base_address);
     }
@@ -213,7 +213,7 @@
     CHECK((ph.p_align == 0) || (0 == ((ph.p_vaddr - ph.p_offset) & (ph.p_align - 1))))
             << elf_file.GetFile().GetPath() << " i=" << i;
     if (DEBUG_FIXUP) {
-      LOG(INFO) << StringPrintf("In %s moving Elf32_Phdr[%d] from 0x%08x to 0x%08x",
+      LOG(INFO) << StringPrintf("In %s moving Elf32_Phdr[%d] from 0x%08x to 0x%08" PRIxPTR,
                                 elf_file.GetFile().GetPath().c_str(), i,
                                 ph.p_vaddr, ph.p_vaddr + base_address);
     }
@@ -238,7 +238,7 @@
     ::llvm::ELF::Elf32_Sym& symbol = elf_file.GetSymbol(section_type, i);
     if (symbol.st_value != 0) {
       if (DEBUG_FIXUP) {
-        LOG(INFO) << StringPrintf("In %s moving Elf32_Sym[%d] from 0x%08x to 0x%08x",
+        LOG(INFO) << StringPrintf("In %s moving Elf32_Sym[%d] from 0x%08x to 0x%08" PRIxPTR,
                                   elf_file.GetFile().GetPath().c_str(), i,
                                   symbol.st_value, symbol.st_value + base_address);
       }
@@ -255,7 +255,7 @@
       for (uint32_t i = 0; i < elf_file.GetRelNum(sh); i++) {
         llvm::ELF::Elf32_Rel& rel = elf_file.GetRel(sh, i);
         if (DEBUG_FIXUP) {
-          LOG(INFO) << StringPrintf("In %s moving Elf32_Rel[%d] from 0x%08x to 0x%08x",
+          LOG(INFO) << StringPrintf("In %s moving Elf32_Rel[%d] from 0x%08x to 0x%08" PRIxPTR,
                                     elf_file.GetFile().GetPath().c_str(), i,
                                     rel.r_offset, rel.r_offset + base_address);
         }
@@ -265,7 +265,7 @@
       for (uint32_t i = 0; i < elf_file.GetRelaNum(sh); i++) {
         llvm::ELF::Elf32_Rela& rela = elf_file.GetRela(sh, i);
         if (DEBUG_FIXUP) {
-          LOG(INFO) << StringPrintf("In %s moving Elf32_Rela[%d] from 0x%08x to 0x%08x",
+          LOG(INFO) << StringPrintf("In %s moving Elf32_Rela[%d] from 0x%08x to 0x%08" PRIxPTR,
                                     elf_file.GetFile().GetPath().c_str(), i,
                                     rela.r_offset, rela.r_offset + base_address);
         }
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 0ef4185..dbc986a 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -44,7 +44,7 @@
                                    size_t& oat_data_offset);
 
   // Returns runtime oat_data runtime address for an opened ElfFile.
-  static llvm::ELF::Elf32_Addr GetOatDataAddress(ElfFile* elf_file);
+  static ::llvm::ELF::Elf32_Addr GetOatDataAddress(ElfFile* elf_file);
 
  protected:
   ElfWriter(const CompilerDriver& driver, File* elf_file);
diff --git a/compiler/elf_writer_mclinker.cc b/compiler/elf_writer_mclinker.cc
index f3fef23..c7baf4f 100644
--- a/compiler/elf_writer_mclinker.cc
+++ b/compiler/elf_writer_mclinker.cc
@@ -22,6 +22,7 @@
 #include <mcld/IRBuilder.h>
 #include <mcld/Linker.h>
 #include <mcld/LinkerConfig.h>
+#include <mcld/LinkerScript.h>
 #include <mcld/MC/ZOption.h>
 #include <mcld/Module.h>
 #include <mcld/Support/Path.h>
@@ -142,13 +143,14 @@
   }
 
   // Based on alone::Linker::config
-  module_.reset(new mcld::Module(linker_config_->options().soname()));
+  linker_script_.reset(new mcld::LinkerScript());
+  module_.reset(new mcld::Module(linker_config_->options().soname(), *linker_script_.get()));
   CHECK(module_.get() != NULL);
   ir_builder_.reset(new mcld::IRBuilder(*module_.get(), *linker_config_.get()));
   CHECK(ir_builder_.get() != NULL);
   linker_.reset(new mcld::Linker());
   CHECK(linker_.get() != NULL);
-  linker_->config(*linker_config_.get());
+  linker_->emulate(*linker_script_.get(), *linker_config_.get());
 }
 
 void ElfWriterMclinker::AddOatInput(std::vector<uint8_t>& oat_contents) {
@@ -263,12 +265,12 @@
   added_symbols_.Put(&symbol, &symbol);
 
   // Add input to supply code for symbol
-  const std::vector<uint8_t>& code = compiled_code.GetCode();
+  const std::vector<uint8_t>* code = compiled_code.GetPortableCode();
   // TODO: ownership of code_input?
   // TODO: why does IRBuilder::ReadInput take a non-const pointer?
   mcld::Input* code_input = ir_builder_->ReadInput(symbol,
-                                                   const_cast<uint8_t*>(&code[0]),
-                                                   code.size());
+                                                   const_cast<uint8_t*>(&(*code)[0]),
+                                                   code->size());
   CHECK(code_input != NULL);
 }
 
@@ -374,7 +376,7 @@
           (!method->IsStatic() ||
            method->IsConstructor() ||
            method->GetDeclaringClass()->IsInitialized())) {
-        method->SetOatCodeOffset(offset);
+        method->SetPortableOatCodeOffset(offset);
       }
     }
     it.Next();
diff --git a/compiler/elf_writer_mclinker.h b/compiler/elf_writer_mclinker.h
index 5da178c..8ee7231 100644
--- a/compiler/elf_writer_mclinker.h
+++ b/compiler/elf_writer_mclinker.h
@@ -29,6 +29,7 @@
 class LDSymbol;
 class Linker;
 class LinkerConfig;
+class LinkerScript;
 class Module;
 }  // namespace mcld
 
@@ -68,12 +69,13 @@
   void FixupOatMethodOffsets(const std::vector<const DexFile*>& dex_files)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint32_t FixupCompiledCodeOffset(ElfFile& elf_file,
-                                   llvm::ELF::Elf32_Addr oatdata_address,
+                                   ::llvm::ELF::Elf32_Addr oatdata_address,
                                    const CompiledCode& compiled_code);
 #endif
 
   // Setup by Init()
   UniquePtr<mcld::LinkerConfig> linker_config_;
+  UniquePtr<mcld::LinkerScript> linker_script_;
   UniquePtr<mcld::Module> module_;
   UniquePtr<mcld::IRBuilder> ir_builder_;
   UniquePtr<mcld::Linker> linker_;
diff --git a/compiler/file_output_stream.cc b/compiler/file_output_stream.cc
index 0e4a294..3ee16f5 100644
--- a/compiler/file_output_stream.cc
+++ b/compiler/file_output_stream.cc
@@ -25,7 +25,7 @@
 
 FileOutputStream::FileOutputStream(File* file) : OutputStream(file->GetPath()), file_(file) {}
 
-bool FileOutputStream::WriteFully(const void* buffer, int64_t byte_count) {
+bool FileOutputStream::WriteFully(const void* buffer, size_t byte_count) {
   return file_->WriteFully(buffer, byte_count);
 }
 
diff --git a/compiler/file_output_stream.h b/compiler/file_output_stream.h
index bde9e68..76b00fe 100644
--- a/compiler/file_output_stream.h
+++ b/compiler/file_output_stream.h
@@ -29,7 +29,7 @@
 
   virtual ~FileOutputStream() {}
 
-  virtual bool WriteFully(const void* buffer, int64_t byte_count);
+  virtual bool WriteFully(const void* buffer, size_t byte_count);
 
   virtual off_t Seek(off_t offset, Whence whence);
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 556dec2..67cd51b 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -208,12 +208,12 @@
   DCHECK_LT(image_end_, image_->Size());
 }
 
-bool ImageWriter::IsImageOffsetAssigned(const mirror::Object* object) const {
+bool ImageWriter::IsImageOffsetAssigned(mirror::Object* object) const {
   DCHECK(object != nullptr);
   return object->GetLockWord().GetState() == LockWord::kForwardingAddress;
 }
 
-size_t ImageWriter::GetImageOffset(const mirror::Object* object) const {
+size_t ImageWriter::GetImageOffset(mirror::Object* object) const {
   DCHECK(object != nullptr);
   DCHECK(IsImageOffsetAssigned(object));
   LockWord lock_word = object->GetLockWord();
@@ -226,7 +226,7 @@
   size_t length = RoundUp(Runtime::Current()->GetHeap()->GetTotalMemory(), kPageSize);
   std::string error_msg;
   image_.reset(MemMap::MapAnonymous("image writer image", NULL, length, PROT_READ | PROT_WRITE,
-                                    &error_msg));
+                                    true, &error_msg));
   if (UNLIKELY(image_.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg;
     return false;
@@ -281,7 +281,7 @@
   Runtime::Current()->GetHeap()->VisitObjects(ComputeEagerResolvedStringsCallback, this);
 }
 
-bool ImageWriter::IsImageClass(const Class* klass) {
+bool ImageWriter::IsImageClass(Class* klass) {
   return compiler_driver_.IsImageClass(ClassHelper(klass).GetDescriptor());
 }
 
@@ -447,7 +447,7 @@
   for (size_t i = 0; i < num_reference_fields; ++i) {
     mirror::ArtField* field = sirt_class->GetInstanceField(i);
     MemberOffset field_offset = field->GetOffset();
-    mirror::Object* value = obj->GetFieldObject<mirror::Object*>(field_offset, false);
+    mirror::Object* value = obj->GetFieldObject<mirror::Object>(field_offset, false);
     if (value != nullptr) {
       WalkFieldsInOrder(value);
     }
@@ -470,7 +470,7 @@
       for (size_t i = 0; i < num_static_fields; ++i) {
         mirror::ArtField* field = klass->GetStaticField(i);
         MemberOffset field_offset = field->GetOffset();
-        mirror::Object* value = sirt_obj->GetFieldObject<mirror::Object*>(field_offset, false);
+        mirror::Object* value = sirt_obj->GetFieldObject<mirror::Object>(field_offset, false);
         if (value != nullptr) {
           WalkFieldsInOrder(value);
         }
@@ -527,16 +527,16 @@
   const size_t heap_bytes_per_bitmap_byte = kBitsPerByte * gc::accounting::SpaceBitmap::kAlignment;
   const size_t bitmap_bytes = RoundUp(image_end_, heap_bytes_per_bitmap_byte) /
       heap_bytes_per_bitmap_byte;
-  ImageHeader image_header(reinterpret_cast<uint32_t>(image_begin_),
+  ImageHeader image_header(PointerToLowMemUInt32(image_begin_),
                            static_cast<uint32_t>(image_end_),
                            RoundUp(image_end_, kPageSize),
                            RoundUp(bitmap_bytes, kPageSize),
-                           reinterpret_cast<uint32_t>(GetImageAddress(image_roots.get())),
+                           PointerToLowMemUInt32(GetImageAddress(image_roots.get())),
                            oat_file_->GetOatHeader().GetChecksum(),
-                           reinterpret_cast<uint32_t>(oat_file_begin),
-                           reinterpret_cast<uint32_t>(oat_data_begin_),
-                           reinterpret_cast<uint32_t>(oat_data_end),
-                           reinterpret_cast<uint32_t>(oat_file_end));
+                           PointerToLowMemUInt32(oat_file_begin),
+                           PointerToLowMemUInt32(oat_data_begin_),
+                           PointerToLowMemUInt32(oat_data_end),
+                           PointerToLowMemUInt32(oat_file_end));
   memcpy(image_->Begin(), &image_header, sizeof(image_header));
 
   // Note that image_end_ is left at end of used space
@@ -578,7 +578,7 @@
   image_writer->FixupObject(obj, copy);
 }
 
-void ImageWriter::FixupObject(const Object* orig, Object* copy) {
+void ImageWriter::FixupObject(Object* orig, Object* copy) {
   DCHECK(orig != NULL);
   DCHECK(copy != NULL);
   copy->SetClass(down_cast<Class*>(GetImageAddress(orig->GetClass())));
@@ -594,12 +594,12 @@
   }
 }
 
-void ImageWriter::FixupClass(const Class* orig, Class* copy) {
+void ImageWriter::FixupClass(Class* orig, Class* copy) {
   FixupInstanceFields(orig, copy);
   FixupStaticFields(orig, copy);
 }
 
-void ImageWriter::FixupMethod(const ArtMethod* orig, ArtMethod* copy) {
+void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) {
   FixupInstanceFields(orig, copy);
 
   // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
@@ -607,43 +607,36 @@
 
   // The resolution method has a special trampoline to call.
   if (UNLIKELY(orig == Runtime::Current()->GetResolutionMethod())) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-    copy->SetEntryPointFromCompiledCode(GetOatAddress(portable_resolution_trampoline_offset_));
-#else
-    copy->SetEntryPointFromCompiledCode(GetOatAddress(quick_resolution_trampoline_offset_));
-#endif
+    copy->SetEntryPointFromPortableCompiledCode(GetOatAddress(portable_resolution_trampoline_offset_));
+    copy->SetEntryPointFromQuickCompiledCode(GetOatAddress(quick_resolution_trampoline_offset_));
   } else if (UNLIKELY(orig == Runtime::Current()->GetImtConflictMethod())) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-    copy->SetEntryPointFromCompiledCode(GetOatAddress(portable_imt_conflict_trampoline_offset_));
-#else
-    copy->SetEntryPointFromCompiledCode(GetOatAddress(quick_imt_conflict_trampoline_offset_));
-#endif
+    copy->SetEntryPointFromPortableCompiledCode(GetOatAddress(portable_imt_conflict_trampoline_offset_));
+    copy->SetEntryPointFromQuickCompiledCode(GetOatAddress(quick_imt_conflict_trampoline_offset_));
   } else {
     // We assume all methods have code. If they don't currently then we set them to the use the
     // resolution trampoline. Abstract methods never have code and so we need to make sure their
     // use results in an AbstractMethodError. We use the interpreter to achieve this.
     if (UNLIKELY(orig->IsAbstract())) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-      copy->SetEntryPointFromCompiledCode(GetOatAddress(portable_to_interpreter_bridge_offset_));
-#else
-      copy->SetEntryPointFromCompiledCode(GetOatAddress(quick_to_interpreter_bridge_offset_));
-#endif
+      copy->SetEntryPointFromPortableCompiledCode(GetOatAddress(portable_to_interpreter_bridge_offset_));
+      copy->SetEntryPointFromQuickCompiledCode(GetOatAddress(quick_to_interpreter_bridge_offset_));
       copy->SetEntryPointFromInterpreter(reinterpret_cast<EntryPointFromInterpreter*>
-      (const_cast<byte*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_))));
+          (const_cast<byte*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_))));
     } else {
       copy->SetEntryPointFromInterpreter(reinterpret_cast<EntryPointFromInterpreter*>
-      (const_cast<byte*>(GetOatAddress(interpreter_to_compiled_code_bridge_offset_))));
+          (const_cast<byte*>(GetOatAddress(interpreter_to_compiled_code_bridge_offset_))));
       // Use original code if it exists. Otherwise, set the code pointer to the resolution
       // trampoline.
-      const byte* code = GetOatAddress(orig->GetOatCodeOffset());
-      if (code != NULL) {
-        copy->SetEntryPointFromCompiledCode(code);
+      const byte* quick_code = GetOatAddress(orig->GetQuickOatCodeOffset());
+      if (quick_code != nullptr) {
+        copy->SetEntryPointFromQuickCompiledCode(quick_code);
       } else {
-#if defined(ART_USE_PORTABLE_COMPILER)
-        copy->SetEntryPointFromCompiledCode(GetOatAddress(portable_resolution_trampoline_offset_));
-#else
-        copy->SetEntryPointFromCompiledCode(GetOatAddress(quick_resolution_trampoline_offset_));
-#endif
+        copy->SetEntryPointFromQuickCompiledCode(GetOatAddress(quick_resolution_trampoline_offset_));
+      }
+      const byte* portable_code = GetOatAddress(orig->GetPortableOatCodeOffset());
+      if (portable_code != nullptr) {
+        copy->SetEntryPointFromPortableCompiledCode(portable_code);
+      } else {
+        copy->SetEntryPointFromPortableCompiledCode(GetOatAddress(portable_resolution_trampoline_offset_));
       }
       if (orig->IsNative()) {
         // The native method's pointer is set to a stub to lookup via dlsym.
@@ -667,14 +660,14 @@
   }
 }
 
-void ImageWriter::FixupObjectArray(const ObjectArray<Object>* orig, ObjectArray<Object>* copy) {
+void ImageWriter::FixupObjectArray(ObjectArray<Object>* orig, ObjectArray<Object>* copy) {
   for (int32_t i = 0; i < orig->GetLength(); ++i) {
-    const Object* element = orig->Get(i);
-    copy->SetPtrWithoutChecks(i, GetImageAddress(element));
+    Object* element = orig->Get(i);
+    copy->SetWithoutChecksAndWriteBarrier(i, GetImageAddress(element));
   }
 }
 
-void ImageWriter::FixupInstanceFields(const Object* orig, Object* copy) {
+void ImageWriter::FixupInstanceFields(Object* orig, Object* copy) {
   DCHECK(orig != NULL);
   DCHECK(copy != NULL);
   Class* klass = orig->GetClass();
@@ -682,13 +675,13 @@
   FixupFields(orig, copy, klass->GetReferenceInstanceOffsets(), false);
 }
 
-void ImageWriter::FixupStaticFields(const Class* orig, Class* copy) {
+void ImageWriter::FixupStaticFields(Class* orig, Class* copy) {
   DCHECK(orig != NULL);
   DCHECK(copy != NULL);
   FixupFields(orig, copy, orig->GetReferenceStaticOffsets(), true);
 }
 
-void ImageWriter::FixupFields(const Object* orig,
+void ImageWriter::FixupFields(Object* orig,
                               Object* copy,
                               uint32_t ref_offsets,
                               bool is_static) {
@@ -697,9 +690,10 @@
     while (ref_offsets != 0) {
       size_t right_shift = CLZ(ref_offsets);
       MemberOffset byte_offset = CLASS_OFFSET_FROM_CLZ(right_shift);
-      const Object* ref = orig->GetFieldObject<const Object*>(byte_offset, false);
-      // Use SetFieldPtr to avoid card marking since we are writing to the image.
-      copy->SetFieldPtr(byte_offset, GetImageAddress(ref), false);
+      Object* ref = orig->GetFieldObject<Object>(byte_offset, false);
+      // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
+      // image.
+      copy->SetFieldObjectWithoutWriteBarrier(byte_offset, GetImageAddress(ref), false);
       ref_offsets &= ~(CLASS_HIGH_BIT >> right_shift);
     }
   } else {
@@ -707,7 +701,7 @@
     // walk up the class inheritance hierarchy and find reference
     // offsets the hard way. In the static case, just consider this
     // class.
-    for (const Class *klass = is_static ? orig->AsClass() : orig->GetClass();
+    for (Class *klass = is_static ? orig->AsClass() : orig->GetClass();
          klass != NULL;
          klass = is_static ? NULL : klass->GetSuperClass()) {
       size_t num_reference_fields = (is_static
@@ -718,9 +712,10 @@
                            ? klass->GetStaticField(i)
                            : klass->GetInstanceField(i));
         MemberOffset field_offset = field->GetOffset();
-        const Object* ref = orig->GetFieldObject<const Object*>(field_offset, false);
-        // Use SetFieldPtr to avoid card marking since we are writing to the image.
-        copy->SetFieldPtr(field_offset, GetImageAddress(ref), false);
+        Object* ref = orig->GetFieldObject<Object>(field_offset, false);
+        // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
+        // image.
+        copy->SetFieldObjectWithoutWriteBarrier(field_offset, GetImageAddress(ref), false);
       }
     }
   }
@@ -728,13 +723,14 @@
     // Fix-up referent, that isn't marked as an object field, for References.
     ArtField* field = orig->GetClass()->FindInstanceField("referent", "Ljava/lang/Object;");
     MemberOffset field_offset = field->GetOffset();
-    const Object* ref = orig->GetFieldObject<const Object*>(field_offset, false);
-    // Use SetFieldPtr to avoid card marking since we are writing to the image.
-    copy->SetFieldPtr(field_offset, GetImageAddress(ref), false);
+    Object* ref = orig->GetFieldObject<Object>(field_offset, false);
+    // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
+    // image.
+    copy->SetFieldObjectWithoutWriteBarrier(field_offset, GetImageAddress(ref), false);
   }
 }
 
-static ArtMethod* GetTargetMethod(const CompilerDriver::PatchInformation* patch)
+static ArtMethod* GetTargetMethod(const CompilerDriver::CallPatchInformation* patch)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Thread* self = Thread::Current();
@@ -757,27 +753,54 @@
   return method;
 }
 
+static Class* GetTargetType(const CompilerDriver::TypePatchInformation* patch)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  Thread* self = Thread::Current();
+  SirtRef<mirror::DexCache> dex_cache(self, class_linker->FindDexCache(patch->GetDexFile()));
+  SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
+  Class* klass = class_linker->ResolveType(patch->GetDexFile(),
+                                           patch->GetTargetTypeIdx(),
+                                           dex_cache,
+                                           class_loader);
+  CHECK(klass != NULL)
+    << patch->GetDexFile().GetLocation() << " " << patch->GetTargetTypeIdx();
+  CHECK(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx()) == klass)
+    << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
+    << PrettyClass(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx())) << " "
+    << PrettyClass(klass);
+  return klass;
+}
+
 void ImageWriter::PatchOatCodeAndMethods() {
   Thread* self = Thread::Current();
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   const char* old_cause = self->StartAssertNoThreadSuspension("ImageWriter");
 
-  typedef std::vector<const CompilerDriver::PatchInformation*> Patches;
-  const Patches& code_to_patch = compiler_driver_.GetCodeToPatch();
+  typedef std::vector<const CompilerDriver::CallPatchInformation*> CallPatches;
+  const CallPatches& code_to_patch = compiler_driver_.GetCodeToPatch();
   for (size_t i = 0; i < code_to_patch.size(); i++) {
-    const CompilerDriver::PatchInformation* patch = code_to_patch[i];
+    const CompilerDriver::CallPatchInformation* patch = code_to_patch[i];
     ArtMethod* target = GetTargetMethod(patch);
-    uint32_t code = reinterpret_cast<uint32_t>(class_linker->GetOatCodeFor(target));
-    uint32_t code_base = reinterpret_cast<uint32_t>(&oat_file_->GetOatHeader());
-    uint32_t code_offset = code - code_base;
-    SetPatchLocation(patch, reinterpret_cast<uint32_t>(GetOatAddress(code_offset)));
+    uintptr_t quick_code = reinterpret_cast<uintptr_t>(class_linker->GetQuickOatCodeFor(target));
+    uintptr_t code_base = reinterpret_cast<uintptr_t>(&oat_file_->GetOatHeader());
+    uintptr_t code_offset = quick_code - code_base;
+    SetPatchLocation(patch, PointerToLowMemUInt32(GetOatAddress(code_offset)));
   }
 
-  const Patches& methods_to_patch = compiler_driver_.GetMethodsToPatch();
+  const CallPatches& methods_to_patch = compiler_driver_.GetMethodsToPatch();
   for (size_t i = 0; i < methods_to_patch.size(); i++) {
-    const CompilerDriver::PatchInformation* patch = methods_to_patch[i];
+    const CompilerDriver::CallPatchInformation* patch = methods_to_patch[i];
     ArtMethod* target = GetTargetMethod(patch);
-    SetPatchLocation(patch, reinterpret_cast<uint32_t>(GetImageAddress(target)));
+    SetPatchLocation(patch, PointerToLowMemUInt32(GetImageAddress(target)));
+  }
+
+  const std::vector<const CompilerDriver::TypePatchInformation*>& classes_to_patch =
+      compiler_driver_.GetClassesToPatch();
+  for (size_t i = 0; i < classes_to_patch.size(); i++) {
+    const CompilerDriver::TypePatchInformation* patch = classes_to_patch[i];
+    Class* target = GetTargetType(patch);
+    SetPatchLocation(patch, PointerToLowMemUInt32(GetImageAddress(target)));
   }
 
   // Update the image header with the new checksum after patching
@@ -788,21 +811,34 @@
 
 void ImageWriter::SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  const void* oat_code = class_linker->GetOatCodeFor(patch->GetDexFile(),
-                                                     patch->GetReferrerClassDefIdx(),
-                                                     patch->GetReferrerMethodIdx());
+  const void* quick_oat_code = class_linker->GetQuickOatCodeFor(patch->GetDexFile(),
+                                                                patch->GetReferrerClassDefIdx(),
+                                                                patch->GetReferrerMethodIdx());
   OatHeader& oat_header = const_cast<OatHeader&>(oat_file_->GetOatHeader());
   // TODO: make this Thumb2 specific
-  uint8_t* base = reinterpret_cast<uint8_t*>(reinterpret_cast<uint32_t>(oat_code) & ~0x1);
+  uint8_t* base = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(quick_oat_code) & ~0x1);
   uint32_t* patch_location = reinterpret_cast<uint32_t*>(base + patch->GetLiteralOffset());
   if (kIsDebugBuild) {
-    const DexFile::MethodId& id = patch->GetDexFile().GetMethodId(patch->GetTargetMethodIdx());
-    uint32_t expected = reinterpret_cast<uint32_t>(&id);
-    uint32_t actual = *patch_location;
-    CHECK(actual == expected || actual == value) << std::hex
-      << "actual=" << actual
-      << "expected=" << expected
-      << "value=" << value;
+    if (patch->IsCall()) {
+      const CompilerDriver::CallPatchInformation* cpatch = patch->AsCall();
+      const DexFile::MethodId& id = cpatch->GetDexFile().GetMethodId(cpatch->GetTargetMethodIdx());
+      uintptr_t expected = reinterpret_cast<uintptr_t>(&id);
+      uint32_t actual = *patch_location;
+      CHECK(actual == expected || actual == value) << std::hex
+          << "actual=" << actual
+          << "expected=" << expected
+          << "value=" << value;
+    }
+    if (patch->IsType()) {
+      const CompilerDriver::TypePatchInformation* tpatch = patch->AsType();
+      const DexFile::TypeId& id = tpatch->GetDexFile().GetTypeId(tpatch->GetTargetTypeIdx());
+      uintptr_t expected = reinterpret_cast<uintptr_t>(&id);
+      uint32_t actual = *patch_location;
+      CHECK(actual == expected || actual == value) << std::hex
+          << "actual=" << actual
+          << "expected=" << expected
+          << "value=" << value;
+    }
   }
   *patch_location = value;
   oat_header.UpdateChecksum(patch_location, sizeof(value));
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 695f59b..a1504ee 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -66,17 +66,17 @@
   void AssignImageOffset(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetImageOffset(mirror::Object* object, size_t offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsImageOffsetAssigned(const mirror::Object* object) const;
-  size_t GetImageOffset(const mirror::Object* object) const;
+  bool IsImageOffsetAssigned(mirror::Object* object) const;
+  size_t GetImageOffset(mirror::Object* object) const;
 
-  mirror::Object* GetImageAddress(const mirror::Object* object) const {
+  mirror::Object* GetImageAddress(mirror::Object* object) const {
     if (object == NULL) {
       return NULL;
     }
     return reinterpret_cast<mirror::Object*>(image_begin_ + GetImageOffset(object));
   }
 
-  mirror::Object* GetLocalAddress(const mirror::Object* object) const {
+  mirror::Object* GetLocalAddress(mirror::Object* object) const {
     size_t offset = GetImageOffset(object);
     byte* dst = image_->Begin() + offset;
     return reinterpret_cast<mirror::Object*>(dst);
@@ -96,7 +96,7 @@
   }
 
   // Returns true if the class was in the original requested image classes list.
-  bool IsImageClass(const mirror::Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsImageClass(mirror::Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Debug aid that list of requested image classes.
   void DumpImageClasses();
@@ -141,20 +141,20 @@
   void CopyAndFixupObjects();
   static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupClass(const mirror::Class* orig, mirror::Class* copy)
+  void FixupClass(mirror::Class* orig, mirror::Class* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupMethod(const mirror::ArtMethod* orig, mirror::ArtMethod* copy)
+  void FixupMethod(mirror::ArtMethod* orig, mirror::ArtMethod* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupObject(const mirror::Object* orig, mirror::Object* copy)
+  void FixupObject(mirror::Object* orig, mirror::Object* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupObjectArray(const mirror::ObjectArray<mirror::Object>* orig,
+  void FixupObjectArray(mirror::ObjectArray<mirror::Object>* orig,
                         mirror::ObjectArray<mirror::Object>* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupInstanceFields(const mirror::Object* orig, mirror::Object* copy)
+  void FixupInstanceFields(mirror::Object* orig, mirror::Object* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupStaticFields(const mirror::Class* orig, mirror::Class* copy)
+  void FixupStaticFields(mirror::Class* orig, mirror::Class* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void FixupFields(const mirror::Object* orig, mirror::Object* copy, uint32_t ref_offsets,
+  void FixupFields(mirror::Object* orig, mirror::Object* copy, uint32_t ref_offsets,
                    bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 21dd11e..c77d319 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -58,11 +58,14 @@
       method = c->FindVirtualMethod(method_name, method_sig);
     }
     ASSERT_TRUE(method != NULL) << method_name << " " << method_sig;
-    if (method->GetEntryPointFromCompiledCode() != NULL) {
-      return;
+    if (method->GetEntryPointFromQuickCompiledCode() == nullptr) {
+      ASSERT_TRUE(method->GetEntryPointFromPortableCompiledCode() == nullptr);
+      CompileMethod(method);
+      ASSERT_TRUE(method->GetEntryPointFromQuickCompiledCode() != nullptr)
+          << method_name << " " << method_sig;
+      ASSERT_TRUE(method->GetEntryPointFromPortableCompiledCode() != nullptr)
+          << method_name << " " << method_sig;
     }
-    CompileMethod(method);
-    ASSERT_TRUE(method->GetEntryPointFromCompiledCode() != NULL) << method_name << " " << method_sig;
   }
 
   void SetUpForTest(bool direct, const char* method_name, const char* method_sig,
@@ -122,19 +125,19 @@
 int gJava_MyClassNatives_foo_calls = 0;
 void Java_MyClassNatives_foo(JNIEnv* env, jobject thisObj) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   Locks::mutator_lock_->AssertNotHeld(Thread::Current());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_foo_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
 }
 
 TEST_F(JniCompilerTest, CompileAndRunNoArgMethod) {
   TEST_DISABLED_FOR_PORTABLE();
-  SetUpForTest(false, "foo", "()V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_foo));
+  SetUpForTest(false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClassNatives_foo));
 
   EXPECT_EQ(0, gJava_MyClassNatives_foo_calls);
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
@@ -150,9 +153,10 @@
 
   ScopedObjectAccess soa(Thread::Current());
   std::string reason;
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                            soa.Decode<mirror::ClassLoader*>(class_loader_));
   ASSERT_TRUE(
-      Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", soa.Decode<mirror::ClassLoader*>(class_loader_),
-                                                         &reason)) << reason;
+      Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", class_loader, &reason)) << reason;
 
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24);
   EXPECT_EQ(25, result);
@@ -165,9 +169,10 @@
 
   ScopedObjectAccess soa(Thread::Current());
   std::string reason;
+  SirtRef<mirror::ClassLoader> class_loader(soa.Self(),
+                                            soa.Decode<mirror::ClassLoader*>(class_loader_));
   ASSERT_TRUE(
-      Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", soa.Decode<mirror::ClassLoader*>(class_loader_),
-                                                         &reason)) << reason;
+      Runtime::Current()->GetJavaVM()->LoadNativeLibrary("", class_loader, &reason)) << reason;
 
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42);
   EXPECT_EQ(43, result);
@@ -176,12 +181,13 @@
 int gJava_MyClassNatives_fooI_calls = 0;
 jint Java_MyClassNatives_fooI(JNIEnv* env, jobject thisObj, jint x) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooI_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x;
 }
 
@@ -202,12 +208,13 @@
 int gJava_MyClassNatives_fooII_calls = 0;
 jint Java_MyClassNatives_fooII(JNIEnv* env, jobject thisObj, jint x, jint y) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooII_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x - y;  // non-commutative operator
 }
 
@@ -229,12 +236,13 @@
 int gJava_MyClassNatives_fooJJ_calls = 0;
 jlong Java_MyClassNatives_fooJJ(JNIEnv* env, jobject thisObj, jlong x, jlong y) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooJJ_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x - y;  // non-commutative operator
 }
 
@@ -257,12 +265,13 @@
 int gJava_MyClassNatives_fooDD_calls = 0;
 jdouble Java_MyClassNatives_fooDD(JNIEnv* env, jobject thisObj, jdouble x, jdouble y) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooDD_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x - y;  // non-commutative operator
 }
 
@@ -286,12 +295,13 @@
 int gJava_MyClassNatives_fooJJ_synchronized_calls = 0;
 jlong Java_MyClassNatives_fooJJ_synchronized(JNIEnv* env, jobject thisObj, jlong x, jlong y) {
   // 1 = thisObj
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooJJ_synchronized_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x | y;
 }
 
@@ -312,12 +322,13 @@
 jobject Java_MyClassNatives_fooIOO(JNIEnv* env, jobject thisObj, jint x, jobject y,
                             jobject z) {
   // 3 = this + y + z
-  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(thisObj != NULL);
   EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
   gJava_MyClassNatives_fooIOO_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   switch (x) {
     case 1:
       return y;
@@ -363,12 +374,13 @@
 int gJava_MyClassNatives_fooSII_calls = 0;
 jint Java_MyClassNatives_fooSII(JNIEnv* env, jclass klass, jint x, jint y) {
   // 1 = klass
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSII_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x + y;
 }
 
@@ -386,12 +398,13 @@
 int gJava_MyClassNatives_fooSDD_calls = 0;
 jdouble Java_MyClassNatives_fooSDD(JNIEnv* env, jclass klass, jdouble x, jdouble y) {
   // 1 = klass
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSDD_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
   return x - y;  // non-commutative operator
 }
 
@@ -415,12 +428,13 @@
 jobject Java_MyClassNatives_fooSIOO(JNIEnv* env, jclass klass, jint x, jobject y,
                              jobject z) {
   // 3 = klass + y + z
-  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSIOO_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   switch (x) {
     case 1:
       return y;
@@ -467,12 +481,13 @@
 int gJava_MyClassNatives_fooSSIOO_calls = 0;
 jobject Java_MyClassNatives_fooSSIOO(JNIEnv* env, jclass klass, jint x, jobject y, jobject z) {
   // 3 = klass + y + z
-  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   EXPECT_EQ(kNative, Thread::Current()->GetState());
   EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
   EXPECT_TRUE(klass != NULL);
   EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
   gJava_MyClassNatives_fooSSIOO_calls++;
+  ScopedObjectAccess soa(Thread::Current());
+  EXPECT_EQ(3U, Thread::Current()->NumStackReferences());
   switch (x) {
     case 1:
       return y;
diff --git a/compiler/leb128_encoder_test.cc b/compiler/leb128_encoder_test.cc
index c63dfa2..7af8518 100644
--- a/compiler/leb128_encoder_test.cc
+++ b/compiler/leb128_encoder_test.cc
@@ -14,14 +14,13 @@
  * limitations under the License.
  */
 
-#include "base/histogram-inl.h"
-#include "common_test.h"
 #include "leb128.h"
 #include "leb128_encoder.h"
 
-namespace art {
+#include "gtest/gtest.h"
+#include "base/histogram-inl.h"
 
-class Leb128Test : public CommonTest {};
+namespace art {
 
 struct DecodeUnsignedLeb128TestCase {
   uint32_t decoded;
@@ -92,7 +91,7 @@
     {(-1) << 31, {0x80, 0x80, 0x80, 0x80, 0x78}},
 };
 
-TEST_F(Leb128Test, UnsignedSinglesVector) {
+TEST(Leb128Test, UnsignedSinglesVector) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
     Leb128EncodingVector builder;
@@ -111,7 +110,7 @@
   }
 }
 
-TEST_F(Leb128Test, UnsignedSingles) {
+TEST(Leb128Test, UnsignedSingles) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
     uint8_t encoded_data[5];
@@ -130,7 +129,7 @@
   }
 }
 
-TEST_F(Leb128Test, UnsignedStreamVector) {
+TEST(Leb128Test, UnsignedStreamVector) {
   // Encode a number of entries.
   Leb128EncodingVector builder;
   for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
@@ -151,7 +150,7 @@
             static_cast<size_t>(encoded_data_ptr - &builder.GetData()[0]));
 }
 
-TEST_F(Leb128Test, UnsignedStream) {
+TEST(Leb128Test, UnsignedStream) {
   // Encode a number of entries.
   uint8_t encoded_data[5 * arraysize(uleb128_tests)];
   uint8_t* end = encoded_data;
@@ -173,7 +172,7 @@
   EXPECT_EQ(data_size, static_cast<size_t>(encoded_data_ptr - encoded_data));
 }
 
-TEST_F(Leb128Test, SignedSinglesVector) {
+TEST(Leb128Test, SignedSinglesVector) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(sleb128_tests); ++i) {
     Leb128EncodingVector builder;
@@ -192,7 +191,7 @@
   }
 }
 
-TEST_F(Leb128Test, SignedSingles) {
+TEST(Leb128Test, SignedSingles) {
   // Test individual encodings.
   for (size_t i = 0; i < arraysize(sleb128_tests); ++i) {
     uint8_t encoded_data[5];
@@ -211,7 +210,7 @@
   }
 }
 
-TEST_F(Leb128Test, SignedStreamVector) {
+TEST(Leb128Test, SignedStreamVector) {
   // Encode a number of entries.
   Leb128EncodingVector builder;
   for (size_t i = 0; i < arraysize(sleb128_tests); ++i) {
@@ -232,7 +231,7 @@
             static_cast<size_t>(encoded_data_ptr - &builder.GetData()[0]));
 }
 
-TEST_F(Leb128Test, SignedStream) {
+TEST(Leb128Test, SignedStream) {
   // Encode a number of entries.
   uint8_t encoded_data[5 * arraysize(sleb128_tests)];
   uint8_t* end = encoded_data;
@@ -254,7 +253,7 @@
   EXPECT_EQ(data_size, static_cast<size_t>(encoded_data_ptr - encoded_data));
 }
 
-TEST_F(Leb128Test, Speed) {
+TEST(Leb128Test, Speed) {
   UniquePtr<Histogram<uint64_t> > enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
   UniquePtr<Histogram<uint64_t> > dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
   Leb128EncodingVector builder;
diff --git a/compiler/llvm/compiler_llvm.cc b/compiler/llvm/compiler_llvm.cc
index 35d1ecd..6563eb5 100644
--- a/compiler/llvm/compiler_llvm.cc
+++ b/compiler/llvm/compiler_llvm.cc
@@ -20,7 +20,8 @@
 #include "base/stl_util.h"
 #include "class_linker.h"
 #include "compiled_method.h"
-#include "dex/verified_methods_data.h"
+#include "dex/verification_results.h"
+#include "dex/verified_method.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "globals.h"
@@ -125,7 +126,7 @@
   MutexLock GUARD(Thread::Current(), next_cunit_id_lock_);
   LlvmCompilationUnit* cunit = new LlvmCompilationUnit(this, next_cunit_id_++);
   if (!bitcode_filename_.empty()) {
-    cunit->SetBitcodeFileName(StringPrintf("%s-%zu",
+    cunit->SetBitcodeFileName(StringPrintf("%s-%u",
                                            bitcode_filename_.c_str(),
                                            cunit->GetCompilationUnitId()));
   }
@@ -153,11 +154,9 @@
 
   cunit->Materialize();
 
-  MethodReference mref(dex_compilation_unit->GetDexFile(),
-                       dex_compilation_unit->GetDexMethodIndex());
   return new CompiledMethod(*compiler_driver_, compiler_driver_->GetInstructionSet(),
                             cunit->GetElfObject(),
-                            *compiler_driver_->GetVerifiedMethodsData()->GetDexGcMap(mref),
+                            dex_compilation_unit->GetVerifiedMethod()->GetDexGcMap(),
                             cunit->GetDexCompilationUnit()->GetSymbol());
 }
 
@@ -214,7 +213,7 @@
 
   art::DexCompilationUnit dex_compilation_unit(
     NULL, class_loader, class_linker, dex_file, code_item,
-    class_def_idx, method_idx, access_flags);
+    class_def_idx, method_idx, access_flags, driver.GetVerifiedMethod(&dex_file, method_idx));
   art::llvm::CompilerLLVM* compiler_llvm = ContextOf(driver);
   art::CompiledMethod* result = compiler_llvm->CompileDexMethod(&dex_compilation_unit, invoke_type);
   return result;
@@ -226,8 +225,8 @@
   art::ClassLinker *class_linker = art::Runtime::Current()->GetClassLinker();
 
   art::DexCompilationUnit dex_compilation_unit(
-    NULL, NULL, class_linker, dex_file, NULL,
-    0, method_idx, access_flags);
+      nullptr, nullptr, class_linker, dex_file, nullptr,
+      0, method_idx, access_flags, nullptr);
 
   art::llvm::CompilerLLVM* compiler_llvm = ContextOf(driver);
   art::CompiledMethod* result = compiler_llvm->CompileNativeMethod(&dex_compilation_unit);
diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc
index 6423cd7..8f22a97 100644
--- a/compiler/llvm/gbc_expander.cc
+++ b/compiler/llvm/gbc_expander.cc
@@ -897,7 +897,7 @@
   } else {
     code_addr =
         irb_.LoadFromObjectOffset(callee_method_object_addr,
-                                  art::mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(),
+                                  art::mirror::ArtMethod::EntryPointFromPortableCompiledCodeOffset().Int32Value(),
                                   func_type->getPointerTo(), kTBAARuntimeInfo);
   }
 
@@ -1234,7 +1234,7 @@
 
   llvm::Value* code_addr =
     irb_.LoadFromObjectOffset(callee_method_object_addr,
-                              art::mirror::ArtMethod::GetEntryPointFromCompiledCodeOffset().Int32Value(),
+                              art::mirror::ArtMethod::EntryPointFromPortableCompiledCodeOffset().Int32Value(),
                               callee_method_type->getPointerTo(),
                               kTBAARuntimeInfo);
 
diff --git a/compiler/llvm/llvm_compilation_unit.cc b/compiler/llvm/llvm_compilation_unit.cc
index 038f5dc..d23706d 100644
--- a/compiler/llvm/llvm_compilation_unit.cc
+++ b/compiler/llvm/llvm_compilation_unit.cc
@@ -151,7 +151,7 @@
 void LlvmCompilationUnit::DumpBitcodeToFile() {
   std::string bitcode;
   DumpBitcodeToString(bitcode);
-  std::string filename(StringPrintf("%s/Art%u.bc", DumpDirectory().c_str(), cunit_id_));
+  std::string filename(StringPrintf("%s/Art%zu.bc", DumpDirectory().c_str(), cunit_id_));
   UniquePtr<File> output(OS::CreateEmptyFile(filename.c_str()));
   output->WriteFully(bitcode.data(), bitcode.size());
   LOG(INFO) << ".bc file written successfully: " << filename;
@@ -178,7 +178,7 @@
   const bool kDumpELF = false;
   if (kDumpELF) {
     // Dump the ELF image for debugging
-    std::string filename(StringPrintf("%s/Art%u.o", DumpDirectory().c_str(), cunit_id_));
+    std::string filename(StringPrintf("%s/Art%zu.o", DumpDirectory().c_str(), cunit_id_));
     UniquePtr<File> output(OS::CreateEmptyFile(filename.c_str()));
     output->WriteFully(elf_object_.data(), elf_object_.size());
     LOG(INFO) << ".o file written successfully: " << filename;
diff --git a/compiler/llvm/llvm_compilation_unit.h b/compiler/llvm/llvm_compilation_unit.h
index ced9f81..58aa6fd 100644
--- a/compiler/llvm/llvm_compilation_unit.h
+++ b/compiler/llvm/llvm_compilation_unit.h
@@ -101,10 +101,10 @@
 
  private:
   LlvmCompilationUnit(const CompilerLLVM* compiler_llvm,
-                      uint32_t cunit_id);
+                      size_t cunit_id);
 
   const CompilerLLVM* compiler_llvm_;
-  const uint32_t cunit_id_;
+  const size_t cunit_id_;
 
   UniquePtr< ::llvm::LLVMContext> context_;
   UniquePtr<IRBuilder> irb_;
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 12d8212..b3070b6 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -39,29 +39,42 @@
                                                             method->GetDexMethodIndex()));
 
     if (compiled_method == NULL) {
-      EXPECT_TRUE(oat_method.GetCode() == NULL) << PrettyMethod(method) << " "
-                                                << oat_method.GetCode();
-#if !defined(ART_USE_PORTABLE_COMPILER)
-      EXPECT_EQ(oat_method.GetFrameSizeInBytes(), kCompile ? kStackAlignment : 0);
+      EXPECT_TRUE(oat_method.GetQuickCode() == NULL) << PrettyMethod(method) << " "
+                                                     << oat_method.GetQuickCode();
+      EXPECT_TRUE(oat_method.GetPortableCode() == NULL) << PrettyMethod(method) << " "
+                                                        << oat_method.GetPortableCode();
+      EXPECT_EQ(oat_method.GetFrameSizeInBytes(), 0U);
       EXPECT_EQ(oat_method.GetCoreSpillMask(), 0U);
       EXPECT_EQ(oat_method.GetFpSpillMask(), 0U);
-#endif
     } else {
-      const void* oat_code = oat_method.GetCode();
-      EXPECT_TRUE(oat_code != NULL) << PrettyMethod(method);
-      uintptr_t oat_code_aligned = RoundDown(reinterpret_cast<uintptr_t>(oat_code), 2);
-      oat_code = reinterpret_cast<const void*>(oat_code_aligned);
-
-      const std::vector<uint8_t>& code = compiled_method->GetCode();
-      size_t code_size = code.size() * sizeof(code[0]);
-      EXPECT_EQ(0, memcmp(oat_code, &code[0], code_size))
-          << PrettyMethod(method) << " " << code_size;
-      CHECK_EQ(0, memcmp(oat_code, &code[0], code_size));
-#if !defined(ART_USE_PORTABLE_COMPILER)
-      EXPECT_EQ(oat_method.GetFrameSizeInBytes(), compiled_method->GetFrameSizeInBytes());
-      EXPECT_EQ(oat_method.GetCoreSpillMask(), compiled_method->GetCoreSpillMask());
-      EXPECT_EQ(oat_method.GetFpSpillMask(), compiled_method->GetFpSpillMask());
-#endif
+      const void* quick_oat_code = oat_method.GetQuickCode();
+      if (quick_oat_code != nullptr) {
+        EXPECT_EQ(oat_method.GetFrameSizeInBytes(), compiled_method->GetFrameSizeInBytes());
+        EXPECT_EQ(oat_method.GetCoreSpillMask(), compiled_method->GetCoreSpillMask());
+        EXPECT_EQ(oat_method.GetFpSpillMask(), compiled_method->GetFpSpillMask());
+        uintptr_t oat_code_aligned = RoundDown(reinterpret_cast<uintptr_t>(quick_oat_code), 2);
+        quick_oat_code = reinterpret_cast<const void*>(oat_code_aligned);
+        const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode();
+        EXPECT_TRUE(quick_code != nullptr);
+        size_t code_size = quick_code->size() * sizeof(quick_code[0]);
+        EXPECT_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size))
+            << PrettyMethod(method) << " " << code_size;
+        CHECK_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size));
+      } else {
+        const void* portable_oat_code = oat_method.GetPortableCode();
+        EXPECT_TRUE(portable_oat_code != nullptr) << PrettyMethod(method);
+        EXPECT_EQ(oat_method.GetFrameSizeInBytes(), 0U);
+        EXPECT_EQ(oat_method.GetCoreSpillMask(), 0U);
+        EXPECT_EQ(oat_method.GetFpSpillMask(), 0U);
+        uintptr_t oat_code_aligned = RoundDown(reinterpret_cast<uintptr_t>(portable_oat_code), 2);
+        portable_oat_code = reinterpret_cast<const void*>(oat_code_aligned);
+        const std::vector<uint8_t>* portable_code = compiled_method->GetPortableCode();
+        EXPECT_TRUE(portable_code != nullptr);
+        size_t code_size = portable_code->size() * sizeof(portable_code[0]);
+        EXPECT_EQ(0, memcmp(quick_oat_code, &portable_code[0], code_size))
+            << PrettyMethod(method) << " " << code_size;
+        CHECK_EQ(0, memcmp(quick_oat_code, &portable_code[0], code_size));
+      }
     }
   }
 };
@@ -70,23 +83,18 @@
   TimingLogger timings("CommonTest::WriteRead", false, false);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
-  // TODO: make selectable
-#if defined(ART_USE_PORTABLE_COMPILER)
-  CompilerBackend compiler_backend = kPortable;
-#else
-  CompilerBackend compiler_backend = kQuick;
-#endif
+  // TODO: make selectable.
+  CompilerBackend compiler_backend = kUsePortableCompiler ? kPortable : kQuick;
   InstructionSet insn_set = kIsTargetBuild ? kThumb2 : kX86;
 
   InstructionSetFeatures insn_features;
-  verified_methods_data_.reset(new VerifiedMethodsData);
+  verification_results_.reset(new VerificationResults);
   method_inliner_map_.reset(compiler_backend == kQuick ? new DexFileToMethodInlinerMap : nullptr);
-  callbacks_.Reset(verified_methods_data_.get(), method_inliner_map_.get());
-  CumulativeLogger timer("Compilation times");
-  compiler_driver_.reset(new CompilerDriver(verified_methods_data_.get(),
+  callbacks_.Reset(verification_results_.get(), method_inliner_map_.get());
+  compiler_driver_.reset(new CompilerDriver(verification_results_.get(),
                                             method_inliner_map_.get(),
                                             compiler_backend, insn_set,
-                                            insn_features, false, NULL, 2, true, true, &timer));
+                                            insn_features, false, NULL, 2, true));
   jobject class_loader = NULL;
   if (kCompile) {
     TimingLogger timings("OatTest::WriteRead", false, false);
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 199a2b8..7c5669a 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -23,7 +23,7 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "dex_file-inl.h"
-#include "dex/verified_methods_data.h"
+#include "dex/verification_results.h"
 #include "gc/space/space.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/array.h"
@@ -39,7 +39,7 @@
 
 OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files,
                      uint32_t image_file_location_oat_checksum,
-                     uint32_t image_file_location_oat_begin,
+                     uintptr_t image_file_location_oat_begin,
                      const std::string& image_file_location,
                      const CompilerDriver* compiler,
                      TimingLogger* timings)
@@ -218,7 +218,7 @@
       mirror::Class::Status status;
       if (compiled_class != NULL) {
         status = compiled_class->GetStatus();
-      } else if (compiler_driver_->GetVerifiedMethodsData()->IsClassRejected(class_ref)) {
+      } else if (compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) {
         status = mirror::Class::kStatusError;
       } else {
         status = mirror::Class::kStatusNotReady;
@@ -348,8 +348,8 @@
                                     bool __attribute__((unused)) is_native,
                                     InvokeType invoke_type,
                                     uint32_t method_idx, const DexFile& dex_file) {
-  // derived from CompiledMethod if available
-  uint32_t code_offset = 0;
+  // Derived from CompiledMethod if available.
+  uint32_t quick_code_offset = 0;
   uint32_t frame_size_in_bytes = kStackAlignment;
   uint32_t core_spill_mask = 0;
   uint32_t fp_spill_mask = 0;
@@ -358,36 +358,38 @@
   uint32_t gc_map_offset = 0;
 
   OatClass* oat_class = oat_classes_[oat_class_index];
-#if defined(ART_USE_PORTABLE_COMPILER)
-  size_t oat_method_offsets_offset =
-      oat_class->GetOatMethodOffsetsOffsetFromOatHeader(class_def_method_index);
-#endif
-
   CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
-  if (compiled_method != NULL) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-    compiled_method->AddOatdataOffsetToCompliledCodeOffset(
-        oat_method_offsets_offset + OFFSETOF_MEMBER(OatMethodOffsets, code_offset_));
-#else
-    const std::vector<uint8_t>& code = compiled_method->GetCode();
-    offset = compiled_method->AlignCode(offset);
-    DCHECK_ALIGNED(offset, kArmAlignment);
-    uint32_t code_size = code.size() * sizeof(code[0]);
-    CHECK_NE(code_size, 0U);
-    uint32_t thumb_offset = compiled_method->CodeDelta();
-    code_offset = offset + sizeof(code_size) + thumb_offset;
 
-    // Deduplicate code arrays
-    SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter = code_offsets_.find(&code);
-    if (code_iter != code_offsets_.end()) {
-      code_offset = code_iter->second;
+  if (compiled_method != NULL) {
+    const std::vector<uint8_t>* portable_code = compiled_method->GetPortableCode();
+    const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode();
+    if (portable_code != nullptr) {
+      CHECK(quick_code == nullptr);
+      size_t oat_method_offsets_offset =
+          oat_class->GetOatMethodOffsetsOffsetFromOatHeader(class_def_method_index);
+      compiled_method->AddOatdataOffsetToCompliledCodeOffset(
+          oat_method_offsets_offset + OFFSETOF_MEMBER(OatMethodOffsets, code_offset_));
     } else {
-      code_offsets_.Put(&code, code_offset);
-      offset += sizeof(code_size);  // code size is prepended before code
-      offset += code_size;
-      oat_header_->UpdateChecksum(&code[0], code_size);
+      CHECK(quick_code != nullptr);
+      offset = compiled_method->AlignCode(offset);
+      DCHECK_ALIGNED(offset, kArmAlignment);
+      uint32_t code_size = quick_code->size() * sizeof(uint8_t);
+      CHECK_NE(code_size, 0U);
+      uint32_t thumb_offset = compiled_method->CodeDelta();
+      quick_code_offset = offset + sizeof(code_size) + thumb_offset;
+
+      // Deduplicate code arrays
+      SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter =
+          code_offsets_.find(quick_code);
+      if (code_iter != code_offsets_.end()) {
+        quick_code_offset = code_iter->second;
+      } else {
+        code_offsets_.Put(quick_code, quick_code_offset);
+        offset += sizeof(code_size);  // code size is prepended before code
+        offset += code_size;
+        oat_header_->UpdateChecksum(&(*quick_code)[0], code_size);
+      }
     }
-#endif
     frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
     core_spill_mask = compiled_method->GetCoreSpillMask();
     fp_spill_mask = compiled_method->GetFpSpillMask();
@@ -433,7 +435,7 @@
       mirror::Class::Status status;
       if (compiled_class != NULL) {
         status = compiled_class->GetStatus();
-      } else if (compiler_driver_->GetVerifiedMethodsData()->IsClassRejected(class_ref)) {
+      } else if (compiler_driver_->GetVerificationResults()->IsClassRejected(class_ref)) {
         status = mirror::Class::kStatusError;
       } else {
         status = mirror::Class::kStatusNotReady;
@@ -456,7 +458,7 @@
     }
 
     oat_class->method_offsets_[*method_offsets_index] =
-        OatMethodOffsets(code_offset,
+        OatMethodOffsets(quick_code_offset,
                          frame_size_in_bytes,
                          core_spill_mask,
                          fp_spill_mask,
@@ -483,9 +485,11 @@
     // Don't overwrite static method trampoline
     if (!method->IsStatic() || method->IsConstructor() ||
         method->GetDeclaringClass()->IsInitialized()) {
-      method->SetOatCodeOffset(code_offset);
+      // TODO: record portable code offsets: method->SetPortableOatCodeOffset(portable_code_offset);
+      method->SetQuickOatCodeOffset(quick_code_offset);
     } else {
-      method->SetEntryPointFromCompiledCode(NULL);
+      method->SetEntryPointFromPortableCompiledCode(nullptr);
+      method->SetEntryPointFromQuickCompiledCode(nullptr);
     }
     method->SetOatVmapTableOffset(vmap_table_offset);
     method->SetOatNativeGcMapOffset(gc_map_offset);
@@ -753,52 +757,52 @@
   if (compiled_method != NULL) {  // ie. not an abstract method
     const OatMethodOffsets method_offsets = oat_class->method_offsets_[*method_offsets_index];
     (*method_offsets_index)++;
-
-#if !defined(ART_USE_PORTABLE_COMPILER)
-    uint32_t aligned_offset = compiled_method->AlignCode(relative_offset);
-    uint32_t aligned_code_delta = aligned_offset - relative_offset;
-    if (aligned_code_delta != 0) {
-      off_t new_offset = out.Seek(aligned_code_delta, kSeekCurrent);
-      size_code_alignment_ += aligned_code_delta;
-      uint32_t expected_offset = file_offset + aligned_offset;
-      if (static_cast<uint32_t>(new_offset) != expected_offset) {
-        PLOG(ERROR) << "Failed to seek to align oat code. Actual: " << new_offset
-                    << " Expected: " << expected_offset << " File: " << out.GetLocation();
-        return 0;
+    const std::vector<uint8_t>* quick_code = compiled_method->GetQuickCode();
+    if (quick_code != nullptr) {
+      CHECK(compiled_method->GetPortableCode() == nullptr);
+      uint32_t aligned_offset = compiled_method->AlignCode(relative_offset);
+      uint32_t aligned_code_delta = aligned_offset - relative_offset;
+      if (aligned_code_delta != 0) {
+        off_t new_offset = out.Seek(aligned_code_delta, kSeekCurrent);
+        size_code_alignment_ += aligned_code_delta;
+        uint32_t expected_offset = file_offset + aligned_offset;
+        if (static_cast<uint32_t>(new_offset) != expected_offset) {
+          PLOG(ERROR) << "Failed to seek to align oat code. Actual: " << new_offset
+              << " Expected: " << expected_offset << " File: " << out.GetLocation();
+          return 0;
+        }
+        relative_offset += aligned_code_delta;
+        DCHECK_OFFSET();
       }
-      relative_offset += aligned_code_delta;
+      DCHECK_ALIGNED(relative_offset, kArmAlignment);
+      uint32_t code_size = quick_code->size() * sizeof(uint8_t);
+      CHECK_NE(code_size, 0U);
+
+      // Deduplicate code arrays
+      size_t code_offset = relative_offset + sizeof(code_size) + compiled_method->CodeDelta();
+      SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter =
+          code_offsets_.find(quick_code);
+      if (code_iter != code_offsets_.end() && code_offset != method_offsets.code_offset_) {
+        DCHECK(code_iter->second == method_offsets.code_offset_)
+              << PrettyMethod(method_idx, dex_file);
+      } else {
+        DCHECK(code_offset == method_offsets.code_offset_) << PrettyMethod(method_idx, dex_file);
+        if (!out.WriteFully(&code_size, sizeof(code_size))) {
+          ReportWriteFailure("method code size", method_idx, dex_file, out);
+          return 0;
+        }
+        size_code_size_ += sizeof(code_size);
+        relative_offset += sizeof(code_size);
+        DCHECK_OFFSET();
+        if (!out.WriteFully(&(*quick_code)[0], code_size)) {
+          ReportWriteFailure("method code", method_idx, dex_file, out);
+          return 0;
+        }
+        size_code_ += code_size;
+        relative_offset += code_size;
+      }
       DCHECK_OFFSET();
     }
-    DCHECK_ALIGNED(relative_offset, kArmAlignment);
-    const std::vector<uint8_t>& code = compiled_method->GetCode();
-    uint32_t code_size = code.size() * sizeof(code[0]);
-    CHECK_NE(code_size, 0U);
-
-    // Deduplicate code arrays
-    size_t code_offset = relative_offset + sizeof(code_size) + compiled_method->CodeDelta();
-    SafeMap<const std::vector<uint8_t>*, uint32_t>::iterator code_iter = code_offsets_.find(&code);
-    if (code_iter != code_offsets_.end() && code_offset != method_offsets.code_offset_) {
-      DCHECK(code_iter->second == method_offsets.code_offset_)
-          << PrettyMethod(method_idx, dex_file);
-    } else {
-      DCHECK(code_offset == method_offsets.code_offset_) << PrettyMethod(method_idx, dex_file);
-      if (!out.WriteFully(&code_size, sizeof(code_size))) {
-        ReportWriteFailure("method code size", method_idx, dex_file, out);
-        return 0;
-      }
-      size_code_size_ += sizeof(code_size);
-      relative_offset += sizeof(code_size);
-      DCHECK_OFFSET();
-      if (!out.WriteFully(&code[0], code_size)) {
-        ReportWriteFailure("method code", method_idx, dex_file, out);
-        return 0;
-      }
-      size_code_ += code_size;
-      relative_offset += code_size;
-    }
-    DCHECK_OFFSET();
-#endif
-
     const std::vector<uint8_t>& mapping_table = compiled_method->GetMappingTable();
     size_t mapping_table_size = mapping_table.size() * sizeof(mapping_table[0]);
 
@@ -994,7 +998,6 @@
   delete compiled_methods_;
 }
 
-#if defined(ART_USE_PORTABLE_COMPILER)
 size_t OatWriter::OatClass::GetOatMethodOffsetsOffsetFromOatHeader(
     size_t class_def_method_index_) const {
   uint32_t method_offset = GetOatMethodOffsetsOffsetFromOatClass(class_def_method_index_);
@@ -1008,7 +1011,6 @@
     size_t class_def_method_index_) const {
   return oat_method_offsets_offsets_from_oat_class_[class_def_method_index_];
 }
-#endif
 
 size_t OatWriter::OatClass::SizeOf() const {
   return sizeof(status_)
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 64275e6..067c789 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -65,7 +65,7 @@
  public:
   OatWriter(const std::vector<const DexFile*>& dex_files,
             uint32_t image_file_location_oat_checksum,
-            uint32_t image_file_location_oat_begin,
+            uintptr_t image_file_location_oat_begin,
             const std::string& image_file_location,
             const CompilerDriver* compiler,
             TimingLogger* timings);
@@ -150,10 +150,8 @@
                       uint32_t num_non_null_compiled_methods,
                       mirror::Class::Status status);
     ~OatClass();
-#if defined(ART_USE_PORTABLE_COMPILER)
     size_t GetOatMethodOffsetsOffsetFromOatHeader(size_t class_def_method_index_) const;
     size_t GetOatMethodOffsetsOffsetFromOatClass(size_t class_def_method_index_) const;
-#endif
     size_t SizeOf() const;
     void UpdateChecksum(OatHeader& oat_header) const;
     bool Write(OatWriter* oat_writer, OutputStream& out, const size_t file_offset) const;
@@ -217,7 +215,7 @@
 
   // dependencies on the image.
   uint32_t image_file_location_oat_checksum_;
-  uint32_t image_file_location_oat_begin_;
+  uintptr_t image_file_location_oat_begin_;
   std::string image_file_location_;
 
   // data to write
diff --git a/compiler/output_stream.h b/compiler/output_stream.h
index 112dcfc..478a854 100644
--- a/compiler/output_stream.h
+++ b/compiler/output_stream.h
@@ -41,7 +41,7 @@
     return location_;
   }
 
-  virtual bool WriteFully(const void* buffer, int64_t byte_count) = 0;
+  virtual bool WriteFully(const void* buffer, size_t byte_count) = 0;
 
   virtual off_t Seek(off_t offset, Whence whence) = 0;
 
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index 638e0ec..7cc253c 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -62,7 +62,9 @@
 
   explicit DedupeSet(const char* set_name) {
     for (HashType i = 0; i < kShard; ++i) {
-      lock_name_[i] = StringPrintf("%s lock %d", set_name, i);
+      std::ostringstream oss;
+      oss << set_name << " lock " << i;
+      lock_name_[i] = oss.str();
       lock_[i].reset(new Mutex(lock_name_[i].c_str()));
     }
   }
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 2be3d56..fdd2bab 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -23,18 +23,6 @@
 
 namespace art {
 namespace mips {
-#if 0
-class DirectCallRelocation : public AssemblerFixup {
- public:
-  void Process(const MemoryRegion& region, int position) {
-    // Direct calls are relative to the following instruction on mips.
-    int32_t pointer = region.Load<int32_t>(position);
-    int32_t start = reinterpret_cast<int32_t>(region.start());
-    int32_t delta = start + position + sizeof(int32_t);
-    region.Store<int32_t>(position, pointer - delta);
-  }
-};
-#endif
 
 std::ostream& operator<<(std::ostream& os, const DRegister& rhs) {
   if (rhs >= D0 && rhs < kNumberOfDRegisters) {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 9095180..136d248 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -24,17 +24,6 @@
 namespace art {
 namespace x86 {
 
-class DirectCallRelocation : public AssemblerFixup {
- public:
-  void Process(const MemoryRegion& region, int position) {
-    // Direct calls are relative to the following instruction on x86.
-    int32_t pointer = region.Load<int32_t>(position);
-    int32_t start = reinterpret_cast<int32_t>(region.start());
-    int32_t delta = start + position + sizeof(int32_t);
-    region.Store<int32_t>(position, pointer - delta);
-  }
-};
-
 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
   return os << "XMM" << static_cast<int>(reg);
 }
@@ -1304,15 +1293,6 @@
 }
 
 
-void X86Assembler::Stop(const char* message) {
-  // Emit the message address as immediate operand in the test rax instruction,
-  // followed by the int3 instruction.
-  // Execution can be resumed with the 'cont' command in gdb.
-  testl(EAX, Immediate(reinterpret_cast<int32_t>(message)));
-  int3();
-}
-
-
 void X86Assembler::EmitOperand(int reg_or_opcode, const Operand& operand) {
   CHECK_GE(reg_or_opcode, 0);
   CHECK_LT(reg_or_opcode, 8);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 4ba03d1..0fa8e00 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -452,9 +452,6 @@
   void Align(int alignment, int offset);
   void Bind(Label* label);
 
-  // Debugging and bringup support.
-  void Stop(const char* message);
-
   //
   // Overridden common assembler high-level functionality
   //
diff --git a/compiler/vector_output_stream.h b/compiler/vector_output_stream.h
index a3f8226..09daa12 100644
--- a/compiler/vector_output_stream.h
+++ b/compiler/vector_output_stream.h
@@ -31,7 +31,7 @@
 
   virtual ~VectorOutputStream() {}
 
-  bool WriteFully(const void* buffer, int64_t byte_count) {
+  bool WriteFully(const void* buffer, size_t byte_count) {
     if (static_cast<size_t>(offset_) == vector_.size()) {
       const uint8_t* start = reinterpret_cast<const uint8_t*>(buffer);
       vector_.insert(vector_.end(), &start[0], &start[byte_count]);
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index 05dcd7b..6cd0538 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -33,7 +33,7 @@
   ifeq ($(ART_BUILD_NDEBUG),true)
     $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart-compiler,art/compiler,host,ndebug))
   endif
-  ifeq ($(ART_BUILD_NDEBUG),true)
+  ifeq ($(ART_BUILD_DEBUG),true)
     $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd-compiler,art/compiler,host,debug))
   endif
 endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 20fafe2..90eea5e 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -32,7 +32,7 @@
 #include "class_linker.h"
 #include "compiler_callbacks.h"
 #include "dex_file-inl.h"
-#include "dex/verified_methods_data.h"
+#include "dex/verification_results.h"
 #include "driver/compiler_driver.h"
 #include "elf_fixup.h"
 #include "elf_stripper.h"
@@ -180,7 +180,11 @@
 
   ~Dex2Oat() {
     delete runtime_;
-    VLOG(compiler) << "dex2oat took " << PrettyDuration(NanoTime() - start_ns_)
+    LogCompletionTime();
+  }
+
+  void LogCompletionTime() {
+    LOG(INFO) << "dex2oat took " << PrettyDuration(NanoTime() - start_ns_)
               << " (threads: " << thread_count_ << ")";
   }
 
@@ -249,9 +253,7 @@
                                       bool image,
                                       UniquePtr<CompilerDriver::DescriptorSet>& image_classes,
                                       bool dump_stats,
-                                      bool dump_passes,
-                                      TimingLogger& timings,
-                                      CumulativeLogger& compiler_phases_timings) {
+                                      TimingLogger& timings) {
     // SirtRef and ClassLoader creation needs to come after Runtime::Create
     jobject class_loader = NULL;
     Thread* self = Thread::Current();
@@ -270,7 +272,7 @@
       Runtime::Current()->SetCompileTimeClassPath(class_loader, class_path_files);
     }
 
-    UniquePtr<CompilerDriver> driver(new CompilerDriver(verified_methods_data_.get(),
+    UniquePtr<CompilerDriver> driver(new CompilerDriver(verification_results_.get(),
                                                         method_inliner_map_.get(),
                                                         compiler_backend_,
                                                         instruction_set_,
@@ -278,9 +280,7 @@
                                                         image,
                                                         image_classes.release(),
                                                         thread_count_,
-                                                        dump_stats,
-                                                        dump_passes,
-                                                        &compiler_phases_timings));
+                                                        dump_stats));
 
     if (compiler_backend_ == kPortable) {
       driver->SetBitcodeFileName(bitcode_filename);
@@ -291,13 +291,13 @@
     timings.NewSplit("dex2oat OatWriter");
     std::string image_file_location;
     uint32_t image_file_location_oat_checksum = 0;
-    uint32_t image_file_location_oat_data_begin = 0;
+    uintptr_t image_file_location_oat_data_begin = 0;
     if (!driver->IsImage()) {
       TimingLogger::ScopedSplit split("Loading image checksum", &timings);
       gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace();
       image_file_location_oat_checksum = image_space->GetImageHeader().GetOatChecksum();
       image_file_location_oat_data_begin =
-          reinterpret_cast<uint32_t>(image_space->GetImageHeader().GetOatDataBegin());
+          reinterpret_cast<uintptr_t>(image_space->GetImageHeader().GetOatDataBegin());
       image_file_location = image_space->GetImageFilename();
       if (host_prefix != NULL && StartsWith(image_file_location, host_prefix->c_str())) {
         image_file_location = image_file_location.substr(host_prefix->size());
@@ -352,28 +352,28 @@
  private:
   class Dex2OatCompilerCallbacks : public CompilerCallbacks {
     public:
-      Dex2OatCompilerCallbacks(VerifiedMethodsData* verified_methods_data,
+      Dex2OatCompilerCallbacks(VerificationResults* verification_results,
                                DexFileToMethodInlinerMap* method_inliner_map)
-          : verified_methods_data_(verified_methods_data),
+          : verification_results_(verification_results),
             method_inliner_map_(method_inliner_map) { }
       virtual ~Dex2OatCompilerCallbacks() { }
 
       virtual bool MethodVerified(verifier::MethodVerifier* verifier)
           SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-        bool result = verified_methods_data_->ProcessVerifiedMethod(verifier);
+        bool result = verification_results_->ProcessVerifiedMethod(verifier);
         if (result && method_inliner_map_ != nullptr) {
           MethodReference ref = verifier->GetMethodReference();
           method_inliner_map_->GetMethodInliner(ref.dex_file)
-              ->AnalyseMethodCode(ref.dex_method_index, verifier->CodeItem());
+              ->AnalyseMethodCode(verifier);
         }
         return result;
       }
       virtual void ClassRejected(ClassReference ref) {
-        verified_methods_data_->AddRejectedClass(ref);
+        verification_results_->AddRejectedClass(ref);
       }
 
     private:
-      VerifiedMethodsData* verified_methods_data_;
+      VerificationResults* verification_results_;
       DexFileToMethodInlinerMap* method_inliner_map_;
   };
 
@@ -384,9 +384,9 @@
       : compiler_backend_(compiler_backend),
         instruction_set_(instruction_set),
         instruction_set_features_(instruction_set_features),
-        verified_methods_data_(new VerifiedMethodsData),
+        verification_results_(new VerificationResults),
         method_inliner_map_(compiler_backend == kQuick ? new DexFileToMethodInlinerMap : nullptr),
-        callbacks_(verified_methods_data_.get(), method_inliner_map_.get()),
+        callbacks_(verification_results_.get(), method_inliner_map_.get()),
         runtime_(nullptr),
         thread_count_(thread_count),
         start_ns_(NanoTime()) {
@@ -450,7 +450,7 @@
   const InstructionSet instruction_set_;
   const InstructionSetFeatures instruction_set_features_;
 
-  UniquePtr<VerifiedMethodsData> verified_methods_data_;
+  UniquePtr<VerificationResults> verification_results_;
   UniquePtr<DexFileToMethodInlinerMap> method_inliner_map_;
   Dex2OatCompilerCallbacks callbacks_;
   Runtime* runtime_;
@@ -654,7 +654,6 @@
 
 static int dex2oat(int argc, char** argv) {
   TimingLogger timings("compiler", false, false);
-  CumulativeLogger compiler_phases_timings("compilation times");
 
   InitLogging(argv);
 
@@ -684,11 +683,7 @@
   std::string android_root;
   std::vector<const char*> runtime_args;
   int thread_count = sysconf(_SC_NPROCESSORS_CONF);
-#if defined(ART_USE_PORTABLE_COMPILER)
-  CompilerBackend compiler_backend = kPortable;
-#else
-  CompilerBackend compiler_backend = kQuick;
-#endif
+  CompilerBackend compiler_backend = kUsePortableCompiler ? kPortable : kQuick;
 
   // Take the default set of instruction features from the build.
   InstructionSetFeatures instruction_set_features =
@@ -701,14 +696,13 @@
 #elif defined(__mips__)
   InstructionSet instruction_set = kMips;
 #else
-#error "Unsupported architecture"
+  InstructionSet instruction_set = kNone;
 #endif
 
 
   bool is_host = false;
   bool dump_stats = false;
   bool dump_timing = false;
-  bool dump_passes = false;
   bool dump_slow_timing = kIsDebugBuild;
   bool watch_dog_enabled = !kIsTargetBuild;
 
@@ -779,6 +773,8 @@
         instruction_set = kMips;
       } else if (instruction_set_str == "x86") {
         instruction_set = kX86;
+      } else if (instruction_set_str == "x86_64") {
+        instruction_set = kX86_64;
       }
     } else if (option.starts_with("--instruction-set-features=")) {
       StringPiece str = option.substr(strlen("--instruction-set-features=")).data();
@@ -802,8 +798,6 @@
       runtime_args.push_back(argv[i]);
     } else if (option == "--dump-timing") {
       dump_timing = true;
-    } else if (option == "--dump-passes") {
-      dump_passes = true;
     } else if (option == "--dump-stats") {
       dump_stats = true;
     } else {
@@ -1075,9 +1069,7 @@
                                                                   image,
                                                                   image_classes,
                                                                   dump_stats,
-                                                                  dump_passes,
-                                                                  timings,
-                                                                  compiler_phases_timings));
+                                                                  timings));
 
   if (compiler.get() == NULL) {
     LOG(ERROR) << "Failed to create oat file: " << oat_location;
@@ -1153,9 +1145,6 @@
     if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
       LOG(INFO) << Dumpable<TimingLogger>(timings);
     }
-    if (dump_passes) {
-      LOG(INFO) << Dumpable<CumulativeLogger>(compiler.get()->GetTimingsLogger());
-    }
     return EXIT_SUCCESS;
   }
 
@@ -1198,13 +1187,11 @@
   if (dump_timing || (dump_slow_timing && timings.GetTotalNs() > MsToNs(1000))) {
     LOG(INFO) << Dumpable<TimingLogger>(timings);
   }
-  if (dump_passes) {
-    LOG(INFO) << Dumpable<CumulativeLogger>(compiler_phases_timings);
-  }
 
   // Everything was successfully written, do an explicit exit here to avoid running Runtime
   // destructors that take time (bug 10645725) unless we're a debug build or running on valgrind.
   if (!kIsDebugBuild || (RUNNING_ON_VALGRIND == 0)) {
+    dex2oat->LogCompletionTime();
     exit(EXIT_SUCCESS);
   }
 
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 68626f6..3e6e33f 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -16,6 +16,8 @@
 
 #include "disassembler_arm.h"
 
+#include <inttypes.h>
+
 #include <iostream>
 
 #include "base/logging.h"
@@ -711,7 +713,7 @@
                 if (Rn.r == 15 && U == 1) {
                   intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr);
                   lit_adr = RoundDown(lit_adr, 4) + 4 + (imm8 << 2);
-                  args << StringPrintf("  ; 0x%llx", *reinterpret_cast<int64_t*>(lit_adr));
+                  args << StringPrintf("  ; 0x%" PRIx64, *reinterpret_cast<int64_t*>(lit_adr));
                 }
               } else if (Rn.r == 13 && W == 1 && U == L) {  // VPUSH/VPOP
                 opcode << (L == 1 ? "vpop" : "vpush");
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index c51ea7b..903d755 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -246,6 +246,42 @@
         load = *instr == 0x10;
         store = !load;
         break;
+      case 0x12: case 0x13:
+        if (prefix[2] == 0x66) {
+          opcode << "movlpd";
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else if (prefix[0] == 0) {
+          opcode << "movlps";
+        }
+        has_modrm = true;
+        src_reg_file = dst_reg_file = SSE;
+        load = *instr == 0x12;
+        store = !load;
+        break;
+      case 0x16: case 0x17:
+        if (prefix[2] == 0x66) {
+          opcode << "movhpd";
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else if (prefix[0] == 0) {
+          opcode << "movhps";
+        }
+        has_modrm = true;
+        src_reg_file = dst_reg_file = SSE;
+        load = *instr == 0x16;
+        store = !load;
+        break;
+      case 0x28: case 0x29:
+        if (prefix[2] == 0x66) {
+          opcode << "movapd";
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else if (prefix[0] == 0) {
+          opcode << "movaps";
+        }
+        has_modrm = true;
+        src_reg_file = dst_reg_file = SSE;
+        load = *instr == 0x28;
+        store = !load;
+        break;
       case 0x2A:
         if (prefix[2] == 0x66) {
           opcode << "cvtpi2pd";
@@ -392,6 +428,17 @@
         has_modrm = true;
         src_reg_file = dst_reg_file = SSE;
         break;
+      case 0x62:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // Clear prefix now. It has served its purpose as part of the opcode.
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        opcode << "punpckldq";
+        load = true;
+        has_modrm = true;
+        break;
       case 0x6E:
         if (prefix[2] == 0x66) {
           dst_reg_file = SSE;
@@ -485,6 +532,18 @@
         has_modrm = true;
         store = true;
         break;
+      case 0xA4:
+        opcode << "shld";
+        has_modrm = true;
+        load = true;
+        immediate_bytes = 1;
+        break;
+      case 0xAC:
+        opcode << "shrd";
+        has_modrm = true;
+        load = true;
+        immediate_bytes = 1;
+        break;
       case 0xAE:
         if (prefix[0] == 0xF3) {
           prefix[0] = 0;  // clear prefix now it's served its purpose as part of the opcode
@@ -571,6 +630,9 @@
     reg_is_opcode = true;
     store = true;
     break;
+  case 0x99:
+    opcode << "cdq";
+    break;
   case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7:
     opcode << "mov";
     immediate_bytes = 1;
diff --git a/jdwpspy/Android.mk b/jdwpspy/Android.mk
deleted file mode 100644
index 97162f0..0000000
--- a/jdwpspy/Android.mk
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# Copyright (C) 2006 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-LOCAL_PATH:= $(call my-dir)
-
-include $(CLEAR_VARS)
-LOCAL_SRC_FILES:= Main.cpp Net.cpp
-LOCAL_C_INCLUDES += art/runtime
-LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
-LOCAL_MODULE := jdwpspy
-include $(BUILD_HOST_EXECUTABLE)
-ART_HOST_EXECUTABLES += $(HOST_OUT_EXECUTABLES)/$(LOCAL_MODULE)
diff --git a/jdwpspy/Common.h b/jdwpspy/Common.h
deleted file mode 100644
index 30a49fb..0000000
--- a/jdwpspy/Common.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright 2006 The Android Open Source Project
- *
- * jdwpspy common stuff.
- */
-#ifndef ART_JDWPSPY_COMMON_H_
-#define ART_JDWPSPY_COMMON_H_
-
-#include <stdint.h>
-#include <stdio.h>
-#include <sys/types.h>
-
-typedef uint8_t u1;
-typedef uint16_t u2;
-typedef uint32_t u4;
-typedef uint64_t u8;
-
-#define NELEM(x) (sizeof(x) / sizeof((x)[0]))
-
-#ifndef _JDWP_MISC_INLINE
-# define INLINE extern inline
-#else
-# define INLINE
-#endif
-
-/*
- * Get 1 byte.  (Included to make the code more legible.)
- */
-INLINE u1 get1(unsigned const char* pSrc) {
-    return *pSrc;
-}
-
-/*
- * Get 2 big-endian bytes.
- */
-INLINE u2 get2BE(unsigned char const* pSrc) {
-    u2 result;
-
-    result = *pSrc++ << 8;
-    result |= *pSrc++;
-
-    return result;
-}
-
-/*
- * Get 4 big-endian bytes.
- */
-INLINE u4 get4BE(unsigned char const* pSrc) {
-    u4 result;
-
-    result = *pSrc++ << 24;
-    result |= *pSrc++ << 16;
-    result |= *pSrc++ << 8;
-    result |= *pSrc++;
-
-    return result;
-}
-
-/*
- * Get 8 big-endian bytes.
- */
-INLINE u8 get8BE(unsigned char const* pSrc) {
-    u8 result;
-
-    result = (u8) *pSrc++ << 56;
-    result |= (u8) *pSrc++ << 48;
-    result |= (u8) *pSrc++ << 40;
-    result |= (u8) *pSrc++ << 32;
-    result |= (u8) *pSrc++ << 24;
-    result |= (u8) *pSrc++ << 16;
-    result |= (u8) *pSrc++ << 8;
-    result |= (u8) *pSrc++;
-
-    return result;
-}
-
-
-/*
- * Start here.
- */
-int run(const char* connectHost, int connectPort, int listenPort);
-
-/*
- * Print a hex dump to the specified file pointer.
- *
- * "local" mode prints a hex dump starting from offset 0 (roughly equivalent
- * to "xxd -g1").
- *
- * "mem" mode shows the actual memory address, and will offset the start
- * so that the low nibble of the address is always zero.
- */
-enum HexDumpMode { kHexDumpLocal, kHexDumpMem };
-void printHexDump(const void* vaddr, size_t length);
-void printHexDump2(const void* vaddr, size_t length, const char* prefix);
-void printHexDumpEx(FILE* fp, const void* vaddr, size_t length,
-    HexDumpMode mode, const char* prefix);
-
-#endif  // ART_JDWPSPY_COMMON_H_
diff --git a/jdwpspy/Main.cpp b/jdwpspy/Main.cpp
deleted file mode 100644
index 0f68d52..0000000
--- a/jdwpspy/Main.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright 2006 The Android Open Source Project
- *
- * JDWP spy.
- */
-#define _JDWP_MISC_INLINE
-#include "Common.h"
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <ctype.h>
-
-static const char gHexDigit[] = "0123456789abcdef";
-
-/*
- * Print a hex dump.  Just hands control off to the fancy version.
- */
-void printHexDump(const void* vaddr, size_t length)
-{
-    printHexDumpEx(stdout, vaddr, length, kHexDumpLocal, "");
-}
-void printHexDump2(const void* vaddr, size_t length, const char* prefix)
-{
-    printHexDumpEx(stdout, vaddr, length, kHexDumpLocal, prefix);
-}
-
-/*
- * Print a hex dump in this format:
- *
-01234567: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef\n
- */
-void printHexDumpEx(FILE* fp, const void* vaddr, size_t length,
-    HexDumpMode mode, const char* prefix)
-{
-    const unsigned char* addr = reinterpret_cast<const unsigned char*>(vaddr);
-    char out[77];       /* exact fit */
-    unsigned int offset;    /* offset to show while printing */
-    char* hex;
-    char* asc;
-    int gap;
-
-    if (mode == kHexDumpLocal)
-        offset = 0;
-    else
-        offset = (int) addr;
-
-    memset(out, ' ', sizeof(out)-1);
-    out[8] = ':';
-    out[sizeof(out)-2] = '\n';
-    out[sizeof(out)-1] = '\0';
-
-    gap = (int) offset & 0x0f;
-    while (length) {
-        unsigned int lineOffset = offset & ~0x0f;
-        char* hex = out;
-        char* asc = out + 59;
-
-        for (int i = 0; i < 8; i++) {
-            *hex++ = gHexDigit[lineOffset >> 28];
-            lineOffset <<= 4;
-        }
-        hex++;
-        hex++;
-
-        int count = ((int)length > 16-gap) ? 16-gap : (int) length; /* cap length */
-        assert(count != 0);
-        assert(count+gap <= 16);
-
-        if (gap) {
-            /* only on first line */
-            hex += gap * 3;
-            asc += gap;
-        }
-
-        int i;
-        for (i = gap ; i < count+gap; i++) {
-            *hex++ = gHexDigit[*addr >> 4];
-            *hex++ = gHexDigit[*addr & 0x0f];
-            hex++;
-            if (isprint(*addr))
-                *asc++ = *addr;
-            else
-                *asc++ = '.';
-            addr++;
-        }
-        for ( ; i < 16; i++) {
-            /* erase extra stuff; only happens on last line */
-            *hex++ = ' ';
-            *hex++ = ' ';
-            hex++;
-            *asc++ = ' ';
-        }
-
-        fprintf(fp, "%s%s", prefix, out);
-
-        gap = 0;
-        length -= count;
-        offset += count;
-    }
-}
-
-
-/*
- * Explain it.
- */
-static void usage(const char* progName)
-{
-    fprintf(stderr, "Usage: %s VM-port [debugger-listen-port]\n\n", progName);
-    fprintf(stderr,
-"When a debugger connects to the debugger-listen-port, jdwpspy will connect\n");
-    fprintf(stderr, "to the VM on the VM-port.\n");
-}
-
-/*
- * Parse args.
- */
-int main(int argc, char* argv[])
-{
-    if (argc < 2 || argc > 3) {
-        usage("jdwpspy");
-        return 2;
-    }
-
-    setvbuf(stdout, NULL, _IONBF, 0);
-
-    /* may want this to be host:port */
-    int connectPort = atoi(argv[1]);
-
-    int listenPort;
-    if (argc > 2)
-        listenPort = atoi(argv[2]);
-    else
-        listenPort = connectPort + 1;
-
-    int cc = run("localhost", connectPort, listenPort);
-
-    return (cc != 0);
-}
diff --git a/jdwpspy/Net.cpp b/jdwpspy/Net.cpp
deleted file mode 100644
index 38d4e26..0000000
--- a/jdwpspy/Net.cpp
+++ /dev/null
@@ -1,751 +0,0 @@
-/*
- * Copyright 2006 The Android Open Source Project
- *
- * JDWP spy.  This is a rearranged version of the JDWP code from the VM.
- */
-#include "Common.h"
-#include "jdwp/jdwp_constants.h"
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <time.h>
-#include <errno.h>
-#include <assert.h>
-
-#include <iostream>
-#include <sstream>
-
-#define kInputBufferSize    (256*1024)
-
-#define kMagicHandshakeLen  14      /* "JDWP-Handshake" */
-#define kJDWPHeaderLen      11
-#define kJDWPFlagReply      0x80
-
-
-/*
- * Information about the remote end.
- */
-struct Peer {
-    char    label[2];           /* 'D' or 'V' */
-
-    int     sock;
-    unsigned char   inputBuffer[kInputBufferSize];
-    int     inputCount;
-
-    bool    awaitingHandshake;  /* waiting for "JDWP-Handshake" */
-};
-
-
-/*
- * Network state.
- */
-struct NetState {
-    /* listen here for connection from debugger */
-    int     listenSock;
-
-    /* connect here to contact VM */
-    in_addr vmAddr;
-    uint16_t vmPort;
-
-    Peer    dbg;
-    Peer    vm;
-};
-
-/*
- * Function names.
- */
-struct JdwpHandlerMap {
-    u1  cmdSet;
-    u1  cmd;
-    const char* descr;
-};
-
-/*
- * Map commands to names.
- *
- * Command sets 0-63 are incoming requests, 64-127 are outbound requests,
- * and 128-256 are vendor-defined.
- */
-static const JdwpHandlerMap gHandlerMap[] = {
-    /* VirtualMachine command set (1) */
-    { 1,    1,  "VirtualMachine.Version" },
-    { 1,    2,  "VirtualMachine.ClassesBySignature" },
-    { 1,    3,  "VirtualMachine.AllClasses" },
-    { 1,    4,  "VirtualMachine.AllThreads" },
-    { 1,    5,  "VirtualMachine.TopLevelThreadGroups" },
-    { 1,    6,  "VirtualMachine.Dispose" },
-    { 1,    7,  "VirtualMachine.IDSizes" },
-    { 1,    8,  "VirtualMachine.Suspend" },
-    { 1,    9,  "VirtualMachine.Resume" },
-    { 1,    10, "VirtualMachine.Exit" },
-    { 1,    11, "VirtualMachine.CreateString" },
-    { 1,    12, "VirtualMachine.Capabilities" },
-    { 1,    13, "VirtualMachine.ClassPaths" },
-    { 1,    14, "VirtualMachine.DisposeObjects" },
-    { 1,    15, "VirtualMachine.HoldEvents" },
-    { 1,    16, "VirtualMachine.ReleaseEvents" },
-    { 1,    17, "VirtualMachine.CapabilitiesNew" },
-    { 1,    18, "VirtualMachine.RedefineClasses" },
-    { 1,    19, "VirtualMachine.SetDefaultStratum" },
-    { 1,    20, "VirtualMachine.AllClassesWithGeneric"},
-    { 1,    21, "VirtualMachine.InstanceCounts"},
-
-    /* ReferenceType command set (2) */
-    { 2,    1,  "ReferenceType.Signature" },
-    { 2,    2,  "ReferenceType.ClassLoader" },
-    { 2,    3,  "ReferenceType.Modifiers" },
-    { 2,    4,  "ReferenceType.Fields" },
-    { 2,    5,  "ReferenceType.Methods" },
-    { 2,    6,  "ReferenceType.GetValues" },
-    { 2,    7,  "ReferenceType.SourceFile" },
-    { 2,    8,  "ReferenceType.NestedTypes" },
-    { 2,    9,  "ReferenceType.Status" },
-    { 2,    10, "ReferenceType.Interfaces" },
-    { 2,    11, "ReferenceType.ClassObject" },
-    { 2,    12, "ReferenceType.SourceDebugExtension" },
-    { 2,    13, "ReferenceType.SignatureWithGeneric" },
-    { 2,    14, "ReferenceType.FieldsWithGeneric" },
-    { 2,    15, "ReferenceType.MethodsWithGeneric" },
-    { 2,    16, "ReferenceType.Instances" },
-    { 2,    17, "ReferenceType.ClassFileVersion" },
-    { 2,    18, "ReferenceType.ConstantPool" },
-
-    /* ClassType command set (3) */
-    { 3,    1,  "ClassType.Superclass" },
-    { 3,    2,  "ClassType.SetValues" },
-    { 3,    3,  "ClassType.InvokeMethod" },
-    { 3,    4,  "ClassType.NewInstance" },
-
-    /* ArrayType command set (4) */
-    { 4,    1,  "ArrayType.NewInstance" },
-
-    /* InterfaceType command set (5) */
-
-    /* Method command set (6) */
-    { 6,    1,  "Method.LineTable" },
-    { 6,    2,  "Method.VariableTable" },
-    { 6,    3,  "Method.Bytecodes" },
-    { 6,    4,  "Method.IsObsolete" },
-    { 6,    5,  "Method.VariableTableWithGeneric" },
-
-    /* Field command set (8) */
-
-    /* ObjectReference command set (9) */
-    { 9,    1,  "ObjectReference.ReferenceType" },
-    { 9,    2,  "ObjectReference.GetValues" },
-    { 9,    3,  "ObjectReference.SetValues" },
-    { 9,    4,  "ObjectReference.UNUSED" },
-    { 9,    5,  "ObjectReference.MonitorInfo" },
-    { 9,    6,  "ObjectReference.InvokeMethod" },
-    { 9,    7,  "ObjectReference.DisableCollection" },
-    { 9,    8,  "ObjectReference.EnableCollection" },
-    { 9,    9,  "ObjectReference.IsCollected" },
-    { 9,    10, "ObjectReference.ReferringObjects" },
-
-    /* StringReference command set (10) */
-    { 10,   1,  "StringReference.Value" },
-
-    /* ThreadReference command set (11) */
-    { 11,   1,  "ThreadReference.Name" },
-    { 11,   2,  "ThreadReference.Suspend" },
-    { 11,   3,  "ThreadReference.Resume" },
-    { 11,   4,  "ThreadReference.Status" },
-    { 11,   5,  "ThreadReference.ThreadGroup" },
-    { 11,   6,  "ThreadReference.Frames" },
-    { 11,   7,  "ThreadReference.FrameCount" },
-    { 11,   8,  "ThreadReference.OwnedMonitors" },
-    { 11,   9,  "ThreadReference.CurrentContendedMonitor" },
-    { 11,   10, "ThreadReference.Stop" },
-    { 11,   11, "ThreadReference.Interrupt" },
-    { 11,   12, "ThreadReference.SuspendCount" },
-    { 11,   13, "ThreadReference.OwnedMonitorsStackDepthInfo" },
-    { 11,   14, "ThreadReference.ForceEarlyReturn" },
-
-    /* ThreadGroupReference command set (12) */
-    { 12,   1,  "ThreadGroupReference.Name" },
-    { 12,   2,  "ThreadGroupReference.Parent" },
-    { 12,   3,  "ThreadGroupReference.Children" },
-
-    /* ArrayReference command set (13) */
-    { 13,   1,  "ArrayReference.Length" },
-    { 13,   2,  "ArrayReference.GetValues" },
-    { 13,   3,  "ArrayReference.SetValues" },
-
-    /* ClassLoaderReference command set (14) */
-    { 14,   1,  "ArrayReference.VisibleClasses" },
-
-    /* EventRequest command set (15) */
-    { 15,   1,  "EventRequest.Set" },
-    { 15,   2,  "EventRequest.Clear" },
-    { 15,   3,  "EventRequest.ClearAllBreakpoints" },
-
-    /* StackFrame command set (16) */
-    { 16,   1,  "StackFrame.GetValues" },
-    { 16,   2,  "StackFrame.SetValues" },
-    { 16,   3,  "StackFrame.ThisObject" },
-    { 16,   4,  "StackFrame.PopFrames" },
-
-    /* ClassObjectReference command set (17) */
-    { 17,   1,  "ClassObjectReference.ReflectedType" },
-
-    /* Event command set (64) */
-    { 64,  100, "Event.Composite" },
-
-    /* DDMS */
-    { 199,  1,  "DDMS.Chunk" },
-};
-
-/*
- * Look up a command's name.
- */
-static const char* getCommandName(int cmdSet, int cmd)
-{
-    for (int i = 0; i < (int) NELEM(gHandlerMap); i++) {
-        if (gHandlerMap[i].cmdSet == cmdSet &&
-            gHandlerMap[i].cmd == cmd)
-        {
-            return gHandlerMap[i].descr;
-        }
-    }
-
-    return "?UNKNOWN?";
-}
-
-
-void jdwpNetFree(NetState* netState);       /* fwd */
-
-/*
- * Allocate state structure and bind to the listen port.
- *
- * Returns 0 on success.
- */
-NetState* jdwpNetStartup(uint16_t listenPort, const char* connectHost, uint16_t connectPort) {
-    NetState* netState = new NetState;
-    memset(netState, 0, sizeof(*netState));
-    netState->listenSock = -1;
-    netState->dbg.sock = netState->vm.sock = -1;
-
-    strcpy(netState->dbg.label, "D");
-    strcpy(netState->vm.label, "V");
-
-    /*
-     * Set up a socket to listen for connections from the debugger.
-     */
-
-    netState->listenSock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
-    if (netState->listenSock < 0) {
-        fprintf(stderr, "Socket create failed: %s\n", strerror(errno));
-        goto fail;
-    }
-
-    /* allow immediate re-use if we die */
-    {
-        int one = 1;
-        if (setsockopt(netState->listenSock, SOL_SOCKET, SO_REUSEADDR, &one,
-                sizeof(one)) < 0)
-        {
-            fprintf(stderr, "setsockopt(SO_REUSEADDR) failed: %s\n",
-                strerror(errno));
-            goto fail;
-        }
-    }
-
-    sockaddr_in addr;
-    addr.sin_family = AF_INET;
-    addr.sin_port = htons(listenPort);
-    addr.sin_addr.s_addr = INADDR_ANY;
-
-    if (bind(netState->listenSock, (sockaddr*) &addr, sizeof(addr)) != 0)
-    {
-        fprintf(stderr, "attempt to bind to port %u failed: %s\n",
-            listenPort, strerror(errno));
-        goto fail;
-    }
-
-    fprintf(stderr, "+++ bound to port %u\n", listenPort);
-
-    if (listen(netState->listenSock, 5) != 0) {
-        fprintf(stderr, "Listen failed: %s\n", strerror(errno));
-        goto fail;
-    }
-
-    /*
-     * Do the hostname lookup for the VM.
-     */
-    hostent* pHost;
-
-    pHost = gethostbyname(connectHost);
-    if (pHost == NULL) {
-        fprintf(stderr, "Name lookup of '%s' failed: %s\n",
-            connectHost, strerror(h_errno));
-        goto fail;
-    }
-
-    netState->vmAddr = *((in_addr*) pHost->h_addr_list[0]);
-    netState->vmPort = connectPort;
-
-    fprintf(stderr, "+++ connect host resolved to %s\n",
-        inet_ntoa(netState->vmAddr));
-
-    return netState;
-
-fail:
-    jdwpNetFree(netState);
-    return NULL;
-}
-
-/*
- * Shut down JDWP listener.  Don't free state.
- *
- * Note that "netState" may be partially initialized if "startup" failed.
- */
-void jdwpNetShutdown(NetState* netState)
-{
-    int listenSock = netState->listenSock;
-    int dbgSock = netState->dbg.sock;
-    int vmSock = netState->vm.sock;
-
-    /* clear these out so it doesn't wake up and try to reuse them */
-    /* (important when multi-threaded) */
-    netState->listenSock = netState->dbg.sock = netState->vm.sock = -1;
-
-    if (listenSock >= 0) {
-        shutdown(listenSock, SHUT_RDWR);
-        close(listenSock);
-    }
-    if (dbgSock >= 0) {
-        shutdown(dbgSock, SHUT_RDWR);
-        close(dbgSock);
-    }
-    if (vmSock >= 0) {
-        shutdown(vmSock, SHUT_RDWR);
-        close(vmSock);
-    }
-}
-
-/*
- * Shut down JDWP listener and free its state.
- */
-void jdwpNetFree(NetState* netState)
-{
-    if (netState == NULL)
-        return;
-
-    jdwpNetShutdown(netState);
-    delete netState;
-}
-
-/*
- * Disable the TCP Nagle algorithm, which delays transmission of outbound
- * packets until the previous transmissions have been acked.  JDWP does a
- * lot of back-and-forth with small packets, so this may help.
- */
-static int setNoDelay(int fd)
-{
-    int cc, on = 1;
-
-    cc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
-    assert(cc == 0);
-    return cc;
-}
-
-/*
- * Accept a connection.  This will block waiting for somebody to show up.
- */
-bool jdwpAcceptConnection(NetState* netState)
-{
-    sockaddr_in addr;
-    socklen_t addrlen;
-    int sock;
-
-    if (netState->listenSock < 0)
-        return false;       /* you're not listening! */
-
-    assert(netState->dbg.sock < 0);     /* must not already be talking */
-
-    addrlen = sizeof(addr);
-    do {
-        sock = accept(netState->listenSock, (sockaddr*) &addr, &addrlen);
-        if (sock < 0 && errno != EINTR) {
-            fprintf(stderr, "accept failed: %s\n", strerror(errno));
-            return false;
-        }
-    } while (sock < 0);
-
-    fprintf(stderr, "+++ accepted connection from %s:%u\n",
-        inet_ntoa(addr.sin_addr), ntohs(addr.sin_port));
-
-    netState->dbg.sock = sock;
-    netState->dbg.awaitingHandshake = true;
-    netState->dbg.inputCount = 0;
-
-    setNoDelay(sock);
-
-    return true;
-}
-
-/*
- * Close the connections to the debugger and VM.
- *
- * Reset the state so we're ready to receive a new connection.
- */
-void jdwpCloseConnection(NetState* netState)
-{
-    if (netState->dbg.sock >= 0) {
-        fprintf(stderr, "+++ closing connection to debugger\n");
-        close(netState->dbg.sock);
-        netState->dbg.sock = -1;
-    }
-    if (netState->vm.sock >= 0) {
-        fprintf(stderr, "+++ closing connection to vm\n");
-        close(netState->vm.sock);
-        netState->vm.sock = -1;
-    }
-}
-
-/*
- * Figure out if we have a full packet in the buffer.
- */
-static bool haveFullPacket(Peer* pPeer)
-{
-    long length;
-
-    if (pPeer->awaitingHandshake)
-        return (pPeer->inputCount >= kMagicHandshakeLen);
-
-    if (pPeer->inputCount < 4)
-        return false;
-
-    length = get4BE(pPeer->inputBuffer);
-    return (pPeer->inputCount >= length);
-}
-
-/*
- * Consume bytes from the buffer.
- *
- * This would be more efficient with a circular buffer.  However, we're
- * usually only going to find one packet, which is trivial to handle.
- */
-static void consumeBytes(Peer* pPeer, int count)
-{
-    assert(count > 0);
-    assert(count <= pPeer->inputCount);
-
-    if (count == pPeer->inputCount) {
-        pPeer->inputCount = 0;
-        return;
-    }
-
-    memmove(pPeer->inputBuffer, pPeer->inputBuffer + count,
-        pPeer->inputCount - count);
-    pPeer->inputCount -= count;
-}
-
-/*
- * Get the current time.
- */
-static void getCurrentTime(int* pMin, int* pSec)
-{
-    time_t now;
-    tm* ptm;
-
-    now = time(NULL);
-    ptm = localtime(&now);
-    *pMin = ptm->tm_min;
-    *pSec = ptm->tm_sec;
-}
-
-/*
- * Dump the contents of a packet to stdout.
- */
-static void dumpPacket(const unsigned char* packetBuf, const char* srcName,
-    const char* dstName)
-{
-    const unsigned char* buf = packetBuf;
-    char prefix[3];
-    u4 length, id;
-    u1 flags, cmdSet=0, cmd=0;
-    art::JDWP::JdwpError error = art::JDWP::ERR_NONE;
-    bool reply;
-    int dataLen;
-
-    length = get4BE(buf+0);
-    id = get4BE(buf+4);
-    flags = get1(buf+8);
-    if ((flags & kJDWPFlagReply) != 0) {
-        reply = true;
-        error = static_cast<art::JDWP::JdwpError>(get2BE(buf+9));
-    } else {
-        reply = false;
-        cmdSet = get1(buf+9);
-        cmd = get1(buf+10);
-    }
-
-    buf += kJDWPHeaderLen;
-    dataLen = length - (buf - packetBuf);
-
-    if (!reply) {
-        prefix[0] = srcName[0];
-        prefix[1] = '>';
-    } else {
-        prefix[0] = dstName[0];
-        prefix[1] = '<';
-    }
-    prefix[2] = '\0';
-
-    int min, sec;
-    getCurrentTime(&min, &sec);
-
-    if (!reply) {
-        printf("%s REQUEST dataLen=%-5u id=0x%08x flags=0x%02x cmd=%d/%d [%02d:%02d]\n",
-            prefix, dataLen, id, flags, cmdSet, cmd, min, sec);
-        printf("%s   --> %s\n", prefix, getCommandName(cmdSet, cmd));
-    } else {
-        std::ostringstream ss;
-        ss << "TODO";  // get access to the operator<<, or regenerate it for jdwpspy?
-        printf("%s REPLY   dataLen=%-5u id=0x%08x flags=0x%02x err=%d (%s) [%02d:%02d]\n",
-            prefix, dataLen, id, flags, error, ss.str().c_str(), min,sec);
-    }
-    if (dataLen > 0)
-        printHexDump2(buf, dataLen, prefix);
-    printf("%s ----------\n", prefix);
-}
-
-/*
- * Handle a packet.  Returns "false" if we encounter a connection-fatal error.
- */
-static bool handlePacket(Peer* pDst, Peer* pSrc)
-{
-    const unsigned char* buf = pSrc->inputBuffer;
-    u4 length;
-    u1 flags;
-    int cc;
-
-    length = get4BE(buf+0);
-    flags = get1(buf+9);
-
-    assert((int) length <= pSrc->inputCount);
-
-    dumpPacket(buf, pSrc->label, pDst->label);
-
-    cc = write(pDst->sock, buf, length);
-    if (cc != (int) length) {
-        fprintf(stderr, "Failed sending packet: %s\n", strerror(errno));
-        return false;
-    }
-    /*printf("*** wrote %d bytes from %c to %c\n",
-        cc, pSrc->label[0], pDst->label[0]);*/
-
-    consumeBytes(pSrc, length);
-    return true;
-}
-
-/*
- * Handle incoming data.  If we have a full packet in the buffer, process it.
- */
-static bool handleIncoming(Peer* pWritePeer, Peer* pReadPeer)
-{
-    if (haveFullPacket(pReadPeer)) {
-        if (pReadPeer->awaitingHandshake) {
-            printf("Handshake [%c]: %.14s\n",
-                pReadPeer->label[0], pReadPeer->inputBuffer);
-            if (write(pWritePeer->sock, pReadPeer->inputBuffer,
-                    kMagicHandshakeLen) != kMagicHandshakeLen)
-            {
-                fprintf(stderr,
-                    "+++ [%c] handshake write failed\n", pReadPeer->label[0]);
-                goto fail;
-            }
-            consumeBytes(pReadPeer, kMagicHandshakeLen);
-            pReadPeer->awaitingHandshake = false;
-        } else {
-            if (!handlePacket(pWritePeer, pReadPeer))
-                goto fail;
-        }
-    } else {
-        /*printf("*** %c not full yet\n", pReadPeer->label[0]);*/
-    }
-
-    return true;
-
-fail:
-    return false;
-}
-
-/*
- * Process incoming data.  If no data is available, this will block until
- * some arrives.
- *
- * Returns "false" on error (indicating that the connection has been severed).
- */
-bool jdwpProcessIncoming(NetState* netState)
-{
-    int cc;
-
-    assert(netState->dbg.sock >= 0);
-    assert(netState->vm.sock >= 0);
-
-    while (!haveFullPacket(&netState->dbg) && !haveFullPacket(&netState->vm)) {
-        /* read some more */
-        int highFd;
-        fd_set readfds;
-
-        highFd = (netState->dbg.sock > netState->vm.sock) ?
-            netState->dbg.sock+1 : netState->vm.sock+1;
-        FD_ZERO(&readfds);
-        FD_SET(netState->dbg.sock, &readfds);
-        FD_SET(netState->vm.sock, &readfds);
-
-        errno = 0;
-        cc = select(highFd, &readfds, NULL, NULL, NULL);
-        if (cc < 0) {
-            if (errno == EINTR) {
-                fprintf(stderr, "+++ EINTR on select\n");
-                continue;
-            }
-            fprintf(stderr, "+++ select failed: %s\n", strerror(errno));
-            goto fail;
-        }
-
-        if (FD_ISSET(netState->dbg.sock, &readfds)) {
-            cc = read(netState->dbg.sock,
-                netState->dbg.inputBuffer + netState->dbg.inputCount,
-                sizeof(netState->dbg.inputBuffer) - netState->dbg.inputCount);
-            if (cc < 0) {
-                if (errno == EINTR) {
-                    fprintf(stderr, "+++ EINTR on read\n");
-                    continue;
-                }
-                fprintf(stderr, "+++ dbg read failed: %s\n", strerror(errno));
-                goto fail;
-            }
-            if (cc == 0) {
-                if (sizeof(netState->dbg.inputBuffer) ==
-                        netState->dbg.inputCount)
-                    fprintf(stderr, "+++ debugger sent huge message\n");
-                else
-                    fprintf(stderr, "+++ debugger disconnected\n");
-                goto fail;
-            }
-
-            /*printf("*** %d bytes from dbg\n", cc);*/
-            netState->dbg.inputCount += cc;
-        }
-
-        if (FD_ISSET(netState->vm.sock, &readfds)) {
-            cc = read(netState->vm.sock,
-                netState->vm.inputBuffer + netState->vm.inputCount,
-                sizeof(netState->vm.inputBuffer) - netState->vm.inputCount);
-            if (cc < 0) {
-                if (errno == EINTR) {
-                    fprintf(stderr, "+++ EINTR on read\n");
-                    continue;
-                }
-                fprintf(stderr, "+++ vm read failed: %s\n", strerror(errno));
-                goto fail;
-            }
-            if (cc == 0) {
-                if (sizeof(netState->vm.inputBuffer) ==
-                        netState->vm.inputCount)
-                    fprintf(stderr, "+++ vm sent huge message\n");
-                else
-                    fprintf(stderr, "+++ vm disconnected\n");
-                goto fail;
-            }
-
-            /*printf("*** %d bytes from vm\n", cc);*/
-            netState->vm.inputCount += cc;
-        }
-    }
-
-    if (!handleIncoming(&netState->dbg, &netState->vm))
-        goto fail;
-    if (!handleIncoming(&netState->vm, &netState->dbg))
-        goto fail;
-
-    return true;
-
-fail:
-    jdwpCloseConnection(netState);
-    return false;
-}
-
-/*
- * Connect to the VM.
- */
-bool jdwpConnectToVm(NetState* netState)
-{
-    sockaddr_in addr;
-    int sock = -1;
-
-    sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
-    if (sock < 0) {
-        fprintf(stderr, "Socket create failed: %s\n", strerror(errno));
-        goto fail;
-    }
-
-    addr.sin_family = AF_INET;
-    addr.sin_addr = netState->vmAddr;
-    addr.sin_port = htons(netState->vmPort);
-    if (connect(sock, (struct sockaddr*) &addr, sizeof(addr)) != 0) {
-        fprintf(stderr, "Connection to %s:%u failed: %s\n",
-            inet_ntoa(addr.sin_addr), ntohs(addr.sin_port), strerror(errno));
-        goto fail;
-    }
-    fprintf(stderr, "+++ connected to VM %s:%u\n",
-        inet_ntoa(addr.sin_addr), ntohs(addr.sin_port));
-
-    netState->vm.sock = sock;
-    netState->vm.awaitingHandshake = true;
-    netState->vm.inputCount = 0;
-
-    setNoDelay(netState->vm.sock);
-    return true;
-
-fail:
-    if (sock >= 0)
-        close(sock);
-    return false;
-}
-
-/*
- * Establish network connections and start things running.
- *
- * We wait for a new connection from the debugger.  When one arrives we
- * open a connection to the VM.  If one side or the other goes away, we
- * drop both ends and go back to listening.
- */
-int run(const char* connectHost, int connectPort, int listenPort)
-{
-    NetState* state;
-
-    state = jdwpNetStartup(listenPort, connectHost, connectPort);
-    if (state == NULL)
-        return -1;
-
-    while (true) {
-        if (!jdwpAcceptConnection(state))
-            break;
-
-        if (jdwpConnectToVm(state)) {
-            while (true) {
-                if (!jdwpProcessIncoming(state))
-                    break;
-            }
-        }
-
-        jdwpCloseConnection(state);
-    }
-
-    jdwpNetFree(state);
-
-    return 0;
-}
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 9bde30d..53b07f9 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -49,6 +49,7 @@
 #include "runtime.h"
 #include "safe_map.h"
 #include "scoped_thread_state_change.h"
+#include "verifier/dex_gc_map.h"
 #include "verifier/method_verifier.h"
 #include "vmap_table.h"
 
@@ -162,12 +163,11 @@
         reinterpret_cast<const byte*>(oat_data) > oat_file_.End()) {
       return 0;  // Address not in oat file
     }
-    uint32_t begin_offset = reinterpret_cast<size_t>(oat_data) -
-                            reinterpret_cast<size_t>(oat_file_.Begin());
-    typedef std::set<uint32_t>::iterator It;
-    It it = offsets_.upper_bound(begin_offset);
+    uintptr_t begin_offset = reinterpret_cast<uintptr_t>(oat_data) -
+                             reinterpret_cast<uintptr_t>(oat_file_.Begin());
+    auto it = offsets_.upper_bound(begin_offset);
     CHECK(it != offsets_.end());
-    uint32_t end_offset = *it;
+    uintptr_t end_offset = *it;
     return end_offset - begin_offset;
   }
 
@@ -175,7 +175,7 @@
     return oat_file_.GetOatHeader().GetInstructionSet();
   }
 
-  const void* GetOatCode(mirror::ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  const void* GetQuickOatCode(mirror::ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     MethodHelper mh(m);
     for (size_t i = 0; i < oat_dex_files_.size(); i++) {
       const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
@@ -193,7 +193,7 @@
           const OatFile::OatClass* oat_class = oat_dex_file->GetOatClass(class_def_index);
           CHECK(oat_class != NULL);
           size_t method_index = m->GetMethodIndex();
-          return oat_class->GetOatMethod(method_index).GetCode();
+          return oat_class->GetOatMethod(method_index).GetQuickCode();
         }
       }
     }
@@ -216,7 +216,7 @@
             << "': " << error_msg;
         continue;
       }
-      offsets_.insert(reinterpret_cast<uint32_t>(&dex_file->GetHeader()));
+      offsets_.insert(reinterpret_cast<uintptr_t>(&dex_file->GetHeader()));
       for (size_t class_def_index = 0; class_def_index < dex_file->NumClassDefs(); class_def_index++) {
         const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
         UniquePtr<const OatFile::OatClass> oat_class(oat_dex_file->GetOatClass(class_def_index));
@@ -240,7 +240,7 @@
     // If the last thing in the file is code for a method, there won't be an offset for the "next"
     // thing. Instead of having a special case in the upper_bound code, let's just add an entry
     // for the end of the file.
-    offsets_.insert(static_cast<uint32_t>(oat_file_.Size()));
+    offsets_.insert(oat_file_.Size());
   }
 
   void AddOffsets(const OatFile::OatMethod& oat_method) {
@@ -374,11 +374,17 @@
       }
     }
     {
+      const void* code = oat_method.GetQuickCode();
+      uint32_t code_size = oat_method.GetQuickCodeSize();
+      if (code == nullptr) {
+        code = oat_method.GetPortableCode();
+        code_size = oat_method.GetPortableCodeSize();
+      }
       indent1_os << StringPrintf("CODE: %p (offset=0x%08x size=%d)%s\n",
-                                 oat_method.GetCode(),
+                                 code,
                                  oat_method.GetCodeOffset(),
-                                 oat_method.GetCodeSize(),
-                                 oat_method.GetCode() != NULL ? "..." : "");
+                                 code_size,
+                                 code != nullptr ? "..." : "");
       Indenter indent2_filter(indent1_os.rdbuf(), kIndentChar, kIndentBy1Count);
       std::ostream indent2_os(&indent2_filter);
 
@@ -468,42 +474,60 @@
     }
   }
 
+  void DumpGcMapRegisters(std::ostream& os, const OatFile::OatMethod& oat_method,
+                          const DexFile::CodeItem* code_item,
+                          size_t num_regs, const uint8_t* reg_bitmap) {
+    bool first = true;
+    for (size_t reg = 0; reg < num_regs; reg++) {
+      if (((reg_bitmap[reg / 8] >> (reg % 8)) & 0x01) != 0) {
+        if (first) {
+          os << "  v" << reg << " (";
+          DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
+          os << ")";
+          first = false;
+        } else {
+          os << ", v" << reg << " (";
+          DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
+          os << ")";
+        }
+      }
+    }
+    if (first) {
+      os << "No registers in GC map\n";
+    } else {
+      os << "\n";
+    }
+  }
   void DumpGcMap(std::ostream& os, const OatFile::OatMethod& oat_method,
                  const DexFile::CodeItem* code_item) {
     const uint8_t* gc_map_raw = oat_method.GetNativeGcMap();
-    if (gc_map_raw == NULL) {
-      return;
+    if (gc_map_raw == nullptr) {
+      return;  // No GC map.
     }
-    NativePcOffsetToReferenceMap map(gc_map_raw);
-    const void* code = oat_method.GetCode();
-    for (size_t entry = 0; entry < map.NumEntries(); entry++) {
-      const uint8_t* native_pc = reinterpret_cast<const uint8_t*>(code) +
-                                 map.GetNativePcOffset(entry);
-      os << StringPrintf("%p", native_pc);
-      size_t num_regs = map.RegWidth() * 8;
-      const uint8_t* reg_bitmap = map.GetBitMap(entry);
-      bool first = true;
-      for (size_t reg = 0; reg < num_regs; reg++) {
-        if (((reg_bitmap[reg / 8] >> (reg % 8)) & 0x01) != 0) {
-          if (first) {
-            os << "  v" << reg << " (";
-            DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-            os << ")";
-            first = false;
-          } else {
-            os << ", v" << reg << " (";
-            DescribeVReg(os, oat_method, code_item, reg, kReferenceVReg);
-            os << ")";
-          }
-        }
+    const void* quick_code = oat_method.GetQuickCode();
+    if (quick_code != nullptr) {
+      NativePcOffsetToReferenceMap map(gc_map_raw);
+      for (size_t entry = 0; entry < map.NumEntries(); entry++) {
+        const uint8_t* native_pc = reinterpret_cast<const uint8_t*>(quick_code) +
+            map.GetNativePcOffset(entry);
+        os << StringPrintf("%p", native_pc);
+        DumpGcMapRegisters(os, oat_method, code_item, map.RegWidth() * 8, map.GetBitMap(entry));
       }
-      os << "\n";
+    } else {
+      const void* portable_code = oat_method.GetPortableCode();
+      CHECK(portable_code != nullptr);
+      verifier::DexPcToReferenceMap map(gc_map_raw);
+      for (size_t entry = 0; entry < map.NumEntries(); entry++) {
+        uint32_t dex_pc = map.GetDexPc(entry);
+        os << StringPrintf("0x%08x", dex_pc);
+        DumpGcMapRegisters(os, oat_method, code_item, map.RegWidth() * 8, map.GetBitMap(entry));
+      }
     }
   }
 
   void DumpMappingTable(std::ostream& os, const OatFile::OatMethod& oat_method) {
-    const void* code = oat_method.GetCode();
-    if (code == NULL) {
+    const void* quick_code = oat_method.GetQuickCode();
+    if (quick_code == nullptr) {
       return;
     }
     MappingTable table(oat_method.GetMappingTable());
@@ -645,31 +669,37 @@
 
   void DumpCode(std::ostream& os, verifier::MethodVerifier* verifier,
                 const OatFile::OatMethod& oat_method, const DexFile::CodeItem* code_item) {
-    const void* code = oat_method.GetCode();
-    size_t code_size = oat_method.GetCodeSize();
-    if (code == NULL || code_size == 0) {
+    const void* portable_code = oat_method.GetPortableCode();
+    const void* quick_code = oat_method.GetQuickCode();
+
+    size_t code_size = oat_method.GetQuickCodeSize();
+    if ((code_size == 0) || ((portable_code == nullptr) && (quick_code == nullptr))) {
       os << "NO CODE!\n";
       return;
-    }
-    const uint8_t* native_pc = reinterpret_cast<const uint8_t*>(code);
-    size_t offset = 0;
-    while (offset < code_size) {
-      DumpMappingAtOffset(os, oat_method, offset, false);
-      offset += disassembler_->Dump(os, native_pc + offset);
-      uint32_t dex_pc = DumpMappingAtOffset(os, oat_method, offset, true);
-      if (dex_pc != DexFile::kDexNoIndex) {
-        DumpGcMapAtNativePcOffset(os, oat_method, code_item, offset);
-        if (verifier != nullptr) {
-          DumpVRegsAtDexPc(os, verifier, oat_method, code_item, dex_pc);
+    } else if (quick_code != nullptr) {
+      const uint8_t* quick_native_pc = reinterpret_cast<const uint8_t*>(quick_code);
+      size_t offset = 0;
+      while (offset < code_size) {
+        DumpMappingAtOffset(os, oat_method, offset, false);
+        offset += disassembler_->Dump(os, quick_native_pc + offset);
+        uint32_t dex_pc = DumpMappingAtOffset(os, oat_method, offset, true);
+        if (dex_pc != DexFile::kDexNoIndex) {
+          DumpGcMapAtNativePcOffset(os, oat_method, code_item, offset);
+          if (verifier != nullptr) {
+            DumpVRegsAtDexPc(os, verifier, oat_method, code_item, dex_pc);
+          }
         }
       }
+    } else {
+      CHECK(portable_code != nullptr);
+      CHECK_EQ(code_size, 0U);  // TODO: disassembly of portable is currently not supported.
     }
   }
 
   const std::string host_prefix_;
   const OatFile& oat_file_;
   std::vector<const OatFile::OatDexFile*> oat_dex_files_;
-  std::set<uint32_t> offsets_;
+  std::set<uintptr_t> offsets_;
   UniquePtr<Disassembler> disassembler_;
 };
 
@@ -856,7 +886,7 @@
     if (descriptor[0] != 'L' && descriptor[0] != '[') {
       mirror::Class* type = fh.GetType();
       if (type->IsPrimitiveLong()) {
-        os << StringPrintf("%lld (0x%llx)\n", field->Get64(obj), field->Get64(obj));
+        os << StringPrintf("%" PRId64 " (0x%" PRIx64 ")\n", field->Get64(obj), field->Get64(obj));
       } else if (type->IsPrimitiveDouble()) {
         os << StringPrintf("%f (%a)\n", field->GetDouble(obj), field->GetDouble(obj));
       } else if (type->IsPrimitiveFloat()) {
@@ -902,34 +932,34 @@
     return image_space_.Contains(object);
   }
 
-  const void* GetOatCodeBegin(mirror::ArtMethod* m)
+  const void* GetQuickOatCodeBegin(mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const void* code = m->GetEntryPointFromCompiledCode();
-    if (code == GetResolutionTrampoline(Runtime::Current()->GetClassLinker())) {
-      code = oat_dumper_->GetOatCode(m);
+    const void* quick_code = m->GetEntryPointFromQuickCompiledCode();
+    if (quick_code == GetQuickResolutionTrampoline(Runtime::Current()->GetClassLinker())) {
+      quick_code = oat_dumper_->GetQuickOatCode(m);
     }
     if (oat_dumper_->GetInstructionSet() == kThumb2) {
-      code = reinterpret_cast<void*>(reinterpret_cast<uint32_t>(code) & ~0x1);
+      quick_code = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(quick_code) & ~0x1);
     }
-    return code;
+    return quick_code;
   }
 
-  uint32_t GetOatCodeSize(mirror::ArtMethod* m)
+  uint32_t GetQuickOatCodeSize(mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const uint32_t* oat_code_begin = reinterpret_cast<const uint32_t*>(GetOatCodeBegin(m));
-    if (oat_code_begin == NULL) {
+    const uint32_t* oat_code_begin = reinterpret_cast<const uint32_t*>(GetQuickOatCodeBegin(m));
+    if (oat_code_begin == nullptr) {
       return 0;
     }
     return oat_code_begin[-1];
   }
 
-  const void* GetOatCodeEnd(mirror::ArtMethod* m)
+  const void* GetQuickOatCodeEnd(mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const uint8_t* oat_code_begin = reinterpret_cast<const uint8_t*>(GetOatCodeBegin(m));
+    const uint8_t* oat_code_begin = reinterpret_cast<const uint8_t*>(GetQuickOatCodeBegin(m));
     if (oat_code_begin == NULL) {
       return NULL;
     }
-    return oat_code_begin + GetOatCodeSize(m);
+    return oat_code_begin + GetQuickOatCodeSize(m);
   }
 
   static void Callback(mirror::Object* obj, void* arg)
@@ -1006,17 +1036,18 @@
     } else if (obj->IsArtMethod()) {
       mirror::ArtMethod* method = obj->AsArtMethod();
       if (method->IsNative()) {
-        DCHECK(method->GetNativeGcMap() == NULL) << PrettyMethod(method);
-        DCHECK(method->GetMappingTable() == NULL) << PrettyMethod(method);
+        // TODO: portable dumping.
+        DCHECK(method->GetNativeGcMap() == nullptr) << PrettyMethod(method);
+        DCHECK(method->GetMappingTable() == nullptr) << PrettyMethod(method);
         bool first_occurrence;
-        const void* oat_code = state->GetOatCodeBegin(method);
-        uint32_t oat_code_size = state->GetOatCodeSize(method);
-        state->ComputeOatSize(oat_code, &first_occurrence);
+        const void* quick_oat_code = state->GetQuickOatCodeBegin(method);
+        uint32_t quick_oat_code_size = state->GetQuickOatCodeSize(method);
+        state->ComputeOatSize(quick_oat_code, &first_occurrence);
         if (first_occurrence) {
-          state->stats_.native_to_managed_code_bytes += oat_code_size;
+          state->stats_.native_to_managed_code_bytes += quick_oat_code_size;
         }
-        if (oat_code != method->GetEntryPointFromCompiledCode()) {
-          indent_os << StringPrintf("OAT CODE: %p\n", oat_code);
+        if (quick_oat_code != method->GetEntryPointFromQuickCompiledCode()) {
+          indent_os << StringPrintf("OAT CODE: %p\n", quick_oat_code);
         }
       } else if (method->IsAbstract() || method->IsCalleeSaveMethod() ||
           method->IsResolutionMethod() || method->IsImtConflictMethod() ||
@@ -1050,33 +1081,34 @@
           state->stats_.vmap_table_bytes += vmap_table_bytes;
         }
 
-        const void* oat_code_begin = state->GetOatCodeBegin(method);
-        const void* oat_code_end = state->GetOatCodeEnd(method);
-        uint32_t oat_code_size = state->GetOatCodeSize(method);
-        state->ComputeOatSize(oat_code_begin, &first_occurrence);
+        // TODO: portable dumping.
+        const void* quick_oat_code_begin = state->GetQuickOatCodeBegin(method);
+        const void* quick_oat_code_end = state->GetQuickOatCodeEnd(method);
+        uint32_t quick_oat_code_size = state->GetQuickOatCodeSize(method);
+        state->ComputeOatSize(quick_oat_code_begin, &first_occurrence);
         if (first_occurrence) {
-          state->stats_.managed_code_bytes += oat_code_size;
+          state->stats_.managed_code_bytes += quick_oat_code_size;
           if (method->IsConstructor()) {
             if (method->IsStatic()) {
-              state->stats_.class_initializer_code_bytes += oat_code_size;
+              state->stats_.class_initializer_code_bytes += quick_oat_code_size;
             } else if (dex_instruction_bytes > kLargeConstructorDexBytes) {
-              state->stats_.large_initializer_code_bytes += oat_code_size;
+              state->stats_.large_initializer_code_bytes += quick_oat_code_size;
             }
           } else if (dex_instruction_bytes > kLargeMethodDexBytes) {
-            state->stats_.large_method_code_bytes += oat_code_size;
+            state->stats_.large_method_code_bytes += quick_oat_code_size;
           }
         }
-        state->stats_.managed_code_bytes_ignoring_deduplication += oat_code_size;
+        state->stats_.managed_code_bytes_ignoring_deduplication += quick_oat_code_size;
 
-        indent_os << StringPrintf("OAT CODE: %p-%p\n", oat_code_begin, oat_code_end);
+        indent_os << StringPrintf("OAT CODE: %p-%p\n", quick_oat_code_begin, quick_oat_code_end);
         indent_os << StringPrintf("SIZE: Dex Instructions=%zd GC=%zd Mapping=%zd\n",
                                   dex_instruction_bytes, gc_map_bytes, pc_mapping_table_bytes);
 
         size_t total_size = dex_instruction_bytes + gc_map_bytes + pc_mapping_table_bytes +
-            vmap_table_bytes + oat_code_size + object_bytes;
+            vmap_table_bytes + quick_oat_code_size + object_bytes;
 
         double expansion =
-            static_cast<double>(oat_code_size) / static_cast<double>(dex_instruction_bytes);
+            static_cast<double>(quick_oat_code_size) / static_cast<double>(dex_instruction_bytes);
         state->stats_.ComputeOutliers(total_size, expansion, method);
       }
     }
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 353f160..223ae7c 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -23,6 +23,7 @@
 	barrier.cc \
 	base/allocator.cc \
 	base/bit_vector.cc \
+	base/hex_dump.cc \
 	base/logging.cc \
 	base/mutex.cc \
 	base/stringpiece.cc \
@@ -65,6 +66,7 @@
 	gc/space/malloc_space.cc \
 	gc/space/rosalloc_space.cc \
 	gc/space/space.cc \
+	gc/space/zygote_space.cc \
 	hprof/hprof.cc \
 	image.cc \
 	indirect_reference_table.cc \
@@ -213,12 +215,13 @@
 else # TARGET_ARCH != x86
 ifeq ($(TARGET_ARCH),x86_64)
 LIBART_TARGET_SRC_FILES += \
-	arch/x86/context_x86.cc \
-	arch/x86/entrypoints_init_x86.cc \
-	arch/x86/jni_entrypoints_x86.S \
-	arch/x86/portable_entrypoints_x86.S \
-	arch/x86/quick_entrypoints_x86.S \
-	arch/x86/thread_x86.cc
+	arch/x86_64/context_x86_64.cc \
+	arch/x86_64/entrypoints_init_x86_64.cc \
+	arch/x86_64/jni_entrypoints_x86_64.S \
+	arch/x86_64/portable_entrypoints_x86_64.S \
+	arch/x86_64/quick_entrypoints_x86_64.S \
+	arch/x86_64/thread_x86_64.cc \
+	monitor_pool.cc
 LIBART_LDFLAGS += -Wl,--no-fatal-warnings
 else # TARGET_ARCH != x86_64
 ifeq ($(TARGET_ARCH),mips)
@@ -230,14 +233,18 @@
 	arch/mips/quick_entrypoints_mips.S \
 	arch/mips/thread_mips.cc
 else # TARGET_ARCH != mips
-ifeq ($(TARGET_ARCH),aarch64)
-$(info TODOAArch64: $(LOCAL_PATH)/Android.mk Add AArch64 specific runtime files)
+ifeq ($(TARGET_ARCH),arm64)
+$(info TODOArm64: $(LOCAL_PATH)/Android.mk Add Arm64 specific runtime files)
+else
+ifeq ($(TARGET_ARCH),mips64)
+$(info TODOMips64: $(LOCAL_PATH)/Android.mk Add mips64 specific runtime files)
 else
 $(error unsupported TARGET_ARCH=$(TARGET_ARCH))
-endif # TARGET_ARCH != aarch64
+endif # TARGET_ARCH != mips64
+endif # TARGET_ARCH != arm64
 endif # TARGET_ARCH != mips
-endif # TARGET_ARCH != x86
 endif # TARGET_ARCH != x86_64
+endif # TARGET_ARCH != x86
 endif # TARGET_ARCH != arm
 
 
@@ -249,6 +256,16 @@
 	thread_linux.cc
 
 ifeq ($(HOST_ARCH),x86)
+ifneq ($(BUILD_HOST_64bit),)
+LIBART_HOST_SRC_FILES += \
+	arch/x86_64/context_x86_64.cc \
+	arch/x86_64/entrypoints_init_x86_64.cc \
+	arch/x86_64/jni_entrypoints_x86_64.S \
+	arch/x86_64/portable_entrypoints_x86_64.S \
+	arch/x86_64/quick_entrypoints_x86_64.S \
+	arch/x86_64/thread_x86_64.cc \
+	monitor_pool.cc
+else
 LIBART_HOST_SRC_FILES += \
 	arch/x86/context_x86.cc \
 	arch/x86/entrypoints_init_x86.cc \
@@ -256,6 +273,7 @@
 	arch/x86/portable_entrypoints_x86.S \
 	arch/x86/quick_entrypoints_x86.S \
 	arch/x86/thread_x86.cc
+endif
 else # HOST_ARCH != x86
 $(error unsupported HOST_ARCH=$(HOST_ARCH))
 endif # HOST_ARCH != x86
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 00651ff..020cae0 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -35,7 +35,7 @@
 
   virtual void Reset();
 
-  virtual void FillCalleeSaves(const StackVisitor& fr);
+  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   virtual void SetSP(uintptr_t new_sp) {
     SetGPR(SP, new_sp);
diff --git a/runtime/arch/arm/portable_entrypoints_arm.S b/runtime/arch/arm/portable_entrypoints_arm.S
index ac519d5..98d17dc 100644
--- a/runtime/arch/arm/portable_entrypoints_arm.S
+++ b/runtime/arch/arm/portable_entrypoints_arm.S
@@ -53,7 +53,7 @@
     mov    ip, #0                          @ set ip to 0
     str    ip, [sp]                        @ store NULL for method* at bottom of frame
     add    sp, #16                         @ first 4 args are not passed on stack for portable
-    ldr    ip, [r0, #METHOD_CODE_OFFSET]   @ get pointer to the code
+    ldr    ip, [r0, #METHOD_PORTABLE_CODE_OFFSET]  @ get pointer to the code
     blx    ip                              @ call the method
     mov    sp, r11                         @ restore the stack pointer
     ldr    ip, [sp, #24]                   @ load the result pointer
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 34de93f..0e5c60a 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -302,7 +302,7 @@
     ldr    r3, [sp, #12]                   @ copy arg value for r3
     mov    ip, #0                          @ set ip to 0
     str    ip, [sp]                        @ store NULL for method* at bottom of frame
-    ldr    ip, [r0, #METHOD_CODE_OFFSET]   @ get pointer to the code
+    ldr    ip, [r0, #METHOD_QUICK_CODE_OFFSET]  @ get pointer to the code
     blx    ip                              @ call the method
     mov    sp, r11                         @ restore the stack pointer
     ldr    ip, [sp, #24]                   @ load the result pointer
diff --git a/runtime/arch/arm/thread_arm.cc b/runtime/arch/arm/thread_arm.cc
index 8c1efeb..df4a04a 100644
--- a/runtime/arch/arm/thread_arm.cc
+++ b/runtime/arch/arm/thread_arm.cc
@@ -28,4 +28,8 @@
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
+void Thread::CleanupCpu() {
+  // Do nothing.
+}
+
 }  // namespace art
diff --git a/runtime/arch/context.cc b/runtime/arch/context.cc
index 7075e42..5eaf809 100644
--- a/runtime/arch/context.cc
+++ b/runtime/arch/context.cc
@@ -22,6 +22,10 @@
 #include "mips/context_mips.h"
 #elif defined(__i386__)
 #include "x86/context_x86.h"
+#elif defined(__x86_64__)
+#include "x86_64/context_x86_64.h"
+#else
+#include "base/logging.h"
 #endif
 
 namespace art {
@@ -33,8 +37,11 @@
   return new mips::MipsContext();
 #elif defined(__i386__)
   return new x86::X86Context();
+#elif defined(__x86_64__)
+  return new x86_64::X86_64Context();
 #else
   UNIMPLEMENTED(FATAL);
+  return nullptr;
 #endif
 }
 
diff --git a/runtime/arch/context.h b/runtime/arch/context.h
index 91e0cd6..3d11178 100644
--- a/runtime/arch/context.h
+++ b/runtime/arch/context.h
@@ -20,6 +20,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include "locks.h"
+
 namespace art {
 
 class StackVisitor;
@@ -38,7 +40,8 @@
 
   // Read values from callee saves in the given frame. The frame also holds
   // the method that holds the layout.
-  virtual void FillCalleeSaves(const StackVisitor& fr) = 0;
+  virtual void FillCalleeSaves(const StackVisitor& fr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Set the stack pointer value
   virtual void SetSP(uintptr_t new_sp) = 0;
diff --git a/runtime/arch/mips/context_mips.h b/runtime/arch/mips/context_mips.h
index 5595f86..4145cd3 100644
--- a/runtime/arch/mips/context_mips.h
+++ b/runtime/arch/mips/context_mips.h
@@ -33,7 +33,7 @@
 
   virtual void Reset();
 
-  virtual void FillCalleeSaves(const StackVisitor& fr);
+  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   virtual void SetSP(uintptr_t new_sp) {
     SetGPR(SP, new_sp);
diff --git a/runtime/arch/mips/portable_entrypoints_mips.S b/runtime/arch/mips/portable_entrypoints_mips.S
index 9208a8a..7545ce0 100644
--- a/runtime/arch/mips/portable_entrypoints_mips.S
+++ b/runtime/arch/mips/portable_entrypoints_mips.S
@@ -61,5 +61,73 @@
     .cfi_adjust_cfa_offset -64
 END art_portable_proxy_invoke_handler
 
+    /*
+     * Invocation stub for portable code.
+     * On entry:
+     *   a0 = method pointer
+     *   a1 = argument array or NULL for no argument methods
+     *   a2 = size of argument array in bytes
+     *   a3 = (managed) thread pointer
+     *   [sp + 16] = JValue* result
+     *   [sp + 20] = result type char
+     */
+ENTRY art_portable_invoke_stub
+    GENERATE_GLOBAL_POINTER
+    sw    $a0, 0($sp)           # save out a0
+    addiu $sp, $sp, -16         # spill s0, s1, fp, ra
+    .cfi_adjust_cfa_offset 16
+    sw    $ra, 12($sp)
+    .cfi_rel_offset 31, 12
+    sw    $fp, 8($sp)
+    .cfi_rel_offset 30, 8
+    sw    $s1, 4($sp)
+    .cfi_rel_offset 17, 4
+    sw    $s0, 0($sp)
+    .cfi_rel_offset 16, 0
+    move  $fp, $sp              # save sp in fp
+    .cfi_def_cfa_register 30
+    move  $s1, $a3              # move managed thread pointer into s1
+    addiu $s0, $zero, SUSPEND_CHECK_INTERVAL  # reset s0 to suspend check interval
+    addiu $t0, $a2, 16          # create space for method pointer in frame
+    srl   $t0, $t0, 3           # shift the frame size right 3
+    sll   $t0, $t0, 3           # shift the frame size left 3 to align to 16 bytes
+    subu  $sp, $sp, $t0         # reserve stack space for argument array
+    addiu $a0, $sp, 4           # pass stack pointer + method ptr as dest for memcpy
+    jal   memcpy                # (dest, src, bytes)
+    addiu $sp, $sp, -16         # make space for argument slots for memcpy
+    addiu $sp, $sp, 16          # restore stack after memcpy
+    lw    $a0, 16($fp)          # restore method*
+    lw    $a1, 4($sp)           # copy arg value for a1
+    lw    $a2, 8($sp)           # copy arg value for a2
+    lw    $a3, 12($sp)          # copy arg value for a3
+    lw    $t9, METHOD_PORTABLE_CODE_OFFSET($a0)  # get pointer to the code
+    jalr  $t9                   # call the method
+    sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
+    move  $sp, $fp              # restore the stack
+    lw    $s0, 0($sp)
+    .cfi_restore 16
+    lw    $s1, 4($sp)
+    .cfi_restore 17
+    lw    $fp, 8($sp)
+    .cfi_restore 30
+    lw    $ra, 12($sp)
+    .cfi_restore 31
+    addiu $sp, $sp, 16
+    .cfi_adjust_cfa_offset -16
+    lw    $t0, 16($sp)          # get result pointer
+    lw    $t1, 20($sp)          # get result type char
+    li    $t2, 68               # put char 'D' into t2
+    beq   $t1, $t2, 1f          # branch if result type char == 'D'
+    li    $t3, 70               # put char 'F' into t3
+    beq   $t1, $t3, 1f          # branch if result type char == 'F'
+    sw    $v0, 0($t0)           # store the result
+    jr    $ra
+    sw    $v1, 4($t0)           # store the other half of the result
+1:
+    s.s   $f0, 0($t0)           # store floating point result
+    jr    $ra
+    s.s   $f1, 4($t0)           # store other half of floating point result
+END art_portable_invoke_stub
+
 UNIMPLEMENTED art_portable_resolution_trampoline
 UNIMPLEMENTED art_portable_to_interpreter_bridge
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 2d1e87a..c60bca0 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -449,7 +449,7 @@
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
     /*
-     * Common invocation stub for portable and quick.
+     * Invocation stub for quick code.
      * On entry:
      *   a0 = method pointer
      *   a1 = argument array or NULL for no argument methods
@@ -458,9 +458,6 @@
      *   [sp + 16] = JValue* result
      *   [sp + 20] = result type char
      */
-    .type art_portable_invoke_stub, %function
-    .global art_portable_invoke_stub
-art_portable_invoke_stub:
 ENTRY art_quick_invoke_stub
     GENERATE_GLOBAL_POINTER
     sw    $a0, 0($sp)           # save out a0
@@ -490,7 +487,7 @@
     lw    $a1, 4($sp)           # copy arg value for a1
     lw    $a2, 8($sp)           # copy arg value for a2
     lw    $a3, 12($sp)          # copy arg value for a3
-    lw    $t9, METHOD_CODE_OFFSET($a0)  # get pointer to the code
+    lw    $t9, METHOD_QUICK_CODE_OFFSET($a0)  # get pointer to the code
     jalr  $t9                   # call the method
     sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
     move  $sp, $fp              # restore the stack
@@ -518,7 +515,6 @@
     jr    $ra
     s.s   $f1, 4($t0)           # store other half of floating point result
 END art_quick_invoke_stub
-    .size art_portable_invoke_stub, .-art_portable_invoke_stub
 
     /*
      * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
diff --git a/runtime/arch/mips/thread_mips.cc b/runtime/arch/mips/thread_mips.cc
index bd54549..f5d211f 100644
--- a/runtime/arch/mips/thread_mips.cc
+++ b/runtime/arch/mips/thread_mips.cc
@@ -27,4 +27,8 @@
   CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
 }
 
+void Thread::CleanupCpu() {
+  // Do nothing.
+}
+
 }  // namespace art
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index d32f998..632c5f3 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -17,11 +17,17 @@
 .macro GENERATE_ALLOC_ENTRYPOINTS c_suffix, cxx_suffix
 // Called by managed code to allocate an object.
 TWO_ARG_DOWNCALL art_quick_alloc_object\c_suffix, artAllocObjectFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an object of a resolved class.
+TWO_ARG_DOWNCALL art_quick_alloc_object_resolved\c_suffix, artAllocObjectFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an object of an initialized class.
+TWO_ARG_DOWNCALL art_quick_alloc_object_initialized\c_suffix, artAllocObjectFromCodeInitialized\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 // Called by managed code to allocate an object when the caller doesn't know whether it has access
 // to the created type.
 TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check\c_suffix, artAllocObjectFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 // Called by managed code to allocate an array.
 THREE_ARG_DOWNCALL art_quick_alloc_array\c_suffix, artAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+// Called by managed code to allocate an array of a resolve class.
+THREE_ARG_DOWNCALL art_quick_alloc_array_resolved\c_suffix, artAllocArrayFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
 // Called by managed code to allocate an array when the caller doesn't know whether it has access
 // to the created type.
 THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check\c_suffix, artAllocArrayFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
diff --git a/runtime/arch/quick_alloc_entrypoints.cc b/runtime/arch/quick_alloc_entrypoints.cc
index 457c73a..9363f81 100644
--- a/runtime/arch/quick_alloc_entrypoints.cc
+++ b/runtime/arch/quick_alloc_entrypoints.cc
@@ -19,29 +19,41 @@
 
 #define GENERATE_ENTRYPOINTS(suffix) \
 extern "C" void* art_quick_alloc_array##suffix(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_array_resolved##suffix(void* klass, void*, int32_t); \
 extern "C" void* art_quick_alloc_array_with_access_check##suffix(uint32_t, void*, int32_t); \
 extern "C" void* art_quick_alloc_object##suffix(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_alloc_object_resolved##suffix(void* klass, void* method); \
+extern "C" void* art_quick_alloc_object_initialized##suffix(void* klass, void* method); \
 extern "C" void* art_quick_alloc_object_with_access_check##suffix(uint32_t type_idx, void* method); \
 extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, void*, int32_t); \
 extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, void*, int32_t); \
 extern "C" void* art_quick_alloc_array##suffix##_instrumented(uint32_t, void*, int32_t); \
+extern "C" void* art_quick_alloc_array_resolved##suffix##_instrumented(void* klass, void*, int32_t); \
 extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, void*, int32_t); \
 extern "C" void* art_quick_alloc_object##suffix##_instrumented(uint32_t type_idx, void* method); \
+extern "C" void* art_quick_alloc_object_resolved##suffix##_instrumented(void* klass, void* method); \
+extern "C" void* art_quick_alloc_object_initialized##suffix##_instrumented(void* klass, void* method); \
 extern "C" void* art_quick_alloc_object_with_access_check##suffix##_instrumented(uint32_t type_idx, void* method); \
 extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, void*, int32_t); \
 extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, void*, int32_t); \
 void SetQuickAllocEntryPoints##suffix(QuickEntryPoints* qpoints, bool instrumented) { \
   if (instrumented) { \
     qpoints->pAllocArray = art_quick_alloc_array##suffix##_instrumented; \
+    qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix##_instrumented; \
     qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix##_instrumented; \
     qpoints->pAllocObject = art_quick_alloc_object##suffix##_instrumented; \
+    qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix##_instrumented; \
+    qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix##_instrumented; \
     qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix##_instrumented; \
     qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix##_instrumented; \
     qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented; \
   } else { \
     qpoints->pAllocArray = art_quick_alloc_array##suffix; \
+    qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix; \
     qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix; \
     qpoints->pAllocObject = art_quick_alloc_object##suffix; \
+    qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix; \
+    qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix; \
     qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix; \
     qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix; \
     qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix; \
diff --git a/runtime/arch/x86/asm_support_x86.S b/runtime/arch/x86/asm_support_x86.S
index 3701b22..9ec1995 100644
--- a/runtime/arch/x86/asm_support_x86.S
+++ b/runtime/arch/x86/asm_support_x86.S
@@ -31,11 +31,26 @@
     // are mangled with an extra underscore prefix. The use of $x for arguments
     // mean that literals need to be represented with $$x in macros.
     #define SYMBOL(name) _ ## name
+    #define PLT_SYMBOL(name) _ ## name
     #define VAR(name,index) SYMBOL($index)
+    #define PLT_VAR(name, index) SYMBOL($index)
     #define REG_VAR(name,index) %$index
     #define CALL_MACRO(name,index) $index
     #define LITERAL(value) $value
     #define MACRO_LITERAL(value) $$value
+
+    // Mac OS' doesn't like cfi_* directives
+    #define CFI_STARTPROC
+    #define CFI_ENDPROC
+    #define CFI_ADJUST_CFA_OFFSET(size)
+    #define CFI_DEF_CFA(reg,size)
+    #define CFI_DEF_CFA_REGISTER(reg)
+    #define CFI_RESTORE(reg)
+    #define CFI_REL_OFFSET(reg,size)
+
+    // Mac OS' doesn't support certain directives
+    #define FUNCTION_TYPE(name)
+    #define SIZE(name)
 #else
     // Regular gas(1) lets you name macro parameters.
     #define MACRO0(macro_name) .macro macro_name
@@ -51,11 +66,25 @@
     // special character meaning care needs to be taken when passing registers as macro arguments.
     .altmacro
     #define SYMBOL(name) name
+    #define PLT_SYMBOL(name) name@PLT
     #define VAR(name,index) name&
+    #define PLT_VAR(name, index) name&@PLT
     #define REG_VAR(name,index) %name
     #define CALL_MACRO(name,index) name&
     #define LITERAL(value) $value
     #define MACRO_LITERAL(value) $value
+
+    // CFI support
+    #define CFI_STARTPROC .cfi_startproc
+    #define CFI_ENDPROC .cfi_endproc
+    #define CFI_ADJUST_CFA_OFFSET(size) .cfi_adjust_cfa_offset size
+    #define CFI_DEF_CFA(reg,size) .cfi_def_cfa reg,size
+    #define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
+    #define CFI_RESTORE(reg) .cfi_restore reg
+    #define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size
+
+    #define FUNCTION_TYPE(name) .type name&, @function
+    #define SIZE(name) .size name, .-name
 #endif
 
     /* Cache alignment for function entry */
@@ -64,40 +93,40 @@
 END_MACRO
 
 MACRO1(DEFINE_FUNCTION, c_name)
-    .type VAR(c_name, 0), @function
+    FUNCTION_TYPE(\c_name)
     .globl VAR(c_name, 0)
     ALIGN_FUNCTION_ENTRY
 VAR(c_name, 0):
-    .cfi_startproc
+    CFI_STARTPROC
 END_MACRO
 
 MACRO1(END_FUNCTION, c_name)
-    .cfi_endproc
-    .size \c_name, .-\c_name
+    CFI_ENDPROC
+    SIZE(\c_name)
 END_MACRO
 
 MACRO1(PUSH, reg)
     pushl REG_VAR(reg, 0)
-    .cfi_adjust_cfa_offset 4
-    .cfi_rel_offset REG_VAR(reg, 0), 0
+    CFI_ADJUST_CFA_OFFSET(4)
+    CFI_REL_OFFSET(REG_VAR(reg, 0), 0)
 END_MACRO
 
 MACRO1(POP, reg)
     popl REG_VAR(reg,0)
-    .cfi_adjust_cfa_offset -4
-    .cfi_restore REG_VAR(reg,0)
+    CFI_ADJUST_CFA_OFFSET(-4)
+    CFI_RESTORE(REG_VAR(reg,0))
 END_MACRO
 
 MACRO1(UNIMPLEMENTED,name)
-    .type VAR(name, 0), @function
+    FUNCTION_TYPE(\name)
     .globl VAR(name, 0)
     ALIGN_FUNCTION_ENTRY
 VAR(name, 0):
-    .cfi_startproc
+    CFI_STARTPROC
     int3
     int3
-    .cfi_endproc
-    .size \name, .-\name
+    CFI_ENDPROC
+    SIZE(\name)
 END_MACRO
 
 MACRO0(SETUP_GOT_NOSAVE)
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 66a51f7..d7dca64 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -23,7 +23,7 @@
 namespace art {
 namespace x86 {
 
-static const uint32_t gZero = 0;
+static const uintptr_t gZero = 0;
 
 void X86Context::Reset() {
   for (int i = 0; i < kNumberOfCpuRegisters; i++) {
@@ -55,8 +55,8 @@
 
 void X86Context::SmashCallerSaves() {
   // This needs to be 0 because we want a null/zero return value.
-  gprs_[EAX] = const_cast<uint32_t*>(&gZero);
-  gprs_[EDX] = const_cast<uint32_t*>(&gZero);
+  gprs_[EAX] = const_cast<uintptr_t*>(&gZero);
+  gprs_[EDX] = const_cast<uintptr_t*>(&gZero);
   gprs_[ECX] = NULL;
   gprs_[EBX] = NULL;
 }
@@ -89,7 +89,7 @@
       : "g"(&gprs[0])  // input.
       :);  // clobber.
 #else
-    UNIMPLEMENTED(FATAL);
+  UNIMPLEMENTED(FATAL);
 #endif
 }
 
diff --git a/runtime/arch/x86/context_x86.h b/runtime/arch/x86/context_x86.h
index d7d2210..598314d 100644
--- a/runtime/arch/x86/context_x86.h
+++ b/runtime/arch/x86/context_x86.h
@@ -33,7 +33,7 @@
 
   virtual void Reset();
 
-  virtual void FillCalleeSaves(const StackVisitor& fr);
+  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   virtual void SetSP(uintptr_t new_sp) {
     SetGPR(ESP, new_sp);
diff --git a/runtime/arch/x86/jni_entrypoints_x86.S b/runtime/arch/x86/jni_entrypoints_x86.S
index e394819..2eb5ada 100644
--- a/runtime/arch/x86/jni_entrypoints_x86.S
+++ b/runtime/arch/x86/jni_entrypoints_x86.S
@@ -17,18 +17,18 @@
 #include "asm_support_x86.S"
 
     /*
-     * Portable resolution trampoline.
+     * Jni dlsym lookup stub.
      */
 DEFINE_FUNCTION art_jni_dlsym_lookup_stub
     subl LITERAL(4), %esp         // align stack
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     SETUP_GOT                     // pushes ebx
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
-    call SYMBOL(artFindNativeMethod)@PLT  // (Thread*)
+    CFI_ADJUST_CFA_OFFSET(4)
+    call PLT_SYMBOL(artFindNativeMethod)  // (Thread*)
     UNDO_SETUP_GOT
     addl LITERAL(8), %esp         // restore the stack
-    .cfi_adjust_cfa_offset -12
+    CFI_ADJUST_CFA_OFFSET(-12)
     cmpl LITERAL(0), %eax         // check if returned method code is null
     je no_native_code_found       // if null, jump to return to handle
     jmp *%eax                     // otherwise, tail call to intended method
diff --git a/runtime/arch/x86/portable_entrypoints_x86.S b/runtime/arch/x86/portable_entrypoints_x86.S
index a1f6b2d..4bd6173 100644
--- a/runtime/arch/x86/portable_entrypoints_x86.S
+++ b/runtime/arch/x86/portable_entrypoints_x86.S
@@ -31,7 +31,7 @@
     PUSH ebp                      // save ebp
     PUSH ebx                      // save ebx
     mov %esp, %ebp                // copy value of stack pointer into base pointer
-    .cfi_def_cfa_register ebp
+    CFI_DEF_CFA_REGISTER(ebp)
     mov 20(%ebp), %ebx            // get arg array size
     addl LITERAL(28), %ebx        // reserve space for return addr, method*, ebx, and ebp in frame
     andl LITERAL(0xFFFFFFF0), %ebx    // align frame size to 16 bytes
@@ -42,11 +42,11 @@
     pushl 20(%ebp)                // push size of region to memcpy
     pushl 16(%ebp)                // push arg array as source of memcpy
     pushl %eax                    // push stack pointer as destination of memcpy
-    call SYMBOL(memcpy)@PLT       // (void*, const void*, size_t)
+    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
     addl LITERAL(12), %esp        // pop arguments to memcpy
     mov 12(%ebp), %eax            // move method pointer into eax
     mov %eax, (%esp)              // push method pointer onto stack
-    call *METHOD_CODE_OFFSET(%eax) // call the method
+    call *METHOD_PORTABLE_CODE_OFFSET(%eax) // call the method
     mov %ebp, %esp                // restore stack pointer
     POP ebx                       // pop ebx
     POP ebp                       // pop ebp
@@ -69,7 +69,7 @@
 DEFINE_FUNCTION art_portable_proxy_invoke_handler
     PUSH ebp                        // Set up frame.
     movl %esp, %ebp
-    .cfi_def_cfa_register %ebp
+    CFI_DEF_CFA_REGISTER(%ebp)
     subl LITERAL(4), %esp           // Align stack
     SETUP_GOT                       // pushes ebx
     leal 8(%ebp), %edx              // %edx = ArtMethod** called_addr
@@ -79,11 +79,11 @@
     pushl %fs:THREAD_SELF_OFFSET    // Pass thread.
     pushl %ecx                      // Pass receiver.
     pushl %eax                      // Pass called.
-    call SYMBOL(artPortableProxyInvokeHandler)@PLT  // (called, receiver, Thread*, &called)
+    call PLT_SYMBOL(artPortableProxyInvokeHandler)  // (called, receiver, Thread*, &called)
     UNDO_SETUP_GOT
     leave
-    .cfi_restore %ebp
-    .cfi_def_cfa %esp, 4
+    CFI_RESTORE(%ebp)
+    CFI_DEF_CFA(%esp, 4)
     movd %eax, %xmm0              // Place return value also into floating point return value.
     movd %edx, %xmm1
     punpckldq %xmm1, %xmm0
@@ -93,7 +93,7 @@
 DEFINE_FUNCTION art_portable_resolution_trampoline
   PUSH ebp                        // Set up frame.
   movl %esp, %ebp
-  .cfi_def_cfa_register %ebp
+  CFI_DEF_CFA_REGISTER(%ebp)
   subl LITERAL(4), %esp           // Align stack
   SETUP_GOT                       // pushes ebx
   leal 8(%ebp), %edx              // %edx = ArtMethod** called_addr
@@ -103,11 +103,11 @@
   pushl %fs:THREAD_SELF_OFFSET    // Pass thread.
   pushl %ecx                      // Pass receiver.
   pushl %eax                      // Pass called.
-  call SYMBOL(artPortableResolutionTrampoline)@PLT  // (called, receiver, Thread*, &called)
+  call PLT_SYMBOL(artPortableResolutionTrampoline)  // (called, receiver, Thread*, &called)
   UNDO_SETUP_GOT
   leave
-  .cfi_restore %ebp
-  .cfi_def_cfa %esp, 4
+  CFI_RESTORE(%ebp)
+  CFI_DEF_CFA(%esp, 4)
   testl %eax, %eax
   jz  resolve_fail
   jmp * %eax
@@ -118,7 +118,7 @@
 DEFINE_FUNCTION art_portable_to_interpreter_bridge
   PUSH ebp                        // Set up frame.
   movl %esp, %ebp
-  .cfi_def_cfa_register %ebp
+  CFI_DEF_CFA_REGISTER(%ebp)
   subl LITERAL(8), %esp           // Align stack
   SETUP_GOT
   leal 8(%ebp), %edx              // %edx = ArtMethod** called_addr
@@ -126,10 +126,10 @@
   pushl %edx                      // Pass called_addr.
   pushl %fs:THREAD_SELF_OFFSET    // Pass thread.
   pushl %eax                      // Pass called.
-  call SYMBOL(artPortableToInterpreterBridge)@PLT  // (called, Thread*, &called)
+  call PLT_SYMBOL(artPortableToInterpreterBridge)  // (called, Thread*, &called)
   UNDO_SETUP_GOT
   leave
-  .cfi_restore %ebp
-  .cfi_def_cfa %esp, 4
+  CFI_RESTORE(%ebp)
+  CFI_DEF_CFA(%esp, 4)
   ret
 END_FUNCTION art_portable_to_interpreter_bridge
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 69738ba..9c3eb30 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -16,8 +16,6 @@
 
 #include "asm_support_x86.S"
 
-#include "arch/quick_alloc_entrypoints.S"
-
 // For x86, the CFA is esp+4, the address above the pushed return address on the stack.
 
     /*
@@ -29,7 +27,7 @@
     PUSH esi
     PUSH ebp
     subl  MACRO_LITERAL(16), %esp  // Grow stack by 4 words, bottom word will hold Method*
-    .cfi_adjust_cfa_offset 16
+    CFI_ADJUST_CFA_OFFSET(16)
 END_MACRO
 
     /*
@@ -41,7 +39,7 @@
     PUSH esi
     PUSH ebp
     subl  MACRO_LITERAL(16), %esp  // Grow stack by 4 words, bottom word will hold Method*
-    .cfi_adjust_cfa_offset 16
+    CFI_ADJUST_CFA_OFFSET(16)
 END_MACRO
 
 MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
@@ -49,7 +47,7 @@
     POP ebp  // Restore callee saves (ebx is saved/restored by the upcall)
     POP esi
     POP edi
-    .cfi_adjust_cfa_offset -28
+    CFI_ADJUST_CFA_OFFSET(-28)
 END_MACRO
 
     /*
@@ -68,7 +66,7 @@
 
 MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
     addl MACRO_LITERAL(4), %esp  // Remove padding
-    .cfi_adjust_cfa_offset -4
+    CFI_ADJUST_CFA_OFFSET(-4)
     POP ecx  // Restore args except eax
     POP edx
     POP ebx
@@ -86,12 +84,12 @@
     mov %esp, %ecx
     // Outgoing argument set up
     subl  MACRO_LITERAL(8), %esp             // Alignment padding
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     PUSH ecx                                 // pass SP
     pushl %fs:THREAD_SELF_OFFSET             // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     SETUP_GOT_NOSAVE                         // clobbers ebx (harmless here)
-    call SYMBOL(artDeliverPendingExceptionFromCode)@PLT  // artDeliverPendingExceptionFromCode(Thread*, SP)
+    call PLT_SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*, SP)
     int3                                     // unreached
 END_MACRO
 
@@ -101,12 +99,12 @@
     mov %esp, %ecx
     // Outgoing argument set up
     subl  MACRO_LITERAL(8), %esp  // alignment padding
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     PUSH ecx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
-    call VAR(cxx_name, 1)@PLT     // cxx_name(Thread*, SP)
+    call PLT_VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
     int3                          // unreached
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
@@ -119,10 +117,10 @@
     PUSH eax                      // alignment padding
     PUSH ecx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH eax                      // pass arg1
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
-    call VAR(cxx_name, 1)@PLT     // cxx_name(arg1, Thread*, SP)
+    call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, Thread*, SP)
     int3                          // unreached
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
@@ -134,11 +132,11 @@
     // Outgoing argument set up
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
-    call VAR(cxx_name, 1)@PLT     // cxx_name(arg1, arg2, Thread*, SP)
+    call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, arg2, Thread*, SP)
     int3                          // unreached
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
@@ -206,18 +204,18 @@
     // Outgoing argument set up
     SETUP_GOT_NOSAVE
     subl MACRO_LITERAL(12), %esp  // alignment padding
-    .cfi_adjust_cfa_offset 12
+    CFI_ADJUST_CFA_OFFSET(12)
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     pushl 32(%edx)                // pass caller Method*
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)@PLT     // cxx_name(arg1, arg2, arg3, Thread*, SP)
+    call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, arg2, arg3, Thread*, SP)
     movl %edx, %edi               // save code pointer in EDI
     addl MACRO_LITERAL(36), %esp  // Pop arguments skip eax
-    .cfi_adjust_cfa_offset -36
+    CFI_ADJUST_CFA_OFFSET(-36)
     POP ecx  // Restore args except eax
     POP edx
     POP ebx
@@ -231,7 +229,7 @@
     ret
 1:
     addl MACRO_LITERAL(4), %esp   // Pop code pointer off stack
-    .cfi_adjust_cfa_offset -4
+    CFI_ADJUST_CFA_OFFSET(-4)
     DELIVER_PENDING_EXCEPTION
     END_FUNCTION VAR(c_name, 0)
 END_MACRO
@@ -259,7 +257,7 @@
     PUSH ebp                      // save ebp
     PUSH ebx                      // save ebx
     mov %esp, %ebp                // copy value of stack pointer into base pointer
-    .cfi_def_cfa_register ebp
+    CFI_DEF_CFA_REGISTER(ebp)
     mov 20(%ebp), %ebx            // get arg array size
     addl LITERAL(28), %ebx        // reserve space for return addr, method*, ebx, and ebp in frame
     andl LITERAL(0xFFFFFFF0), %ebx    // align frame size to 16 bytes
@@ -270,16 +268,16 @@
     pushl 20(%ebp)                // push size of region to memcpy
     pushl 16(%ebp)                // push arg array as source of memcpy
     pushl %eax                    // push stack pointer as destination of memcpy
-    call SYMBOL(memcpy)@PLT       // (void*, const void*, size_t)
+    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
     addl LITERAL(12), %esp        // pop arguments to memcpy
     movl LITERAL(0), (%esp)       // store NULL for method*
     mov 12(%ebp), %eax            // move method pointer into eax
     mov 4(%esp), %ecx             // copy arg1 into ecx
     mov 8(%esp), %edx             // copy arg2 into edx
     mov 12(%esp), %ebx            // copy arg3 into ebx
-    call *METHOD_CODE_OFFSET(%eax) // call the method
+    call *METHOD_QUICK_CODE_OFFSET(%eax) // call the method
     mov %ebp, %esp                // restore stack pointer
-    .cfi_def_cfa_register esp
+    CFI_DEF_CFA_REGISTER(esp)
     POP ebx                       // pop ebx
     POP ebp                       // pop ebp
     mov 20(%esp), %ecx            // get result pointer
@@ -303,13 +301,13 @@
     SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
     // Outgoing argument set up
     subl MACRO_LITERAL(8), %esp   // push padding
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
-    call VAR(cxx_name, 1)@PLT     // cxx_name(Thread*, SP)
+    CFI_ADJUST_CFA_OFFSET(4)
+    call PLT_VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
     addl MACRO_LITERAL(16), %esp  // pop arguments
-    .cfi_adjust_cfa_offset -16
+    CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
     END_FUNCTION VAR(c_name, 0)
@@ -324,11 +322,11 @@
     PUSH eax                      // push padding
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)@PLT     // cxx_name(arg1, Thread*, SP)
+    call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, Thread*, SP)
     addl MACRO_LITERAL(16), %esp  // pop arguments
-    .cfi_adjust_cfa_offset -16
+    CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
     END_FUNCTION VAR(c_name, 0)
@@ -342,12 +340,12 @@
     // Outgoing argument set up
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
-    call VAR(cxx_name, 1)@PLT     // cxx_name(arg1, arg2, Thread*, SP)
+    call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, arg2, Thread*, SP)
     addl MACRO_LITERAL(16), %esp  // pop arguments
-    .cfi_adjust_cfa_offset -16
+    CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
     END_FUNCTION VAR(c_name, 0)
@@ -359,17 +357,17 @@
     mov %esp, %ebx                // remember SP
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp  // alignment padding
-    .cfi_adjust_cfa_offset 12
+    CFI_ADJUST_CFA_OFFSET(12)
     PUSH ebx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                      // pass arg3
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call VAR(cxx_name, 1)@PLT     // cxx_name(arg1, arg2, arg3, Thread*, SP)
+    call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, arg2, arg3, Thread*, SP)
     addl MACRO_LITERAL(32), %esp  // pop arguments
-    .cfi_adjust_cfa_offset -32
+    CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     CALL_MACRO(return_macro, 2)   // return or deliver exception
     END_FUNCTION VAR(c_name, 0)
@@ -401,7 +399,108 @@
 END_MACRO
 
 // Generate the allocation entrypoints for each allocator.
-GENERATE_ALL_ALLOC_ENTRYPOINTS
+// TODO: use arch/quick_alloc_entrypoints.S. Currently we don't as we need to use concatenation
+// macros to work around differences between OS/X's as and binutils as (OS/X lacks named arguments
+// to macros and the VAR macro won't concatenate arguments properly), this also breaks having
+// multi-line macros that use each other (hence using 1 macro per newline below).
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check ## c_suffix, artAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
 
 TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
@@ -445,11 +544,11 @@
     PUSH eax                      // push padding
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH eax                      // pass object
-    call artLockObjectFromCode@PLT  // artLockObjectFromCode(object, Thread*, SP)
-    addl MACRO_LITERAL(16), %esp  // pop arguments
-    .cfi_adjust_cfa_offset -16
+    call PLT_SYMBOL(artLockObjectFromCode)  // artLockObjectFromCode(object, Thread*, SP)
+    addl LITERAL(16), %esp  // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_lock_object
@@ -479,11 +578,11 @@
     PUSH eax                      // push padding
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH eax                      // pass object
-    call artUnlockObjectFromCode@PLT  // artUnlockObjectFromCode(object, Thread*, SP)
-    addl MACRO_LITERAL(16), %esp  // pop arguments
-    .cfi_adjust_cfa_offset -16
+    call PLT_SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*, SP)
+    addl LITERAL(16), %esp  // pop arguments
+    CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object
@@ -493,9 +592,9 @@
     PUSH eax                     // alignment padding
     PUSH ecx                     // pass arg2 - obj->klass
     PUSH eax                     // pass arg1 - checked class
-    call SYMBOL(artIsAssignableFromCode)@PLT  // (Class* klass, Class* ref_klass)
+    call PLT_SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
     addl LITERAL(12), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -12
+    CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_is_assignable
 
@@ -504,26 +603,26 @@
     PUSH eax                     // alignment padding
     PUSH ecx                     // pass arg2 - obj->klass
     PUSH eax                     // pass arg1 - checked class
-    call SYMBOL(artIsAssignableFromCode)@PLT  // (Class* klass, Class* ref_klass)
+    call PLT_SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
     testl %eax, %eax
     jz 1f                         // jump forward if not assignable
     addl LITERAL(12), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -12
+    CFI_ADJUST_CFA_OFFSET(-12)
     ret
 1:
     POP eax                       // pop arguments
     POP ecx
     addl LITERAL(4), %esp
-    .cfi_adjust_cfa_offset -12
+    CFI_ADJUST_CFA_OFFSET(-12)
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov %esp, %edx
     // Outgoing argument set up
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
-    call SYMBOL(artThrowClassCastException)@PLT // (Class* a, Class* b, Thread*, SP)
+    call PLT_SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*, SP)
     int3                          // unreached
 END_FUNCTION art_quick_check_cast
 
@@ -568,14 +667,14 @@
     PUSH ecx
     PUSH edx
     subl LITERAL(8), %esp        // alignment padding
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     pushl CLASS_OFFSET(%edx)     // pass arg2 - type of the value to be stored
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ebx                     // pass arg1 - component type of the array
     SETUP_GOT_NOSAVE             // clobbers EBX
-    call SYMBOL(artIsAssignableFromCode)@PLT  // (Class* a, Class* b)
+    call PLT_SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
     addl LITERAL(16), %esp       // pop arguments
-    .cfi_adjust_cfa_offset -16
+    CFI_ADJUST_CFA_OFFSET(-16)
     testl %eax, %eax
     jz   throw_array_store_exception
     POP  edx
@@ -595,10 +694,10 @@
     // Outgoing argument set up
     PUSH ecx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                      // pass arg2 - value
     PUSH eax                      // pass arg1 - array
-    call SYMBOL(artThrowArrayStoreException)@PLT // (array, value, Thread*, SP)
+    call PLT_SYMBOL(artThrowArrayStoreException) // (array, value, Thread*, SP)
     int3                          // unreached
 END_FUNCTION art_quick_aput_obj
 
@@ -607,9 +706,9 @@
     PUSH edx                      // pass arg3
     PUSH ecx                      // pass arg2
     PUSH eax                      // pass arg1
-    call SYMBOL(memcpy)@PLT       // (void*, const void*, size_t)
+    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
     addl LITERAL(12), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -12
+    CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_memcpy
 
@@ -617,17 +716,17 @@
 
 DEFINE_FUNCTION art_quick_fmod
     subl LITERAL(12), %esp        // alignment padding
-    .cfi_adjust_cfa_offset 12
+    CFI_ADJUST_CFA_OFFSET(12)
     PUSH ebx                      // pass arg4 b.hi
     PUSH edx                      // pass arg3 b.lo
     PUSH ecx                      // pass arg2 a.hi
     PUSH eax                      // pass arg1 a.lo
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(fmod)@PLT         // (jdouble a, jdouble b)
+    call PLT_SYMBOL(fmod)         // (jdouble a, jdouble b)
     fstpl (%esp)                  // pop return value off fp stack
     movsd (%esp), %xmm0           // place into %xmm0
     addl LITERAL(28), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -28
+    CFI_ADJUST_CFA_OFFSET(-28)
     ret
 END_FUNCTION art_quick_fmod
 
@@ -636,11 +735,11 @@
     PUSH ecx                      // pass arg2 b
     PUSH eax                      // pass arg1 a
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(fmodf)@PLT        // (jfloat a, jfloat b)
+    call PLT_SYMBOL(fmodf)        // (jfloat a, jfloat b)
     fstps (%esp)                  // pop return value off fp stack
     movss (%esp), %xmm0           // place into %xmm0
     addl LITERAL(12), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -12
+    CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_fmodf
 
@@ -651,7 +750,7 @@
     fstpl (%esp)                  // pop value off fp stack as double
     movsd (%esp), %xmm0           // place into %xmm0
     addl LITERAL(8), %esp         // pop arguments
-    .cfi_adjust_cfa_offset -8
+    CFI_ADJUST_CFA_OFFSET(-8)
     ret
 END_FUNCTION art_quick_l2d
 
@@ -662,7 +761,7 @@
     fstps (%esp)                  // pop value off fp stack as a single
     movss (%esp), %xmm0           // place into %xmm0
     addl LITERAL(8), %esp         // pop argument
-    .cfi_adjust_cfa_offset -8
+    CFI_ADJUST_CFA_OFFSET(-8)
     ret
 END_FUNCTION art_quick_l2f
 
@@ -671,20 +770,20 @@
     PUSH ecx                      // pass arg2 a.hi
     PUSH eax                      // pass arg1 a.lo
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(art_d2l)@PLT      // (jdouble a)
+    call PLT_SYMBOL(art_d2l)      // (jdouble a)
     addl LITERAL(12), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -12
+    CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_d2l
 
 DEFINE_FUNCTION art_quick_f2l
     subl LITERAL(8), %esp         // alignment padding
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     SETUP_GOT_NOSAVE              // clobbers EBX
     PUSH eax                      // pass arg1 a
-    call SYMBOL(art_f2l)@PLT      // (jfloat a)
+    call PLT_SYMBOL(art_f2l)      // (jfloat a)
     addl LITERAL(12), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -12
+    CFI_ADJUST_CFA_OFFSET(-12)
     ret
 END_FUNCTION art_quick_f2l
 
@@ -704,29 +803,29 @@
 
 DEFINE_FUNCTION art_quick_ldiv
     subl LITERAL(12), %esp       // alignment padding
-    .cfi_adjust_cfa_offset 12
+    CFI_ADJUST_CFA_OFFSET(12)
     PUSH ebx                     // pass arg4 b.hi
     PUSH edx                     // pass arg3 b.lo
     PUSH ecx                     // pass arg2 a.hi
     PUSH eax                     // pass arg1 a.lo
     SETUP_GOT_NOSAVE             // clobbers EBX
-    call SYMBOL(artLdiv)@PLT     // (jlong a, jlong b)
+    call PLT_SYMBOL(artLdiv)     // (jlong a, jlong b)
     addl LITERAL(28), %esp       // pop arguments
-    .cfi_adjust_cfa_offset -28
+    CFI_ADJUST_CFA_OFFSET(-28)
     ret
 END_FUNCTION art_quick_ldiv
 
 DEFINE_FUNCTION art_quick_lmod
     subl LITERAL(12), %esp       // alignment padding
-    .cfi_adjust_cfa_offset 12
+    CFI_ADJUST_CFA_OFFSET(12)
     PUSH ebx                     // pass arg4 b.hi
     PUSH edx                     // pass arg3 b.lo
     PUSH ecx                     // pass arg2 a.hi
     PUSH eax                     // pass arg1 a.lo
     SETUP_GOT_NOSAVE             // clobbers EBX
-    call SYMBOL(artLmod)@PLT     // (jlong a, jlong b)
+    call PLT_SYMBOL(artLmod)     // (jlong a, jlong b)
     addl LITERAL(28), %esp       // pop arguments
-    .cfi_adjust_cfa_offset -28
+    CFI_ADJUST_CFA_OFFSET(-28)
     ret
 END_FUNCTION art_quick_lmod
 
@@ -782,19 +881,19 @@
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME       // save ref containing registers for GC
     mov %esp, %ebx                // remember SP
     subl LITERAL(8), %esp         // alignment padding
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     PUSH ebx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     mov 32(%ebx), %ebx            // get referrer
     PUSH ebx                      // pass referrer
     PUSH edx                      // pass new_val
     PUSH ecx                      // pass object
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artSet32InstanceFromCode)@PLT  // (field_idx, Object*, new_val, referrer, Thread*, SP)
+    call PLT_SYMBOL(artSet32InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*, SP)
     addl LITERAL(32), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -32
+    CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set32_instance
@@ -802,19 +901,19 @@
 DEFINE_FUNCTION art_quick_set64_instance
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     subl LITERAL(8), %esp         // alignment padding
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     PUSH esp                      // pass SP-8
     addl LITERAL(8), (%esp)       // fix SP on stack by adding 8
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ebx                      // pass high half of new_val
     PUSH edx                      // pass low half of new_val
     PUSH ecx                      // pass object
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artSet64InstanceFromCode)@PLT  // (field_idx, Object*, new_val, Thread*, SP)
+    call PLT_SYMBOL(artSet64InstanceFromCode)  // (field_idx, Object*, new_val, Thread*, SP)
     addl LITERAL(32), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -32
+    CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set64_instance
@@ -823,19 +922,19 @@
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     mov %esp, %ebx                // remember SP
     subl LITERAL(8), %esp         // alignment padding
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     PUSH ebx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     mov 32(%ebx), %ebx            // get referrer
     PUSH ebx                      // pass referrer
     PUSH edx                      // pass new_val
     PUSH ecx                      // pass object
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artSetObjInstanceFromCode)@PLT // (field_idx, Object*, new_val, referrer, Thread*, SP)
+    call PLT_SYMBOL(artSetObjInstanceFromCode) // (field_idx, Object*, new_val, referrer, Thread*, SP)
     addl LITERAL(32), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -32
+    CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set_obj_instance
@@ -845,17 +944,17 @@
     mov %esp, %ebx                // remember SP
     mov 32(%esp), %edx            // get referrer
     subl LITERAL(12), %esp        // alignment padding
-    .cfi_adjust_cfa_offset 12
+    CFI_ADJUST_CFA_OFFSET(12)
     PUSH ebx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                      // pass referrer
     PUSH ecx                      // pass object
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artGet32InstanceFromCode)@PLT  // (field_idx, Object*, referrer, Thread*, SP)
+    call PLT_SYMBOL(artGet32InstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
     addl LITERAL(32), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -32
+    CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_get32_instance
@@ -865,17 +964,17 @@
     mov %esp, %ebx                // remember SP
     mov 32(%esp), %edx            // get referrer
     subl LITERAL(12), %esp        // alignment padding
-    .cfi_adjust_cfa_offset 12
+    CFI_ADJUST_CFA_OFFSET(12)
     PUSH ebx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                      // pass referrer
     PUSH ecx                      // pass object
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artGet64InstanceFromCode)@PLT  // (field_idx, Object*, referrer, Thread*, SP)
+    call PLT_SYMBOL(artGet64InstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
     addl LITERAL(32), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -32
+    CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_get64_instance
@@ -885,17 +984,17 @@
     mov %esp, %ebx                // remember SP
     mov 32(%esp), %edx            // get referrer
     subl LITERAL(12), %esp        // alignment padding
-    .cfi_adjust_cfa_offset 12
+    CFI_ADJUST_CFA_OFFSET(12)
     PUSH ebx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                      // pass referrer
     PUSH ecx                      // pass object
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artGetObjInstanceFromCode)@PLT // (field_idx, Object*, referrer, Thread*, SP)
+    call PLT_SYMBOL(artGetObjInstanceFromCode) // (field_idx, Object*, referrer, Thread*, SP)
     addl LITERAL(32), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -32
+    CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_get_obj_instance
@@ -905,17 +1004,17 @@
     mov %esp, %ebx                // remember SP
     mov 32(%esp), %edx            // get referrer
     subl LITERAL(12), %esp        // alignment padding
-    .cfi_adjust_cfa_offset 12
+    CFI_ADJUST_CFA_OFFSET(12)
     PUSH ebx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                      // pass referrer
     PUSH ecx                      // pass new_val
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artSet32StaticFromCode)@PLT  // (field_idx, new_val, referrer, Thread*, SP)
+    call PLT_SYMBOL(artSet32StaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
     addl LITERAL(32), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -32
+    CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set32_static
@@ -924,19 +1023,19 @@
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
     mov %esp, %ebx                // remember SP
     subl LITERAL(8), %esp         // alignment padding
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     PUSH ebx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     mov 32(%ebx), %ebx            // get referrer
     PUSH edx                      // pass high half of new_val
     PUSH ecx                      // pass low half of new_val
     PUSH ebx                      // pass referrer
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artSet64StaticFromCode)@PLT  // (field_idx, referrer, new_val, Thread*, SP)
+    call PLT_SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*, SP)
     addl LITERAL(32), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -32
+    CFI_ADJUST_CFA_OFFSET(-32)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
 END_FUNCTION art_quick_set64_static
@@ -946,15 +1045,15 @@
     mov %esp, %ebx                // remember SP
     mov 32(%esp), %edx            // get referrer
     subl LITERAL(12), %esp        // alignment padding
-    .cfi_adjust_cfa_offset 12
+    CFI_ADJUST_CFA_OFFSET(12)
     PUSH ebx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                      // pass referrer
     PUSH ecx                      // pass new_val
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artSetObjStaticFromCode)@PLT  // (field_idx, new_val, referrer, Thread*, SP)
+    call PLT_SYMBOL(artSetObjStaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
     addl LITERAL(32), %esp        // pop arguments
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     RETURN_IF_EAX_ZERO            // return or deliver exception
@@ -966,13 +1065,13 @@
     mov 32(%esp), %ecx            // get referrer
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass referrer
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artGet32StaticFromCode)@PLT    // (field_idx, referrer, Thread*, SP)
+    call PLT_SYMBOL(artGet32StaticFromCode)    // (field_idx, referrer, Thread*, SP)
     addl LITERAL(16), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -16
+    CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_get32_static
@@ -983,13 +1082,13 @@
     mov 32(%esp), %ecx            // get referrer
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass referrer
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artGet64StaticFromCode)@PLT    // (field_idx, referrer, Thread*, SP)
+    call PLT_SYMBOL(artGet64StaticFromCode)    // (field_idx, referrer, Thread*, SP)
     addl LITERAL(16), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -16
+    CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_get64_static
@@ -1000,13 +1099,13 @@
     mov 32(%esp), %ecx            // get referrer
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass referrer
     PUSH eax                      // pass field_idx
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artGetObjStaticFromCode)@PLT   // (field_idx, referrer, Thread*, SP)
+    call PLT_SYMBOL(artGetObjStaticFromCode)   // (field_idx, referrer, Thread*, SP)
     addl LITERAL(16), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -16
+    CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME     // restore frame up to return address
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_get_obj_static
@@ -1015,16 +1114,16 @@
     SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME   // save frame and Method*
     PUSH esp                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass receiver
     PUSH eax                      // pass proxy method
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artQuickProxyInvokeHandler)@PLT // (proxy method, receiver, Thread*, SP)
+    call PLT_SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
     movd %eax, %xmm0              // place return value also into floating point return value
     movd %edx, %xmm1
     punpckldq %xmm1, %xmm0
     addl LITERAL(44), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -44
+    CFI_ADJUST_CFA_OFFSET(-44)
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_proxy_invoke_handler
 
@@ -1046,11 +1145,11 @@
     SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
     PUSH esp                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // pass receiver
     PUSH eax                      // pass method
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artQuickResolutionTrampoline)@PLT // (Method* called, receiver, Thread*, SP)
+    call PLT_SYMBOL(artQuickResolutionTrampoline) // (Method* called, receiver, Thread*, SP)
     movl %eax, %edi               // remember code pointer in EDI
     addl LITERAL(16), %esp        // pop arguments
     test %eax, %eax               // if code pointer is NULL goto deliver pending exception
@@ -1074,15 +1173,15 @@
     PUSH eax                      // alignment padding
     PUSH edx                      // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH eax                      // pass  method
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artQuickToInterpreterBridge)@PLT  // (method, Thread*, SP)
+    call PLT_SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
     movd %eax, %xmm0              // place return value also into floating point return value
     movd %edx, %xmm1
     punpckldq %xmm1, %xmm0
     addl LITERAL(16), %esp        // pop arguments
-    .cfi_adjust_cfa_offset -16
+    CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
 END_FUNCTION art_quick_to_interpreter_bridge
@@ -1095,20 +1194,20 @@
     movl  %esp, %edx              // Save SP.
     PUSH eax                      // Save eax which will be clobbered by the callee-save method.
     subl LITERAL(8), %esp         // Align stack.
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     pushl 40(%esp)                // Pass LR.
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH edx                      // Pass SP.
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // Pass receiver.
     PUSH eax                      // Pass Method*.
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call  SYMBOL(artInstrumentationMethodEntryFromCode)@PLT // (Method*, Object*, Thread*, SP, LR)
+    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP, LR)
     addl  LITERAL(28), %esp       // Pop arguments upto saved Method*.
     movl 28(%esp), %edi           // Restore edi.
     movl %eax, 28(%esp)           // Place code* over edi, just under return pc.
-    movl LITERAL(SYMBOL(art_quick_instrumentation_exit)@PLT), 32(%esp)
+    movl LITERAL(PLT_SYMBOL(art_quick_instrumentation_exit)), 32(%esp)
                                   // Place instrumentation exit as return pc.
     movl (%esp), %eax             // Restore eax.
     movl 8(%esp), %ecx            // Restore ecx.
@@ -1125,32 +1224,32 @@
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
     mov  %esp, %ecx               // Remember SP
     subl LITERAL(8), %esp         // Save float return value.
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     movd %xmm0, (%esp)
     PUSH edx                      // Save gpr return value.
     PUSH eax
     subl LITERAL(8), %esp         // Align stack
     movd %xmm0, (%esp)
     subl LITERAL(8), %esp         // Pass float return value.
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     movd %xmm0, (%esp)
     PUSH edx                      // Pass gpr return value.
     PUSH eax
     PUSH ecx                      // Pass SP.
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current.
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call  SYMBOL(artInstrumentationMethodExitFromCode)@PLT  // (Thread*, SP, gpr_result, fpr_result)
+    call PLT_SYMBOL(artInstrumentationMethodExitFromCode)  // (Thread*, SP, gpr_result, fpr_result)
     mov   %eax, %ecx              // Move returned link register.
     addl LITERAL(32), %esp        // Pop arguments.
-    .cfi_adjust_cfa_offset -32
+    CFI_ADJUST_CFA_OFFSET(-32)
     movl %edx, %ebx               // Move returned link register for deopt
                                   // (ebx is pretending to be our LR).
     POP eax                       // Restore gpr return value.
     POP edx
     movd (%esp), %xmm0            // Restore fpr return value.
     addl LITERAL(8), %esp
-    .cfi_adjust_cfa_offset -8
+    CFI_ADJUST_CFA_OFFSET(-8)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
     addl LITERAL(4), %esp         // Remove fake return pc.
     jmp   *%ecx                   // Return.
@@ -1165,12 +1264,12 @@
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
     mov  %esp, %ecx               // Remember SP.
     subl LITERAL(8), %esp         // Align stack.
-    .cfi_adjust_cfa_offset 8
+    CFI_ADJUST_CFA_OFFSET(8)
     PUSH ecx                      // Pass SP.
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
-    .cfi_adjust_cfa_offset 4
+    CFI_ADJUST_CFA_OFFSET(4)
     SETUP_GOT_NOSAVE              // clobbers EBX
-    call SYMBOL(artDeoptimize)@PLT    // artDeoptimize(Thread*, SP)
+    call PLT_SYMBOL(artDeoptimize)  // artDeoptimize(Thread*, SP)
     int3                          // Unreachable.
 END_FUNCTION art_quick_deoptimize
 
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 39bad58..235da99 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -40,8 +40,9 @@
 
 namespace art {
 
+static Mutex modify_ldt_lock("modify_ldt lock");
+
 void Thread::InitCpu() {
-  static Mutex modify_ldt_lock("modify_ldt lock");
   MutexLock mu(Thread::Current(), modify_ldt_lock);
 
   const uintptr_t base = reinterpret_cast<uintptr_t>(this);
@@ -113,7 +114,6 @@
   uint16_t table_indicator = 1 << 2;  // LDT
   uint16_t rpl = 3;  // Requested privilege level
   uint16_t selector = (entry_number << 3) | table_indicator | rpl;
-  // TODO: use our assembler to generate code
   __asm__ __volatile__("movw %w0, %%fs"
       :    // output
       : "q"(selector)  // input
@@ -124,7 +124,6 @@
 
   // Sanity check that reads from %fs point to this Thread*.
   Thread* self_check;
-  // TODO: use our assembler to generate code
   CHECK_EQ(THREAD_SELF_OFFSET, OFFSETOF_MEMBER(Thread, self_));
   __asm__ __volatile__("movl %%fs:(%1), %0"
       : "=r"(self_check)  // output
@@ -138,4 +137,36 @@
   CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
 }
 
+void Thread::CleanupCpu() {
+  MutexLock mu(Thread::Current(), modify_ldt_lock);
+
+  // Sanity check that reads from %fs point to this Thread*.
+  Thread* self_check;
+  __asm__ __volatile__("movl %%fs:(%1), %0"
+      : "=r"(self_check)  // output
+      : "r"(THREAD_SELF_OFFSET)  // input
+      :);  // clobber
+  CHECK_EQ(self_check, this);
+
+  // Extract the LDT entry number from the FS register.
+  uint16_t selector;
+  __asm__ __volatile__("movw %%fs, %w0"
+      : "=q"(selector)  // output
+      :  // input
+      :);  // clobber
+
+  // Free LDT entry.
+#if defined(__APPLE__)
+  i386_set_ldt(selector >> 3, 0, 1);
+#else
+  user_desc ldt_entry;
+  memset(&ldt_entry, 0, sizeof(ldt_entry));
+  ldt_entry.entry_number = selector >> 3;
+  ldt_entry.contents = MODIFY_LDT_CONTENTS_DATA;
+  ldt_entry.seg_not_present = 1;
+
+  syscall(__NR_modify_ldt, 1, &ldt_entry, sizeof(ldt_entry));
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
new file mode 100644
index 0000000..b59c0cb
--- /dev/null
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_S_
+#define ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_S_
+
+#include "asm_support_x86_64.h"
+
+#if defined(__APPLE__)
+    // Mac OS' as(1) doesn't let you name macro parameters.
+    #define MACRO0(macro_name) .macro macro_name
+    #define MACRO1(macro_name, macro_arg1) .macro macro_name
+    #define MACRO2(macro_name, macro_arg1, macro_args2) .macro macro_name
+    #define MACRO3(macro_name, macro_arg1, macro_args2, macro_args3) .macro macro_name
+    #define END_MACRO .endmacro
+
+    // Mac OS' as(1) uses $0, $1, and so on for macro arguments, and function names
+    // are mangled with an extra underscore prefix. The use of $x for arguments
+    // mean that literals need to be represented with $$x in macros.
+    #define SYMBOL(name) _ ## name
+    #define PLT_SYMBOL(name) _ ## name
+    #define VAR(name,index) SYMBOL($index)
+    #define PLT_VAR(name, index) SYMBOL($index)
+    #define REG_VAR(name,index) %$index
+    #define CALL_MACRO(name,index) $index
+    #define LITERAL(value) $value
+    #define MACRO_LITERAL(value) $$value
+
+    // Mac OS' doesn't like cfi_* directives
+    #define CFI_STARTPROC
+    #define CFI_ENDPROC
+    #define CFI_ADJUST_CFA_OFFSET(size)
+    #define CFI_DEF_CFA(reg,size)
+    #define CFI_DEF_CFA_REGISTER(reg)
+    #define CFI_RESTORE(reg)
+    #define CFI_REL_OFFSET(reg,size)
+
+    // Mac OS' doesn't support certain directives
+    #define FUNCTION_TYPE(name)
+    #define SIZE(name)
+#else
+    // Regular gas(1) lets you name macro parameters.
+    #define MACRO0(macro_name) .macro macro_name
+    #define MACRO1(macro_name, macro_arg1) .macro macro_name macro_arg1
+    #define MACRO2(macro_name, macro_arg1, macro_arg2) .macro macro_name macro_arg1, macro_arg2
+    #define MACRO3(macro_name, macro_arg1, macro_arg2, macro_arg3) .macro macro_name macro_arg1, macro_arg2, macro_arg3
+    #define END_MACRO .endm
+
+    // Regular gas(1) uses \argument_name for macro arguments.
+    // We need to turn on alternate macro syntax so we can use & instead or the preprocessor
+    // will screw us by inserting a space between the \ and the name. Even in this mode there's
+    // no special meaning to $, so literals are still just $x. The use of altmacro means % is a
+    // special character meaning care needs to be taken when passing registers as macro arguments.
+    .altmacro
+    #define SYMBOL(name) name
+    #define PLT_SYMBOL(name) name@PLT
+    #define VAR(name,index) name&
+    #define PLT_VAR(name, index) name&@PLT
+    #define REG_VAR(name,index) %name
+    #define CALL_MACRO(name,index) name&
+    #define LITERAL(value) $value
+    #define MACRO_LITERAL(value) $value
+
+    // CFI support
+    #define CFI_STARTPROC .cfi_startproc
+    #define CFI_ENDPROC .cfi_endproc
+    #define CFI_ADJUST_CFA_OFFSET(size) .cfi_adjust_cfa_offset size
+    #define CFI_DEF_CFA(reg,size) .cfi_def_cfa reg,size
+    #define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
+    #define CFI_RESTORE(reg) .cfi_restore reg
+    #define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size
+
+    #define FUNCTION_TYPE(name) .type name&, @function
+    #define SIZE(name) .size name, .-name
+#endif
+
+    /* Cache alignment for function entry */
+MACRO0(ALIGN_FUNCTION_ENTRY)
+    .balign 16
+END_MACRO
+
+MACRO1(DEFINE_FUNCTION, c_name)
+    FUNCTION_TYPE(\c_name)
+    .globl VAR(c_name, 0)
+    ALIGN_FUNCTION_ENTRY
+VAR(c_name, 0):
+    CFI_STARTPROC
+END_MACRO
+
+MACRO1(END_FUNCTION, c_name)
+    CFI_ENDPROC
+    SIZE(\c_name)
+END_MACRO
+
+MACRO1(PUSH, reg)
+    pushq REG_VAR(reg, 0)
+    CFI_ADJUST_CFA_OFFSET(8)
+    CFI_REL_OFFSET(REG_VAR(reg, 0), 0)
+END_MACRO
+
+MACRO1(POP, reg)
+    popq REG_VAR(reg,0)
+    CFI_ADJUST_CFA_OFFSET(-8)
+    CFI_RESTORE(REG_VAR(reg,0))
+END_MACRO
+
+MACRO1(UNIMPLEMENTED,name)
+    FUNCTION_TYPE(\name)
+    .globl VAR(name, 0)
+    ALIGN_FUNCTION_ENTRY
+VAR(name, 0):
+    CFI_STARTPROC
+    int3
+    int3
+    CFI_ENDPROC
+    SIZE(\name)
+END_MACRO
+
+MACRO0(SETUP_GOT_NOSAVE)
+    call __x86.get_pc_thunk.bx
+    addl $_GLOBAL_OFFSET_TABLE_, %ebx
+END_MACRO
+
+MACRO0(SETUP_GOT)
+    PUSH  ebx
+    SETUP_GOT_NOSAVE
+END_MACRO
+
+MACRO0(UNDO_SETUP_GOT)
+    POP  ebx
+END_MACRO
+
+#endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_S_
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
new file mode 100644
index 0000000..d425ed8
--- /dev/null
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
+#define ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
+
+#include "asm_support.h"
+
+// Offset of field Thread::self_ verified in InitCpu
+#define THREAD_SELF_OFFSET 72
+// Offset of field Thread::card_table_ verified in InitCpu
+#define THREAD_CARD_TABLE_OFFSET 8
+// Offset of field Thread::exception_ verified in InitCpu
+#define THREAD_EXCEPTION_OFFSET 16
+// Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
+#define THREAD_ID_OFFSET 112
+
+#endif  // ART_RUNTIME_ARCH_X86_64_ASM_SUPPORT_X86_64_H_
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
new file mode 100644
index 0000000..4d1131c
--- /dev/null
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "context_x86_64.h"
+
+#include "mirror/art_method.h"
+#include "mirror/object-inl.h"
+#include "stack.h"
+
+namespace art {
+namespace x86_64 {
+
+static const uintptr_t gZero = 0;
+
+void X86_64Context::Reset() {
+  for (int i = 0; i < kNumberOfCpuRegisters; i++) {
+    gprs_[i] = NULL;
+  }
+  gprs_[RSP] = &rsp_;
+  // Initialize registers with easy to spot debug values.
+  rsp_ = X86_64Context::kBadGprBase + RSP;
+  rip_ = X86_64Context::kBadGprBase + kNumberOfCpuRegisters;
+}
+
+void X86_64Context::FillCalleeSaves(const StackVisitor& fr) {
+  mirror::ArtMethod* method = fr.GetMethod();
+  uint32_t core_spills = method->GetCoreSpillMask();
+  size_t spill_count = __builtin_popcount(core_spills);
+  DCHECK_EQ(method->GetFpSpillMask(), 0u);
+  size_t frame_size = method->GetFrameSizeInBytes();
+  if (spill_count > 0) {
+    // Lowest number spill is farthest away, walk registers and fill into context.
+    int j = 2;  // Offset j to skip return address spill.
+    for (int i = 0; i < kNumberOfCpuRegisters; i++) {
+      if (((core_spills >> i) & 1) != 0) {
+        gprs_[i] = fr.CalleeSaveAddress(spill_count - j, frame_size);
+        j++;
+      }
+    }
+  }
+}
+
+void X86_64Context::SmashCallerSaves() {
+  // This needs to be 0 because we want a null/zero return value.
+  gprs_[RAX] = const_cast<uintptr_t*>(&gZero);
+  gprs_[RDX] = const_cast<uintptr_t*>(&gZero);
+  gprs_[RCX] = nullptr;
+  gprs_[RBX] = nullptr;
+}
+
+void X86_64Context::SetGPR(uint32_t reg, uintptr_t value) {
+  CHECK_LT(reg, static_cast<uint32_t>(kNumberOfCpuRegisters));
+  CHECK_NE(gprs_[reg], &gZero);
+  CHECK(gprs_[reg] != NULL);
+  *gprs_[reg] = value;
+}
+
+void X86_64Context::DoLongJump() {
+  UNIMPLEMENTED(FATAL);
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h
new file mode 100644
index 0000000..3e49165
--- /dev/null
+++ b/runtime/arch/x86_64/context_x86_64.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_64_CONTEXT_X86_64_H_
+#define ART_RUNTIME_ARCH_X86_64_CONTEXT_X86_64_H_
+
+#include "arch/context.h"
+#include "base/logging.h"
+#include "registers_x86_64.h"
+
+namespace art {
+namespace x86_64 {
+
+class X86_64Context : public Context {
+ public:
+  X86_64Context() {
+    Reset();
+  }
+  virtual ~X86_64Context() {}
+
+  virtual void Reset();
+
+  virtual void FillCalleeSaves(const StackVisitor& fr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  virtual void SetSP(uintptr_t new_sp) {
+    SetGPR(RSP, new_sp);
+  }
+
+  virtual void SetPC(uintptr_t new_pc) {
+    rip_ = new_pc;
+  }
+
+  virtual uintptr_t GetGPR(uint32_t reg) {
+    const uint32_t kNumberOfCpuRegisters = 8;
+    DCHECK_LT(reg, kNumberOfCpuRegisters);
+    return *gprs_[reg];
+  }
+
+  virtual void SetGPR(uint32_t reg, uintptr_t value);
+
+  virtual void SmashCallerSaves();
+  virtual void DoLongJump();
+
+ private:
+  // Pointers to register locations, floating point registers are all caller save. Values are
+  // initialized to NULL or the special registers below.
+  uintptr_t* gprs_[kNumberOfCpuRegisters];
+  // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat
+  // special in that it cannot be encoded normally as a register operand to an instruction (except
+  // in 64bit addressing modes).
+  uintptr_t rsp_, rip_;
+};
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_X86_64_CONTEXT_X86_64_H_
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
new file mode 100644
index 0000000..589c7d9
--- /dev/null
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "entrypoints/portable/portable_entrypoints.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "entrypoints/entrypoint_utils.h"
+
+namespace art {
+
+// Interpreter entrypoints.
+extern "C" void artInterpreterToInterpreterBridge(Thread* self, MethodHelper& mh,
+                                                  const DexFile::CodeItem* code_item,
+                                                  ShadowFrame* shadow_frame, JValue* result);
+extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, MethodHelper& mh,
+                                           const DexFile::CodeItem* code_item,
+                                           ShadowFrame* shadow_frame, JValue* result);
+
+// Portable entrypoints.
+extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*);
+extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*);
+
+// Cast entrypoints.
+extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
+                                                const mirror::Class* ref_class);
+extern "C" void art_quick_check_cast(void*, void*);
+
+// DexCache entrypoints.
+extern "C" void* art_quick_initialize_static_storage(uint32_t, void*);
+extern "C" void* art_quick_initialize_type(uint32_t, void*);
+extern "C" void* art_quick_initialize_type_and_verify_access(uint32_t, void*);
+extern "C" void* art_quick_resolve_string(void*, uint32_t);
+
+// Field entrypoints.
+extern "C" int art_quick_set32_instance(uint32_t, void*, int32_t);
+extern "C" int art_quick_set32_static(uint32_t, int32_t);
+extern "C" int art_quick_set64_instance(uint32_t, void*, int64_t);
+extern "C" int art_quick_set64_static(uint32_t, int64_t);
+extern "C" int art_quick_set_obj_instance(uint32_t, void*, void*);
+extern "C" int art_quick_set_obj_static(uint32_t, void*);
+extern "C" int32_t art_quick_get32_instance(uint32_t, void*);
+extern "C" int32_t art_quick_get32_static(uint32_t);
+extern "C" int64_t art_quick_get64_instance(uint32_t, void*);
+extern "C" int64_t art_quick_get64_static(uint32_t);
+extern "C" void* art_quick_get_obj_instance(uint32_t, void*);
+extern "C" void* art_quick_get_obj_static(uint32_t);
+
+// Array entrypoints.
+extern "C" void art_quick_aput_obj_with_null_and_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj_with_bound_check(void*, uint32_t, void*);
+extern "C" void art_quick_aput_obj(void*, uint32_t, void*);
+extern "C" void art_quick_handle_fill_data(void*, void*);
+
+// Lock entrypoints.
+extern "C" void art_quick_lock_object(void*);
+extern "C" void art_quick_unlock_object(void*);
+
+// Math entrypoints.
+extern "C" double art_quick_fmod(double, double);
+extern "C" float art_quick_fmodf(float, float);
+extern "C" double art_quick_l2d(int64_t);
+extern "C" float art_quick_l2f(int64_t);
+extern "C" int64_t art_quick_d2l(double);
+extern "C" int64_t art_quick_f2l(float);
+extern "C" int32_t art_quick_idivmod(int32_t, int32_t);
+extern "C" int64_t art_quick_ldiv(int64_t, int64_t);
+extern "C" int64_t art_quick_lmod(int64_t, int64_t);
+extern "C" int64_t art_quick_lmul(int64_t, int64_t);
+extern "C" uint64_t art_quick_lshl(uint64_t, uint32_t);
+extern "C" uint64_t art_quick_lshr(uint64_t, uint32_t);
+extern "C" uint64_t art_quick_lushr(uint64_t, uint32_t);
+
+// Intrinsic entrypoints.
+extern "C" int32_t art_quick_memcmp16(void*, void*, int32_t);
+extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
+extern "C" int32_t art_quick_string_compareto(void*, void*);
+extern "C" void* art_quick_memcpy(void*, const void*, size_t);
+
+// Invoke entrypoints.
+extern "C" void art_quick_imt_conflict_trampoline(mirror::ArtMethod*);
+extern "C" void art_quick_resolution_trampoline(mirror::ArtMethod*);
+extern "C" void art_quick_to_interpreter_bridge(mirror::ArtMethod*);
+extern "C" void art_quick_invoke_direct_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_interface_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_static_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_super_trampoline_with_access_check(uint32_t, void*);
+extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
+
+// Thread entrypoints.
+extern void CheckSuspendFromCode(Thread* thread);
+extern "C" void art_quick_test_suspend();
+
+// Throw entrypoints.
+extern "C" void art_quick_deliver_exception(void*);
+extern "C" void art_quick_throw_array_bounds(int32_t index, int32_t limit);
+extern "C" void art_quick_throw_div_zero();
+extern "C" void art_quick_throw_no_such_method(int32_t method_idx);
+extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_throw_stack_overflow(void*);
+
+extern void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
+
+void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
+                     PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
+  // Interpreter
+  ipoints->pInterpreterToInterpreterBridge = artInterpreterToInterpreterBridge;
+  ipoints->pInterpreterToCompiledCodeBridge = artInterpreterToCompiledCodeBridge;
+
+  // JNI
+  jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
+
+  // Portable
+  ppoints->pPortableResolutionTrampoline = art_portable_resolution_trampoline;
+  ppoints->pPortableToInterpreterBridge = art_portable_to_interpreter_bridge;
+
+  // Alloc
+  ResetQuickAllocEntryPoints(qpoints);
+
+  // Cast
+  qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
+  qpoints->pCheckCast = art_quick_check_cast;
+
+  // DexCache
+  qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
+  qpoints->pInitializeTypeAndVerifyAccess = art_quick_initialize_type_and_verify_access;
+  qpoints->pInitializeType = art_quick_initialize_type;
+  qpoints->pResolveString = art_quick_resolve_string;
+
+  // Field
+  qpoints->pSet32Instance = art_quick_set32_instance;
+  qpoints->pSet32Static = art_quick_set32_static;
+  qpoints->pSet64Instance = art_quick_set64_instance;
+  qpoints->pSet64Static = art_quick_set64_static;
+  qpoints->pSetObjInstance = art_quick_set_obj_instance;
+  qpoints->pSetObjStatic = art_quick_set_obj_static;
+  qpoints->pGet32Instance = art_quick_get32_instance;
+  qpoints->pGet64Instance = art_quick_get64_instance;
+  qpoints->pGetObjInstance = art_quick_get_obj_instance;
+  qpoints->pGet32Static = art_quick_get32_static;
+  qpoints->pGet64Static = art_quick_get64_static;
+  qpoints->pGetObjStatic = art_quick_get_obj_static;
+
+  // Array
+  qpoints->pAputObjectWithNullAndBoundCheck = art_quick_aput_obj_with_null_and_bound_check;
+  qpoints->pAputObjectWithBoundCheck = art_quick_aput_obj_with_bound_check;
+  qpoints->pAputObject = art_quick_aput_obj;
+  qpoints->pHandleFillArrayData = art_quick_handle_fill_data;
+
+  // JNI
+  qpoints->pJniMethodStart = JniMethodStart;
+  qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
+  qpoints->pJniMethodEnd = JniMethodEnd;
+  qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
+  qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
+  qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
+
+  // Locks
+  qpoints->pLockObject = art_quick_lock_object;
+  qpoints->pUnlockObject = art_quick_unlock_object;
+
+  // Math
+  // points->pCmpgDouble = NULL;  // Not needed on x86.
+  // points->pCmpgFloat = NULL;  // Not needed on x86.
+  // points->pCmplDouble = NULL;  // Not needed on x86.
+  // points->pCmplFloat = NULL;  // Not needed on x86.
+  qpoints->pFmod = art_quick_fmod;
+  // qpoints->pSqrt = NULL;  // Not needed on x86.
+  qpoints->pL2d = art_quick_l2d;
+  qpoints->pFmodf = art_quick_fmodf;
+  qpoints->pL2f = art_quick_l2f;
+  // points->pD2iz = NULL;  // Not needed on x86.
+  // points->pF2iz = NULL;  // Not needed on x86.
+  qpoints->pIdivmod = art_quick_idivmod;
+  qpoints->pD2l = art_quick_d2l;
+  qpoints->pF2l = art_quick_f2l;
+  qpoints->pLdiv = art_quick_ldiv;
+  qpoints->pLmod = art_quick_lmod;
+  qpoints->pLmul = art_quick_lmul;
+  qpoints->pShlLong = art_quick_lshl;
+  qpoints->pShrLong = art_quick_lshr;
+  qpoints->pUshrLong = art_quick_lushr;
+
+  // Intrinsics
+  qpoints->pIndexOf = art_quick_indexof;
+  qpoints->pMemcmp16 = art_quick_memcmp16;
+  qpoints->pStringCompareTo = art_quick_string_compareto;
+  qpoints->pMemcpy = art_quick_memcpy;
+
+  // Invocation
+  qpoints->pQuickImtConflictTrampoline = art_quick_imt_conflict_trampoline;
+  qpoints->pQuickResolutionTrampoline = art_quick_resolution_trampoline;
+  qpoints->pQuickToInterpreterBridge = art_quick_to_interpreter_bridge;
+  qpoints->pInvokeDirectTrampolineWithAccessCheck = art_quick_invoke_direct_trampoline_with_access_check;
+  qpoints->pInvokeInterfaceTrampolineWithAccessCheck = art_quick_invoke_interface_trampoline_with_access_check;
+  qpoints->pInvokeStaticTrampolineWithAccessCheck = art_quick_invoke_static_trampoline_with_access_check;
+  qpoints->pInvokeSuperTrampolineWithAccessCheck = art_quick_invoke_super_trampoline_with_access_check;
+  qpoints->pInvokeVirtualTrampolineWithAccessCheck = art_quick_invoke_virtual_trampoline_with_access_check;
+
+  // Thread
+  qpoints->pCheckSuspend = CheckSuspendFromCode;
+  qpoints->pTestSuspend = art_quick_test_suspend;
+
+  // Throws
+  qpoints->pDeliverException = art_quick_deliver_exception;
+  qpoints->pThrowArrayBounds = art_quick_throw_array_bounds;
+  qpoints->pThrowDivZero = art_quick_throw_div_zero;
+  qpoints->pThrowNoSuchMethod = art_quick_throw_no_such_method;
+  qpoints->pThrowNullPointer = art_quick_throw_null_pointer_exception;
+  qpoints->pThrowStackOverflow = art_quick_throw_stack_overflow;
+};
+
+}  // namespace art
diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
new file mode 100644
index 0000000..35fcccb
--- /dev/null
+++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "asm_support_x86_64.S"
+
+    /*
+     * Jni dlsym lookup stub.
+     */
+UNIMPLEMENTED art_jni_dlsym_lookup_stub
diff --git a/runtime/arch/x86_64/portable_entrypoints_x86_64.S b/runtime/arch/x86_64/portable_entrypoints_x86_64.S
new file mode 100644
index 0000000..2e9d19a
--- /dev/null
+++ b/runtime/arch/x86_64/portable_entrypoints_x86_64.S
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "asm_support_x86_64.S"
+
+    /*
+     * Portable invocation stub.
+     */
+UNIMPLEMENTED art_portable_invoke_stub
+
+UNIMPLEMENTED art_portable_proxy_invoke_handler
+
+UNIMPLEMENTED art_portable_resolution_trampoline
+
+UNIMPLEMENTED art_portable_to_interpreter_bridge
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
new file mode 100644
index 0000000..e01a31b
--- /dev/null
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -0,0 +1,401 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "asm_support_x86_64.S"
+
+// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveAll)
+     */
+MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
+    int3
+    int3
+END_MACRO
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
+     */
+MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
+    int3
+    int3
+END_MACRO
+
+MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
+    int3
+    int3
+END_MACRO
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
+     */
+MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
+    int3
+    int3
+END_MACRO
+
+MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
+    int3
+END_MACRO
+
+    /*
+     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_.
+     */
+MACRO0(DELIVER_PENDING_EXCEPTION)
+    int3
+    int3
+END_MACRO
+
+MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+    /*
+     * Called by managed code to create and deliver a NullPointerException.
+     */
+NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
+
+    /*
+     * Called by managed code to create and deliver an ArithmeticException.
+     */
+NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
+
+    /*
+     * Called by managed code to create and deliver a StackOverflowError.
+     */
+NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
+
+    /*
+     * Called by managed code, saves callee saves and then calls artThrowException
+     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
+
+    /*
+     * Called by managed code to create and deliver a NoSuchMethodError.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
+
+    /*
+     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
+     * index, arg2 holds limit.
+     */
+TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
+
+    /*
+     * All generated callsites for interface invokes and invocation slow paths will load arguments
+     * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
+     * the method_idx.  This wrapper will save arg1-arg3, load the caller's Method*, align the
+     * stack and call the appropriate C helper.
+     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
+     *
+     * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
+     * of the target Method* in r0 and method->code_ in r1.
+     *
+     * If unsuccessful, the helper will return NULL/NULL. There will bea pending exception in the
+     * thread and we branch to another stub to deliver it.
+     *
+     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
+     * pointing back to the original caller.
+     */
+MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
+INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
+
+INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
+INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
+INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
+INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
+
+    /*
+     * Quick invocation stub.
+     */
+DEFINE_FUNCTION art_quick_invoke_stub
+    int3
+    int3
+END_FUNCTION art_quick_invoke_stub
+
+MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    int3
+    int3
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
+    int3
+    testl %eax, %eax               // eax == 0 ?
+    jz  1f                         // if eax == 0 goto 1
+    ret                            // return
+1:                                 // deliver exception on current thread
+    DELIVER_PENDING_EXCEPTION
+END_MACRO
+
+MACRO0(RETURN_IF_EAX_ZERO)
+    int3
+    testl %eax, %eax               // eax == 0 ?
+    jnz  1f                        // if eax != 0 goto 1
+    ret                            // return
+1:                                 // deliver exception on current thread
+    DELIVER_PENDING_EXCEPTION
+END_MACRO
+
+MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
+    int3
+    int3
+    DELIVER_PENDING_EXCEPTION
+END_MACRO
+
+// Generate the allocation entrypoints for each allocator.
+// TODO: use arch/quick_alloc_entrypoints.S. Currently we don't as we need to use concatenation
+// macros to work around differences between OS/X's as and binutils as (OS/X lacks named arguments
+// to macros and the VAR macro won't concatenate arguments properly), this also breaks having
+// multi-line macros that use each other (hence using 1 macro per newline below).
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check ## c_suffix, artAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
+  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
+GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
+
+TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
+
+TWO_ARG_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
+
+DEFINE_FUNCTION art_quick_lock_object
+    int3
+    int3
+END_FUNCTION art_quick_lock_object
+
+DEFINE_FUNCTION art_quick_unlock_object
+    int3
+    int3
+END_FUNCTION art_quick_unlock_object
+
+DEFINE_FUNCTION art_quick_is_assignable
+    int3
+    int3
+END_FUNCTION art_quick_is_assignable
+
+DEFINE_FUNCTION art_quick_check_cast
+    int3
+    int3
+END_FUNCTION art_quick_check_cast
+
+    /*
+     * Entry from managed code for array put operations of objects where the value being stored
+     * needs to be checked for compatibility.
+     * eax = array, ecx = index, edx = value
+     */
+UNIMPLEMENTED art_quick_aput_obj_with_null_and_bound_check
+UNIMPLEMENTED art_quick_aput_obj_with_bound_check
+UNIMPLEMENTED art_quick_aput_obj
+UNIMPLEMENTED art_quick_memcpy
+
+NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
+
+UNIMPLEMENTED art_quick_fmod
+UNIMPLEMENTED art_quick_fmodf
+UNIMPLEMENTED art_quick_l2d
+UNIMPLEMENTED art_quick_l2f
+UNIMPLEMENTED art_quick_d2l
+UNIMPLEMENTED art_quick_f2l
+UNIMPLEMENTED art_quick_idivmod
+UNIMPLEMENTED art_quick_ldiv
+UNIMPLEMENTED art_quick_lmod
+UNIMPLEMENTED art_quick_lmul
+UNIMPLEMENTED art_quick_lshl
+UNIMPLEMENTED art_quick_lshr
+UNIMPLEMENTED art_quick_lushr
+UNIMPLEMENTED art_quick_set32_instance
+UNIMPLEMENTED art_quick_set64_instance
+UNIMPLEMENTED art_quick_set_obj_instance
+UNIMPLEMENTED art_quick_get32_instance
+UNIMPLEMENTED art_quick_get64_instance
+UNIMPLEMENTED art_quick_get_obj_instance
+UNIMPLEMENTED art_quick_set32_static
+UNIMPLEMENTED art_quick_set64_static
+UNIMPLEMENTED art_quick_set_obj_static
+UNIMPLEMENTED art_quick_get32_static
+UNIMPLEMENTED art_quick_get64_static
+UNIMPLEMENTED art_quick_get_obj_static
+UNIMPLEMENTED art_quick_proxy_invoke_handler
+
+    /*
+     * Called to resolve an imt conflict.
+     */
+UNIMPLEMENTED art_quick_imt_conflict_trampoline
+UNIMPLEMENTED art_quick_resolution_trampoline
+UNIMPLEMENTED art_quick_to_interpreter_bridge
+
+    /*
+     * Routine that intercepts method calls and returns.
+     */
+UNIMPLEMENTED art_quick_instrumentation_entry
+UNIMPLEMENTED art_quick_instrumentation_exit
+
+    /*
+     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
+     * will long jump to the upcall with a special exception of -1.
+     */
+UNIMPLEMENTED art_quick_deoptimize
+
+UNIMPLEMENTED art_quick_indexof
+UNIMPLEMENTED art_quick_string_compareto
+UNIMPLEMENTED art_quick_memcmp16
diff --git a/runtime/arch/x86_64/registers_x86_64.cc b/runtime/arch/x86_64/registers_x86_64.cc
new file mode 100644
index 0000000..38f3494
--- /dev/null
+++ b/runtime/arch/x86_64/registers_x86_64.cc
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "registers_x86_64.h"
+
+#include <ostream>
+
+namespace art {
+namespace x86_64 {
+
+static const char* kRegisterNames[] = {
+  "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
+  "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+};
+std::ostream& operator<<(std::ostream& os, const Register& rhs) {
+  if (rhs >= RAX && rhs <= R15) {
+    os << kRegisterNames[rhs];
+  } else {
+    os << "Register[" << static_cast<int>(rhs) << "]";
+  }
+  return os;
+}
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/runtime/arch/x86_64/registers_x86_64.h b/runtime/arch/x86_64/registers_x86_64.h
new file mode 100644
index 0000000..9808d91
--- /dev/null
+++ b/runtime/arch/x86_64/registers_x86_64.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_64_REGISTERS_X86_64_H_
+#define ART_RUNTIME_ARCH_X86_64_REGISTERS_X86_64_H_
+
+#include <iosfwd>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "globals.h"
+
+namespace art {
+namespace x86_64 {
+
+enum Register {
+  RAX = 0,
+  RCX = 1,
+  RDX = 2,
+  RBX = 3,
+  RSP = 4,
+  RBP = 5,
+  RSI = 6,
+  RDI = 7,
+  R8  = 8,
+  R9  = 9,
+  R10 = 10,
+  R11 = 11,
+  R12 = 12,
+  R13 = 13,
+  R14 = 14,
+  R15 = 15,
+  kNumberOfCpuRegisters = 16,
+  kNoRegister = -1  // Signals an illegal register.
+};
+std::ostream& operator<<(std::ostream& os, const Register& rhs);
+
+}  // namespace x86_64
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_X86_64_REGISTERS_X86_64_H_
diff --git a/runtime/arch/x86_64/thread_x86_64.cc b/runtime/arch/x86_64/thread_x86_64.cc
new file mode 100644
index 0000000..9e45a72
--- /dev/null
+++ b/runtime/arch/x86_64/thread_x86_64.cc
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "thread.h"
+
+#include "asm_support_x86_64.h"
+#include "base/macros.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+namespace art {
+
+static void arch_prctl(int code, void* val) {
+  syscall(__NR_arch_prctl, code, val);
+}
+void Thread::InitCpu() {
+  static Mutex modify_ldt_lock("modify_ldt lock");
+  MutexLock mu(Thread::Current(), modify_ldt_lock);
+  arch_prctl(ARCH_SET_GS, this);
+
+  // Allow easy indirection back to Thread*.
+  self_ = this;
+
+  // Sanity check that reads from %gs point to this Thread*.
+  Thread* self_check;
+  CHECK_EQ(THREAD_SELF_OFFSET, OFFSETOF_MEMBER(Thread, self_));
+  __asm__ __volatile__("movq %%gs:(%1), %0"
+      : "=r"(self_check)  // output
+      : "r"(THREAD_SELF_OFFSET)  // input
+      :);  // clobber
+  CHECK_EQ(self_check, this);
+
+  // Sanity check other offsets.
+  CHECK_EQ(THREAD_EXCEPTION_OFFSET, OFFSETOF_MEMBER(Thread, exception_));
+  CHECK_EQ(THREAD_CARD_TABLE_OFFSET, OFFSETOF_MEMBER(Thread, card_table_));
+  CHECK_EQ(THREAD_ID_OFFSET, OFFSETOF_MEMBER(Thread, thin_lock_thread_id_));
+}
+
+void Thread::CleanupCpu() {
+  // Sanity check that reads from %gs point to this Thread*.
+  Thread* self_check;
+  CHECK_EQ(THREAD_SELF_OFFSET, OFFSETOF_MEMBER(Thread, self_));
+  __asm__ __volatile__("movq %%gs:(%1), %0"
+      : "=r"(self_check)  // output
+      : "r"(THREAD_SELF_OFFSET)  // input
+      :);  // clobber
+  CHECK_EQ(self_check, this);
+
+  // Do nothing.
+}
+
+}  // namespace art
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 06c7b53..4c42099 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -40,6 +40,7 @@
 
 // Offsets within java.lang.Method.
 #define METHOD_DEX_CACHE_METHODS_OFFSET 12
-#define METHOD_CODE_OFFSET 36
+#define METHOD_PORTABLE_CODE_OFFSET 40
+#define METHOD_QUICK_CODE_OFFSET 48
 
 #endif  // ART_RUNTIME_ASM_SUPPORT_H_
diff --git a/runtime/atomic.cc b/runtime/atomic.cc
index bac0a99..63f2cf8 100644
--- a/runtime/atomic.cc
+++ b/runtime/atomic.cc
@@ -24,7 +24,7 @@
 std::vector<Mutex*>* QuasiAtomic::gSwapMutexes = nullptr;
 
 Mutex* QuasiAtomic::GetSwapMutex(const volatile int64_t* addr) {
-  return (*gSwapMutexes)[(reinterpret_cast<unsigned>(addr) >> 3U) % kSwapMutexCount];
+  return (*gSwapMutexes)[(reinterpret_cast<uintptr_t>(addr) >> 3U) % kSwapMutexCount];
 }
 
 void QuasiAtomic::Startup() {
diff --git a/runtime/atomic.h b/runtime/atomic.h
index b1e9870..2a47e46 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -26,6 +26,69 @@
 
 class Mutex;
 
+template<typename T>
+class Atomic {
+ public:
+  Atomic<T>() : value_(0) { }
+
+  explicit Atomic<T>(T value) : value_(value) { }
+
+  Atomic<T>& operator=(T desired) {
+    Store(desired);
+    return *this;
+  }
+
+  T Load() const {
+    return value_;
+  }
+
+  operator T() const {
+    return Load();
+  }
+
+  T FetchAndAdd(const T value) {
+    return __sync_fetch_and_add(&value_, value);  // Return old_value.
+  }
+
+  T FetchAndSub(const T value) {
+    return __sync_fetch_and_sub(&value_, value);  // Return old value.
+  }
+
+  T operator++() {  // Prefix operator.
+    return __sync_add_and_fetch(&value_, 1);  // Return new value.
+  }
+
+  T operator++(int) {  // Postfix operator.
+    return __sync_fetch_and_add(&value_, 1);  // Return old value.
+  }
+
+  T operator--() {  // Prefix operator.
+    return __sync_sub_and_fetch(&value_, 1);  // Return new value.
+  }
+
+  T operator--(int) {  // Postfix operator.
+    return __sync_fetch_and_sub(&value_, 1);  // Return old value.
+  }
+
+  bool CompareAndSwap(T expected_value, T desired_value) {
+    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
+  }
+
+  volatile T* Address() {
+    return &value_;
+  }
+
+ private:
+  // Unsafe = operator for non atomic operations on the integer.
+  void Store(T desired) {
+    value_ = desired;
+  }
+
+  volatile T value_;
+};
+
+typedef Atomic<int32_t> AtomicInteger;
+
 // NOTE: Two "quasiatomic" operations on the exact same memory address
 // are guaranteed to operate atomically with respect to each other,
 // but no guarantees are made about quasiatomic operations mixed with
@@ -80,7 +143,7 @@
   static void MembarLoadStore() {
   #if defined(__arm__)
     __asm__ __volatile__("dmb ish" : : : "memory");
-  #elif defined(__i386__)
+  #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("" : : : "memory");
   #elif defined(__mips__)
     __asm__ __volatile__("sync" : : : "memory");
@@ -92,7 +155,7 @@
   static void MembarLoadLoad() {
   #if defined(__arm__)
     __asm__ __volatile__("dmb ish" : : : "memory");
-  #elif defined(__i386__)
+  #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("" : : : "memory");
   #elif defined(__mips__)
     __asm__ __volatile__("sync" : : : "memory");
@@ -104,7 +167,7 @@
   static void MembarStoreStore() {
   #if defined(__arm__)
     __asm__ __volatile__("dmb ishst" : : : "memory");
-  #elif defined(__i386__)
+  #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("" : : : "memory");
   #elif defined(__mips__)
     __asm__ __volatile__("sync" : : : "memory");
@@ -116,7 +179,7 @@
   static void MembarStoreLoad() {
   #if defined(__arm__)
     __asm__ __volatile__("dmb ish" : : : "memory");
-  #elif defined(__i386__)
+  #elif defined(__i386__) || defined(__x86_64__)
     __asm__ __volatile__("mfence" : : : "memory");
   #elif defined(__mips__)
     __asm__ __volatile__("sync" : : : "memory");
diff --git a/runtime/atomic_integer.h b/runtime/atomic_integer.h
deleted file mode 100644
index 651ca4a..0000000
--- a/runtime/atomic_integer.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_ATOMIC_INTEGER_H_
-#define ART_RUNTIME_ATOMIC_INTEGER_H_
-
-#include <stdint.h>
-
-namespace art {
-
-class AtomicInteger {
- public:
-  AtomicInteger() : value_(0) { }
-
-  explicit AtomicInteger(int32_t value) : value_(value) { }
-
-  AtomicInteger& operator=(int32_t desired) {
-    Store(desired);
-    return *this;
-  }
-
-  int32_t Load() const {
-    return value_;
-  }
-
-  operator int32_t() const {
-    return Load();
-  }
-
-  int32_t FetchAndAdd(const int32_t value) {
-    return __sync_fetch_and_add(&value_, value);  // Return old_value.
-  }
-
-  int32_t FetchAndSub(const int32_t value) {
-    return __sync_fetch_and_sub(&value_, value);  // Return old value.
-  }
-
-  int32_t operator++() {  // Prefix operator.
-    return __sync_add_and_fetch(&value_, 1);  // Return new value.
-  }
-
-  int32_t operator++(int32_t) {  // Postfix operator.
-    return __sync_fetch_and_add(&value_, 1);  // Return old value.
-  }
-
-  int32_t operator--() {  // Prefix operator.
-    return __sync_sub_and_fetch(&value_, 1);  // Return new value.
-  }
-
-  int32_t operator--(int32_t) {  // Postfix operator.
-    return __sync_fetch_and_sub(&value_, 1);  // Return old value.
-  }
-
-  bool CompareAndSwap(int32_t expected_value, int32_t desired_value) {
-    return __sync_bool_compare_and_swap(&value_, expected_value, desired_value);
-  }
-
-  volatile int32_t* Address() {
-    return &value_;
-  }
-
- private:
-  // Unsafe = operator for non atomic operations on the integer.
-  void Store(int32_t desired) {
-    value_ = desired;
-  }
-
-  volatile int32_t value_;
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_ATOMIC_INTEGER_H_
diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc
index 91fc143..69951c5 100644
--- a/runtime/barrier_test.cc
+++ b/runtime/barrier_test.cc
@@ -18,7 +18,7 @@
 
 #include <string>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "common_test.h"
 #include "mirror/object_array-inl.h"
 #include "thread_pool.h"
diff --git a/runtime/base/bit_vector_test.cc b/runtime/base/bit_vector_test.cc
index d99d059..3fc9b86 100644
--- a/runtime/base/bit_vector_test.cc
+++ b/runtime/base/bit_vector_test.cc
@@ -25,7 +25,7 @@
 
   BitVector bv(kBits, false, Allocator::GetMallocAllocator());
   EXPECT_EQ(1U, bv.GetStorageSize());
-  EXPECT_EQ(kWordSize, bv.GetSizeOf());
+  EXPECT_EQ(sizeof(uint32_t), bv.GetSizeOf());
   EXPECT_FALSE(bv.IsExpandable());
 
   EXPECT_EQ(0U, bv.NumSetBits());
@@ -70,7 +70,7 @@
 
   BitVector bv(0U, false, Allocator::GetNoopAllocator(), kWords, bits);
   EXPECT_EQ(kWords, bv.GetStorageSize());
-  EXPECT_EQ(kWords * kWordSize, bv.GetSizeOf());
+  EXPECT_EQ(kWords * sizeof(uint32_t), bv.GetSizeOf());
   EXPECT_EQ(bits, bv.GetRawStorage());
   EXPECT_EQ(0U, bv.NumSetBits());
 
diff --git a/runtime/base/hex_dump.cc b/runtime/base/hex_dump.cc
new file mode 100644
index 0000000..936c52b
--- /dev/null
+++ b/runtime/base/hex_dump.cc
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hex_dump.h"
+
+#include "globals.h"
+
+#include <string.h>
+
+namespace art {
+
+void HexDump::Dump(std::ostream& os) const {
+  if (byte_count_ == 0) {
+    return;
+  }
+
+  if (address_ == NULL) {
+    os << "00000000:";
+    return;
+  }
+
+  static const char gHexDigit[] = "0123456789abcdef";
+  const unsigned char* addr = reinterpret_cast<const unsigned char*>(address_);
+  // 01234560: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
+  char out[(kBitsPerWord / 4) + /* offset */
+           1 + /* colon */
+           (16 * 3) + /* 16 hex digits and space */
+           2 + /* white space */
+           16 + /* 16 characters*/
+           1 /* \0 */ ];
+  size_t offset;    /* offset to show while printing */
+
+  if (show_actual_addresses_) {
+    offset = reinterpret_cast<size_t>(addr);
+  } else {
+    offset = 0;
+  }
+  memset(out, ' ', sizeof(out)-1);
+  out[kBitsPerWord / 4] = ':';
+  out[sizeof(out)-1] = '\0';
+
+  size_t byte_count = byte_count_;
+  size_t gap = offset & 0x0f;
+  while (byte_count > 0) {
+    size_t line_offset = offset & ~0x0f;
+
+    char* hex = out;
+    char* asc = out + (kBitsPerWord / 4) + /* offset */ 1 + /* colon */
+        (16 * 3) + /* 16 hex digits and space */ 2 /* white space */;
+
+    for (int i = 0; i < (kBitsPerWord / 4); i++) {
+      *hex++ = gHexDigit[line_offset >> (kBitsPerWord - 4)];
+      line_offset <<= 4;
+    }
+    hex++;
+    hex++;
+
+    size_t count = std::min(byte_count, 16 - gap);
+    // CHECK_NE(count, 0U);
+    // CHECK_LE(count + gap, 16U);
+
+    if (gap) {
+      /* only on first line */
+      hex += gap * 3;
+      asc += gap;
+    }
+
+    size_t i;
+    for (i = gap ; i < count + gap; i++) {
+      *hex++ = gHexDigit[*addr >> 4];
+      *hex++ = gHexDigit[*addr & 0x0f];
+      hex++;
+      if (*addr >= 0x20 && *addr < 0x7f /*isprint(*addr)*/) {
+        *asc++ = *addr;
+      } else {
+        *asc++ = '.';
+      }
+      addr++;
+    }
+    for (; i < 16; i++) {
+      /* erase extra stuff; only happens on last line */
+      *hex++ = ' ';
+      *hex++ = ' ';
+      hex++;
+      *asc++ = ' ';
+    }
+
+    os << prefix_ << out;
+
+    gap = 0;
+    byte_count -= count;
+    offset += count;
+    if (byte_count > 0) {
+      os << "\n";
+    }
+  }
+}
+
+}  // namespace art
diff --git a/runtime/base/hex_dump.h b/runtime/base/hex_dump.h
new file mode 100644
index 0000000..8769ece
--- /dev/null
+++ b/runtime/base/hex_dump.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_HEX_DUMP_H_
+#define ART_RUNTIME_BASE_HEX_DUMP_H_
+
+#include "macros.h"
+
+#include <ostream>
+
+namespace art {
+
+// Prints a hex dump in this format:
+//
+// 01234560: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
+// 01234568: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
+class HexDump {
+ public:
+  HexDump(const void* address, size_t byte_count, bool show_actual_addresses, const char* prefix)
+      : address_(address), byte_count_(byte_count), show_actual_addresses_(show_actual_addresses),
+        prefix_(prefix) {
+  }
+
+  void Dump(std::ostream& os) const;
+
+ private:
+  const void* const address_;
+  const size_t byte_count_;
+  const bool show_actual_addresses_;
+  const char* const prefix_;
+
+  DISALLOW_COPY_AND_ASSIGN(HexDump);
+};
+
+inline std::ostream& operator<<(std::ostream& os, const HexDump& rhs) {
+  rhs.Dump(os);
+  return os;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_HEX_DUMP_H_
diff --git a/runtime/base/hex_dump_test.cc b/runtime/base/hex_dump_test.cc
new file mode 100644
index 0000000..d950961
--- /dev/null
+++ b/runtime/base/hex_dump_test.cc
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "hex_dump.h"
+
+#include "globals.h"
+
+#include "gtest/gtest.h"
+
+#include <stdint.h>
+
+namespace art {
+
+TEST(HexDump, OneLine) {
+  const char* test_text = "0123456789abcdef";
+  std::ostringstream oss;
+  oss << HexDump(test_text, strlen(test_text), false, "");
+  EXPECT_STREQ(oss.str().c_str(),
+               "00000000: 30 31 32 33 34 35 36 37 38 39 61 62 63 64 65 66  0123456789abcdef");
+}
+
+TEST(HexDump, MultiLine) {
+  const char* test_text = "0123456789abcdef0123456789ABCDEF";
+  std::ostringstream oss;
+  oss << HexDump(test_text, strlen(test_text), false, "");
+  EXPECT_STREQ(oss.str().c_str(),
+               "00000000: 30 31 32 33 34 35 36 37 38 39 61 62 63 64 65 66  0123456789abcdef\n"
+               "00000010: 30 31 32 33 34 35 36 37 38 39 41 42 43 44 45 46  0123456789ABCDEF");
+}
+
+uint64_t g16byte_aligned_number __attribute__ ((aligned(16)));  // NOLINT(whitespace/parens)
+TEST(HexDump, ShowActualAddresses) {
+  g16byte_aligned_number = 0x6162636465666768;
+  std::ostringstream oss;
+  oss << HexDump(&g16byte_aligned_number, 8, true, "");
+  // Compare ignoring pointer.
+  EXPECT_STREQ(oss.str().c_str() + (kBitsPerWord / 4),
+               ": 68 67 66 65 64 63 62 61                          hgfedcba        ");
+}
+
+TEST(HexDump, Prefix) {
+  const char* test_text = "0123456789abcdef";
+  std::ostringstream oss;
+  oss << HexDump(test_text, strlen(test_text), false, "test prefix: ");
+  EXPECT_STREQ(oss.str().c_str(),
+               "test prefix: 00000000: 30 31 32 33 34 35 36 37 38 39 61 62 63 64 65 66  "
+               "0123456789abcdef");
+}
+
+}  // namespace art
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 3aabc8d..46b8ff2 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -161,90 +161,4 @@
   }
 }
 
-HexDump::HexDump(const void* address, size_t byte_count, bool show_actual_addresses)
-    : address_(address), byte_count_(byte_count), show_actual_addresses_(show_actual_addresses) {
-}
-
-void HexDump::Dump(std::ostream& os) const {
-  if (byte_count_ == 0) {
-    return;
-  }
-
-  if (address_ == NULL) {
-    os << "00000000:";
-    return;
-  }
-
-  static const char gHexDigit[] = "0123456789abcdef";
-  const unsigned char* addr = reinterpret_cast<const unsigned char*>(address_);
-  char out[76];           /* exact fit */
-  unsigned int offset;    /* offset to show while printing */
-
-  if (show_actual_addresses_) {
-    offset = reinterpret_cast<int>(addr);
-  } else {
-    offset = 0;
-  }
-  memset(out, ' ', sizeof(out)-1);
-  out[8] = ':';
-  out[sizeof(out)-1] = '\0';
-
-  size_t byte_count = byte_count_;
-  int gap = static_cast<int>(offset & 0x0f);
-  while (byte_count) {
-    unsigned int line_offset = offset & ~0x0f;
-
-    char* hex = out;
-    char* asc = out + 59;
-
-    for (int i = 0; i < 8; i++) {
-      *hex++ = gHexDigit[line_offset >> 28];
-      line_offset <<= 4;
-    }
-    hex++;
-    hex++;
-
-    int count = std::min(static_cast<int>(byte_count), 16 - gap);
-    CHECK_NE(count, 0);
-    CHECK_LE(count + gap, 16);
-
-    if (gap) {
-      /* only on first line */
-      hex += gap * 3;
-      asc += gap;
-    }
-
-    int i;
-    for (i = gap ; i < count+gap; i++) {
-      *hex++ = gHexDigit[*addr >> 4];
-      *hex++ = gHexDigit[*addr & 0x0f];
-      hex++;
-      if (*addr >= 0x20 && *addr < 0x7f /*isprint(*addr)*/) {
-        *asc++ = *addr;
-      } else {
-        *asc++ = '.';
-      }
-      addr++;
-    }
-    for (; i < 16; i++) {
-      /* erase extra stuff; only happens on last line */
-      *hex++ = ' ';
-      *hex++ = ' ';
-      hex++;
-      *asc++ = ' ';
-    }
-
-    os << out;
-
-    gap = 0;
-    byte_count -= count;
-    offset += count;
-  }
-}
-
-std::ostream& operator<<(std::ostream& os, const HexDump& rhs) {
-  rhs.Dump(os);
-  return os;
-}
-
 }  // namespace art
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 8e40da0..075d571 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -208,24 +208,6 @@
   DISALLOW_COPY_AND_ASSIGN(LogMessage);
 };
 
-// Prints a hex dump in this format:
-//
-// 01234560: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
-// 01234568: 00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff  0123456789abcdef
-class HexDump {
- public:
-  HexDump(const void* address, size_t byte_count, bool show_actual_addresses = false);
-  void Dump(std::ostream& os) const;
-
- private:
-  const void* address_;
-  size_t byte_count_;
-  bool show_actual_addresses_;
-
-  DISALLOW_COPY_AND_ASSIGN(HexDump);
-};
-std::ostream& operator<<(std::ostream& os, const HexDump& rhs);
-
 // A convenience to allow any class with a "Dump(std::ostream& os)" member function
 // but without an operator<< to be used as if it had an operator<<. Use like this:
 //
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 29b3981..a7e25cb 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_BASE_MUTEX_INL_H_
 #define ART_RUNTIME_BASE_MUTEX_INL_H_
 
+#include <inttypes.h>
+
 #include "mutex.h"
 
 #define ATRACE_TAG ATRACE_TAG_DALVIK
@@ -96,7 +98,7 @@
         blocked_tid_(kLogLockContentions ? blocked_tid : 0),
         owner_tid_(kLogLockContentions ? owner_tid : 0),
         start_nano_time_(kLogLockContentions ? NanoTime() : 0) {
-    std::string msg = StringPrintf("Lock contention on %s (owner tid: %llu)",
+    std::string msg = StringPrintf("Lock contention on %s (owner tid: %" PRIu64 ")",
                                    mutex->GetName(), owner_tid);
     ATRACE_BEGIN(msg.c_str());
   }
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 05e3a83..ff72d16 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -47,7 +47,7 @@
 
 struct AllMutexData {
   // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait).
-  AtomicInteger all_mutexes_guard;
+  Atomic<const BaseMutex*> all_mutexes_guard;
   // All created mutexes guarded by all_mutexes_guard_.
   std::set<BaseMutex*>* all_mutexes;
   AllMutexData() : all_mutexes(NULL) {}
@@ -57,12 +57,12 @@
 class ScopedAllMutexesLock {
  public:
   explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) {
-    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(0, reinterpret_cast<int32_t>(mutex))) {
+    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(0, mutex)) {
       NanoSleep(100);
     }
   }
   ~ScopedAllMutexesLock() {
-    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(reinterpret_cast<int32_t>(mutex_), 0)) {
+    while (!gAllMutexData->all_mutexes_guard.CompareAndSwap(mutex_, 0)) {
       NanoSleep(100);
     }
   }
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 1c1dcaf..63ed6cb 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -23,7 +23,7 @@
 #include <iosfwd>
 #include <string>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index f48c76d..87d1c06 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -102,11 +102,11 @@
   return fd_ >= 0;
 }
 
-bool FdFile::ReadFully(void* buffer, int64_t byte_count) {
+bool FdFile::ReadFully(void* buffer, size_t byte_count) {
   char* ptr = static_cast<char*>(buffer);
   while (byte_count > 0) {
-    int bytes_read = TEMP_FAILURE_RETRY(read(fd_, ptr, byte_count));
-    if (bytes_read <= 0) {
+    ssize_t bytes_read = TEMP_FAILURE_RETRY(read(fd_, ptr, byte_count));
+    if (bytes_read == -1) {
       return false;
     }
     byte_count -= bytes_read;  // Reduce the number of remaining bytes.
@@ -115,15 +115,15 @@
   return true;
 }
 
-bool FdFile::WriteFully(const void* buffer, int64_t byte_count) {
+bool FdFile::WriteFully(const void* buffer, size_t byte_count) {
   const char* ptr = static_cast<const char*>(buffer);
   while (byte_count > 0) {
-    int bytes_read = TEMP_FAILURE_RETRY(write(fd_, ptr, byte_count));
-    if (bytes_read < 0) {
+    ssize_t bytes_written = TEMP_FAILURE_RETRY(write(fd_, ptr, byte_count));
+    if (bytes_written == -1) {
       return false;
     }
-    byte_count -= bytes_read;  // Reduce the number of remaining bytes.
-    ptr += bytes_read;  // Move the buffer forward.
+    byte_count -= bytes_written;  // Reduce the number of remaining bytes.
+    ptr += bytes_written;  // Move the buffer forward.
   }
   return true;
 }
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 19e3511..01f4ca2 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -61,8 +61,8 @@
     return file_path_;
   }
   void DisableAutoClose();
-  bool ReadFully(void* buffer, int64_t byte_count);
-  bool WriteFully(const void* buffer, int64_t byte_count);
+  bool ReadFully(void* buffer, size_t byte_count);
+  bool WriteFully(const void* buffer, size_t byte_count);
 
  private:
   int fd_;
diff --git a/runtime/base/unix_file/mapped_file.cc b/runtime/base/unix_file/mapped_file.cc
index b63fdd3..bc23a74 100644
--- a/runtime/base/unix_file/mapped_file.cc
+++ b/runtime/base/unix_file/mapped_file.cc
@@ -101,7 +101,8 @@
       errno = EINVAL;
       return -errno;
     }
-    int64_t read_size = std::max(0LL, std::min(byte_count, file_size_ - offset));
+    int64_t read_size = std::max(static_cast<int64_t>(0),
+                                 std::min(byte_count, file_size_ - offset));
     if (read_size > 0) {
       memcpy(buf, data() + offset, read_size);
     }
@@ -136,7 +137,8 @@
       errno = EINVAL;
       return -errno;
     }
-    int64_t write_size = std::max(0LL, std::min(byte_count, file_size_ - offset));
+    int64_t write_size = std::max(static_cast<int64_t>(0),
+                                  std::min(byte_count, file_size_ - offset));
     if (write_size > 0) {
       memcpy(data() + offset, buf, write_size);
     }
diff --git a/runtime/base/unix_file/mapped_file_test.cc b/runtime/base/unix_file/mapped_file_test.cc
index 3dda02f..49750f4 100644
--- a/runtime/base/unix_file/mapped_file_test.cc
+++ b/runtime/base/unix_file/mapped_file_test.cc
@@ -65,7 +65,7 @@
   ASSERT_TRUE(file.Open(good_path_, MappedFile::kReadOnlyMode));
   EXPECT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
-  EXPECT_EQ(kContent.size(), file.size());
+  EXPECT_EQ(kContent.size(), static_cast<uint64_t>(file.size()));
   EXPECT_EQ(0, file.Close());
   EXPECT_EQ(-1, file.Fd());
   EXPECT_FALSE(file.IsOpened());
@@ -86,7 +86,7 @@
   EXPECT_FALSE(file.IsMapped());
   EXPECT_TRUE(file.MapReadOnly());
   EXPECT_TRUE(file.IsMapped());
-  EXPECT_EQ(kContent.size(), file.size());
+  EXPECT_EQ(kContent.size(), static_cast<uint64_t>(file.size()));
   ASSERT_TRUE(file.data());
   EXPECT_EQ(0, memcmp(kContent.c_str(), file.data(), file.size()));
   EXPECT_EQ(0, file.Flush());
@@ -113,7 +113,7 @@
   ASSERT_TRUE(file.Open(new_path, MappedFile::kReadWriteMode));
   EXPECT_TRUE(file.MapReadWrite(kContent.size()));
   EXPECT_TRUE(file.IsMapped());
-  EXPECT_EQ(kContent.size(), file.size());
+  EXPECT_EQ(kContent.size(), static_cast<uint64_t>(file.size()));
   ASSERT_TRUE(file.data());
   memcpy(file.data(), kContent.c_str(), kContent.size());
   EXPECT_EQ(0, file.Close());
@@ -200,15 +200,16 @@
   // A zero-length write is a no-op.
   EXPECT_EQ(0, file.Write(kContent.c_str(), 0, 0));
   // But the file size is as given when mapped.
-  EXPECT_EQ(kContent.size(), file.GetLength());
+  EXPECT_EQ(kContent.size(), static_cast<uint64_t>(file.GetLength()));
 
   // Data written past the end are discarded.
   EXPECT_EQ(kContent.size() - 1,
-            file.Write(kContent.c_str(), kContent.size(), 1));
+            static_cast<uint64_t>(file.Write(kContent.c_str(), kContent.size(), 1)));
   EXPECT_EQ(0, memcmp(kContent.c_str(), file.data() + 1, kContent.size() - 1));
 
   // Data can be overwritten.
-  EXPECT_EQ(kContent.size(), file.Write(kContent.c_str(), kContent.size(), 0));
+  EXPECT_EQ(kContent.size(),
+            static_cast<uint64_t>(file.Write(kContent.c_str(), kContent.size(), 0)));
   EXPECT_EQ(0, memcmp(kContent.c_str(), file.data(), kContent.size()));
 }
 
diff --git a/runtime/base/unix_file/null_file_test.cc b/runtime/base/unix_file/null_file_test.cc
index 0f20acd..410fdfc 100644
--- a/runtime/base/unix_file/null_file_test.cc
+++ b/runtime/base/unix_file/null_file_test.cc
@@ -48,7 +48,7 @@
   NullFile f;
   // The length is always 0.
   ASSERT_EQ(0, f.GetLength());
-  ASSERT_EQ(content.size(), f.Write(content.data(), content.size(), 0));
+  ASSERT_EQ(content.size(), static_cast<uint64_t>(f.Write(content.data(), content.size(), 0)));
   ASSERT_EQ(0, f.GetLength());
 }
 
@@ -58,8 +58,8 @@
   // You can't write at a negative offset...
   ASSERT_EQ(-EINVAL, f.Write(content.data(), content.size(), -128));
   // But you can write anywhere else...
-  ASSERT_EQ(content.size(), f.Write(content.data(), content.size(), 0));
-  ASSERT_EQ(content.size(), f.Write(content.data(), content.size(), 128));
+  ASSERT_EQ(content.size(), static_cast<uint64_t>(f.Write(content.data(), content.size(), 0)));
+  ASSERT_EQ(content.size(), static_cast<uint64_t>(f.Write(content.data(), content.size(), 128)));
   // ...though the file will remain empty.
   ASSERT_EQ(0, f.GetLength());
 }
diff --git a/runtime/base/unix_file/random_access_file_test.h b/runtime/base/unix_file/random_access_file_test.h
index 9d8550d..3152788 100644
--- a/runtime/base/unix_file/random_access_file_test.h
+++ b/runtime/base/unix_file/random_access_file_test.h
@@ -71,7 +71,7 @@
     ASSERT_EQ(0, file->Read(buf, 123, 0));
 
     const std::string content("hello");
-    ASSERT_EQ(content.size(), file->Write(content.data(), content.size(), 0));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Write(content.data(), content.size(), 0)));
 
     TestReadContent(content, file.get());
   }
@@ -83,21 +83,21 @@
     ASSERT_EQ(-EINVAL, file->Read(buf.get(), 0, -123));
 
     // Reading too much gets us just what's in the file.
-    ASSERT_EQ(content.size(), file->Read(buf.get(), buf_size, 0));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Read(buf.get(), buf_size, 0)));
     ASSERT_EQ(std::string(buf.get(), content.size()), content);
 
     // We only get as much as we ask for.
     const size_t short_request = 2;
     ASSERT_LT(short_request, content.size());
-    ASSERT_EQ(short_request, file->Read(buf.get(), short_request, 0));
+    ASSERT_EQ(short_request, static_cast<uint64_t>(file->Read(buf.get(), short_request, 0)));
     ASSERT_EQ(std::string(buf.get(), short_request),
               content.substr(0, short_request));
 
     // We don't have to start at the beginning.
     const int non_zero_offset = 2;
     ASSERT_GT(non_zero_offset, 0);
-    ASSERT_EQ(short_request,
-              file->Read(buf.get(), short_request, non_zero_offset));
+    ASSERT_EQ(short_request, static_cast<uint64_t>(file->Read(buf.get(), short_request,
+                                                              non_zero_offset)));
     ASSERT_EQ(std::string(buf.get(), short_request),
               content.substr(non_zero_offset, short_request));
 
@@ -109,8 +109,8 @@
   void TestSetLength() {
     const std::string content("hello");
     UniquePtr<RandomAccessFile> file(MakeTestFile());
-    ASSERT_EQ(content.size(), file->Write(content.data(), content.size(), 0));
-    ASSERT_EQ(content.size(), file->GetLength());
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Write(content.data(), content.size(), 0)));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->GetLength()));
 
     // Can't give a file a negative length.
     ASSERT_EQ(-EINVAL, file->SetLength(-123));
@@ -143,20 +143,20 @@
     ASSERT_EQ(0, file->GetLength());
 
     // We can write data.
-    ASSERT_EQ(content.size(), file->Write(content.data(), content.size(), 0));
-    ASSERT_EQ(content.size(), file->GetLength());
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Write(content.data(), content.size(), 0)));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->GetLength()));
     std::string new_content;
     ASSERT_TRUE(ReadString(file.get(), &new_content));
     ASSERT_EQ(new_content, content);
 
     // We can read it back.
     char buf[256];
-    ASSERT_EQ(content.size(), file->Read(buf, sizeof(buf), 0));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Read(buf, sizeof(buf), 0)));
     ASSERT_EQ(std::string(buf, content.size()), content);
 
     // We can append data past the end.
-    ASSERT_EQ(content.size(),
-    file->Write(content.data(), content.size(), file->GetLength() + 1));
+    ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Write(content.data(), content.size(),
+                                                                file->GetLength() + 1)));
     int64_t new_length = 2*content.size() + 1;
     ASSERT_EQ(file->GetLength(), new_length);
     ASSERT_TRUE(ReadString(file.get(), &new_content));
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 1b79ee0..960c26d 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -40,23 +40,23 @@
 static void JniAbort(const char* jni_function_name, const char* msg) {
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
-  mirror::ArtMethod* current_method = self->GetCurrentMethod(NULL);
+  mirror::ArtMethod* current_method = self->GetCurrentMethod(nullptr);
 
   std::ostringstream os;
   os << "JNI DETECTED ERROR IN APPLICATION: " << msg;
 
-  if (jni_function_name != NULL) {
+  if (jni_function_name != nullptr) {
     os << "\n    in call to " << jni_function_name;
   }
   // TODO: is this useful given that we're about to dump the calling thread's stack?
-  if (current_method != NULL) {
+  if (current_method != nullptr) {
     os << "\n    from " << PrettyMethod(current_method);
   }
   os << "\n";
   self->Dump(os);
 
   JavaVMExt* vm = Runtime::Current()->GetJavaVM();
-  if (vm->check_jni_abort_hook != NULL) {
+  if (vm->check_jni_abort_hook != nullptr) {
     vm->check_jni_abort_hook(vm->check_jni_abort_hook_data, os.str());
   } else {
     // Ensure that we get a native stack trace for this thread.
@@ -118,10 +118,10 @@
   "Ljavax/",
   "Llibcore/",
   "Lorg/apache/harmony/",
-  NULL
+  nullptr
 };
 
-static bool ShouldTrace(JavaVMExt* vm, const mirror::ArtMethod* method)
+static bool ShouldTrace(JavaVMExt* vm, mirror::ArtMethod* method)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // If both "-Xcheck:jni" and "-Xjnitrace:" are enabled, we print trace messages
   // when a native method that matches the -Xjnitrace argument calls a JNI function
@@ -135,7 +135,7 @@
   if (VLOG_IS_ON(third_party_jni)) {
     // Return true if we're trying to log all third-party JNI activity and 'method' doesn't look
     // like part of Android.
-    for (size_t i = 0; gBuiltInPrefixes[i] != NULL; ++i) {
+    for (size_t i = 0; gBuiltInPrefixes[i] != nullptr; ++i) {
       if (StartsWith(class_name, gBuiltInPrefixes[i])) {
         return false;
       }
@@ -192,15 +192,16 @@
    *
    * Works for both static and instance fields.
    */
-  void CheckFieldType(jobject java_object, jfieldID fid, char prim, bool isStatic)
+  void CheckFieldType(jvalue value, jfieldID fid, char prim, bool isStatic)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtField* f = CheckFieldID(fid);
-    if (f == NULL) {
+    if (f == nullptr) {
       return;
     }
     mirror::Class* field_type = FieldHelper(f).GetType();
     if (!field_type->IsPrimitive()) {
-      if (java_object != NULL) {
+      jobject java_object = value.l;
+      if (java_object != nullptr) {
         mirror::Object* obj = soa_.Decode<mirror::Object*>(java_object);
         // If java_object is a weak global ref whose referent has been cleared,
         // obj will be NULL.  Otherwise, obj should always be non-NULL
@@ -242,7 +243,7 @@
   void CheckInstanceFieldID(jobject java_object, jfieldID fid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::Object* o = soa_.Decode<mirror::Object*>(java_object);
-    if (o == NULL || !Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
+    if (o == nullptr || !Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "field operation on invalid %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(), java_object);
@@ -250,12 +251,12 @@
     }
 
     mirror::ArtField* f = CheckFieldID(fid);
-    if (f == NULL) {
+    if (f == nullptr) {
       return;
     }
     mirror::Class* c = o->GetClass();
     FieldHelper fh(f);
-    if (c->FindInstanceField(fh.GetName(), fh.GetTypeDescriptor()) == NULL) {
+    if (c->FindInstanceField(fh.GetName(), fh.GetTypeDescriptor()) == nullptr) {
       JniAbortF(function_name_, "jfieldID %s not valid for an object of class %s",
                 PrettyField(f).c_str(), PrettyTypeOf(o).c_str());
     }
@@ -265,7 +266,7 @@
    * Verify that the pointer value is non-NULL.
    */
   void CheckNonNull(const void* ptr) {
-    if (ptr == NULL) {
+    if (ptr == nullptr) {
       JniAbortF(function_name_, "non-nullable argument was NULL");
     }
   }
@@ -277,7 +278,7 @@
   void CheckSig(jmethodID mid, const char* expectedType, bool isStatic)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = CheckMethodID(mid);
-    if (m == NULL) {
+    if (m == nullptr) {
       return;
     }
     if (*expectedType != MethodHelper(m).GetShorty()[0]) {
@@ -303,8 +304,8 @@
   void CheckStaticFieldID(jclass java_class, jfieldID fid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::Class* c = soa_.Decode<mirror::Class*>(java_class);
-    const mirror::ArtField* f = CheckFieldID(fid);
-    if (f == NULL) {
+    mirror::ArtField* f = CheckFieldID(fid);
+    if (f == nullptr) {
       return;
     }
     if (f->GetDeclaringClass() != c) {
@@ -324,8 +325,8 @@
    */
   void CheckStaticMethod(jclass java_class, jmethodID mid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const mirror::ArtMethod* m = CheckMethodID(mid);
-    if (m == NULL) {
+    mirror::ArtMethod* m = CheckMethodID(mid);
+    if (m == nullptr) {
       return;
     }
     mirror::Class* c = soa_.Decode<mirror::Class*>(java_class);
@@ -344,8 +345,8 @@
    */
   void CheckVirtualMethod(jobject java_object, jmethodID mid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const mirror::ArtMethod* m = CheckMethodID(mid);
-    if (m == NULL) {
+    mirror::ArtMethod* m = CheckMethodID(mid);
+    if (m == nullptr) {
       return;
     }
     mirror::Object* o = soa_.Decode<mirror::Object*>(java_object);
@@ -394,17 +395,18 @@
   void Check(bool entry, const char* fmt0, ...) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     va_list ap;
 
-    const mirror::ArtMethod* traceMethod = NULL;
+    mirror::ArtMethod* traceMethod = nullptr;
     if (has_method_ && (!soa_.Vm()->trace.empty() || VLOG_IS_ON(third_party_jni))) {
       // We need to guard some of the invocation interface's calls: a bad caller might
       // use DetachCurrentThread or GetEnv on a thread that's not yet attached.
       Thread* self = Thread::Current();
-      if ((flags_ & kFlag_Invocation) == 0 || self != NULL) {
-        traceMethod = self->GetCurrentMethod(NULL);
+      if ((flags_ & kFlag_Invocation) == 0 || self != nullptr) {
+        traceMethod = self->GetCurrentMethod(nullptr);
       }
     }
 
-    if (((flags_ & kFlag_ForceTrace) != 0) || (traceMethod != NULL && ShouldTrace(soa_.Vm(), traceMethod))) {
+    if (((flags_ & kFlag_ForceTrace) != 0) ||
+        (traceMethod != nullptr && ShouldTrace(soa_.Vm(), traceMethod))) {
       va_start(ap, fmt0);
       std::string msg;
       for (const char* fmt = fmt0; *fmt;) {
@@ -428,7 +430,7 @@
         } else if (ch == 'I' || ch == 'S') {  // jint, jshort
           StringAppendF(&msg, "%d", va_arg(ap, int));
         } else if (ch == 'J') {  // jlong
-          StringAppendF(&msg, "%lld", va_arg(ap, jlong));
+          StringAppendF(&msg, "%" PRId64, va_arg(ap, jlong));
         } else if (ch == 'Z') {  // jboolean
           StringAppendF(&msg, "%s", va_arg(ap, int) ? "true" : "false");
         } else if (ch == 'V') {  // void
@@ -442,7 +444,7 @@
         } else if (ch == 'L' || ch == 'a' || ch == 's') {  // jobject, jarray, jstring
           // For logging purposes, these are identical.
           jobject o = va_arg(ap, jobject);
-          if (o == NULL) {
+          if (o == nullptr) {
             msg += "NULL";
           } else {
             StringAppendF(&msg, "%p", o);
@@ -453,7 +455,7 @@
         } else if (ch == 'c') {  // jclass
           jclass jc = va_arg(ap, jclass);
           mirror::Class* c = reinterpret_cast<mirror::Class*>(Thread::Current()->DecodeJObject(jc));
-          if (c == NULL) {
+          if (c == nullptr) {
             msg += "NULL";
           } else if (c == kInvalidIndirectRefObject ||
               !Runtime::Current()->GetHeap()->IsValidObjectAddress(c)) {
@@ -488,7 +490,7 @@
           }
         } else if (ch == 'p') {  // void* ("pointer")
           void* p = va_arg(ap, void*);
-          if (p == NULL) {
+          if (p == nullptr) {
             msg += "NULL";
           } else {
             StringAppendF(&msg, "(void*) %p", p);
@@ -506,7 +508,7 @@
           }
         } else if (ch == 'u') {  // const char* (Modified UTF-8)
           const char* utf = va_arg(ap, const char*);
-          if (utf == NULL) {
+          if (utf == nullptr) {
             msg += "NULL";
           } else {
             StringAppendF(&msg, "\"%s\"", utf);
@@ -563,7 +565,7 @@
           }
         } else if (ch == 'z') {
           CheckLengthPositive(va_arg(ap, jsize));
-        } else if (strchr("BCISZbfmpEv", ch) != NULL) {
+        } else if (strchr("BCISZbfmpEv", ch) != nullptr) {
           va_arg(ap, uint32_t);  // Skip this argument.
         } else if (ch == 'D' || ch == 'F') {
           va_arg(ap, double);  // Skip this argument.
@@ -595,7 +597,7 @@
    */
   bool CheckInstance(InstanceKind kind, jobject java_object)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const char* what = NULL;
+    const char* what = nullptr;
     switch (kind) {
     case kClass:
       what = "jclass";
@@ -616,7 +618,7 @@
       LOG(FATAL) << "Unknown kind " << static_cast<int>(kind);
     }
 
-    if (java_object == NULL) {
+    if (java_object == nullptr) {
       JniAbortF(function_name_, "%s received null %s", function_name_, what);
       return false;
     }
@@ -670,7 +672,7 @@
    * Since we're dealing with objects, switch to "running" mode.
    */
   void CheckArray(jarray java_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (java_array == NULL) {
+    if (java_array == nullptr) {
       JniAbortF(function_name_, "jarray was NULL");
       return;
     }
@@ -692,29 +694,29 @@
   }
 
   mirror::ArtField* CheckFieldID(jfieldID fid) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (fid == NULL) {
+    if (fid == nullptr) {
       JniAbortF(function_name_, "jfieldID was NULL");
-      return NULL;
+      return nullptr;
     }
     mirror::ArtField* f = soa_.DecodeField(fid);
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(f) || !f->IsArtField()) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "invalid jfieldID: %p", fid);
-      return NULL;
+      return nullptr;
     }
     return f;
   }
 
   mirror::ArtMethod* CheckMethodID(jmethodID mid) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (mid == NULL) {
+    if (mid == nullptr) {
       JniAbortF(function_name_, "jmethodID was NULL");
-      return NULL;
+      return nullptr;
     }
     mirror::ArtMethod* m = soa_.DecodeMethod(mid);
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(m) || !m->IsArtMethod()) {
       Runtime::Current()->GetHeap()->DumpSpaces();
       JniAbortF(function_name_, "invalid jmethodID: %p", mid);
-      return NULL;
+      return nullptr;
     }
     return m;
   }
@@ -727,7 +729,7 @@
    */
   void CheckObject(jobject java_object)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (java_object == NULL) {
+    if (java_object == nullptr) {
       return;
     }
 
@@ -752,7 +754,7 @@
 
   void CheckThread(int flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Thread* self = Thread::Current();
-    if (self == NULL) {
+    if (self == nullptr) {
       JniAbortF(function_name_, "a thread (tid %d) is making JNI calls without being attached", GetTid());
       return;
     }
@@ -813,7 +815,7 @@
 
   // Verifies that "bytes" points to valid Modified UTF-8 data.
   void CheckUtfString(const char* bytes, bool nullable) {
-    if (bytes == NULL) {
+    if (bytes == nullptr) {
       if (!nullable) {
         JniAbortF(function_name_, "non-nullable const char* was NULL");
         return;
@@ -821,9 +823,9 @@
       return;
     }
 
-    const char* errorKind = NULL;
+    const char* errorKind = nullptr;
     uint8_t utf8 = CheckUtfBytes(bytes, &errorKind);
-    if (errorKind != NULL) {
+    if (errorKind != nullptr) {
       JniAbortF(function_name_,
                 "input is not valid Modified UTF-8: illegal %s byte %#x\n"
                 "    string: '%s'", errorKind, utf8, bytes);
@@ -998,7 +1000,7 @@
     const uint16_t* pat = reinterpret_cast<const uint16_t*>(fullBuf);
     for (size_t i = sizeof(GuardedCopy) / 2; i < (kGuardLen / 2 - sizeof(GuardedCopy)) / 2; i++) {
       if (pat[i] != kGuardPattern) {
-        JniAbortF(functionName, "guard pattern(1) disturbed at %p +%d", fullBuf, i*2);
+        JniAbortF(functionName, "guard pattern(1) disturbed at %p +%zd", fullBuf, i*2);
       }
     }
 
@@ -1018,7 +1020,7 @@
     pat = reinterpret_cast<const uint16_t*>(fullBuf + offset);
     for (size_t i = 0; i < kGuardLen / 4; i++) {
       if (pat[i] != kGuardPattern) {
-        JniAbortF(functionName, "guard pattern(2) disturbed at %p +%d", fullBuf, offset + i*2);
+        JniAbortF(functionName, "guard pattern(2) disturbed at %p +%zd", fullBuf, offset + i*2);
       }
     }
 
@@ -1037,7 +1039,7 @@
 
  private:
   static uint8_t* DebugAlloc(size_t len) {
-    void* result = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+    void* result = mmap(nullptr, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
     if (result == MAP_FAILED) {
       PLOG(FATAL) << "GuardedCopy::create mmap(" << len << ") failed";
     }
@@ -1081,8 +1083,8 @@
   mirror::Array* a = soa.Decode<mirror::Array*>(java_array);
   size_t component_size = a->GetClass()->GetComponentSize();
   size_t byte_count = a->GetLength() * component_size;
-  void* result = GuardedCopy::Create(a->GetRawData(component_size), byte_count, true);
-  if (isCopy != NULL) {
+  void* result = GuardedCopy::Create(a->GetRawData(component_size, 0), byte_count, true);
+  if (isCopy != nullptr) {
     *isCopy = JNI_TRUE;
   }
   return result;
@@ -1100,7 +1102,7 @@
 
   if (mode != JNI_ABORT) {
     size_t len = GuardedCopy::FromData(dataBuf)->original_length;
-    memcpy(a->GetRawData(a->GetClass()->GetComponentSize()), dataBuf, len);
+    memcpy(a->GetRawData(a->GetClass()->GetComponentSize(), 0), dataBuf, len);
   }
   if (mode != JNI_COMMIT) {
     GuardedCopy::Destroy(dataBuf);
@@ -1223,7 +1225,7 @@
 
   static void DeleteGlobalRef(JNIEnv* env, jobject globalRef) {
     CHECK_JNI_ENTRY(kFlag_Default | kFlag_ExcepOkay, "EL", env, globalRef);
-    if (globalRef != NULL && GetIndirectRefKind(globalRef) != kGlobal) {
+    if (globalRef != nullptr && GetIndirectRefKind(globalRef) != kGlobal) {
       JniAbortF(__FUNCTION__, "DeleteGlobalRef on %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(globalRef)).c_str(), globalRef);
     } else {
@@ -1234,7 +1236,7 @@
 
   static void DeleteWeakGlobalRef(JNIEnv* env, jweak weakGlobalRef) {
     CHECK_JNI_ENTRY(kFlag_Default | kFlag_ExcepOkay, "EL", env, weakGlobalRef);
-    if (weakGlobalRef != NULL && GetIndirectRefKind(weakGlobalRef) != kWeakGlobal) {
+    if (weakGlobalRef != nullptr && GetIndirectRefKind(weakGlobalRef) != kWeakGlobal) {
       JniAbortF(__FUNCTION__, "DeleteWeakGlobalRef on %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(weakGlobalRef)).c_str(), weakGlobalRef);
     } else {
@@ -1245,7 +1247,7 @@
 
   static void DeleteLocalRef(JNIEnv* env, jobject localRef) {
     CHECK_JNI_ENTRY(kFlag_Default | kFlag_ExcepOkay, "EL", env, localRef);
-    if (localRef != NULL && GetIndirectRefKind(localRef) != kLocal && !IsSirtLocalRef(env, localRef)) {
+    if (localRef != nullptr && GetIndirectRefKind(localRef) != kLocal && !IsSirtLocalRef(env, localRef)) {
       JniAbortF(__FUNCTION__, "DeleteLocalRef on %s: %p",
                 ToStr<IndirectRefKind>(GetIndirectRefKind(localRef)).c_str(), localRef);
     } else {
@@ -1318,7 +1320,7 @@
     return CHECK_JNI_EXIT("f", baseEnv(env)->GetStaticFieldID(env, c, name, sig));
   }
 
-#define FIELD_ACCESSORS(_ctype, _jname, _type) \
+#define FIELD_ACCESSORS(_ctype, _jname, _jvalue_type, _type) \
     static _ctype GetStatic##_jname##Field(JNIEnv* env, jclass c, jfieldID fid) { \
         CHECK_JNI_ENTRY(kFlag_Default, "Ecf", env, c, fid); \
         sc.CheckStaticFieldID(c, fid); \
@@ -1333,7 +1335,9 @@
         CHECK_JNI_ENTRY(kFlag_Default, "Ecf" _type, env, c, fid, value); \
         sc.CheckStaticFieldID(c, fid); \
         /* "value" arg only used when type == ref */ \
-        sc.CheckFieldType((jobject)(uint32_t)value, fid, _type[0], true); \
+        jvalue java_type_value; \
+        java_type_value._jvalue_type = value; \
+        sc.CheckFieldType(java_type_value, fid, _type[0], true); \
         baseEnv(env)->SetStatic##_jname##Field(env, c, fid, value); \
         CHECK_JNI_EXIT_VOID(); \
     } \
@@ -1341,20 +1345,22 @@
         CHECK_JNI_ENTRY(kFlag_Default, "ELf" _type, env, obj, fid, value); \
         sc.CheckInstanceFieldID(obj, fid); \
         /* "value" arg only used when type == ref */ \
-        sc.CheckFieldType((jobject)(uint32_t) value, fid, _type[0], false); \
+        jvalue java_type_value; \
+        java_type_value._jvalue_type = value; \
+        sc.CheckFieldType(java_type_value, fid, _type[0], false); \
         baseEnv(env)->Set##_jname##Field(env, obj, fid, value); \
         CHECK_JNI_EXIT_VOID(); \
     }
 
-FIELD_ACCESSORS(jobject, Object, "L");
-FIELD_ACCESSORS(jboolean, Boolean, "Z");
-FIELD_ACCESSORS(jbyte, Byte, "B");
-FIELD_ACCESSORS(jchar, Char, "C");
-FIELD_ACCESSORS(jshort, Short, "S");
-FIELD_ACCESSORS(jint, Int, "I");
-FIELD_ACCESSORS(jlong, Long, "J");
-FIELD_ACCESSORS(jfloat, Float, "F");
-FIELD_ACCESSORS(jdouble, Double, "D");
+FIELD_ACCESSORS(jobject, Object, l, "L");
+FIELD_ACCESSORS(jboolean, Boolean, z, "Z");
+FIELD_ACCESSORS(jbyte, Byte, b, "B");
+FIELD_ACCESSORS(jchar, Char, c, "C");
+FIELD_ACCESSORS(jshort, Short, s, "S");
+FIELD_ACCESSORS(jint, Int, i, "I");
+FIELD_ACCESSORS(jlong, Long, j, "J");
+FIELD_ACCESSORS(jfloat, Float, f, "F");
+FIELD_ACCESSORS(jdouble, Double, d, "D");
 
 #define CALL(_ctype, _jname, _retdecl, _retasgn, _retok, _retsig) \
     /* Virtual... */ \
@@ -1484,11 +1490,11 @@
   static const jchar* GetStringChars(JNIEnv* env, jstring java_string, jboolean* isCopy) {
     CHECK_JNI_ENTRY(kFlag_CritOkay, "Esp", env, java_string, isCopy);
     const jchar* result = baseEnv(env)->GetStringChars(env, java_string, isCopy);
-    if (sc.ForceCopy() && result != NULL) {
+    if (sc.ForceCopy() && result != nullptr) {
       mirror::String* s = sc.soa().Decode<mirror::String*>(java_string);
       int byteCount = s->GetLength() * 2;
       result = (const jchar*) GuardedCopy::Create(result, byteCount, false);
-      if (isCopy != NULL) {
+      if (isCopy != nullptr) {
         *isCopy = JNI_TRUE;
       }
     }
@@ -1519,9 +1525,9 @@
   static const char* GetStringUTFChars(JNIEnv* env, jstring string, jboolean* isCopy) {
     CHECK_JNI_ENTRY(kFlag_CritOkay, "Esp", env, string, isCopy);
     const char* result = baseEnv(env)->GetStringUTFChars(env, string, isCopy);
-    if (sc.ForceCopy() && result != NULL) {
+    if (sc.ForceCopy() && result != nullptr) {
       result = (const char*) GuardedCopy::Create(result, strlen(result) + 1, false);
-      if (isCopy != NULL) {
+      if (isCopy != nullptr) {
         *isCopy = JNI_TRUE;
       }
     }
@@ -1578,7 +1584,7 @@
   ForceCopyGetChecker(ScopedCheck& sc, jboolean* isCopy) {
     force_copy = sc.ForceCopy();
     no_copy = 0;
-    if (force_copy && isCopy != NULL) {
+    if (force_copy && isCopy != nullptr) {
       // Capture this before the base call tramples on it.
       no_copy = *reinterpret_cast<uint32_t*>(isCopy);
     }
@@ -1586,7 +1592,7 @@
 
   template<typename ResultT>
   ResultT Check(JNIEnv* env, jarray array, jboolean* isCopy, ResultT result) {
-    if (force_copy && result != NULL) {
+    if (force_copy && result != nullptr) {
       result = reinterpret_cast<ResultT>(CreateGuardedPACopy(env, array, isCopy));
     }
     return result;
@@ -1690,7 +1696,7 @@
   static void* GetPrimitiveArrayCritical(JNIEnv* env, jarray array, jboolean* isCopy) {
     CHECK_JNI_ENTRY(kFlag_CritGet, "Eap", env, array, isCopy);
     void* result = baseEnv(env)->GetPrimitiveArrayCritical(env, array, isCopy);
-    if (sc.ForceCopy() && result != NULL) {
+    if (sc.ForceCopy() && result != nullptr) {
       result = CreateGuardedPACopy(env, array, isCopy);
     }
     return CHECK_JNI_EXIT("p", result);
@@ -1709,11 +1715,11 @@
   static const jchar* GetStringCritical(JNIEnv* env, jstring java_string, jboolean* isCopy) {
     CHECK_JNI_ENTRY(kFlag_CritGet, "Esp", env, java_string, isCopy);
     const jchar* result = baseEnv(env)->GetStringCritical(env, java_string, isCopy);
-    if (sc.ForceCopy() && result != NULL) {
+    if (sc.ForceCopy() && result != nullptr) {
       mirror::String* s = sc.soa().Decode<mirror::String*>(java_string);
       int byteCount = s->GetLength() * 2;
       result = (const jchar*) GuardedCopy::Create(result, byteCount, false);
-      if (isCopy != NULL) {
+      if (isCopy != nullptr) {
         *isCopy = JNI_TRUE;
       }
     }
@@ -1751,11 +1757,11 @@
 
   static jobject NewDirectByteBuffer(JNIEnv* env, void* address, jlong capacity) {
     CHECK_JNI_ENTRY(kFlag_Default, "EpJ", env, address, capacity);
-    if (address == NULL) {
+    if (address == nullptr) {
       JniAbortF(__FUNCTION__, "non-nullable address is NULL");
     }
     if (capacity < 0) {
-      JniAbortF(__FUNCTION__, "capacity must be non-negative: %lld", capacity);
+      JniAbortF(__FUNCTION__, "capacity must be non-negative: %" PRId64, capacity);
     }
     return CHECK_JNI_EXIT("L", baseEnv(env)->NewDirectByteBuffer(env, address, capacity));
   }
@@ -1779,10 +1785,10 @@
 };
 
 const JNINativeInterface gCheckNativeInterface = {
-  NULL,  // reserved0.
-  NULL,  // reserved1.
-  NULL,  // reserved2.
-  NULL,  // reserved3.
+  nullptr,  // reserved0.
+  nullptr,  // reserved1.
+  nullptr,  // reserved2.
+  nullptr,  // reserved3.
   CheckJNI::GetVersion,
   CheckJNI::DefineClass,
   CheckJNI::FindClass,
@@ -2057,9 +2063,9 @@
 };
 
 const JNIInvokeInterface gCheckInvokeInterface = {
-  NULL,  // reserved0
-  NULL,  // reserved1
-  NULL,  // reserved2
+  nullptr,  // reserved0
+  nullptr,  // reserved1
+  nullptr,  // reserved2
   CheckJII::DestroyJavaVM,
   CheckJII::AttachCurrentThread,
   CheckJII::DetachCurrentThread,
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 0436435..66c24b5 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -28,7 +28,7 @@
 namespace art {
 
 inline mirror::String* ClassLinker::ResolveString(uint32_t string_idx,
-                                                  const mirror::ArtMethod* referrer) {
+                                                  mirror::ArtMethod* referrer) {
   mirror::String* resolved_string = referrer->GetDexCacheStrings()->Get(string_idx);
   if (UNLIKELY(resolved_string == NULL)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
@@ -40,7 +40,7 @@
 }
 
 inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx,
-                                               const mirror::ArtMethod* referrer) {
+                                               mirror::ArtMethod* referrer) {
   mirror::Class* resolved_type = referrer->GetDexCacheResolvedTypes()->Get(type_idx);
   if (UNLIKELY(resolved_type == NULL)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
@@ -53,7 +53,7 @@
   return resolved_type;
 }
 
-inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, const mirror::ArtField* referrer) {
+inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, mirror::ArtField* referrer) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
   mirror::DexCache* dex_cache_ptr = declaring_class->GetDexCache();
   mirror::Class* resolved_type = dex_cache_ptr->GetResolvedType(type_idx);
@@ -68,7 +68,7 @@
 }
 
 inline mirror::ArtMethod* ClassLinker::ResolveMethod(uint32_t method_idx,
-                                                     const mirror::ArtMethod* referrer,
+                                                     mirror::ArtMethod* referrer,
                                                      InvokeType type) {
   mirror::ArtMethod* resolved_method =
       referrer->GetDexCacheResolvedMethods()->Get(method_idx);
@@ -84,7 +84,7 @@
 }
 
 inline mirror::ArtField* ClassLinker::ResolveField(uint32_t field_idx,
-                                                   const mirror::ArtMethod* referrer,
+                                                   mirror::ArtMethod* referrer,
                                                    bool is_static) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
   mirror::ArtField* resolved_field =
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index b1117a2..978c99b 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -199,14 +199,14 @@
   Thread* self = Thread::Current();
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // The GC can't handle an object with a null class since we can't get the size of this object.
-  heap->IncrementDisableGC(self);
+  heap->IncrementDisableMovingGC(self);
   SirtRef<mirror::Class> java_lang_Class(self, down_cast<mirror::Class*>(
       heap->AllocNonMovableObject<true>(self, nullptr, sizeof(mirror::ClassClass))));
   CHECK(java_lang_Class.get() != NULL);
   mirror::Class::SetClassClass(java_lang_Class.get());
   java_lang_Class->SetClass(java_lang_Class.get());
   java_lang_Class->SetClassSize(sizeof(mirror::ClassClass));
-  heap->DecrementDisableGC(self);
+  heap->DecrementDisableMovingGC(self);
   // AllocClass(mirror::Class*) can now be used
 
   // Class[] is used for reflection support.
@@ -649,15 +649,9 @@
 
 const OatFile* ClassLinker::RegisterOatFile(const OatFile* oat_file) {
   WriterMutexLock mu(Thread::Current(), dex_lock_);
-  for (size_t i = 0; i < oat_files_.size(); ++i) {
-    if (UNLIKELY(oat_file->GetLocation() == oat_files_[i]->GetLocation())) {
-      VLOG(class_linker) << "Attempt to register oat file that's already registered: "
-          << oat_file->GetLocation();
-      for (size_t j = i; j < oat_files_.size(); ++j) {
-        CHECK_NE(oat_file, oat_files_[j]) << "Attempt to re-register dex file.";
-      }
-      delete oat_file;
-      return oat_files_[i];
+  if (kIsDebugBuild) {
+    for (size_t i = 0; i < oat_files_.size(); ++i) {
+      CHECK_NE(oat_file, oat_files_[i]) << oat_file->GetLocation();
     }
   }
   VLOG(class_linker) << "Registering " << oat_file->GetLocation();
@@ -718,11 +712,11 @@
     return nullptr;
   }
 
-  uint32_t expected_image_oat_offset = reinterpret_cast<uint32_t>(image_header.GetOatDataBegin());
+  uintptr_t expected_image_oat_offset = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin());
   uint32_t actual_image_oat_offset = oat_file->GetOatHeader().GetImageFileLocationOatDataBegin();
   if (expected_image_oat_offset != actual_image_oat_offset) {
-    *error_msg = StringPrintf("Failed to find oat file at '%s' with expected image oat offset %ud, "
-                              "found %ud", oat_location, expected_image_oat_offset,
+    *error_msg = StringPrintf("Failed to find oat file at '%s' with expected image oat offset %"
+                              PRIuPTR ", found %ud", oat_location, expected_image_oat_offset,
                               actual_image_oat_offset);
     return nullptr;
   }
@@ -826,20 +820,6 @@
       << oat_location << "': " << *error_msg;
   error_msg->clear();
 
-  {
-    // We might have registered an outdated OatFile in FindDexFileInOatLocation().
-    // Get rid of it as its MAP_PRIVATE mapping may not reflect changes we're about to do.
-    WriterMutexLock mu(Thread::Current(), dex_lock_);
-    for (size_t i = 0; i < oat_files_.size(); ++i) {
-      if (oat_location == oat_files_[i]->GetLocation()) {
-        VLOG(class_linker) << "De-registering old OatFile: " << oat_location;
-        delete oat_files_[i];
-        oat_files_.erase(oat_files_.begin() + i);
-        break;
-      }
-    }
-  }
-
   // Generate the output oat file for the dex file
   VLOG(class_linker) << "Generating oat file " << oat_location << " for " << dex_location;
   if (!GenerateOatFile(dex_location, scoped_flock.GetFile().Fd(), oat_location, error_msg)) {
@@ -878,7 +858,7 @@
   Runtime* runtime = Runtime::Current();
   const ImageHeader& image_header = runtime->GetHeap()->GetImageSpace()->GetImageHeader();
   uint32_t image_oat_checksum = image_header.GetOatChecksum();
-  uint32_t image_oat_data_begin = reinterpret_cast<uint32_t>(image_header.GetOatDataBegin());
+  uintptr_t image_oat_data_begin = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin());
   bool image_check = ((oat_file->GetOatHeader().GetImageFileLocationOatChecksum() == image_oat_checksum)
                       && (oat_file->GetOatHeader().GetImageFileLocationOatDataBegin() == image_oat_data_begin));
 
@@ -905,7 +885,7 @@
     ScopedObjectAccess soa(Thread::Current());
     mirror::String* oat_location = image_header.GetImageRoot(ImageHeader::kOatLocation)->AsString();
     std::string image_file(oat_location->ToModifiedUtf8());
-    *error_msg = StringPrintf("oat file '%s' mismatch (0x%x, %d) with '%s' (0x%x, %d)",
+    *error_msg = StringPrintf("oat file '%s' mismatch (0x%x, %d) with '%s' (0x%x, %" PRIdPTR ")",
                               oat_file->GetLocation().c_str(),
                               oat_file->GetOatHeader().GetImageFileLocationOatChecksum(),
                               oat_file->GetOatHeader().GetImageFileLocationOatDataBegin(),
@@ -1043,7 +1023,8 @@
     if (!method->IsNative()) {
       method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge);
       if (method != Runtime::Current()->GetResolutionMethod()) {
-        method->SetEntryPointFromCompiledCode(GetCompiledCodeToInterpreterBridge());
+        method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+        method->SetEntryPointFromPortableCompiledCode(GetPortableToInterpreterBridge());
       }
     }
   }
@@ -1592,7 +1573,7 @@
   return 0;
 }
 
-const OatFile::OatMethod ClassLinker::GetOatMethodFor(const mirror::ArtMethod* method) {
+const OatFile::OatMethod ClassLinker::GetOatMethodFor(mirror::ArtMethod* method) {
   // Although we overwrite the trampoline of non-static methods, we may get here via the resolution
   // method for direct methods (or virtual methods made direct).
   mirror::Class* declaring_class = method->GetDeclaringClass();
@@ -1628,35 +1609,68 @@
 }
 
 // Special case to get oat code without overwriting a trampoline.
-const void* ClassLinker::GetOatCodeFor(const mirror::ArtMethod* method) {
+const void* ClassLinker::GetQuickOatCodeFor(mirror::ArtMethod* method) {
   CHECK(!method->IsAbstract()) << PrettyMethod(method);
   if (method->IsProxyMethod()) {
-#if !defined(ART_USE_PORTABLE_COMPILER)
-    return reinterpret_cast<void*>(art_quick_proxy_invoke_handler);
-#else
-    return reinterpret_cast<void*>(art_portable_proxy_invoke_handler);
-#endif
+    return GetQuickProxyInvokeHandler();
   }
-  const void* result = GetOatMethodFor(method).GetCode();
-  if (result == NULL) {
-    // No code? You must mean to go into the interpreter.
-    result = GetCompiledCodeToInterpreterBridge();
+  const void* result = GetOatMethodFor(method).GetQuickCode();
+  if (result == nullptr) {
+    if (method->IsPortableCompiled()) {
+      // No code? Do we expect portable code?
+      result = GetQuickToPortableBridge();
+    } else {
+      // No code? You must mean to go into the interpreter.
+      result = GetQuickToInterpreterBridge();
+    }
   }
   return result;
 }
 
-const void* ClassLinker::GetOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx,
-                                       uint32_t method_idx) {
+const void* ClassLinker::GetPortableOatCodeFor(mirror::ArtMethod* method,
+                                               bool* have_portable_code) {
+  CHECK(!method->IsAbstract()) << PrettyMethod(method);
+  *have_portable_code = false;
+  if (method->IsProxyMethod()) {
+    return GetPortableProxyInvokeHandler();
+  }
+  const void* result = GetOatMethodFor(method).GetPortableCode();
+  if (result == nullptr) {
+    if (GetOatMethodFor(method).GetQuickCode() == nullptr) {
+      // No code? You must mean to go into the interpreter.
+      result = GetPortableToInterpreterBridge();
+    } else {
+      // No code? But there's quick code, so use a bridge.
+      result = GetPortableToQuickBridge();
+    }
+  } else {
+    *have_portable_code = true;
+  }
+  return result;
+}
+
+const void* ClassLinker::GetQuickOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx,
+                                            uint32_t method_idx) {
   UniquePtr<const OatFile::OatClass> oat_class(GetOatClass(dex_file, class_def_idx));
   CHECK(oat_class.get() != nullptr);
   uint32_t oat_method_idx = GetOatMethodIndexFromMethodIndex(dex_file, class_def_idx, method_idx);
-  return oat_class->GetOatMethod(oat_method_idx).GetCode();
+  return oat_class->GetOatMethod(oat_method_idx).GetQuickCode();
+}
+
+const void* ClassLinker::GetPortableOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx,
+                                               uint32_t method_idx) {
+  UniquePtr<const OatFile::OatClass> oat_class(GetOatClass(dex_file, class_def_idx));
+  CHECK(oat_class.get() != nullptr);
+  uint32_t oat_method_idx = GetOatMethodIndexFromMethodIndex(dex_file, class_def_idx, method_idx);
+  return oat_class->GetOatMethod(oat_method_idx).GetPortableCode();
 }
 
 // Returns true if the method must run with interpreter, false otherwise.
-static bool NeedsInterpreter(const mirror::ArtMethod* method, const void* code) {
-  if (code == NULL) {
+static bool NeedsInterpreter(mirror::ArtMethod* method, const void* quick_code,
+                             const void* portable_code) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if ((quick_code == nullptr) && (portable_code == nullptr)) {
     // No code: need interpreter.
+    DCHECK(!method->IsNative());
     return true;
   }
 #ifdef ART_SEA_IR_MODE
@@ -1704,13 +1718,26 @@
       // Only update static methods.
       continue;
     }
-    const void* code = oat_class->GetOatMethod(method_index).GetCode();
-    const bool enter_interpreter = NeedsInterpreter(method, code);
+    const void* portable_code = oat_class->GetOatMethod(method_index).GetPortableCode();
+    const void* quick_code = oat_class->GetOatMethod(method_index).GetQuickCode();
+    const bool enter_interpreter = NeedsInterpreter(method, quick_code, portable_code);
+    bool have_portable_code = false;
     if (enter_interpreter) {
       // Use interpreter entry point.
-      code = GetCompiledCodeToInterpreterBridge();
+      portable_code = GetPortableToInterpreterBridge();
+      quick_code = GetQuickToInterpreterBridge();
+    } else {
+      if (portable_code == nullptr) {
+        portable_code = GetPortableToQuickBridge();
+      } else {
+        have_portable_code = true;
+      }
+      if (quick_code == nullptr) {
+        quick_code = GetQuickToPortableBridge();
+      }
     }
-    runtime->GetInstrumentation()->UpdateMethodsCode(method, code);
+    runtime->GetInstrumentation()->UpdateMethodsCode(method, quick_code, portable_code,
+                                                     have_portable_code);
   }
   // Ignore virtual methods on the iterator.
 }
@@ -1719,7 +1746,8 @@
                      uint32_t method_index)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Method shouldn't have already been linked.
-  DCHECK(method->GetEntryPointFromCompiledCode() == NULL);
+  DCHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr);
+  DCHECK(method->GetEntryPointFromPortableCompiledCode() == nullptr);
   // Every kind of method should at least get an invoke stub from the oat_method.
   // non-abstract methods also get their code pointers.
   const OatFile::OatMethod oat_method = oat_class->GetOatMethod(method_index);
@@ -1727,7 +1755,9 @@
 
   // Install entry point from interpreter.
   Runtime* runtime = Runtime::Current();
-  bool enter_interpreter = NeedsInterpreter(method.get(), method->GetEntryPointFromCompiledCode());
+  bool enter_interpreter = NeedsInterpreter(method.get(),
+                                            method->GetEntryPointFromQuickCompiledCode(),
+                                            method->GetEntryPointFromPortableCompiledCode());
   if (enter_interpreter) {
     method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge);
   } else {
@@ -1735,18 +1765,29 @@
   }
 
   if (method->IsAbstract()) {
-    method->SetEntryPointFromCompiledCode(GetCompiledCodeToInterpreterBridge());
+    method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+    method->SetEntryPointFromPortableCompiledCode(GetPortableToInterpreterBridge());
     return;
   }
 
+  bool have_portable_code = false;
   if (method->IsStatic() && !method->IsConstructor()) {
     // For static methods excluding the class initializer, install the trampoline.
     // It will be replaced by the proper entry point by ClassLinker::FixupStaticTrampolines
     // after initializing class (see ClassLinker::InitializeClass method).
-    method->SetEntryPointFromCompiledCode(GetResolutionTrampoline(runtime->GetClassLinker()));
+    method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionTrampoline(runtime->GetClassLinker()));
+    method->SetEntryPointFromPortableCompiledCode(GetPortableResolutionTrampoline(runtime->GetClassLinker()));
   } else if (enter_interpreter) {
     // Set entry point from compiled code if there's no code or in interpreter only mode.
-    method->SetEntryPointFromCompiledCode(GetCompiledCodeToInterpreterBridge());
+    method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge());
+    method->SetEntryPointFromPortableCompiledCode(GetPortableToInterpreterBridge());
+  } else if (method->GetEntryPointFromPortableCompiledCode() != nullptr) {
+    DCHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr);
+    have_portable_code = true;
+    method->SetEntryPointFromQuickCompiledCode(GetQuickToPortableBridge());
+  } else {
+    DCHECK(method->GetEntryPointFromQuickCompiledCode() != nullptr);
+    method->SetEntryPointFromPortableCompiledCode(GetPortableToQuickBridge());
   }
 
   if (method->IsNative()) {
@@ -1756,7 +1797,9 @@
 
   // Allow instrumentation its chance to hijack code.
   runtime->GetInstrumentation()->UpdateMethodsCode(method.get(),
-                                                   method->GetEntryPointFromCompiledCode());
+                                                   method->GetEntryPointFromQuickCompiledCode(),
+                                                   method->GetEntryPointFromPortableCompiledCode(),
+                                                   have_portable_code);
 }
 
 void ClassLinker::LoadClass(const DexFile& dex_file,
@@ -2096,10 +2139,11 @@
                                              const SirtRef<mirror::ClassLoader>& class_loader) {
   // Identify the underlying component type
   CHECK_EQ('[', descriptor[0]);
-  mirror::Class* component_type = FindClass(descriptor + 1, class_loader);
-  if (component_type == NULL) {
-    DCHECK(Thread::Current()->IsExceptionPending());
-    return NULL;
+  Thread* self = Thread::Current();
+  SirtRef<mirror::Class> component_type(self, FindClass(descriptor + 1, class_loader));
+  if (component_type.get() == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return nullptr;
   }
 
   // See if the component type is already loaded.  Array classes are
@@ -2134,7 +2178,6 @@
   //
   // Array classes are simple enough that we don't need to do a full
   // link step.
-  Thread* self = Thread::Current();
   SirtRef<mirror::Class> new_class(self, NULL);
   if (UNLIKELY(!init_done_)) {
     // Classes that were hand created, ie not by FindSystemClass
@@ -2156,12 +2199,12 @@
       new_class.reset(GetClassRoot(kIntArrayClass));
     }
   }
-  if (new_class.get() == NULL) {
+  if (new_class.get() == nullptr) {
     new_class.reset(AllocClass(self, sizeof(mirror::Class)));
-    if (new_class.get() == NULL) {
-      return NULL;
+    if (new_class.get() == nullptr) {
+      return nullptr;
     }
-    new_class->SetComponentType(component_type);
+    new_class->SetComponentType(component_type.get());
   }
   ObjectLock<mirror::Class> lock(self, &new_class);  // Must hold lock on object when initializing.
   DCHECK(new_class->GetComponentType() != NULL);
@@ -2187,7 +2230,7 @@
 
   // Use the single, global copies of "interfaces" and "iftable"
   // (remember not to free them for arrays).
-  CHECK(array_iftable_ != NULL);
+  CHECK(array_iftable_ != nullptr);
   new_class->SetIfTable(array_iftable_);
 
   // Inherit access flags from the component type.
@@ -2202,7 +2245,7 @@
   new_class->SetAccessFlags(access_flags);
 
   mirror::Class* existing = InsertClass(descriptor, new_class.get(), Hash(descriptor));
-  if (existing == NULL) {
+  if (existing == nullptr) {
     return new_class.get();
   }
   // Another thread must have loaded the class after we
@@ -2823,15 +2866,15 @@
   return klass.get();
 }
 
-std::string ClassLinker::GetDescriptorForProxy(const mirror::Class* proxy_class) {
+std::string ClassLinker::GetDescriptorForProxy(mirror::Class* proxy_class) {
   DCHECK(proxy_class->IsProxyClass());
   mirror::String* name = proxy_class->GetName();
   DCHECK(name != NULL);
   return DotToDescriptor(name->ToModifiedUtf8().c_str());
 }
 
-mirror::ArtMethod* ClassLinker::FindMethodForProxy(const mirror::Class* proxy_class,
-                                                        const mirror::ArtMethod* proxy_method) {
+mirror::ArtMethod* ClassLinker::FindMethodForProxy(mirror::Class* proxy_class,
+                                                   mirror::ArtMethod* proxy_method) {
   DCHECK(proxy_class->IsProxyClass());
   DCHECK(proxy_method->IsProxyMethod());
   // Locate the dex cache of the original interface/Object
@@ -2912,7 +2955,8 @@
   method->SetCoreSpillMask(refs_and_args->GetCoreSpillMask());
   method->SetFpSpillMask(refs_and_args->GetFpSpillMask());
   method->SetFrameSizeInBytes(refs_and_args->GetFrameSizeInBytes());
-  method->SetEntryPointFromCompiledCode(GetProxyInvokeHandler());
+  method->SetEntryPointFromQuickCompiledCode(GetQuickProxyInvokeHandler());
+  method->SetEntryPointFromPortableCompiledCode(GetPortableProxyInvokeHandler());
   method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
 
   return method;
@@ -3195,7 +3239,7 @@
       klass->GetClassLoader() != klass->GetSuperClass()->GetClassLoader()) {
     SirtRef<mirror::Class> super(self, klass->GetSuperClass());
     for (int i = super->GetVTable()->GetLength() - 1; i >= 0; --i) {
-      const mirror::ArtMethod* method = klass->GetVTable()->Get(i);
+      mirror::ArtMethod* method = klass->GetVTable()->Get(i);
       if (method != super->GetVTable()->Get(i) &&
           !IsSameMethodSignatureInDifferentClassContexts(method, super.get(), klass.get())) {
         ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in superclass %s",
@@ -3209,7 +3253,7 @@
     SirtRef<mirror::Class> interface(self, klass->GetIfTable()->GetInterface(i));
     if (klass->GetClassLoader() != interface->GetClassLoader()) {
       for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-        const mirror::ArtMethod* method = klass->GetIfTable()->GetMethodArray(i)->Get(j);
+        mirror::ArtMethod* method = klass->GetIfTable()->GetMethodArray(i)->Get(j);
         if (!IsSameMethodSignatureInDifferentClassContexts(method, interface.get(),
                                                            method->GetDeclaringClass())) {
           ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in interface %s",
@@ -3226,9 +3270,9 @@
 
 // Returns true if classes referenced by the signature of the method are the
 // same classes in klass1 as they are in klass2.
-bool ClassLinker::IsSameMethodSignatureInDifferentClassContexts(const mirror::ArtMethod* method,
-                                                                const mirror::Class* klass1,
-                                                                const mirror::Class* klass2) {
+bool ClassLinker::IsSameMethodSignatureInDifferentClassContexts(mirror::ArtMethod* method,
+                                                                mirror::Class* klass1,
+                                                                mirror::Class* klass2) {
   if (klass1 == klass2) {
     return true;
   }
@@ -3810,23 +3854,24 @@
   explicit LinkFieldsComparator() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   }
   // No thread safety analysis as will be called from STL. Checked lock held in constructor.
-  bool operator()(const mirror::ArtField* field1, const mirror::ArtField* field2)
+  bool operator()(mirror::ArtField* field1, mirror::ArtField* field2)
       NO_THREAD_SAFETY_ANALYSIS {
     // First come reference fields, then 64-bit, and finally 32-bit
     FieldHelper fh1(field1);
     Primitive::Type type1 = fh1.GetTypeAsPrimitiveType();
     FieldHelper fh2(field2);
     Primitive::Type type2 = fh2.GetTypeAsPrimitiveType();
-    bool isPrimitive1 = type1 != Primitive::kPrimNot;
-    bool isPrimitive2 = type2 != Primitive::kPrimNot;
-    bool is64bit1 = isPrimitive1 && (type1 == Primitive::kPrimLong || type1 == Primitive::kPrimDouble);
-    bool is64bit2 = isPrimitive2 && (type2 == Primitive::kPrimLong || type2 == Primitive::kPrimDouble);
-    int order1 = (!isPrimitive1 ? 0 : (is64bit1 ? 1 : 2));
-    int order2 = (!isPrimitive2 ? 0 : (is64bit2 ? 1 : 2));
-    if (order1 != order2) {
-      return order1 < order2;
+    if (type1 != type2) {
+      bool is_primitive1 = type1 != Primitive::kPrimNot;
+      bool is_primitive2 = type2 != Primitive::kPrimNot;
+      bool is64bit1 = is_primitive1 && (type1 == Primitive::kPrimLong || type1 == Primitive::kPrimDouble);
+      bool is64bit2 = is_primitive2 && (type2 == Primitive::kPrimLong || type2 == Primitive::kPrimDouble);
+      int order1 = !is_primitive1 ? 0 : (is64bit1 ? 1 : 2);
+      int order2 = !is_primitive2 ? 0 : (is64bit2 ? 1 : 2);
+      if (order1 != order2) {
+        return order1 < order2;
+      }
     }
-
     // same basic group? then sort by string.
     const char* name1 = fh1.GetName();
     const char* name2 = fh2.GetName();
@@ -4016,14 +4061,14 @@
   size_t num_reference_fields =
       is_static ? klass->NumReferenceStaticFieldsDuringLinking()
                 : klass->NumReferenceInstanceFieldsDuringLinking();
-  const mirror::ObjectArray<mirror::ArtField>* fields =
+  mirror::ObjectArray<mirror::ArtField>* fields =
       is_static ? klass->GetSFields() : klass->GetIFields();
   // All of the fields that contain object references are guaranteed
   // to be at the beginning of the fields list.
   for (size_t i = 0; i < num_reference_fields; ++i) {
     // Note that byte_offset is the offset from the beginning of
     // object, not the offset into instance data
-    const mirror::ArtField* field = fields->Get(i);
+    mirror::ArtField* field = fields->Get(i);
     MemberOffset byte_offset = field->GetOffsetDuringLinking();
     CHECK_EQ(byte_offset.Uint32Value() & (CLASS_OFFSET_ALIGNMENT - 1), 0U);
     if (CLASS_CAN_ENCODE_OFFSET(byte_offset.Uint32Value())) {
@@ -4058,7 +4103,7 @@
 }
 
 mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file, uint16_t type_idx,
-                                        const mirror::Class* referrer) {
+                                        mirror::Class* referrer) {
   Thread* self = Thread::Current();
   SirtRef<mirror::DexCache> dex_cache(self, referrer->GetDexCache());
   SirtRef<mirror::ClassLoader> class_loader(self, referrer->GetClassLoader());
@@ -4101,7 +4146,7 @@
                                               uint32_t method_idx,
                                               const SirtRef<mirror::DexCache>& dex_cache,
                                               const SirtRef<mirror::ClassLoader>& class_loader,
-                                              const mirror::ArtMethod* referrer,
+                                              mirror::ArtMethod* referrer,
                                               InvokeType type) {
   DCHECK(dex_cache.get() != NULL);
   // Check for hit in the dex cache.
@@ -4186,7 +4231,7 @@
       mirror::Class* referring_class = referrer->GetDeclaringClass();
       if (!referring_class->CanAccess(methods_class)) {
         ThrowIllegalAccessErrorClassForMethodDispatch(referring_class, methods_class,
-                                                      referrer, resolved, type);
+                                                      resolved, type);
         return NULL;
       } else if (!referring_class->CanAccessMember(methods_class,
                                                    resolved->GetAccessFlags())) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 8722de3..7e31356 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -121,7 +121,7 @@
   // Resolve a String with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::String* ResolveString(uint32_t string_idx, const mirror::ArtMethod* referrer)
+  mirror::String* ResolveString(uint32_t string_idx, mirror::ArtMethod* referrer)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a String with the given index from the DexFile, storing the
@@ -133,17 +133,16 @@
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identity the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx,
-                             const mirror::Class* referrer)
+  mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx, mirror::Class* referrer)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::Class* ResolveType(uint16_t type_idx, const mirror::ArtMethod* referrer)
+  mirror::Class* ResolveType(uint16_t type_idx, mirror::ArtMethod* referrer)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Class* ResolveType(uint16_t type_idx, const mirror::ArtField* referrer)
+  mirror::Class* ResolveType(uint16_t type_idx, mirror::ArtField* referrer)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a type with the given ID from the DexFile, storing the
@@ -164,15 +163,15 @@
                                    uint32_t method_idx,
                                    const SirtRef<mirror::DexCache>& dex_cache,
                                    const SirtRef<mirror::ClassLoader>& class_loader,
-                                   const mirror::ArtMethod* referrer,
+                                   mirror::ArtMethod* referrer,
                                    InvokeType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ArtMethod* ResolveMethod(uint32_t method_idx, const mirror::ArtMethod* referrer,
+  mirror::ArtMethod* ResolveMethod(uint32_t method_idx, mirror::ArtMethod* referrer,
                                    InvokeType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ArtField* ResolveField(uint32_t field_idx, const mirror::ArtMethod* referrer,
+  mirror::ArtField* ResolveField(uint32_t field_idx, mirror::ArtMethod* referrer,
                                  bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -319,19 +318,23 @@
   mirror::Class* CreateProxyClass(ScopedObjectAccess& soa, jstring name, jobjectArray interfaces,
                                   jobject loader, jobjectArray methods, jobjectArray throws)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  std::string GetDescriptorForProxy(const mirror::Class* proxy_class)
+  std::string GetDescriptorForProxy(mirror::Class* proxy_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::ArtMethod* FindMethodForProxy(const mirror::Class* proxy_class,
-                                        const mirror::ArtMethod* proxy_method)
+  mirror::ArtMethod* FindMethodForProxy(mirror::Class* proxy_class,
+                                        mirror::ArtMethod* proxy_method)
       LOCKS_EXCLUDED(dex_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get the oat code for a method when its class isn't yet initialized
-  const void* GetOatCodeFor(const mirror::ArtMethod* method)
+  const void* GetQuickOatCodeFor(mirror::ArtMethod* method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const void* GetPortableOatCodeFor(mirror::ArtMethod* method, bool* have_portable_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get the oat code for a method from a method index.
-  const void* GetOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx, uint32_t method_idx)
+  const void* GetQuickOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx, uint32_t method_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  const void* GetPortableOatCodeFor(const DexFile& dex_file, uint16_t class_def_idx, uint32_t method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   pid_t GetClassesLockOwner();  // For SignalCatcher.
@@ -368,7 +371,7 @@
   mirror::ArtMethod* AllocArtMethod(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  const OatFile::OatMethod GetOatMethodFor(const mirror::ArtMethod* method)
+  const OatFile::OatMethod GetOatMethodFor(mirror::ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   OatFile& GetImageOatFile(gc::space::ImageSpace* space)
@@ -451,9 +454,9 @@
                                                 SirtRef<mirror::ClassLoader>& class_loader2)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsSameMethodSignatureInDifferentClassContexts(const mirror::ArtMethod* method,
-                                                     const mirror::Class* klass1,
-                                                     const mirror::Class* klass2)
+  bool IsSameMethodSignatureInDifferentClassContexts(mirror::ArtMethod* method,
+                                                     mirror::Class* klass1,
+                                                     mirror::Class* klass2)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool LinkClass(Thread* self, const SirtRef<mirror::Class>& klass,
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 1744050..fb979c2 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -55,7 +55,7 @@
     AssertPrimitiveClass(descriptor, class_linker_->FindSystemClass(descriptor.c_str()));
   }
 
-  void AssertPrimitiveClass(const std::string& descriptor, const mirror::Class* primitive)
+  void AssertPrimitiveClass(const std::string& descriptor, mirror::Class* primitive)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ClassHelper primitive_ch(primitive);
     ASSERT_TRUE(primitive != NULL);
@@ -212,7 +212,7 @@
       }
     }
     EXPECT_EQ(klass->IsInterface(), klass->GetVTable() == NULL);
-    const mirror::IfTable* iftable = klass->GetIfTable();
+    mirror::IfTable* iftable = klass->GetIfTable();
     for (int i = 0; i < klass->GetIfTableCount(); i++) {
       mirror::Class* interface = iftable->GetInterface(i);
       ASSERT_TRUE(interface != NULL);
@@ -469,20 +469,23 @@
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_cache_resolved_types_),             "dexCacheResolvedTypes"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_cache_strings_),                    "dexCacheStrings"));
 
+    // alphabetical 64-bit
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_interpreter_),            "entryPointFromInterpreter"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_jni_),                    "entryPointFromJni"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_portable_compiled_code_), "entryPointFromPortableCompiledCode"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_quick_compiled_code_),    "entryPointFromQuickCompiledCode"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, gc_map_),                                  "gcMap"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_mapping_table_),                     "quickMappingTable"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_vmap_table_),                        "quickVmapTable"));
+
     // alphabetical 32-bit
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, access_flags_),                   "accessFlags"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, code_item_offset_),               "codeItemOffset"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, core_spill_mask_),                "coreSpillMask"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_compiled_code_), "entryPointFromCompiledCode"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_interpreter_),   "entryPointFromInterpreter"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, fp_spill_mask_),                  "fpSpillMask"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, frame_size_in_bytes_),            "frameSizeInBytes"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, gc_map_),                         "gcMap"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, mapping_table_),                  "mappingTable"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, method_dex_index_),               "methodDexIndex"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_code_item_offset_),           "dexCodeItemOffset"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_method_index_),               "dexMethodIndex"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, method_index_),                   "methodIndex"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, native_method_),                  "nativeMethod"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, vmap_table_),                     "vmapTable"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_core_spill_mask_),          "quickCoreSpillMask"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_fp_spill_mask_),            "quickFpSpillMask"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, quick_frame_size_in_bytes_),      "quickFrameSizeInBytes"));
   };
 };
 
@@ -762,10 +765,10 @@
   mirror::Class* array_class = class_linker_->FindSystemClass("[Ljava/lang/String;");
   mirror::ObjectArray<mirror::String>* array =
       mirror::ObjectArray<mirror::String>::Alloc(soa.Self(), array_class, 0);
-  uint32_t array_offset = reinterpret_cast<uint32_t>(array);
-  uint32_t data_offset =
-      array_offset + mirror::ObjectArray<mirror::String>::DataOffset(sizeof(mirror::String*)).Uint32Value();
-  if (sizeof(mirror::String*) == sizeof(int32_t)) {
+  uintptr_t data_offset =
+      reinterpret_cast<uintptr_t>(array->GetRawData(sizeof(mirror::HeapReference<mirror::String>),
+                                                    0));
+  if (sizeof(mirror::HeapReference<mirror::String>) == sizeof(int32_t)) {
     EXPECT_TRUE(IsAligned<4>(data_offset));  // Check 4 byte alignment.
   } else {
     EXPECT_TRUE(IsAligned<8>(data_offset));  // Check 8 byte alignment.
diff --git a/runtime/common_test.h b/runtime/common_test.h
index a75a513..daa2ff1 100644
--- a/runtime/common_test.h
+++ b/runtime/common_test.h
@@ -26,7 +26,7 @@
 
 #include "../../external/icu4c/common/unicode/uvernum.h"
 #include "../compiler/dex/quick/dex_file_to_method_inliner_map.h"
-#include "../compiler/dex/verified_methods_data.h"
+#include "../compiler/dex/verification_results.h"
 #include "../compiler/driver/compiler_driver.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
@@ -48,6 +48,7 @@
 #include "scoped_thread_state_change.h"
 #include "ScopedLocalRef.h"
 #include "thread.h"
+#include "utils.h"
 #include "UniquePtr.h"
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
@@ -160,11 +161,7 @@
 
 #if defined(__arm__)
 
-
-#include <signal.h>
-#include <asm/sigcontext.h>
-#include <asm-generic/ucontext.h>
-
+#include <sys/ucontext.h>
 
 // A signal handler called when have an illegal instruction.  We record the fact in
 // a global boolean and then increment the PC in the signal context to return to
@@ -266,11 +263,6 @@
 
 class CommonTest : public testing::Test {
  public:
-  static void MakeExecutable(const mirror::ByteArray* code_array) {
-    CHECK(code_array != NULL);
-    MakeExecutable(code_array->GetData(), code_array->GetLength());
-  }
-
   static void MakeExecutable(const std::vector<uint8_t>& code) {
     CHECK_NE(code.size(), 0U);
     MakeExecutable(&code[0], code.size());
@@ -284,31 +276,39 @@
                                      const uint8_t* mapping_table,
                                      const uint8_t* vmap_table,
                                      const uint8_t* gc_map) {
-      return OatFile::OatMethod(NULL,
-                                reinterpret_cast<uint32_t>(code),
-                                frame_size_in_bytes,
-                                core_spill_mask,
-                                fp_spill_mask,
-                                reinterpret_cast<uint32_t>(mapping_table),
-                                reinterpret_cast<uint32_t>(vmap_table),
-                                reinterpret_cast<uint32_t>(gc_map));
+    const byte* base = nullptr;  // Base of data in oat file, ie 0.
+    uint32_t code_offset = PointerToLowMemUInt32(code);
+    uint32_t mapping_table_offset = PointerToLowMemUInt32(mapping_table);
+    uint32_t vmap_table_offset = PointerToLowMemUInt32(vmap_table);
+    uint32_t gc_map_offset = PointerToLowMemUInt32(gc_map);
+    return OatFile::OatMethod(base,
+                              code_offset,
+                              frame_size_in_bytes,
+                              core_spill_mask,
+                              fp_spill_mask,
+                              mapping_table_offset,
+                              vmap_table_offset,
+                              gc_map_offset);
   }
 
   void MakeExecutable(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(method != NULL);
+    CHECK(method != nullptr);
 
-    const CompiledMethod* compiled_method = NULL;
+    const CompiledMethod* compiled_method = nullptr;
     if (!method->IsAbstract()) {
-      const mirror::DexCache* dex_cache = method->GetDeclaringClass()->GetDexCache();
+      mirror::DexCache* dex_cache = method->GetDeclaringClass()->GetDexCache();
       const DexFile& dex_file = *dex_cache->GetDexFile();
       compiled_method =
           compiler_driver_->GetCompiledMethod(MethodReference(&dex_file,
                                                               method->GetDexMethodIndex()));
     }
-    if (compiled_method != NULL) {
-      const std::vector<uint8_t>& code = compiled_method->GetCode();
-      MakeExecutable(code);
-      const void* method_code = CompiledMethod::CodePointer(&code[0],
+    if (compiled_method != nullptr) {
+      const std::vector<uint8_t>* code = compiled_method->GetQuickCode();
+      if (code == nullptr) {
+        code = compiled_method->GetPortableCode();
+      }
+      MakeExecutable(*code);
+      const void* method_code = CompiledMethod::CodePointer(&(*code)[0],
                                                             compiled_method->GetInstructionSet());
       LOG(INFO) << "MakeExecutable " << PrettyMethod(method) << " code=" << method_code;
       OatFile::OatMethod oat_method = CreateOatMethod(method_code,
@@ -321,9 +321,9 @@
       oat_method.LinkMethod(method);
       method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge);
     } else {
-      const void* method_code;
       // No code? You must mean to go into the interpreter.
-      method_code = GetCompiledCodeToInterpreterBridge();
+      const void* method_code = kUsePortableCompiler ? GetPortableToInterpreterBridge()
+                                                     : GetQuickToInterpreterBridge();
       OatFile::OatMethod oat_method = CreateOatMethod(method_code,
                                                       kStackAlignment,
                                                       0,
@@ -334,6 +334,14 @@
       oat_method.LinkMethod(method);
       method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge);
     }
+    // Create bridges to transition between different kinds of compiled bridge.
+    if (method->GetEntryPointFromPortableCompiledCode() == nullptr) {
+      method->SetEntryPointFromPortableCompiledCode(GetPortableToQuickBridge());
+    } else {
+      CHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr);
+      method->SetEntryPointFromQuickCompiledCode(GetQuickToPortableBridge());
+      method->SetIsPortableCompiled();
+    }
   }
 
   static void MakeExecutable(const void* code_start, size_t code_length) {
@@ -419,15 +427,11 @@
     std::string max_heap_string(StringPrintf("-Xmx%zdm", gc::Heap::kDefaultMaximumSize / MB));
 
     // TODO: make selectable
-#if defined(ART_USE_PORTABLE_COMPILER)
-    CompilerBackend compiler_backend = kPortable;
-#else
-    CompilerBackend compiler_backend = kQuick;
-#endif
+    CompilerBackend compiler_backend = kUsePortableCompiler ? kPortable : kQuick;
 
-    verified_methods_data_.reset(new VerifiedMethodsData);
+    verification_results_.reset(new VerificationResults);
     method_inliner_map_.reset(compiler_backend == kQuick ? new DexFileToMethodInlinerMap : nullptr);
-    callbacks_.Reset(verified_methods_data_.get(), method_inliner_map_.get());
+    callbacks_.Reset(verification_results_.get(), method_inliner_map_.get());
     Runtime::Options options;
     options.push_back(std::make_pair("compilercallbacks", static_cast<CompilerCallbacks*>(&callbacks_)));
     options.push_back(std::make_pair("bootclasspath", &boot_class_path_));
@@ -464,6 +468,8 @@
       instruction_set = kMips;
 #elif defined(__i386__)
       instruction_set = kX86;
+#elif defined(__x86_64__)
+      instruction_set = kX86_64;
 #endif
 
       for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
@@ -474,13 +480,12 @@
         }
       }
       class_linker_->FixupDexCaches(runtime_->GetResolutionMethod());
-      CumulativeLogger timer("Compilation times");
-      compiler_driver_.reset(new CompilerDriver(verified_methods_data_.get(),
+      compiler_driver_.reset(new CompilerDriver(verification_results_.get(),
                                                 method_inliner_map_.get(),
                                                 compiler_backend, instruction_set,
                                                 instruction_set_features,
                                                 true, new CompilerDriver::DescriptorSet,
-                                                2, true, true, &timer));
+                                                2, true));
     }
     // We typically don't generate an image in unit tests, disable this optimization by default.
     compiler_driver_->SetSupportBootImageFixup(false);
@@ -527,7 +532,7 @@
     compiler_driver_.reset();
     callbacks_.Reset(nullptr, nullptr);
     method_inliner_map_.reset();
-    verified_methods_data_.reset();
+    verification_results_.reset();
     STLDeleteElements(&opened_dex_files_);
 
     Runtime::Current()->GetHeap()->VerifyHeap();  // Check for heap corruption after the test
@@ -645,7 +650,9 @@
     image_reservation_.reset(MemMap::MapAnonymous("image reservation",
                                                   reinterpret_cast<byte*>(ART_BASE_ADDRESS),
                                                   (size_t)100 * 1024 * 1024,  // 100MB
-                                                  PROT_NONE, &error_msg));
+                                                  PROT_NONE,
+                                                  false /* no need for 4gb flag with fixed mmap*/,
+                                                  &error_msg));
     CHECK(image_reservation_.get() != nullptr) << error_msg;
   }
 
@@ -655,31 +662,31 @@
 
   class TestCompilerCallbacks : public CompilerCallbacks {
    public:
-    TestCompilerCallbacks() : verified_methods_data_(nullptr), method_inliner_map_(nullptr) { }
+    TestCompilerCallbacks() : verification_results_(nullptr), method_inliner_map_(nullptr) { }
 
-    void Reset(VerifiedMethodsData* verified_methods_data,
+    void Reset(VerificationResults* verification_results,
                DexFileToMethodInlinerMap* method_inliner_map) {
-        verified_methods_data_ = verified_methods_data;
+        verification_results_ = verification_results;
         method_inliner_map_ = method_inliner_map;
     }
 
     virtual bool MethodVerified(verifier::MethodVerifier* verifier)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-      CHECK(verified_methods_data_);
-      bool result = verified_methods_data_->ProcessVerifiedMethod(verifier);
+      CHECK(verification_results_);
+      bool result = verification_results_->ProcessVerifiedMethod(verifier);
       if (result && method_inliner_map_ != nullptr) {
         MethodReference ref = verifier->GetMethodReference();
         method_inliner_map_->GetMethodInliner(ref.dex_file)
-            ->AnalyseMethodCode(ref.dex_method_index, verifier->CodeItem());
+            ->AnalyseMethodCode(verifier);
       }
       return result;
     }
     virtual void ClassRejected(ClassReference ref) {
-      verified_methods_data_->AddRejectedClass(ref);
+      verification_results_->AddRejectedClass(ref);
     }
 
    private:
-    VerifiedMethodsData* verified_methods_data_;
+    VerificationResults* verification_results_;
     DexFileToMethodInlinerMap* method_inliner_map_;
   };
 
@@ -690,7 +697,7 @@
   UniquePtr<Runtime> runtime_;
   // Owned by the runtime
   ClassLinker* class_linker_;
-  UniquePtr<VerifiedMethodsData> verified_methods_data_;
+  UniquePtr<VerificationResults> verification_results_;
   UniquePtr<DexFileToMethodInlinerMap> method_inliner_map_;
   TestCompilerCallbacks callbacks_;
   UniquePtr<CompilerDriver> compiler_driver_;
@@ -738,11 +745,12 @@
 // MCLinker link LLVM ELF output because we no longer just have code
 // blobs in memory. We'll need to dlopen to load and relocate
 // temporary output to resurrect these tests.
-#if defined(ART_USE_PORTABLE_COMPILER)
-#define TEST_DISABLED_FOR_PORTABLE() printf("WARNING: TEST DISABLED FOR PORTABLE\n"); return
-#else
-#define TEST_DISABLED_FOR_PORTABLE()
-#endif
+#define TEST_DISABLED_FOR_PORTABLE() \
+  if (kUsePortableCompiler) { \
+    printf("WARNING: TEST DISABLED FOR PORTABLE\n"); \
+    return; \
+  }
+
 }  // namespace art
 
 namespace std {
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 0419dab..24d16c4 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -33,7 +33,7 @@
 
 namespace art {
 
-static void AddReferrerLocation(std::ostream& os, const mirror::Class* referrer)
+static void AddReferrerLocation(std::ostream& os, mirror::Class* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (referrer != NULL) {
     ClassHelper kh(referrer);
@@ -46,7 +46,7 @@
 }
 
 static void ThrowException(const ThrowLocation* throw_location, const char* exception_descriptor,
-                           const mirror::Class* referrer, const char* fmt, va_list* args = NULL)
+                           mirror::Class* referrer, const char* fmt, va_list* args = NULL)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   std::ostringstream msg;
   if (args != NULL) {
@@ -68,7 +68,7 @@
 
 // AbstractMethodError
 
-void ThrowAbstractMethodError(const mirror::ArtMethod* method) {
+void ThrowAbstractMethodError(mirror::ArtMethod* method) {
   ThrowException(NULL, "Ljava/lang/AbstractMethodError;", NULL,
                  StringPrintf("abstract method \"%s\"",
                               PrettyMethod(method).c_str()).c_str());
@@ -89,8 +89,7 @@
 
 // ArrayStoreException
 
-void ThrowArrayStoreException(const mirror::Class* element_class,
-                              const mirror::Class* array_class) {
+void ThrowArrayStoreException(mirror::Class* element_class, mirror::Class* array_class) {
   ThrowException(NULL, "Ljava/lang/ArrayStoreException;", NULL,
                  StringPrintf("%s cannot be stored in an array of type %s",
                               PrettyDescriptor(element_class).c_str(),
@@ -99,7 +98,7 @@
 
 // ClassCastException
 
-void ThrowClassCastException(const mirror::Class* dest_type, const mirror::Class* src_type) {
+void ThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type) {
   ThrowException(NULL, "Ljava/lang/ClassCastException;", NULL,
                  StringPrintf("%s cannot be cast to %s",
                               PrettyDescriptor(src_type).c_str(),
@@ -120,7 +119,7 @@
 
 // ClassFormatError
 
-void ThrowClassFormatError(const mirror::Class* referrer, const char* fmt, ...) {
+void ThrowClassFormatError(mirror::Class* referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException(NULL, "Ljava/lang/ClassFormatError;", referrer, fmt, &args);
@@ -136,8 +135,7 @@
 }
 
 void ThrowIllegalAccessErrorClassForMethodDispatch(mirror::Class* referrer, mirror::Class* accessed,
-                                                   const mirror::ArtMethod* caller,
-                                                   const mirror::ArtMethod* called,
+                                                   mirror::ArtMethod* called,
                                                    InvokeType type) {
   std::ostringstream msg;
   msg << "Illegal class access ('" << PrettyDescriptor(referrer) << "' attempting to access '"
@@ -160,7 +158,7 @@
   ThrowException(NULL, "Ljava/lang/IllegalAccessError;", referrer, msg.str().c_str());
 }
 
-void ThrowIllegalAccessErrorFinalField(const mirror::ArtMethod* referrer,
+void ThrowIllegalAccessErrorFinalField(mirror::ArtMethod* referrer,
                                        mirror::ArtField* accessed) {
   std::ostringstream msg;
   msg << "Final field '" << PrettyField(accessed, false) << "' cannot be written to by method '"
@@ -188,7 +186,7 @@
 
 void ThrowIncompatibleClassChangeError(InvokeType expected_type, InvokeType found_type,
                                        mirror::ArtMethod* method,
-                                       const mirror::ArtMethod* referrer) {
+                                       mirror::ArtMethod* referrer) {
   std::ostringstream msg;
   msg << "The method '" << PrettyMethod(method) << "' was expected to be of type "
       << expected_type << " but instead was found to be of type " << found_type;
@@ -197,9 +195,9 @@
                  msg.str().c_str());
 }
 
-void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(const mirror::ArtMethod* interface_method,
+void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(mirror::ArtMethod* interface_method,
                                                                 mirror::Object* this_object,
-                                                                const mirror::ArtMethod* referrer) {
+                                                                mirror::ArtMethod* referrer) {
   // Referrer is calling interface_method on this_object, however, the interface_method isn't
   // implemented by this_object.
   CHECK(this_object != NULL);
@@ -213,8 +211,8 @@
                  msg.str().c_str());
 }
 
-void ThrowIncompatibleClassChangeErrorField(const mirror::ArtField* resolved_field, bool is_static,
-                                            const mirror::ArtMethod* referrer) {
+void ThrowIncompatibleClassChangeErrorField(mirror::ArtField* resolved_field, bool is_static,
+                                            mirror::ArtMethod* referrer) {
   std::ostringstream msg;
   msg << "Expected '" << PrettyField(resolved_field) << "' to be a "
       << (is_static ? "static" : "instance") << " field" << " rather than a "
@@ -223,7 +221,7 @@
                  msg.str().c_str());
 }
 
-void ThrowIncompatibleClassChangeError(const mirror::Class* referrer, const char* fmt, ...) {
+void ThrowIncompatibleClassChangeError(mirror::Class* referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException(NULL, "Ljava/lang/IncompatibleClassChangeError;", referrer, fmt, &args);
@@ -241,7 +239,7 @@
 
 // LinkageError
 
-void ThrowLinkageError(const mirror::Class* referrer, const char* fmt, ...) {
+void ThrowLinkageError(mirror::Class* referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException(NULL, "Ljava/lang/LinkageError;", referrer, fmt, &args);
@@ -487,7 +485,7 @@
 
 // VerifyError
 
-void ThrowVerifyError(const mirror::Class* referrer, const char* fmt, ...) {
+void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException(NULL, "Ljava/lang/VerifyError;", referrer, fmt, &args);
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 3164f30..792cdef 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -33,7 +33,7 @@
 
 // AbstractMethodError
 
-void ThrowAbstractMethodError(const mirror::ArtMethod* method)
+void ThrowAbstractMethodError(mirror::ArtMethod* method)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
 // ArithmeticException
@@ -47,8 +47,7 @@
 
 // ArrayStoreException
 
-void ThrowArrayStoreException(const mirror::Class* element_class,
-                              const mirror::Class* array_class)
+void ThrowArrayStoreException(mirror::Class* element_class, mirror::Class* array_class)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
 // ClassCircularityError
@@ -58,7 +57,7 @@
 
 // ClassCastException
 
-void ThrowClassCastException(const mirror::Class* dest_type, const mirror::Class* src_type)
+void ThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowClassCastException(const ThrowLocation* throw_location, const char* msg)
@@ -66,7 +65,7 @@
 
 // ClassFormatError
 
-void ThrowClassFormatError(const mirror::Class* referrer, const char* fmt, ...)
+void ThrowClassFormatError(mirror::Class* referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -76,8 +75,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowIllegalAccessErrorClassForMethodDispatch(mirror::Class* referrer, mirror::Class* accessed,
-                                                   const mirror::ArtMethod* caller,
-                                                   const mirror::ArtMethod* called,
+                                                   mirror::ArtMethod* called,
                                                    InvokeType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -87,8 +85,7 @@
 void ThrowIllegalAccessErrorField(mirror::Class* referrer, mirror::ArtField* accessed)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIllegalAccessErrorFinalField(const mirror::ArtMethod* referrer,
-                                       mirror::ArtField* accessed)
+void ThrowIllegalAccessErrorFinalField(mirror::ArtMethod* referrer, mirror::ArtField* accessed)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowIllegalAccessError(mirror::Class* referrer, const char* fmt, ...)
@@ -103,20 +100,19 @@
 // IncompatibleClassChangeError
 
 void ThrowIncompatibleClassChangeError(InvokeType expected_type, InvokeType found_type,
-                                       mirror::ArtMethod* method,
-                                       const mirror::ArtMethod* referrer)
+                                       mirror::ArtMethod* method, mirror::ArtMethod* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(const mirror::ArtMethod* interface_method,
+void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(mirror::ArtMethod* interface_method,
                                                                 mirror::Object* this_object,
-                                                                const mirror::ArtMethod* referrer)
+                                                                mirror::ArtMethod* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIncompatibleClassChangeErrorField(const mirror::ArtField* resolved_field, bool is_static,
-                                            const mirror::ArtMethod* referrer)
+void ThrowIncompatibleClassChangeErrorField(mirror::ArtField* resolved_field, bool is_static,
+                                            mirror::ArtMethod* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIncompatibleClassChangeError(const mirror::Class* referrer, const char* fmt, ...)
+void ThrowIncompatibleClassChangeError(mirror::Class* referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -127,7 +123,7 @@
 
 // LinkageError
 
-void ThrowLinkageError(const mirror::Class* referrer, const char* fmt, ...)
+void ThrowLinkageError(mirror::Class* referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -186,7 +182,7 @@
 
 // VerifyError
 
-void ThrowVerifyError(const mirror::Class* referrer, const char* fmt, ...)
+void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 2141997..8280c7c 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -63,6 +63,9 @@
   mirror::ArtMethod* method;
   uint32_t dex_pc;
 
+  AllocRecordStackTraceElement() : method(nullptr), dex_pc(0) {
+  }
+
   int32_t LineNumber() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return MethodHelper(method).GetLineNumFromDexPC(dex_pc);
   }
@@ -81,6 +84,20 @@
     }
     return depth;
   }
+
+  void UpdateObjectPointers(RootVisitor* visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (type != nullptr) {
+      type = down_cast<mirror::Class*>(visitor(type, arg));
+    }
+    for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) {
+      mirror::ArtMethod*& m = stack[stack_frame].method;
+      if (m == nullptr) {
+        break;
+      }
+      m = down_cast<mirror::ArtMethod*>(visitor(m, arg));
+    }
+  }
 };
 
 struct Breakpoint {
@@ -101,7 +118,7 @@
   virtual ~DebugInstrumentationListener() {}
 
   virtual void MethodEntered(Thread* thread, mirror::Object* this_object,
-                             const mirror::ArtMethod* method, uint32_t dex_pc)
+                             mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
@@ -111,7 +128,7 @@
   }
 
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method,
+                            mirror::ArtMethod* method,
                             uint32_t dex_pc, const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (method->IsNative()) {
@@ -122,7 +139,7 @@
   }
 
   virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc)
+                            mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
     LOG(ERROR) << "Unexpected method unwind event in debugger " << PrettyMethod(method)
@@ -130,7 +147,7 @@
   }
 
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                          const mirror::ArtMethod* method, uint32_t new_dex_pc)
+                          mirror::ArtMethod* method, uint32_t new_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc);
   }
@@ -303,7 +320,7 @@
  *
  * Null objects are tagged JT_OBJECT.
  */
-static JDWP::JdwpTag TagFromObject(const mirror::Object* o)
+static JDWP::JdwpTag TagFromObject(mirror::Object* o)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return (o == NULL) ? JDWP::JT_OBJECT : TagFromClass(o->GetClass());
 }
@@ -464,6 +481,8 @@
 }
 
 void Dbg::StopJdwp() {
+  // Prevent the JDWP thread from processing JDWP incoming packets after we close the connection.
+  Disposed();
   delete gJdwpState;
   gJdwpState = NULL;
   delete gRegistry;
@@ -773,6 +792,8 @@
 JDWP::JdwpError Dbg::GetInstanceCounts(const std::vector<JDWP::RefTypeId>& class_ids,
                                        std::vector<uint64_t>& counts)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  heap->CollectGarbage(false);
   std::vector<mirror::Class*> classes;
   counts.clear();
   for (size_t i = 0; i < class_ids.size(); ++i) {
@@ -784,19 +805,20 @@
     classes.push_back(c);
     counts.push_back(0);
   }
-
-  Runtime::Current()->GetHeap()->CountInstances(classes, false, &counts[0]);
+  heap->CountInstances(classes, false, &counts[0]);
   return JDWP::ERR_NONE;
 }
 
 JDWP::JdwpError Dbg::GetInstances(JDWP::RefTypeId class_id, int32_t max_count, std::vector<JDWP::ObjectId>& instances)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  // We only want reachable instances, so do a GC.
+  heap->CollectGarbage(false);
   JDWP::JdwpError status;
   mirror::Class* c = DecodeClass(class_id, status);
-  if (c == NULL) {
+  if (c == nullptr) {
     return status;
   }
-
   std::vector<mirror::Object*> raw_instances;
   Runtime::Current()->GetHeap()->GetInstances(c, max_count, raw_instances);
   for (size_t i = 0; i < raw_instances.size(); ++i) {
@@ -808,13 +830,14 @@
 JDWP::JdwpError Dbg::GetReferringObjects(JDWP::ObjectId object_id, int32_t max_count,
                                          std::vector<JDWP::ObjectId>& referring_objects)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  heap->CollectGarbage(false);
   mirror::Object* o = gRegistry->Get<mirror::Object*>(object_id);
   if (o == NULL || o == ObjectRegistry::kInvalidObject) {
     return JDWP::ERR_INVALID_OBJECT;
   }
-
   std::vector<mirror::Object*> raw_instances;
-  Runtime::Current()->GetHeap()->GetReferringObjects(o, max_count, raw_instances);
+  heap->GetReferringObjects(o, max_count, raw_instances);
   for (size_t i = 0; i < raw_instances.size(); ++i) {
     referring_objects.push_back(gRegistry->Add(raw_instances[i]));
   }
@@ -1054,16 +1077,16 @@
     size_t width = GetTagWidth(tag);
     uint8_t* dst = expandBufAddSpace(pReply, count * width);
     if (width == 8) {
-      const uint64_t* src8 = reinterpret_cast<uint64_t*>(a->GetRawData(sizeof(uint64_t)));
+      const uint64_t* src8 = reinterpret_cast<uint64_t*>(a->GetRawData(sizeof(uint64_t), 0));
       for (int i = 0; i < count; ++i) JDWP::Write8BE(&dst, src8[offset + i]);
     } else if (width == 4) {
-      const uint32_t* src4 = reinterpret_cast<uint32_t*>(a->GetRawData(sizeof(uint32_t)));
+      const uint32_t* src4 = reinterpret_cast<uint32_t*>(a->GetRawData(sizeof(uint32_t), 0));
       for (int i = 0; i < count; ++i) JDWP::Write4BE(&dst, src4[offset + i]);
     } else if (width == 2) {
-      const uint16_t* src2 = reinterpret_cast<uint16_t*>(a->GetRawData(sizeof(uint16_t)));
+      const uint16_t* src2 = reinterpret_cast<uint16_t*>(a->GetRawData(sizeof(uint16_t), 0));
       for (int i = 0; i < count; ++i) JDWP::Write2BE(&dst, src2[offset + i]);
     } else {
-      const uint8_t* src = reinterpret_cast<uint8_t*>(a->GetRawData(sizeof(uint8_t)));
+      const uint8_t* src = reinterpret_cast<uint8_t*>(a->GetRawData(sizeof(uint8_t), 0));
       memcpy(dst, &src[offset * width], count * width);
     }
   } else {
@@ -1079,10 +1102,13 @@
   return JDWP::ERR_NONE;
 }
 
-template <typename T> void CopyArrayData(mirror::Array* a, JDWP::Request& src, int offset, int count) {
+template <typename T>
+static void CopyArrayData(mirror::Array* a, JDWP::Request& src, int offset, int count)
+    NO_THREAD_SAFETY_ANALYSIS {
+  // TODO: fix when annotalysis correctly handles non-member functions.
   DCHECK(a->GetClass()->IsPrimitiveArray());
 
-  T* dst = &(reinterpret_cast<T*>(a->GetRawData(sizeof(T)))[offset * sizeof(T)]);
+  T* dst = reinterpret_cast<T*>(a->GetRawData(sizeof(T), offset));
   for (int i = 0; i < count; ++i) {
     *dst++ = src.ReadValue(sizeof(T));
   }
@@ -1926,7 +1952,7 @@
         JDWP::FrameId frame_id(GetFrameId());
         JDWP::JdwpLocation location;
         SetLocation(location, GetMethod(), GetDexPc());
-        VLOG(jdwp) << StringPrintf("    Frame %3zd: id=%3lld ", depth_, frame_id) << location;
+        VLOG(jdwp) << StringPrintf("    Frame %3zd: id=%3" PRIu64 " ", depth_, frame_id) << location;
         expandBufAdd8BE(buf_, frame_id);
         expandBufAddLocation(buf_, location);
       }
@@ -2283,7 +2309,7 @@
   visitor.WalkStack();
 }
 
-void Dbg::PostLocationEvent(const mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
+void Dbg::PostLocationEvent(mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
                             int event_flags, const JValue* return_value) {
   mirror::Class* c = m->GetDeclaringClass();
 
@@ -2338,7 +2364,7 @@
 }
 
 void Dbg::UpdateDebugger(Thread* thread, mirror::Object* this_object,
-                         const mirror::ArtMethod* m, uint32_t dex_pc) {
+                         mirror::ArtMethod* m, uint32_t dex_pc) {
   if (!IsDebuggerActive() || dex_pc == static_cast<uint32_t>(-2) /* fake method exit */) {
     return;
   }
@@ -2630,7 +2656,7 @@
       if (!m->IsRuntimeMethod()) {
         ++single_step_control_->stack_depth;
         if (single_step_control_->method == NULL) {
-          const mirror::DexCache* dex_cache = m->GetDeclaringClass()->GetDexCache();
+          mirror::DexCache* dex_cache = m->GetDeclaringClass()->GetDexCache();
           single_step_control_->method = m;
           *line_number_ = -1;
           if (dex_cache != NULL) {
@@ -2699,7 +2725,7 @@
     uint32_t last_pc;
   };
   single_step_control->dex_pcs.clear();
-  const mirror::ArtMethod* m = single_step_control->method;
+  mirror::ArtMethod* m = single_step_control->method;
   if (!m->IsNative()) {
     DebugCallbackContext context(single_step_control, line_number);
     MethodHelper mh(m);
@@ -3062,7 +3088,7 @@
   // Run through and find all chunks.  [Currently just find the first.]
   ScopedByteArrayRO contents(env, dataArray.get());
   if (length != request_length) {
-    LOG(WARNING) << StringPrintf("bad chunk found (len=%u pktLen=%d)", length, request_length);
+    LOG(WARNING) << StringPrintf("bad chunk found (len=%u pktLen=%zd)", length, request_length);
     return false;
   }
 
@@ -3454,7 +3480,7 @@
             Flush();
         }
     }
-    const mirror::Object* obj = reinterpret_cast<const mirror::Object*>(start);
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(start);
 
     // Determine the type of this chunk.
     // OLD-TODO: if context.merge, see if this chunk is different from the last chunk.
@@ -3497,8 +3523,8 @@
     *p_++ = length - 1;
   }
 
-  uint8_t ExamineObject(const mirror::Object* o, bool is_native_heap)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
+  uint8_t ExamineObject(mirror::Object* o, bool is_native_heap)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     if (o == NULL) {
       return HPSG_STATE(SOLIDITY_FREE, 0);
     }
@@ -3751,7 +3777,7 @@
               << PrettyClass(record->type);
 
     for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) {
-      const mirror::ArtMethod* m = record->stack[stack_frame].method;
+      mirror::ArtMethod* m = record->stack[stack_frame].method;
       if (m == NULL) {
         break;
       }
@@ -3767,6 +3793,37 @@
   }
 }
 
+void Dbg::UpdateObjectPointers(RootVisitor* visitor, void* arg) {
+  {
+    MutexLock mu(Thread::Current(), gAllocTrackerLock);
+    if (recent_allocation_records_ != nullptr) {
+      size_t i = HeadIndex();
+      size_t count = gAllocRecordCount;
+      while (count--) {
+        AllocRecord* record = &recent_allocation_records_[i];
+        DCHECK(record != nullptr);
+        record->UpdateObjectPointers(visitor, arg);
+        i = (i + 1) & (gAllocRecordMax - 1);
+      }
+    }
+  }
+  if (gRegistry != nullptr) {
+    gRegistry->UpdateObjectPointers(visitor, arg);
+  }
+}
+
+void Dbg::AllowNewObjectRegistryObjects() {
+  if (gRegistry != nullptr) {
+    gRegistry->AllowNewObjects();
+  }
+}
+
+void Dbg::DisallowNewObjectRegistryObjects() {
+  if (gRegistry != nullptr) {
+    gRegistry->DisallowNewObjects();
+  }
+}
+
 class StringTable {
  public:
   StringTable() {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index a3f8b9c..f1e3f45 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -366,7 +366,7 @@
     kMethodEntry    = 0x04,
     kMethodExit     = 0x08,
   };
-  static void PostLocationEvent(const mirror::ArtMethod* method, int pcOffset,
+  static void PostLocationEvent(mirror::ArtMethod* method, int pcOffset,
                                 mirror::Object* thisPtr, int eventFlags,
                                 const JValue* return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -382,7 +382,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void UpdateDebugger(Thread* thread, mirror::Object* this_object,
-                             const mirror::ArtMethod* method, uint32_t new_dex_pc)
+                             mirror::ArtMethod* method, uint32_t new_dex_pc)
       LOCKS_EXCLUDED(Locks::breakpoint_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -452,6 +452,10 @@
   static jbyteArray GetRecentAllocations() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void DumpRecentAllocations();
 
+  // Updates the stored direct object pointers (called from SweepSystemWeaks).
+  static void UpdateObjectPointers(RootVisitor* visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   enum HpifWhen {
     HPIF_WHEN_NEVER = 0,
     HPIF_WHEN_NOW = 1,
@@ -476,6 +480,9 @@
   static void DdmSendHeapSegments(bool native)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static void AllowNewObjectRegistryObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static void DisallowNewObjectRegistryObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  private:
   static void DdmBroadcast(bool connect) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostThreadStartOrStop(Thread*, uint32_t)
diff --git a/runtime/dex_file-inl.h b/runtime/dex_file-inl.h
index 3b2135c..a459308 100644
--- a/runtime/dex_file-inl.h
+++ b/runtime/dex_file-inl.h
@@ -44,7 +44,7 @@
 inline const DexFile::TryItem* DexFile::GetTryItems(const CodeItem& code_item, uint32_t offset) {
   const uint16_t* insns_end_ = &code_item.insns_[code_item.insns_size_in_code_units_];
   return reinterpret_cast<const TryItem*>
-      (RoundUp(reinterpret_cast<uint32_t>(insns_end_), 4)) + offset;
+      (RoundUp(reinterpret_cast<uintptr_t>(insns_end_), 4)) + offset;
 }
 
 static inline bool DexFileStringEquals(const DexFile* df1, uint32_t sidx1,
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 429c516..eaba7eb 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -284,6 +284,27 @@
   }
 }
 
+DexFile::DexFile(const byte* base, size_t size,
+                 const std::string& location,
+                 uint32_t location_checksum,
+                 MemMap* mem_map)
+    : begin_(base),
+      size_(size),
+      location_(location),
+      location_checksum_(location_checksum),
+      mem_map_(mem_map),
+      modification_lock("DEX modification lock"),
+      header_(reinterpret_cast<const Header*>(base)),
+      string_ids_(reinterpret_cast<const StringId*>(base + header_->string_ids_off_)),
+      type_ids_(reinterpret_cast<const TypeId*>(base + header_->type_ids_off_)),
+      field_ids_(reinterpret_cast<const FieldId*>(base + header_->field_ids_off_)),
+      method_ids_(reinterpret_cast<const MethodId*>(base + header_->method_ids_off_)),
+      proto_ids_(reinterpret_cast<const ProtoId*>(base + header_->proto_ids_off_)),
+      class_defs_(reinterpret_cast<const ClassDef*>(base + header_->class_defs_off_)) {
+  CHECK(begin_ != NULL) << GetLocation();
+  CHECK_GT(size_, 0U) << GetLocation();
+}
+
 DexFile::~DexFile() {
   // We don't call DeleteGlobalRef on dex_object_ because we're only called by DestroyJavaVM, and
   // that's only called after DetachCurrentThread, which means there's no JNIEnv. We could
@@ -292,25 +313,12 @@
 }
 
 bool DexFile::Init(std::string* error_msg) {
-  InitMembers();
   if (!CheckMagicAndVersion(error_msg)) {
     return false;
   }
   return true;
 }
 
-void DexFile::InitMembers() {
-  const byte* b = begin_;
-  header_ = reinterpret_cast<const Header*>(b);
-  const Header* h = header_;
-  string_ids_ = reinterpret_cast<const StringId*>(b + h->string_ids_off_);
-  type_ids_ = reinterpret_cast<const TypeId*>(b + h->type_ids_off_);
-  field_ids_ = reinterpret_cast<const FieldId*>(b + h->field_ids_off_);
-  method_ids_ = reinterpret_cast<const MethodId*>(b + h->method_ids_off_);
-  proto_ids_ = reinterpret_cast<const ProtoId*>(b + h->proto_ids_off_);
-  class_defs_ = reinterpret_cast<const ClassDef*>(b + h->class_defs_off_);
-}
-
 bool DexFile::CheckMagicAndVersion(std::string* error_msg) const {
   CHECK(header_->magic_ != NULL) << GetLocation();
   if (!IsMagicValid(header_->magic_)) {
@@ -613,7 +621,7 @@
   return Signature(this, *proto_id);
 }
 
-int32_t DexFile::GetLineNumFromPC(const mirror::ArtMethod* method, uint32_t rel_pc) const {
+int32_t DexFile::GetLineNumFromPC(mirror::ArtMethod* method, uint32_t rel_pc) const {
   // For native method, lineno should be -2 to indicate it is native. Note that
   // "line number == -2" is how libcore tells from StackTraceElement.
   if (method->GetCodeItemOffset() == 0) {
@@ -856,6 +864,13 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const DexFile& dex_file) {
+  os << StringPrintf("[DexFile: %s dex-checksum=%08x location-checksum=%08x %p-%p]",
+                     dex_file.GetLocation().c_str(),
+                     dex_file.GetHeader().checksum_, dex_file.GetLocationChecksum(),
+                     dex_file.Begin(), dex_file.Begin() + dex_file.Size());
+  return os;
+}
 std::string Signature::ToString() const {
   if (dex_file_ == nullptr) {
     CHECK(proto_id_ == nullptr);
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 69593cd..46df455 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -794,7 +794,7 @@
   // Returns -2 for native methods (as expected in exception traces).
   //
   // This is used by runtime; therefore use art::Method not art::DexFile::Method.
-  int32_t GetLineNumFromPC(const mirror::ArtMethod* method, uint32_t rel_pc) const
+  int32_t GetLineNumFromPC(mirror::ArtMethod* method, uint32_t rel_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void DecodeDebugInfo(const CodeItem* code_item, bool is_static, uint32_t method_idx,
@@ -849,30 +849,11 @@
   DexFile(const byte* base, size_t size,
           const std::string& location,
           uint32_t location_checksum,
-          MemMap* mem_map)
-      : begin_(base),
-        size_(size),
-        location_(location),
-        location_checksum_(location_checksum),
-        mem_map_(mem_map),
-        modification_lock("DEX modification lock"),
-        header_(0),
-        string_ids_(0),
-        type_ids_(0),
-        field_ids_(0),
-        method_ids_(0),
-        proto_ids_(0),
-        class_defs_(0) {
-    CHECK(begin_ != NULL) << GetLocation();
-    CHECK_GT(size_, 0U) << GetLocation();
-  }
+          MemMap* mem_map);
 
   // Top-level initializer that calls other Init methods.
   bool Init(std::string* error_msg);
 
-  // Caches pointers into to the various file sections.
-  void InitMembers();
-
   // Returns true if the header magic and version numbers are of the expected values.
   bool CheckMagicAndVersion(std::string* error_msg) const;
 
@@ -903,26 +884,27 @@
   Mutex modification_lock;
 
   // Points to the header section.
-  const Header* header_;
+  const Header* const header_;
 
   // Points to the base of the string identifier list.
-  const StringId* string_ids_;
+  const StringId* const string_ids_;
 
   // Points to the base of the type identifier list.
-  const TypeId* type_ids_;
+  const TypeId* const type_ids_;
 
   // Points to the base of the field identifier list.
-  const FieldId* field_ids_;
+  const FieldId* const field_ids_;
 
   // Points to the base of the method identifier list.
-  const MethodId* method_ids_;
+  const MethodId* const method_ids_;
 
   // Points to the base of the prototype identifier list.
-  const ProtoId* proto_ids_;
+  const ProtoId* const proto_ids_;
 
   // Points to the base of the class definition list.
-  const ClassDef* class_defs_;
+  const ClassDef* const class_defs_;
 };
+std::ostream& operator<<(std::ostream& os, const DexFile& dex_file);
 
 // Iterate over a dex file's ProtoId's paramters
 class DexFileParameterIterator {
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index dc9d337..528e112 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -113,13 +113,13 @@
 }
 
 bool DexFileVerifier::CheckPointerRange(const void* start, const void* end, const char* label) {
-  uint32_t range_start = reinterpret_cast<uint32_t>(start);
-  uint32_t range_end = reinterpret_cast<uint32_t>(end);
-  uint32_t file_start = reinterpret_cast<uint32_t>(begin_);
-  uint32_t file_end = file_start + size_;
+  const byte* range_start = reinterpret_cast<const byte*>(start);
+  const byte* range_end = reinterpret_cast<const byte*>(end);
+  const byte* file_start = reinterpret_cast<const byte*>(begin_);
+  const byte* file_end = file_start + size_;
   if (UNLIKELY((range_start < file_start) || (range_start > file_end) ||
                (range_end < file_start) || (range_end > file_end))) {
-    ErrorStringPrintf("Bad range for %s: %x to %x", label,
+    ErrorStringPrintf("Bad range for %s: %zx to %zx", label,
                       range_start - file_start, range_end - file_start);
     return false;
   }
@@ -284,7 +284,7 @@
 
   for (uint32_t i = 0; i < handlers_size; i++) {
     bool catch_all;
-    uint32_t offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(handlers_base);
+    size_t offset = ptr_ - handlers_base;
     int32_t size = DecodeSignedLeb128(&ptr_);
 
     if (UNLIKELY((size < -65536) || (size > 65536))) {
@@ -299,7 +299,7 @@
       catch_all = false;
     }
 
-    handler_offsets[i] = offset;
+    handler_offsets[i] = static_cast<uint32_t>(offset);
 
     while (size-- > 0) {
       uint32_t type_idx = DecodeUnsignedLeb128(&ptr_);
@@ -386,14 +386,14 @@
   return true;
 }
 
-bool DexFileVerifier::CheckPadding(uint32_t offset, uint32_t aligned_offset) {
+bool DexFileVerifier::CheckPadding(size_t offset, uint32_t aligned_offset) {
   if (offset < aligned_offset) {
     if (!CheckPointerRange(begin_ + offset, begin_ + aligned_offset, "section")) {
       return false;
     }
     while (offset < aligned_offset) {
       if (UNLIKELY(*ptr_ != '\0')) {
-        ErrorStringPrintf("Non-zero padding %x before section start at %x", *ptr_, offset);
+        ErrorStringPrintf("Non-zero padding %x before section start at %zx", *ptr_, offset);
         return false;
       }
       ptr_++;
@@ -634,7 +634,7 @@
   }
 
   // try_items are 4-byte aligned. Verify the spacer is 0.
-  if ((((uint32_t) &insns[insns_size] & 3) != 0) && (insns[insns_size] != 0)) {
+  if (((reinterpret_cast<uintptr_t>(&insns[insns_size]) & 3) != 0) && (insns[insns_size] != 0)) {
     ErrorStringPrintf("Non-zero padding: %x", insns[insns_size]);
     return false;
   }
@@ -975,9 +975,9 @@
   return true;
 }
 
-bool DexFileVerifier::CheckIntraSectionIterate(uint32_t offset, uint32_t count, uint16_t type) {
+bool DexFileVerifier::CheckIntraSectionIterate(size_t offset, uint32_t count, uint16_t type) {
   // Get the right alignment mask for the type of section.
-  uint32_t alignment_mask;
+  size_t alignment_mask;
   switch (type) {
     case DexFile::kDexTypeClassDataItem:
     case DexFile::kDexTypeStringDataItem:
@@ -993,7 +993,7 @@
 
   // Iterate through the items in the section.
   for (uint32_t i = 0; i < count; i++) {
-    uint32_t aligned_offset = (offset + alignment_mask) & ~alignment_mask;
+    size_t aligned_offset = (offset + alignment_mask) & ~alignment_mask;
 
     // Check the padding between items.
     if (!CheckPadding(offset, aligned_offset)) {
@@ -1134,7 +1134,7 @@
       offset_to_type_map_.Put(aligned_offset, type);
     }
 
-    aligned_offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(begin_);
+    aligned_offset = ptr_ - begin_;
     if (UNLIKELY(aligned_offset > size_)) {
       ErrorStringPrintf("Item %d at ends out of bounds", i);
       return false;
@@ -1146,7 +1146,7 @@
   return true;
 }
 
-bool DexFileVerifier::CheckIntraIdSection(uint32_t offset, uint32_t count, uint16_t type) {
+bool DexFileVerifier::CheckIntraIdSection(size_t offset, uint32_t count, uint16_t type) {
   uint32_t expected_offset;
   uint32_t expected_size;
 
@@ -1183,7 +1183,7 @@
 
   // Check that the offset and size are what were expected from the header.
   if (UNLIKELY(offset != expected_offset)) {
-    ErrorStringPrintf("Bad offset for section: got %x, expected %x", offset, expected_offset);
+    ErrorStringPrintf("Bad offset for section: got %zx, expected %x", offset, expected_offset);
     return false;
   }
   if (UNLIKELY(count != expected_size)) {
@@ -1194,13 +1194,13 @@
   return CheckIntraSectionIterate(offset, count, type);
 }
 
-bool DexFileVerifier::CheckIntraDataSection(uint32_t offset, uint32_t count, uint16_t type) {
-  uint32_t data_start = header_->data_off_;
-  uint32_t data_end = data_start + header_->data_size_;
+bool DexFileVerifier::CheckIntraDataSection(size_t offset, uint32_t count, uint16_t type) {
+  size_t data_start = header_->data_off_;
+  size_t data_end = data_start + header_->data_size_;
 
   // Sanity check the offset of the section.
   if (UNLIKELY((offset < data_start) || (offset > data_end))) {
-    ErrorStringPrintf("Bad offset for data subsection: %x", offset);
+    ErrorStringPrintf("Bad offset for data subsection: %zx", offset);
     return false;
   }
 
@@ -1208,9 +1208,9 @@
     return false;
   }
 
-  uint32_t next_offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(begin_);
+  size_t next_offset = ptr_ - begin_;
   if (next_offset > data_end) {
-    ErrorStringPrintf("Out-of-bounds end of data subsection: %x", next_offset);
+    ErrorStringPrintf("Out-of-bounds end of data subsection: %zx", next_offset);
     return false;
   }
 
@@ -1222,7 +1222,7 @@
   const DexFile::MapItem* item = map->list_;
 
   uint32_t count = map->size_;
-  uint32_t offset = 0;
+  size_t offset = 0;
   ptr_ = begin_;
 
   // Check the items listed in the map.
@@ -1235,7 +1235,7 @@
     if (!CheckPadding(offset, section_offset)) {
       return false;
     } else if (UNLIKELY(offset > section_offset)) {
-      ErrorStringPrintf("Section overlap or out-of-order map: %x, %x", offset, section_offset);
+      ErrorStringPrintf("Section overlap or out-of-order map: %zx, %x", offset, section_offset);
       return false;
     }
 
@@ -1262,7 +1262,7 @@
         if (!CheckIntraIdSection(section_offset, section_count, type)) {
           return false;
         }
-        offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(begin_);
+        offset = ptr_ - begin_;
         break;
       case DexFile::kDexTypeMapList:
         if (UNLIKELY(section_count != 1)) {
@@ -1290,7 +1290,7 @@
         if (!CheckIntraDataSection(section_offset, section_count, type)) {
           return false;
         }
-        offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(begin_);
+        offset = ptr_ - begin_;
         break;
       default:
         ErrorStringPrintf("Unknown map item type %x", type);
@@ -1303,14 +1303,14 @@
   return true;
 }
 
-bool DexFileVerifier::CheckOffsetToTypeMap(uint32_t offset, uint16_t type) {
+bool DexFileVerifier::CheckOffsetToTypeMap(size_t offset, uint16_t type) {
   auto it = offset_to_type_map_.find(offset);
   if (UNLIKELY(it == offset_to_type_map_.end())) {
-    ErrorStringPrintf("No data map entry found @ %x; expected %x", offset, type);
+    ErrorStringPrintf("No data map entry found @ %zx; expected %x", offset, type);
     return false;
   }
   if (UNLIKELY(it->second != type)) {
-    ErrorStringPrintf("Unexpected data map entry @ %x; expected %x, found %x",
+    ErrorStringPrintf("Unexpected data map entry @ %zx; expected %x, found %x",
                       offset, type, it->second);
     return false;
   }
@@ -1784,9 +1784,9 @@
   return true;
 }
 
-bool DexFileVerifier::CheckInterSectionIterate(uint32_t offset, uint32_t count, uint16_t type) {
+bool DexFileVerifier::CheckInterSectionIterate(size_t offset, uint32_t count, uint16_t type) {
   // Get the right alignment mask for the type of section.
-  uint32_t alignment_mask;
+  size_t alignment_mask;
   switch (type) {
     case DexFile::kDexTypeClassDataItem:
       alignment_mask = sizeof(uint8_t) - 1;
@@ -1871,7 +1871,7 @@
     }
 
     previous_item_ = prev_ptr;
-    offset = reinterpret_cast<uint32_t>(ptr_) - reinterpret_cast<uint32_t>(begin_);
+    offset = ptr_ - begin_;
   }
 
   return true;
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index 4b8b80a..3337785 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -53,7 +53,7 @@
   bool CheckClassDataItemField(uint32_t idx, uint32_t access_flags, bool expect_static);
   bool CheckClassDataItemMethod(uint32_t idx, uint32_t access_flags, uint32_t code_offset,
                                 bool expect_direct);
-  bool CheckPadding(uint32_t offset, uint32_t aligned_offset);
+  bool CheckPadding(size_t offset, uint32_t aligned_offset);
   bool CheckEncodedValue();
   bool CheckEncodedArray();
   bool CheckEncodedAnnotation();
@@ -65,12 +65,12 @@
   bool CheckIntraAnnotationItem();
   bool CheckIntraAnnotationsDirectoryItem();
 
-  bool CheckIntraSectionIterate(uint32_t offset, uint32_t count, uint16_t type);
-  bool CheckIntraIdSection(uint32_t offset, uint32_t count, uint16_t type);
-  bool CheckIntraDataSection(uint32_t offset, uint32_t count, uint16_t type);
+  bool CheckIntraSectionIterate(size_t offset, uint32_t count, uint16_t type);
+  bool CheckIntraIdSection(size_t offset, uint32_t count, uint16_t type);
+  bool CheckIntraDataSection(size_t offset, uint32_t count, uint16_t type);
   bool CheckIntraSection();
 
-  bool CheckOffsetToTypeMap(uint32_t offset, uint16_t type);
+  bool CheckOffsetToTypeMap(size_t offset, uint16_t type);
   uint16_t FindFirstClassDataDefiner(const byte* ptr) const;
   uint16_t FindFirstAnnotationsDirectoryDefiner(const byte* ptr) const;
 
@@ -85,7 +85,7 @@
   bool CheckInterClassDataItem();
   bool CheckInterAnnotationsDirectoryItem();
 
-  bool CheckInterSectionIterate(uint32_t offset, uint32_t count, uint16_t type);
+  bool CheckInterSectionIterate(size_t offset, uint32_t count, uint16_t type);
   bool CheckInterSection();
 
   void ErrorStringPrintf(const char* fmt, ...)
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 6e8736a..8fccd6d 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -16,9 +16,12 @@
 
 #include "dex_instruction-inl.h"
 
+#include <inttypes.h>
+
+#include <iomanip>
+
 #include "dex_file-inl.h"
 #include "utils.h"
-#include <iomanip>
 
 namespace art {
 
@@ -403,7 +406,8 @@
           os << StringPrintf("%s v%d, #int %+d // 0x%x", opcode, VRegA_21h(), value, value);
         } else {
           uint64_t value = static_cast<uint64_t>(VRegB_21h()) << 48;
-          os << StringPrintf("%s v%d, #long %+lld // 0x%llx", opcode, VRegA_21h(), value, value);
+          os << StringPrintf("%s v%d, #long %+" PRId64 " // 0x%" PRIx64, opcode, VRegA_21h(),
+                             value, value);
         }
       }
       break;
@@ -611,7 +615,7 @@
       }
       break;
     }
-    case k51l: os << StringPrintf("%s v%d, #%+lld", opcode, VRegA_51l(), VRegB_51l()); break;
+    case k51l: os << StringPrintf("%s v%d, #%+" PRId64, opcode, VRegA_51l(), VRegB_51l()); break;
     default: os << " unknown format (" << DumpHex(5) << ")"; break;
   }
   return os.str();
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index b3b24ba..261c217 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -64,15 +64,16 @@
     prot = PROT_READ;
     flags = MAP_PRIVATE;
   }
-  int64_t file_length = file_->GetLength();
-  if (file_length < 0) {
-    errno = -file_length;
+  int64_t temp_file_length = file_->GetLength();
+  if (temp_file_length < 0) {
+    errno = -temp_file_length;
     *error_msg = StringPrintf("Failed to get length of file: '%s' fd=%d: %s",
                               file_->GetPath().c_str(), file_->Fd(), strerror(errno));
     return false;
   }
+  size_t file_length = static_cast<size_t>(temp_file_length);
   if (file_length < sizeof(llvm::ELF::Elf32_Ehdr)) {
-    *error_msg = StringPrintf("File size of %lld bytes not large enough to contain ELF header of "
+    *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF header of "
                               "%zd bytes: '%s'", file_length, sizeof(llvm::ELF::Elf32_Ehdr),
                               file_->GetPath().c_str());
     return false;
@@ -89,7 +90,7 @@
     // then remap to cover program header
     size_t program_header_size = header_->e_phoff + (header_->e_phentsize * header_->e_phnum);
     if (file_length < program_header_size) {
-      *error_msg = StringPrintf("File size of %lld bytes not large enough to contain ELF program "
+      *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF program "
                                 "header of %zd bytes: '%s'", file_length,
                                 sizeof(llvm::ELF::Elf32_Ehdr), file_->GetPath().c_str());
       return false;
@@ -632,13 +633,21 @@
     // non-zero, the segments require the specific address specified,
     // which either was specified in the file because we already set
     // base_address_ after the first zero segment).
-    int64_t file_length = file_->GetLength();
+    int64_t temp_file_length = file_->GetLength();
+    if (temp_file_length < 0) {
+      errno = -temp_file_length;
+      *error_msg = StringPrintf("Failed to get length of file: '%s' fd=%d: %s",
+                                file_->GetPath().c_str(), file_->Fd(), strerror(errno));
+      return false;
+    }
+    size_t file_length = static_cast<size_t>(temp_file_length);
     if (program_header.p_vaddr == 0) {
       std::string reservation_name("ElfFile reservation for ");
       reservation_name += file_->GetPath();
       std::string error_msg;
       UniquePtr<MemMap> reserve(MemMap::MapAnonymous(reservation_name.c_str(),
-                                                     NULL, GetLoadedSize(), PROT_NONE, &error_msg));
+                                                     NULL, GetLoadedSize(), PROT_NONE, false,
+                                                     &error_msg));
       CHECK(reserve.get() != NULL) << file_->GetPath() << ": " << error_msg;
       base_address_ = reserve->Begin();
       segments_.push_back(reserve.release());
@@ -666,7 +675,7 @@
       flags |= MAP_PRIVATE;
     }
     if (file_length < (program_header.p_offset + program_header.p_memsz)) {
-      *error_msg = StringPrintf("File size of %lld bytes not large enough to contain ELF segment "
+      *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF segment "
                                 "%d of %d bytes: '%s'", file_length, i,
                                 program_header.p_offset + program_header.p_memsz,
                                 file_->GetPath().c_str());
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 8304229..20532f4 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -74,21 +74,48 @@
   }
   if (UNLIKELY(!klass->IsInitialized())) {
     SirtRef<mirror::Class> sirt_klass(self, klass);
-    // The class initializer might cause a GC.
+    // EnsureInitialized (the class initializer) might cause a GC.
+    // may cause us to suspend meaning that another thread may try to
+    // change the allocator while we are stuck in the entrypoints of
+    // an old allocator. Also, the class initialization may fail. To
+    // handle these cases we mark the slow path boolean as true so
+    // that the caller knows to check the allocator type to see if it
+    // has changed and to null-check the return value in case the
+    // initialization fails.
+    *slow_path = true;
     if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_klass, true, true)) {
       DCHECK(self->IsExceptionPending());
       return nullptr;  // Failure
     }
-    // TODO: EnsureInitialized may cause us to suspend meaning that another thread may try to
-    // change the allocator while we are stuck in the entrypoints of an old allocator. To handle
-    // this case we mark the slow path boolean as true so that the caller knows to check the
-    // allocator type to see if it has changed.
-    *slow_path = true;
     return sirt_klass.get();
   }
   return klass;
 }
 
+// TODO: Fix no thread safety analysis when annotalysis is smarter.
+ALWAYS_INLINE static inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
+                                                                               Thread* self, bool* slow_path)
+    NO_THREAD_SAFETY_ANALYSIS {
+  if (UNLIKELY(!klass->IsInitialized())) {
+    SirtRef<mirror::Class> sirt_class(self, klass);
+    // EnsureInitialized (the class initializer) might cause a GC.
+    // may cause us to suspend meaning that another thread may try to
+    // change the allocator while we are stuck in the entrypoints of
+    // an old allocator. Also, the class initialization may fail. To
+    // handle these cases we mark the slow path boolean as true so
+    // that the caller knows to check the allocator type to see if it
+    // has changed and to null-check the return value in case the
+    // initialization fails.
+    *slow_path = true;
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(sirt_class, true, true)) {
+      DCHECK(self->IsExceptionPending());
+      return nullptr;  // Failure
+    }
+    return sirt_class.get();
+  }
+  return klass;
+}
+
 // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
 // cannot be resolved, throw an error. If it can, use it to create an instance.
 // When verification/compiler hasn't been able to verify access, optionally perform an access
@@ -112,6 +139,40 @@
   return klass->Alloc<kInstrumented>(self, allocator_type);
 }
 
+// Given the context of a calling Method and a resolved class, create an instance.
+// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
+template <bool kInstrumented>
+ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
+                                                                        mirror::ArtMethod* method,
+                                                                        Thread* self,
+                                                                        gc::AllocatorType allocator_type)
+    NO_THREAD_SAFETY_ANALYSIS {
+  DCHECK(klass != nullptr);
+  bool slow_path = false;
+  klass = CheckClassInitializedForObjectAlloc(klass, self, &slow_path);
+  if (UNLIKELY(slow_path)) {
+    if (klass == nullptr) {
+      return nullptr;
+    }
+    gc::Heap* heap = Runtime::Current()->GetHeap();
+    return klass->Alloc<kInstrumented>(self, heap->GetCurrentAllocator());
+  }
+  return klass->Alloc<kInstrumented>(self, allocator_type);
+}
+
+// Given the context of a calling Method and an initialized class, create an instance.
+// TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
+template <bool kInstrumented>
+ALWAYS_INLINE static inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
+                                                                           mirror::ArtMethod* method,
+                                                                           Thread* self,
+                                                                           gc::AllocatorType allocator_type)
+    NO_THREAD_SAFETY_ANALYSIS {
+  DCHECK(klass != nullptr);
+  return klass->Alloc<kInstrumented>(self, allocator_type);
+}
+
+
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <bool kAccessCheck>
 ALWAYS_INLINE static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
@@ -171,6 +232,30 @@
   return mirror::Array::Alloc<kInstrumented>(self, klass, component_count, allocator_type);
 }
 
+template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE static inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
+                                                                      mirror::ArtMethod* method,
+                                                                      int32_t component_count,
+                                                                      Thread* self,
+                                                                      gc::AllocatorType allocator_type)
+    NO_THREAD_SAFETY_ANALYSIS {
+  DCHECK(klass != nullptr);
+  if (UNLIKELY(component_count < 0)) {
+    ThrowNegativeArraySizeException(component_count);
+    return nullptr;  // Failure
+  }
+  if (kAccessCheck) {
+    mirror::Class* referrer = method->GetDeclaringClass();
+    if (UNLIKELY(!referrer->CanAccess(klass))) {
+      ThrowIllegalAccessErrorClass(referrer, klass);
+      return nullptr;  // Failure
+    }
+  }
+  // No need to retry a slow-path allocation as the above code won't
+  // cause a GC or thread suspension.
+  return mirror::Array::Alloc<kInstrumented>(self, klass, component_count, allocator_type);
+}
+
 extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
                                                  int32_t component_count, Thread* self,
                                                  bool access_check,
@@ -197,7 +282,7 @@
 };
 
 template<FindFieldType type, bool access_check>
-static inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, const mirror::ArtMethod* referrer,
+static inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
                                                   Thread* self, size_t expected_size) {
   bool is_primitive;
   bool is_set;
@@ -226,32 +311,17 @@
       return nullptr;
     }
     mirror::Class* referring_class = referrer->GetDeclaringClass();
-    if (UNLIKELY(!referring_class->CanAccess(fields_class) ||
-                 !referring_class->CanAccessMember(fields_class,
-                                                   resolved_field->GetAccessFlags()))) {
-      // The referring class can't access the resolved field, this may occur as a result of a
-      // protected field being made public by a sub-class. Resort to the dex file to determine
-      // the correct class for the access check.
-      const DexFile& dex_file = *referring_class->GetDexCache()->GetDexFile();
-      fields_class = class_linker->ResolveType(dex_file,
-                                               dex_file.GetFieldId(field_idx).class_idx_,
-                                               referring_class);
-      if (UNLIKELY(!referring_class->CanAccess(fields_class))) {
-        ThrowIllegalAccessErrorClass(referring_class, fields_class);
-        return nullptr;  // failure
-      } else if (UNLIKELY(!referring_class->CanAccessMember(fields_class,
-                                                            resolved_field->GetAccessFlags()))) {
-        ThrowIllegalAccessErrorField(referring_class, resolved_field);
-        return nullptr;  // failure
-      }
+    if (UNLIKELY(!referring_class->CheckResolvedFieldAccess(fields_class, resolved_field,
+                                                            field_idx))) {
+      DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
+      return nullptr;  // Failure.
     }
     if (UNLIKELY(is_set && resolved_field->IsFinal() && (fields_class != referring_class))) {
       ThrowIllegalAccessErrorFinalField(referrer, resolved_field);
-      return nullptr;  // failure
+      return nullptr;  // Failure.
     } else {
       FieldHelper fh(resolved_field);
-      if (UNLIKELY(fh.IsPrimitiveType() != is_primitive ||
-                   fh.FieldSize() != expected_size)) {
+      if (UNLIKELY(fh.IsPrimitiveType() != is_primitive || fh.FieldSize() != expected_size)) {
         ThrowLocation throw_location = self->GetCurrentLocationForThrow();
         DCHECK(throw_location.GetMethod() == referrer);
         self->ThrowNewExceptionF(throw_location, "Ljava/lang/NoSuchFieldError;",
@@ -259,7 +329,7 @@
                                  expected_size * (32 / sizeof(int32_t)),
                                  is_primitive ? "primitive" : "non-primitive",
                                  PrettyField(resolved_field, true).c_str());
-        return nullptr;  // failure
+        return nullptr;  // Failure.
       }
     }
   }
@@ -277,7 +347,7 @@
         return resolved_field;
       } else {
         DCHECK(self->IsExceptionPending());  // Throw exception and unwind
-        return nullptr;  // failure
+        return nullptr;  // Failure.
       }
     }
   }
@@ -287,7 +357,7 @@
 #define EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL(_type, _access_check) \
 template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE \
 mirror::ArtField* FindFieldFromCode<_type, _access_check>(uint32_t field_idx, \
-                                                          const mirror::ArtMethod* referrer, \
+                                                          mirror::ArtMethod* referrer, \
                                                           Thread* self, size_t expected_size) \
 
 #define EXPLICIT_FIND_FIELD_FROM_CODE_TYPED_TEMPLATE_DECL(_type) \
@@ -310,11 +380,12 @@
 static inline mirror::ArtMethod* FindMethodFromCode(uint32_t method_idx, mirror::Object* this_object,
                                                     mirror::ArtMethod* referrer, Thread* self) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  SirtRef<mirror::Object> sirt_this(self, this_object);
   mirror::ArtMethod* resolved_method = class_linker->ResolveMethod(method_idx, referrer, type);
   if (UNLIKELY(resolved_method == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
-  } else if (UNLIKELY(this_object == nullptr && type != kStatic)) {
+  } else if (UNLIKELY(sirt_this.get() == nullptr && type != kStatic)) {
     // Maintain interpreter-like semantics where NullPointerException is thrown
     // after potential NoSuchMethodError from class linker.
     ThrowLocation throw_location = self->GetCurrentLocationForThrow();
@@ -330,26 +401,12 @@
     }
     mirror::Class* methods_class = resolved_method->GetDeclaringClass();
     mirror::Class* referring_class = referrer->GetDeclaringClass();
-    if (UNLIKELY(!referring_class->CanAccess(methods_class) ||
-                 !referring_class->CanAccessMember(methods_class,
-                                                   resolved_method->GetAccessFlags()))) {
-      // The referring class can't access the resolved method, this may occur as a result of a
-      // protected method being made public by implementing an interface that re-declares the
-      // method public. Resort to the dex file to determine the correct class for the access check
-      const DexFile& dex_file = *referring_class->GetDexCache()->GetDexFile();
-      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-      methods_class = class_linker->ResolveType(dex_file,
-                                                dex_file.GetMethodId(method_idx).class_idx_,
-                                                referring_class);
-      if (UNLIKELY(!referring_class->CanAccess(methods_class))) {
-        ThrowIllegalAccessErrorClassForMethodDispatch(referring_class, methods_class,
-                                                      referrer, resolved_method, type);
-        return nullptr;  // Failure.
-      } else if (UNLIKELY(!referring_class->CanAccessMember(methods_class,
-                                                            resolved_method->GetAccessFlags()))) {
-        ThrowIllegalAccessErrorMethod(referring_class, resolved_method);
-        return nullptr;  // Failure.
-      }
+    bool can_access_resolved_method =
+        referring_class->CheckResolvedMethodAccess<type>(methods_class, resolved_method,
+                                                         method_idx);
+    if (UNLIKELY(!can_access_resolved_method)) {
+      DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
+      return nullptr;  // Failure.
     }
   }
   switch (type) {
@@ -357,7 +414,7 @@
     case kDirect:
       return resolved_method;
     case kVirtual: {
-      mirror::ObjectArray<mirror::ArtMethod>* vtable = this_object->GetClass()->GetVTable();
+      mirror::ObjectArray<mirror::ArtMethod>* vtable = sirt_this->GetClass()->GetVTable();
       uint16_t vtable_index = resolved_method->GetMethodIndex();
       if (access_check &&
           (vtable == nullptr || vtable_index >= static_cast<uint32_t>(vtable->GetLength()))) {
@@ -394,16 +451,16 @@
     }
     case kInterface: {
       uint32_t imt_index = resolved_method->GetDexMethodIndex() % ClassLinker::kImtSize;
-      mirror::ObjectArray<mirror::ArtMethod>* imt_table = this_object->GetClass()->GetImTable();
+      mirror::ObjectArray<mirror::ArtMethod>* imt_table = sirt_this->GetClass()->GetImTable();
       mirror::ArtMethod* imt_method = imt_table->Get(imt_index);
       if (!imt_method->IsImtConflictMethod()) {
         return imt_method;
       } else {
         mirror::ArtMethod* interface_method =
-            this_object->GetClass()->FindVirtualMethodForInterface(resolved_method);
+            sirt_this->GetClass()->FindVirtualMethodForInterface(resolved_method);
         if (UNLIKELY(interface_method == nullptr)) {
-          ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(resolved_method, this_object,
-                                                                     referrer);
+          ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(resolved_method,
+                                                                     sirt_this.get(), referrer);
           return nullptr;  // Failure.
         } else {
           return interface_method;
@@ -438,7 +495,7 @@
 
 // Fast path field resolution that can't initialize classes or throw exceptions.
 static inline mirror::ArtField* FindFieldFast(uint32_t field_idx,
-                                              const mirror::ArtMethod* referrer,
+                                              mirror::ArtMethod* referrer,
                                               FindFieldType type, size_t expected_size)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* resolved_field =
@@ -494,7 +551,7 @@
 // Fast path method resolution that can't throw exceptions.
 static inline mirror::ArtMethod* FindMethodFast(uint32_t method_idx,
                                                 mirror::Object* this_object,
-                                                const mirror::ArtMethod* referrer,
+                                                mirror::ArtMethod* referrer,
                                                 bool access_check, InvokeType type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   bool is_direct = type == kStatic || type == kDirect;
@@ -535,7 +592,7 @@
 }
 
 static inline mirror::Class* ResolveVerifyAndClinit(uint32_t type_idx,
-                                                    const mirror::ArtMethod* referrer,
+                                                    mirror::ArtMethod* referrer,
                                                     Thread* self, bool can_run_clinit,
                                                     bool verify_access)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -572,7 +629,7 @@
 
 extern void ThrowStackOverflowError(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-static inline mirror::String* ResolveStringFromCode(const mirror::ArtMethod* referrer,
+static inline mirror::String* ResolveStringFromCode(mirror::ArtMethod* referrer,
                                                     uint32_t string_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -661,21 +718,21 @@
   return reinterpret_cast<void*>(art_portable_to_interpreter_bridge);
 }
 
+static inline const void* GetPortableToQuickBridge() {
+  // TODO: portable to quick bridge. Bug: 8196384
+  return GetPortableToInterpreterBridge();
+}
+
 extern "C" void art_quick_to_interpreter_bridge(mirror::ArtMethod*);
 static inline const void* GetQuickToInterpreterBridge() {
   return reinterpret_cast<void*>(art_quick_to_interpreter_bridge);
 }
 
-// Return address of interpreter stub.
-static inline const void* GetCompiledCodeToInterpreterBridge() {
-#if defined(ART_USE_PORTABLE_COMPILER)
-  return GetPortableToInterpreterBridge();
-#else
+static inline const void* GetQuickToPortableBridge() {
+  // TODO: quick to portable bridge. Bug: 8196384
   return GetQuickToInterpreterBridge();
-#endif
 }
 
-
 static inline const void* GetPortableResolutionTrampoline(ClassLinker* class_linker) {
   return class_linker->GetPortableResolutionTrampoline();
 }
@@ -684,15 +741,6 @@
   return class_linker->GetQuickResolutionTrampoline();
 }
 
-// Return address of resolution trampoline stub for defined compiler.
-static inline const void* GetResolutionTrampoline(ClassLinker* class_linker) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-  return GetPortableResolutionTrampoline(class_linker);
-#else
-  return GetQuickResolutionTrampoline(class_linker);
-#endif
-}
-
 static inline const void* GetPortableImtConflictTrampoline(ClassLinker* class_linker) {
   return class_linker->GetPortableImtConflictTrampoline();
 }
@@ -701,15 +749,6 @@
   return class_linker->GetQuickImtConflictTrampoline();
 }
 
-// Return address of imt conflict trampoline stub for defined compiler.
-static inline const void* GetImtConflictTrampoline(ClassLinker* class_linker) {
-#if defined(ART_USE_PORTABLE_COMPILER)
-  return GetPortableImtConflictTrampoline(class_linker);
-#else
-  return GetQuickImtConflictTrampoline(class_linker);
-#endif
-}
-
 extern "C" void art_portable_proxy_invoke_handler();
 static inline const void* GetPortableProxyInvokeHandler() {
   return reinterpret_cast<void*>(art_portable_proxy_invoke_handler);
@@ -720,14 +759,6 @@
   return reinterpret_cast<void*>(art_quick_proxy_invoke_handler);
 }
 
-static inline const void* GetProxyInvokeHandler() {
-#if defined(ART_USE_PORTABLE_COMPILER)
-  return GetPortableProxyInvokeHandler();
-#else
-  return GetQuickProxyInvokeHandler();
-#endif
-}
-
 extern "C" void* art_jni_dlsym_lookup_stub(JNIEnv*, jobject);
 static inline void* GetJniDlsymLookupStub() {
   return reinterpret_cast<void*>(art_jni_dlsym_lookup_stub);
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
index 0df00c2..8a2ce51 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
@@ -45,15 +45,15 @@
     }
   }
   uint16_t arg_offset = (code_item == NULL) ? 0 : code_item->registers_size_ - code_item->ins_size_;
-#if defined(ART_USE_PORTABLE_COMPILER)
-  ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
-  arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
-  method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result, mh.GetShorty()[0]);
-#else
-  method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
-                 (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
-                 result, mh.GetShorty()[0]);
-#endif
+  if (kUsePortableCompiler) {
+    ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
+    arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
+    method->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result, mh.GetShorty()[0]);
+  } else {
+    method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
+                   (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
+                   result, mh.GetShorty()[0]);
+  }
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/portable/portable_cast_entrypoints.cc b/runtime/entrypoints/portable/portable_cast_entrypoints.cc
index d343c5d..a553a22 100644
--- a/runtime/entrypoints/portable/portable_cast_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_cast_entrypoints.cc
@@ -20,16 +20,16 @@
 
 namespace art {
 
-extern "C" int32_t art_portable_is_assignable_from_code(const mirror::Class* dest_type,
-                                                        const mirror::Class* src_type)
+extern "C" int32_t art_portable_is_assignable_from_code(mirror::Class* dest_type,
+                                                        mirror::Class* src_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(dest_type != NULL);
   DCHECK(src_type != NULL);
   return dest_type->IsAssignableFrom(src_type) ? 1 : 0;
 }
 
-extern "C" void art_portable_check_cast_from_code(const mirror::Class* dest_type,
-                                                  const mirror::Class* src_type)
+extern "C" void art_portable_check_cast_from_code(mirror::Class* dest_type,
+                                                  mirror::Class* src_type)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(dest_type->IsClass()) << PrettyClass(dest_type);
   DCHECK(src_type->IsClass()) << PrettyClass(src_type);
@@ -38,8 +38,8 @@
   }
 }
 
-extern "C" void art_portable_check_put_array_element_from_code(const mirror::Object* element,
-                                                               const mirror::Object* array)
+extern "C" void art_portable_check_put_array_element_from_code(mirror::Object* element,
+                                                               mirror::Object* array)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (element == NULL) {
     return;
diff --git a/runtime/entrypoints/portable/portable_field_entrypoints.cc b/runtime/entrypoints/portable/portable_field_entrypoints.cc
index 095e99e..0b54b9c 100644
--- a/runtime/entrypoints/portable/portable_field_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_field_entrypoints.cc
@@ -65,13 +65,13 @@
                                                          mirror::Object* new_value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     field->SetObj(field->GetDeclaringClass(), new_value);
     return 0;
   }
   field = FindFieldFromCode<StaticObjectWrite, true>(field_idx, referrer, Thread::Current(),
-                                                     sizeof(mirror::Object*));
+                                                     sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     field->SetObj(field->GetDeclaringClass(), new_value);
     return 0;
@@ -113,12 +113,12 @@
                                                                  mirror::ArtMethod* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectRead,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(field->GetDeclaringClass());
   }
   field = FindFieldFromCode<StaticObjectRead, true>(field_idx, referrer, Thread::Current(),
-                                                    sizeof(mirror::Object*));
+                                                    sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(field->GetDeclaringClass());
   }
@@ -167,13 +167,13 @@
                                                            mirror::Object* new_value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     field->SetObj(obj, new_value);
     return 0;
   }
   field = FindFieldFromCode<InstanceObjectWrite, true>(field_idx, referrer, Thread::Current(),
-                                                       sizeof(mirror::Object*));
+                                                       sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     field->SetObj(obj, new_value);
     return 0;
@@ -218,12 +218,12 @@
                                                                    mirror::Object* obj)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectRead,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(obj);
   }
   field = FindFieldFromCode<InstanceObjectRead, true>(field_idx, referrer, Thread::Current(),
-                                                      sizeof(mirror::Object*));
+                                                      sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(obj);
   }
diff --git a/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc b/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
index 8cf4eed..1005d0e 100644
--- a/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_fillarray_entrypoints.cc
@@ -44,7 +44,7 @@
     return;  // Error
   }
   uint32_t size_in_bytes = payload->element_count * payload->element_width;
-  memcpy(array->GetRawData(payload->element_width), payload->data, size_in_bytes);
+  memcpy(array->GetRawData(payload->element_width, 0), payload->data, size_in_bytes);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
index 47ccbb1..d34b097 100644
--- a/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_invoke_entrypoints.cc
@@ -34,7 +34,7 @@
     }
   }
   DCHECK(!thread->IsExceptionPending());
-  const void* code = method->GetEntryPointFromCompiledCode();
+  const void* code = method->GetEntryPointFromPortableCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
   if (UNLIKELY(code == NULL)) {
diff --git a/runtime/entrypoints/portable/portable_throw_entrypoints.cc b/runtime/entrypoints/portable/portable_throw_entrypoints.cc
index 2a0df9b..1fdb832 100644
--- a/runtime/entrypoints/portable/portable_throw_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_throw_entrypoints.cc
@@ -75,7 +75,7 @@
   ThrowLocation throw_location;
   mirror::Throwable* exception = self->GetException(&throw_location);
   // Check for special deoptimization exception.
-  if (UNLIKELY(reinterpret_cast<int32_t>(exception) == -1)) {
+  if (UNLIKELY(reinterpret_cast<intptr_t>(exception) == -1)) {
     return -1;
   }
   mirror::Class* exception_type = exception->GetClass();
diff --git a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
index 2162dcc..55fd301 100644
--- a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
@@ -47,6 +47,11 @@
 #define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 0
 #define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 0
 #define PORTABLE_STACK_ARG_SKIP 4
+#elif defined(__x86_64__)
+// TODO: implement and check these.
+#define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 16
+#define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 96
+#define PORTABLE_STACK_ARG_SKIP 0
 #else
 #error "Unsupported architecture"
 #define PORTABLE_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 0
@@ -387,43 +392,42 @@
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type));
   }
-  const void* code = NULL;
+  const void* code = nullptr;
   if (LIKELY(!thread->IsExceptionPending())) {
     // Ensure that the called method's class is initialized.
     SirtRef<mirror::Class> called_class(thread, called->GetDeclaringClass());
     linker->EnsureInitialized(called_class, true, true);
     if (LIKELY(called_class->IsInitialized())) {
-      code = called->GetEntryPointFromCompiledCode();
+      code = called->GetEntryPointFromPortableCompiledCode();
       // TODO: remove this after we solve the link issue.
-      {  // for lazy link.
-        if (code == NULL) {
-          code = linker->GetOatCodeFor(called);
-        }
+      if (code == nullptr) {
+        bool have_portable_code;
+        code = linker->GetPortableOatCodeFor(called, &have_portable_code);
       }
     } else if (called_class->IsInitializing()) {
       if (invoke_type == kStatic) {
         // Class is still initializing, go to oat and grab code (trampoline must be left in place
         // until class is initialized to stop races between threads).
-        code = linker->GetOatCodeFor(called);
+        bool have_portable_code;
+        code = linker->GetPortableOatCodeFor(called, &have_portable_code);
       } else {
         // No trampoline for non-static methods.
-        code = called->GetEntryPointFromCompiledCode();
+        code = called->GetEntryPointFromPortableCompiledCode();
         // TODO: remove this after we solve the link issue.
-        {  // for lazy link.
-          if (code == NULL) {
-            code = linker->GetOatCodeFor(called);
-          }
+        if (code == nullptr) {
+          bool have_portable_code;
+          code = linker->GetPortableOatCodeFor(called, &have_portable_code);
         }
       }
     } else {
       DCHECK(called_class->IsErroneous());
     }
   }
-  if (LIKELY(code != NULL)) {
+  if (LIKELY(code != nullptr)) {
     // Expect class to at least be initializing.
     DCHECK(called->GetDeclaringClass()->IsInitializing());
     // Don't want infinite recursion.
-    DCHECK(code != GetResolutionTrampoline(linker));
+    DCHECK(code != GetPortableResolutionTrampoline(linker));
     // Set up entry into main method
     *called_addr = called;
   }
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index b1dca77..2e1b69d 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -30,6 +30,18 @@
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
 } \
+extern "C" mirror::Object* artAllocObjectFromCodeResolved##suffix##suffix2( \
+    mirror::Class* klass, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocObjectFromCodeResolved<instrumented_bool>(klass, method, self, allocator_type); \
+} \
+extern "C" mirror::Object* artAllocObjectFromCodeInitialized##suffix##suffix2( \
+    mirror::Class* klass, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocObjectFromCodeInitialized<instrumented_bool>(klass, method, self, allocator_type); \
+} \
 extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck##suffix##suffix2( \
     uint32_t type_idx, mirror::ArtMethod* method, Thread* self, mirror::ArtMethod** sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
@@ -44,6 +56,14 @@
   return AllocArrayFromCode<false, instrumented_bool>(type_idx, method, component_count, self, \
                                                       allocator_type); \
 } \
+extern "C" mirror::Array* artAllocArrayFromCodeResolved##suffix##suffix2( \
+    mirror::Class* klass, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
+    mirror::ArtMethod** sp) \
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
+  return AllocArrayFromCodeResolved<false, instrumented_bool>(klass, method, component_count, self, \
+                                                              allocator_type); \
+} \
 extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheck##suffix##suffix2( \
     uint32_t type_idx, mirror::ArtMethod* method, int32_t component_count, Thread* self, \
     mirror::ArtMethod** sp) \
diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
index ae53d6c..a6ab69b 100644
--- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
@@ -20,8 +20,7 @@
 namespace art {
 
 // Assignable test for code, won't throw.  Null and equality tests already performed
-extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
-                                            const mirror::Class* ref_class)
+extern "C" uint32_t artIsAssignableFromCode(mirror::Class* klass, mirror::Class* ref_class)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(klass != NULL);
   DCHECK(ref_class != NULL);
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 003047a..ab428a5 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -26,7 +26,7 @@
 namespace art {
 
 extern "C" mirror::Class* artInitializeStaticStorageFromCode(uint32_t type_idx,
-                                                             const mirror::ArtMethod* referrer,
+                                                             mirror::ArtMethod* referrer,
                                                              Thread* self,
                                                              mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -38,7 +38,7 @@
 }
 
 extern "C" mirror::Class* artInitializeTypeFromCode(uint32_t type_idx,
-                                                    const mirror::ArtMethod* referrer,
+                                                    mirror::ArtMethod* referrer,
                                                     Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
@@ -47,7 +47,7 @@
 }
 
 extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type_idx,
-                                                                   const mirror::ArtMethod* referrer,
+                                                                   mirror::ArtMethod* referrer,
                                                                    Thread* self,
                                                                    mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 1ba2066..011e926 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -40,8 +40,11 @@
 struct PACKED(4) QuickEntryPoints {
   // Alloc
   void* (*pAllocArray)(uint32_t, void*, int32_t);
+  void* (*pAllocArrayResolved)(void*, void*, int32_t);
   void* (*pAllocArrayWithAccessCheck)(uint32_t, void*, int32_t);
   void* (*pAllocObject)(uint32_t, void*);
+  void* (*pAllocObjectResolved)(void*, void*);
+  void* (*pAllocObjectInitialized)(void*, void*);
   void* (*pAllocObjectWithAccessCheck)(uint32_t, void*);
   void* (*pCheckAndAllocArray)(uint32_t, void*, int32_t);
   void* (*pCheckAndAllocArrayWithAccessCheck)(uint32_t, void*, int32_t);
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 0a533bd..93ff7aa 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -26,7 +26,7 @@
 namespace art {
 
 extern "C" uint32_t artGet32StaticFromCode(uint32_t field_idx,
-                                           const mirror::ArtMethod* referrer,
+                                           mirror::ArtMethod* referrer,
                                            Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
@@ -43,7 +43,7 @@
 }
 
 extern "C" uint64_t artGet64StaticFromCode(uint32_t field_idx,
-                                           const mirror::ArtMethod* referrer,
+                                           mirror::ArtMethod* referrer,
                                            Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
@@ -60,17 +60,17 @@
 }
 
 extern "C" mirror::Object* artGetObjStaticFromCode(uint32_t field_idx,
-                                                   const mirror::ArtMethod* referrer,
+                                                   mirror::ArtMethod* referrer,
                                                    Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectRead,
-                                       sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(field->GetDeclaringClass());
   }
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticObjectRead, true>(field_idx, referrer, self,
-                                                    sizeof(mirror::Object*));
+                                                    sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     return field->GetObj(field->GetDeclaringClass());
   }
@@ -78,7 +78,7 @@
 }
 
 extern "C" uint32_t artGet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             const mirror::ArtMethod* referrer, Thread* self,
+                                             mirror::ArtMethod* referrer, Thread* self,
                                              mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
@@ -101,7 +101,7 @@
 }
 
 extern "C" uint64_t artGet64InstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                             const mirror::ArtMethod* referrer, Thread* self,
+                                             mirror::ArtMethod* referrer, Thread* self,
                                              mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
@@ -124,18 +124,18 @@
 }
 
 extern "C" mirror::Object* artGetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
-                                                     const mirror::ArtMethod* referrer,
+                                                     mirror::ArtMethod* referrer,
                                                      Thread* self,
                                                      mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectRead,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL && obj != NULL)) {
     return field->GetObj(obj);
   }
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstanceObjectRead, true>(field_idx, referrer, self,
-                                                      sizeof(mirror::Object*));
+                                                      sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     if (UNLIKELY(obj == NULL)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
@@ -148,7 +148,7 @@
 }
 
 extern "C" int artSet32StaticFromCode(uint32_t field_idx, uint32_t new_value,
-                                      const mirror::ArtMethod* referrer, Thread* self,
+                                      mirror::ArtMethod* referrer, Thread* self,
                                       mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
@@ -166,7 +166,7 @@
   return -1;  // failure
 }
 
-extern "C" int artSet64StaticFromCode(uint32_t field_idx, const mirror::ArtMethod* referrer,
+extern "C" int artSet64StaticFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
                                       uint64_t new_value, Thread* self, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
@@ -185,11 +185,11 @@
 }
 
 extern "C" int artSetObjStaticFromCode(uint32_t field_idx, mirror::Object* new_value,
-                                       const mirror::ArtMethod* referrer, Thread* self,
+                                       mirror::ArtMethod* referrer, Thread* self,
                                        mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     if (LIKELY(!FieldHelper(field).IsPrimitiveType())) {
       field->SetObj(field->GetDeclaringClass(), new_value);
@@ -198,7 +198,7 @@
   }
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<StaticObjectWrite, true>(field_idx, referrer, self,
-                                                     sizeof(mirror::Object*));
+                                                     sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     field->SetObj(field->GetDeclaringClass(), new_value);
     return 0;  // success
@@ -207,7 +207,7 @@
 }
 
 extern "C" int artSet32InstanceFromCode(uint32_t field_idx, mirror::Object* obj, uint32_t new_value,
-                                        const mirror::ArtMethod* referrer, Thread* self,
+                                        mirror::ArtMethod* referrer, Thread* self,
                                         mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
@@ -261,18 +261,18 @@
 
 extern "C" int artSetObjInstanceFromCode(uint32_t field_idx, mirror::Object* obj,
                                          mirror::Object* new_value,
-                                         const mirror::ArtMethod* referrer, Thread* self,
+                                         mirror::ArtMethod* referrer, Thread* self,
                                          mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
-                                          sizeof(mirror::Object*));
+                                          sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL && obj != NULL)) {
     field->SetObj(obj, new_value);
     return 0;  // success
   }
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
   field = FindFieldFromCode<InstanceObjectWrite, true>(field_idx, referrer, self,
-                                                       sizeof(mirror::Object*));
+                                                       sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != NULL)) {
     if (UNLIKELY(obj == NULL)) {
       ThrowLocation throw_location = self->GetCurrentLocationForThrow();
diff --git a/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc b/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
index ca0c92e..8dac750 100644
--- a/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_fillarray_entrypoints.cc
@@ -56,7 +56,7 @@
     return -1;  // Error
   }
   uint32_t size_in_bytes = payload->element_count * payload->element_width;
-  memcpy(array->GetRawData(payload->element_width), payload->data, size_in_bytes);
+  memcpy(array->GetRawData(payload->element_width, 0), payload->data, size_in_bytes);
   return 0;  // Success
 }
 
diff --git a/runtime/entrypoints/quick/quick_invoke_entrypoints.cc b/runtime/entrypoints/quick/quick_invoke_entrypoints.cc
index 5a1b3e8..c081768 100644
--- a/runtime/entrypoints/quick/quick_invoke_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_invoke_entrypoints.cc
@@ -124,21 +124,23 @@
       return 0;  // Failure.
     }
   }
-  const void* code = method->GetEntryPointFromCompiledCode();
+  const void* code = method->GetEntryPointFromQuickCompiledCode();
 
-#ifndef NDEBUG
   // When we return, the caller will branch to this address, so it had better not be 0!
-  if (UNLIKELY(code == NULL)) {
+  if (kIsDebugBuild && UNLIKELY(code == nullptr)) {
       MethodHelper mh(method);
       LOG(FATAL) << "Code was NULL in method: " << PrettyMethod(method)
                  << " location: " << mh.GetDexFile().GetLocation();
   }
-#endif
-
+#ifdef __LP64__
+  UNIMPLEMENTED(FATAL);
+  return 0;
+#else
   uint32_t method_uint = reinterpret_cast<uint32_t>(method);
   uint64_t code_uint = reinterpret_cast<uint32_t>(code);
   uint64_t result = ((code_uint << 32) | method_uint);
   return result;
+#endif
 }
 
 template<InvokeType type, bool access_check>
@@ -156,21 +158,23 @@
     }
   }
   DCHECK(!self->IsExceptionPending());
-  const void* code = method->GetEntryPointFromCompiledCode();
+  const void* code = method->GetEntryPointFromQuickCompiledCode();
 
-#ifndef NDEBUG
   // When we return, the caller will branch to this address, so it had better not be 0!
-  if (UNLIKELY(code == NULL)) {
+  if (kIsDebugBuild && UNLIKELY(code == NULL)) {
       MethodHelper mh(method);
       LOG(FATAL) << "Code was NULL in method: " << PrettyMethod(method)
                  << " location: " << mh.GetDexFile().GetLocation();
   }
-#endif
-
+#ifdef __LP64__
+  UNIMPLEMENTED(FATAL);
+  return 0;
+#else
   uint32_t method_uint = reinterpret_cast<uint32_t>(method);
   uint64_t code_uint = reinterpret_cast<uint32_t>(code);
   uint64_t result = ((code_uint << 32) | method_uint);
   return result;
+#endif
 }
 
 // Explicit template declarations of artInvokeCommon for all invoke types.
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index b589384..9f30190 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -97,6 +97,12 @@
 #define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__LR_OFFSET 28
 #define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 32
 #define QUICK_STACK_ARG_SKIP 16
+#elif defined(__x86_64__)
+// TODO: implement and check these.
+#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 8
+#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__LR_OFFSET 56
+#define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__FRAME_SIZE 64
+#define QUICK_STACK_ARG_SKIP 32
 #else
 #error "Unsupported architecture"
 #define QUICK_CALLEE_SAVE_FRAME__REF_AND_ARGS__R1_OFFSET 0
@@ -567,15 +573,15 @@
     SirtRef<mirror::Class> called_class(soa.Self(), called->GetDeclaringClass());
     linker->EnsureInitialized(called_class, true, true);
     if (LIKELY(called_class->IsInitialized())) {
-      code = called->GetEntryPointFromCompiledCode();
+      code = called->GetEntryPointFromQuickCompiledCode();
     } else if (called_class->IsInitializing()) {
       if (invoke_type == kStatic) {
         // Class is still initializing, go to oat and grab code (trampoline must be left in place
         // until class is initialized to stop races between threads).
-        code = linker->GetOatCodeFor(called);
+        code = linker->GetQuickOatCodeFor(called);
       } else {
         // No trampoline for non-static methods.
-        code = called->GetEntryPointFromCompiledCode();
+        code = called->GetEntryPointFromQuickCompiledCode();
       }
     } else {
       DCHECK(called_class->IsErroneous());
diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc
index 978faeb..f7b621f 100644
--- a/runtime/exception_test.cc
+++ b/runtime/exception_test.cc
@@ -76,7 +76,7 @@
     method_f_ = my_klass_->FindVirtualMethod("f", "()I");
     ASSERT_TRUE(method_f_ != NULL);
     method_f_->SetFrameSizeInBytes(kStackAlignment);
-    method_f_->SetEntryPointFromCompiledCode(CompiledMethod::CodePointer(&fake_code_[sizeof(code_size)], kThumb2));
+    method_f_->SetEntryPointFromQuickCompiledCode(CompiledMethod::CodePointer(&fake_code_[sizeof(code_size)], kThumb2));
     method_f_->SetMappingTable(&fake_mapping_data_.GetData()[0]);
     method_f_->SetVmapTable(&fake_vmap_table_data_.GetData()[0]);
     method_f_->SetNativeGcMap(&fake_gc_map_[0]);
@@ -84,7 +84,7 @@
     method_g_ = my_klass_->FindVirtualMethod("g", "(I)V");
     ASSERT_TRUE(method_g_ != NULL);
     method_g_->SetFrameSizeInBytes(kStackAlignment);
-    method_g_->SetEntryPointFromCompiledCode(CompiledMethod::CodePointer(&fake_code_[sizeof(code_size)], kThumb2));
+    method_g_->SetEntryPointFromQuickCompiledCode(CompiledMethod::CodePointer(&fake_code_[sizeof(code_size)], kThumb2));
     method_g_->SetMappingTable(&fake_mapping_data_.GetData()[0]);
     method_g_->SetVmapTable(&fake_vmap_table_data_.GetData()[0]);
     method_g_->SetNativeGcMap(&fake_gc_map_[0]);
@@ -105,6 +105,7 @@
 };
 
 TEST_F(ExceptionTest, FindCatchHandler) {
+  ScopedObjectAccess soa(Thread::Current());
   const DexFile::CodeItem* code_item = dex_->GetCodeItem(method_f_->GetCodeItemOffset());
 
   ASSERT_TRUE(code_item != NULL);
@@ -151,51 +152,51 @@
   ASSERT_EQ(kStackAlignment, 16U);
   ASSERT_EQ(sizeof(uintptr_t), sizeof(uint32_t));
 
-#if !defined(ART_USE_PORTABLE_COMPILER)
-  // Create two fake stack frames with mapping data created in SetUp. We map offset 3 in the code
-  // to dex pc 3.
-  const uint32_t dex_pc = 3;
+  if (!kUsePortableCompiler) {
+    // Create two fake stack frames with mapping data created in SetUp. We map offset 3 in the code
+    // to dex pc 3.
+    const uint32_t dex_pc = 3;
 
-  // Create/push fake 16byte stack frame for method g
-  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(method_f_->ToNativePc(dex_pc));  // return pc
+    // Create/push fake 16byte stack frame for method g
+    fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
+    fake_stack.push_back(method_f_->ToNativePc(dex_pc));  // return pc
 
-  // Create/push fake 16byte stack frame for method f
-  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(0xEBAD6070);  // return pc
+    // Create/push fake 16byte stack frame for method f
+    fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
+    fake_stack.push_back(0xEBAD6070);  // return pc
 
-  // Pull Method* of NULL to terminate the trace
-  fake_stack.push_back(0);
+    // Pull Method* of NULL to terminate the trace
+    fake_stack.push_back(0);
 
-  // Push null values which will become null incoming arguments.
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
+    // Push null values which will become null incoming arguments.
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
 
-  // Set up thread to appear as if we called out of method_g_ at pc dex 3
-  thread->SetTopOfStack(&fake_stack[0], method_g_->ToNativePc(dex_pc));  // return pc
-#else
-  // Create/push fake 20-byte shadow frame for method g
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
-  fake_stack.push_back(3);
-  fake_stack.push_back(0);
+    // Set up thread to appear as if we called out of method_g_ at pc dex 3
+    thread->SetTopOfStack(&fake_stack[0], method_g_->ToNativePc(dex_pc));  // return pc
+  } else {
+    // Create/push fake 20-byte shadow frame for method g
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
+    fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_));
+    fake_stack.push_back(3);
+    fake_stack.push_back(0);
 
-  // Create/push fake 20-byte shadow frame for method f
-  fake_stack.push_back(0);
-  fake_stack.push_back(0);
-  fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
-  fake_stack.push_back(3);
-  fake_stack.push_back(0);
+    // Create/push fake 20-byte shadow frame for method f
+    fake_stack.push_back(0);
+    fake_stack.push_back(0);
+    fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_));
+    fake_stack.push_back(3);
+    fake_stack.push_back(0);
 
-  thread->PushShadowFrame(reinterpret_cast<ShadowFrame*>(&fake_stack[5]));
-  thread->PushShadowFrame(reinterpret_cast<ShadowFrame*>(&fake_stack[0]));
-#endif
+    thread->PushShadowFrame(reinterpret_cast<ShadowFrame*>(&fake_stack[5]));
+    thread->PushShadowFrame(reinterpret_cast<ShadowFrame*>(&fake_stack[0]));
+  }
 
   jobject internal = thread->CreateInternalStackTrace(soa);
   ASSERT_TRUE(internal != NULL);
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index 02e01b8..ea8f89c 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -19,7 +19,7 @@
 
 #include <string>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "UniquePtr.h"
@@ -165,7 +165,7 @@
   void Init() {
     std::string error_msg;
     mem_map_.reset(MemMap::MapAnonymous(name_.c_str(), NULL, capacity_ * sizeof(T),
-                                        PROT_READ | PROT_WRITE, &error_msg));
+                                        PROT_READ | PROT_WRITE, false, &error_msg));
     CHECK(mem_map_.get() != NULL) << "couldn't allocate mark stack.\n" << error_msg;
     byte* addr = mem_map_->Begin();
     CHECK(addr != NULL);
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index 7bd53df..f0c4d0d 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -121,7 +121,7 @@
 inline void CardTable::ModifyCardsAtomic(byte* scan_begin, byte* scan_end, const Visitor& visitor,
                                          const ModifiedVisitor& modified) {
   byte* card_cur = CardFromAddr(scan_begin);
-  byte* card_end = CardFromAddr(scan_end);
+  byte* card_end = CardFromAddr(AlignUp(scan_end, kCardSize));
   CheckCardValid(card_cur);
   CheckCardValid(card_end);
 
@@ -147,7 +147,7 @@
       new_value = visitor(expected);
     } while (expected != new_value && UNLIKELY(!byte_cas(expected, new_value, card_end)));
     if (expected != new_value) {
-      modified(card_cur, expected, new_value);
+      modified(card_end, expected, new_value);
     }
   }
 
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index e099137..714e6f7 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -57,7 +57,7 @@
   std::string error_msg;
   UniquePtr<MemMap> mem_map(MemMap::MapAnonymous("card table", NULL,
                                                  capacity + 256, PROT_READ | PROT_WRITE,
-                                                 &error_msg));
+                                                 false, &error_msg));
   CHECK(mem_map.get() != NULL) << "couldn't allocate card table: " << error_msg;
   // All zeros is the correct initial value; all clean. Anonymous mmaps are initialized to zero, we
   // don't clear the card table to avoid unnecessary pages being allocated
@@ -72,11 +72,11 @@
   byte* biased_begin = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(cardtable_begin) -
       (reinterpret_cast<uintptr_t>(heap_begin) >> kCardShift));
   if (((uintptr_t)biased_begin & 0xff) != kCardDirty) {
-    int delta = kCardDirty - (reinterpret_cast<int>(biased_begin) & 0xff);
+    int delta = kCardDirty - (reinterpret_cast<uintptr_t>(biased_begin) & 0xff);
     offset = delta + (delta < 0 ? 0x100 : 0);
     biased_begin += offset;
   }
-  CHECK_EQ(reinterpret_cast<int>(biased_begin) & 0xff, kCardDirty);
+  CHECK_EQ(reinterpret_cast<uintptr_t>(biased_begin) & 0xff, kCardDirty);
 
   return new CardTable(mem_map.release(), biased_begin, offset);
 }
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 6d9dde7..0225f29 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -82,9 +82,9 @@
     if (ref != nullptr) {
       Object* new_ref = visitor_(ref, arg_);
       if (new_ref != ref) {
-        // Use SetFieldPtr to avoid card mark as an optimization which reduces dirtied pages and
-        // improves performance.
-        obj->SetFieldPtr(offset, new_ref, true);
+        // Use SetFieldObjectWithoutWriteBarrier to avoid card mark as an optimization which
+        // reduces dirtied pages and improves performance.
+        obj->SetFieldObjectWithoutWriteBarrier(offset, new_ref, true);
       }
     }
   }
@@ -122,9 +122,8 @@
 class AddToReferenceArrayVisitor {
  public:
   explicit AddToReferenceArrayVisitor(ModUnionTableReferenceCache* mod_union_table,
-                                      std::vector<Object**>* references)
-    : mod_union_table_(mod_union_table),
-      references_(references) {
+                                      std::vector<mirror::HeapReference<Object>*>* references)
+    : mod_union_table_(mod_union_table), references_(references) {
   }
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
@@ -133,19 +132,19 @@
     // Only add the reference if it is non null and fits our criteria.
     if (ref != nullptr && mod_union_table_->AddReference(obj, ref)) {
       // Push the adddress of the reference.
-      references_->push_back(obj->GetFieldObjectAddr(offset));
+      references_->push_back(obj->GetFieldObjectReferenceAddr(offset));
     }
   }
 
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
-  std::vector<Object**>* const references_;
+  std::vector<mirror::HeapReference<Object>*>* const references_;
 };
 
 class ModUnionReferenceVisitor {
  public:
   explicit ModUnionReferenceVisitor(ModUnionTableReferenceCache* const mod_union_table,
-                                    std::vector<Object**>* references)
+                                    std::vector<mirror::HeapReference<Object>*>* references)
     : mod_union_table_(mod_union_table),
       references_(references) {
   }
@@ -160,7 +159,7 @@
   }
  private:
   ModUnionTableReferenceCache* const mod_union_table_;
-  std::vector<Object**>* const references_;
+  std::vector<mirror::HeapReference<Object>*>* const references_;
 };
 
 class CheckReferenceVisitor {
@@ -173,7 +172,7 @@
 
   // Extra parameters are required since we use this same visitor signature for checking objects.
   // TODO: Fixme when anotatalysis works with visitors.
-  void operator()(const Object* obj, const Object* ref,
+  void operator()(Object* obj, Object* ref,
                   const MemberOffset& /* offset */, bool /* is_static */) const
       SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
     Heap* heap = mod_union_table_->GetHeap();
@@ -219,8 +218,8 @@
 void ModUnionTableReferenceCache::Verify() {
   // Start by checking that everything in the mod union table is marked.
   for (const auto& ref_pair : references_) {
-    for (Object** ref : ref_pair.second) {
-      CHECK(heap_->IsLiveObjectLocked(*ref));
+    for (mirror::HeapReference<Object>* ref : ref_pair.second) {
+      CHECK(heap_->IsLiveObjectLocked(ref->AsMirrorPtr()));
     }
   }
 
@@ -231,8 +230,8 @@
     const byte* card = ref_pair.first;
     if (*card == CardTable::kCardClean) {
       std::set<const Object*> reference_set;
-      for (Object** obj_ptr : ref_pair.second) {
-        reference_set.insert(*obj_ptr);
+      for (mirror::HeapReference<Object>* obj_ptr : ref_pair.second) {
+        reference_set.insert(obj_ptr->AsMirrorPtr());
       }
       ModUnionCheckReferences visitor(this, reference_set);
       uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card));
@@ -255,8 +254,8 @@
     uintptr_t start = reinterpret_cast<uintptr_t>(card_table->AddrFromCard(card_addr));
     uintptr_t end = start + CardTable::kCardSize;
     os << reinterpret_cast<void*>(start) << "-" << reinterpret_cast<void*>(end) << "->{";
-    for (Object** ref : ref_pair.second) {
-      os << reinterpret_cast<const void*>(*ref) << ",";
+    for (mirror::HeapReference<Object>* ref : ref_pair.second) {
+      os << reinterpret_cast<const void*>(ref->AsMirrorPtr()) << ",";
     }
     os << "},";
   }
@@ -266,7 +265,7 @@
   Heap* heap = GetHeap();
   CardTable* card_table = heap->GetCardTable();
 
-  std::vector<Object**> cards_references;
+  std::vector<mirror::HeapReference<Object>*> cards_references;
   ModUnionReferenceVisitor add_visitor(this, &cards_references);
 
   for (const auto& card : cleared_cards_) {
@@ -294,13 +293,13 @@
   cleared_cards_.clear();
   size_t count = 0;
   for (const auto& ref : references_) {
-    for (const auto& obj_ptr : ref.second) {
-      Object* obj = *obj_ptr;
+    for (mirror::HeapReference<Object>* obj_ptr : ref.second) {
+      Object* obj = obj_ptr->AsMirrorPtr();
       if (obj != nullptr) {
         Object* new_obj = visitor(obj, arg);
         // Avoid dirtying pages in the image unless necessary.
         if (new_obj != obj) {
-          *obj_ptr = new_obj;
+          obj_ptr->Assign(new_obj);
         }
       }
     }
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index 5a99f1b..a89dbd1 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -112,20 +112,23 @@
 
   // Exclusive lock is required since verify uses SpaceBitmap::VisitMarkedRange and
   // VisitMarkedRange can't know if the callback will modify the bitmap or not.
-  void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void Verify()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
   // Function that tells whether or not to add a reference to the table.
   virtual bool AddReference(const mirror::Object* obj, const mirror::Object* ref) = 0;
 
-  void Dump(std::ostream& os);
+  void Dump(std::ostream& os) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  protected:
   // Cleared card array, used to update the mod-union table.
   ModUnionTable::CardSet cleared_cards_;
 
   // Maps from dirty cards to their corresponding alloc space references.
-  SafeMap<const byte*, std::vector<mirror::Object**>, std::less<const byte*>,
-    GcAllocator<std::pair<const byte*, std::vector<mirror::Object**> > > > references_;
+  SafeMap<const byte*, std::vector<mirror::HeapReference<mirror::Object>*>, std::less<const byte*>,
+      GcAllocator<std::pair<const byte*, std::vector<mirror::HeapReference<mirror::Object>*> > > >
+      references_;
 };
 
 // Card caching implementation. Keeps track of which cards we cleared and only this information.
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 1dde18d..d6d1b3e 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -18,7 +18,6 @@
 #define ART_RUNTIME_GC_ACCOUNTING_SPACE_BITMAP_INL_H_
 
 #include "base/logging.h"
-#include "cutils/atomic-inline.h"
 #include "utils.h"
 
 namespace art {
@@ -38,9 +37,11 @@
     old_word = *address;
     // Fast path: The bit is already set.
     if ((old_word & mask) != 0) {
+      DCHECK(Test(obj));
       return true;
     }
-  } while (UNLIKELY(android_atomic_cas(old_word, old_word | mask, address) != 0));
+  } while (!__sync_bool_compare_and_swap(address, old_word, old_word | mask));
+  DCHECK(Test(obj));
   return false;
 }
 
@@ -57,6 +58,15 @@
 void SpaceBitmap::VisitMarkedRange(uintptr_t visit_begin, uintptr_t visit_end,
                                    const Visitor& visitor) const {
   DCHECK_LT(visit_begin, visit_end);
+#ifdef __LP64__
+  // TODO: make the optimized code below work in the 64bit case.
+  for (uintptr_t i = visit_begin; i < visit_end; i += kAlignment) {
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(i);
+    if (Test(obj)) {
+      visitor(obj);
+    }
+  }
+#else
   const size_t bit_index_start = (visit_begin - heap_begin_) / kAlignment;
   const size_t bit_index_end = (visit_end - heap_begin_ - 1) / kAlignment;
 
@@ -115,6 +125,7 @@
     visitor(obj);
     edge_word ^= static_cast<size_t>(kWordHighBitMask) >> shift;
   }
+#endif
 }
 
 inline bool SpaceBitmap::Modify(const mirror::Object* obj, bool do_set) {
@@ -131,6 +142,7 @@
   } else {
     *address = old_word & ~mask;
   }
+  DCHECK_EQ(Test(obj), do_set);
   return (old_word & mask) != 0;
 }
 
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 99800fc..a080bee 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -64,7 +64,7 @@
   size_t bitmap_size = OffsetToIndex(RoundUp(heap_capacity, kAlignment * kBitsPerWord)) * kWordSize;
   std::string error_msg;
   UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), NULL, bitmap_size,
-                                                 PROT_READ | PROT_WRITE, &error_msg));
+                                                 PROT_READ | PROT_WRITE, false, &error_msg));
   if (UNLIKELY(mem_map.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate bitmap " << name << ": " << error_msg;
     return NULL;
@@ -128,9 +128,9 @@
 //
 // The callback is not permitted to increase the max of either bitmap.
 void SpaceBitmap::SweepWalk(const SpaceBitmap& live_bitmap,
-                           const SpaceBitmap& mark_bitmap,
-                           uintptr_t sweep_begin, uintptr_t sweep_end,
-                           SpaceBitmap::SweepCallback* callback, void* arg) {
+                            const SpaceBitmap& mark_bitmap,
+                            uintptr_t sweep_begin, uintptr_t sweep_end,
+                            SpaceBitmap::SweepCallback* callback, void* arg) {
   CHECK(live_bitmap.bitmap_begin_ != NULL);
   CHECK(mark_bitmap.bitmap_begin_ != NULL);
   CHECK_EQ(live_bitmap.heap_begin_, mark_bitmap.heap_begin_);
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 2d6cde5..aa074eb 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -72,8 +72,8 @@
   }
 
   // Pack the bits in backwards so they come out in address order when using CLZ.
-  static word OffsetToMask(uintptr_t offset_) {
-    return static_cast<uintptr_t>(kWordHighBitMask) >> ((offset_ / kAlignment) % kBitsPerWord);
+  static word OffsetToMask(uintptr_t offset) {
+    return static_cast<uintptr_t>(kWordHighBitMask) >> ((offset / kAlignment) % kBitsPerWord);
   }
 
   inline bool Set(const mirror::Object* obj) {
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 8ae61a3..65d4c441 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -15,6 +15,9 @@
  */
 
 #include "base/mutex-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/object.h"
+#include "mirror/object-inl.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "rosalloc.h"
@@ -749,21 +752,35 @@
   }
 }
 
-void RosAlloc::Run::Dump() {
-  size_t idx = size_bracket_idx_;
-  size_t num_slots = numOfSlots[idx];
-  size_t num_vec = RoundUp(num_slots, 32) / 32;
+std::string RosAlloc::Run::BitMapToStr(uint32_t* bit_map_base, size_t num_vec) {
   std::string bit_map_str;
   for (size_t v = 0; v < num_vec; v++) {
-    uint32_t vec = alloc_bit_map_[v];
+    uint32_t vec = bit_map_base[v];
     if (v != num_vec - 1) {
       bit_map_str.append(StringPrintf("%x-", vec));
     } else {
       bit_map_str.append(StringPrintf("%x", vec));
     }
   }
-  LOG(INFO) << "Run : " << std::hex << reinterpret_cast<intptr_t>(this)
-            << std::dec << ", idx=" << idx << ", bit_map=" << bit_map_str;
+  return bit_map_str.c_str();
+}
+
+std::string RosAlloc::Run::Dump() {
+  size_t idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  std::ostringstream stream;
+  stream << "RosAlloc Run = " << reinterpret_cast<void*>(this)
+         << "{ magic_num=" << static_cast<int>(magic_num_)
+         << " size_bracket_idx=" << idx
+         << " is_thread_local=" << static_cast<int>(is_thread_local_)
+         << " to_be_bulk_freed=" << static_cast<int>(to_be_bulk_freed_)
+         << " top_slot_idx=" << top_slot_idx_
+         << " alloc_bit_map=" << BitMapToStr(alloc_bit_map_, num_vec)
+         << " bulk_free_bit_map=" << BitMapToStr(BulkFreeBitMap(), num_vec)
+         << " thread_local_bit_map=" << BitMapToStr(ThreadLocalFreeBitMap(), num_vec)
+         << " }" << std::endl;
+  return stream.str();
 }
 
 void* RosAlloc::Run::AllocSlot() {
@@ -849,7 +866,7 @@
   size_t num_vec = RoundUp(num_slots, 32) / 32;
   bool changed = false;
   uint32_t* vecp = &alloc_bit_map_[0];
-  uint32_t* tl_free_vecp = &thread_local_free_bit_map()[0];
+  uint32_t* tl_free_vecp = &ThreadLocalFreeBitMap()[0];
   bool is_all_free_after = true;
   for (size_t v = 0; v < num_vec; v++, vecp++, tl_free_vecp++) {
     uint32_t tl_free_vec = *tl_free_vecp;
@@ -881,7 +898,7 @@
   size_t num_slots = numOfSlots[idx];
   size_t num_vec = RoundUp(num_slots, 32) / 32;
   uint32_t* vecp = &alloc_bit_map_[0];
-  uint32_t* free_vecp = &bulk_free_bit_map()[0];
+  uint32_t* free_vecp = &BulkFreeBitMap()[0];
   for (size_t v = 0; v < num_vec; v++, vecp++, free_vecp++) {
     uint32_t free_vec = *free_vecp;
     if (free_vec != 0) {
@@ -898,13 +915,13 @@
   byte idx = size_bracket_idx_;
   size_t num_slots = numOfSlots[idx];
   size_t num_vec = RoundUp(num_slots, 32) / 32;
-  uint32_t* to_vecp = &thread_local_free_bit_map()[0];
-  uint32_t* from_vecp = &bulk_free_bit_map()[0];
+  uint32_t* to_vecp = &ThreadLocalFreeBitMap()[0];
+  uint32_t* from_vecp = &BulkFreeBitMap()[0];
   for (size_t v = 0; v < num_vec; v++, to_vecp++, from_vecp++) {
     uint32_t from_vec = *from_vecp;
     if (from_vec != 0) {
       *to_vecp |= from_vec;
-      *from_vecp = 0;  // clear the from free bit map.
+      *from_vecp = 0;  // clear the bulk free bit map.
     }
     DCHECK_EQ(*from_vecp, static_cast<uint32_t>(0));
   }
@@ -912,11 +929,11 @@
 
 inline void RosAlloc::Run::MarkThreadLocalFreeBitMap(void* ptr) {
   DCHECK_NE(is_thread_local_, 0);
-  MarkFreeBitMapShared(ptr, thread_local_free_bit_map(), "MarkThreadLocalFreeBitMap");
+  MarkFreeBitMapShared(ptr, ThreadLocalFreeBitMap(), "MarkThreadLocalFreeBitMap");
 }
 
 inline void RosAlloc::Run::MarkBulkFreeBitMap(void* ptr) {
-  MarkFreeBitMapShared(ptr, bulk_free_bit_map(), "MarkFreeBitMap");
+  MarkFreeBitMapShared(ptr, BulkFreeBitMap(), "MarkFreeBitMap");
 }
 
 inline void RosAlloc::Run::MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base,
@@ -975,6 +992,32 @@
   return true;
 }
 
+inline bool RosAlloc::Run::IsBulkFreeBitmapClean() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = BulkFreeBitMap()[v];
+    if (vec != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+inline bool RosAlloc::Run::IsThreadLocalFreeBitmapClean() {
+  byte idx = size_bracket_idx_;
+  size_t num_slots = numOfSlots[idx];
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  for (size_t v = 0; v < num_vec; v++) {
+    uint32_t vec = ThreadLocalFreeBitMap()[v];
+    if (vec != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
 inline void RosAlloc::Run::ClearBitMaps() {
   byte idx = size_bracket_idx_;
   size_t num_slots = numOfSlots[idx];
@@ -1196,8 +1239,10 @@
   }
 }
 
-void RosAlloc::DumpPageMap(Thread* self) {
-  MutexLock mu(self, lock_);
+std::string RosAlloc::DumpPageMap() {
+  std::ostringstream stream;
+  stream << "RosAlloc PageMap: " << std::endl;
+  lock_.AssertHeld(Thread::Current());
   size_t end = page_map_.size();
   FreePageRun* curr_fpr = NULL;
   size_t curr_fpr_size = 0;
@@ -1218,15 +1263,15 @@
           curr_fpr_size = fpr->ByteSize(this);
           DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
           remaining_curr_fpr_size = curr_fpr_size - kPageSize;
-          LOG(INFO) << "[" << i << "]=Empty (FPR start)"
-                    << " fpr_size=" << curr_fpr_size
-                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          stream << "[" << i << "]=Empty (FPR start)"
+                 << " fpr_size=" << curr_fpr_size
+                 << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
           if (remaining_curr_fpr_size == 0) {
             // Reset at the end of the current free page run.
             curr_fpr = NULL;
             curr_fpr_size = 0;
           }
-          LOG(INFO) << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr);
+          stream << "curr_fpr=0x" << std::hex << reinterpret_cast<intptr_t>(curr_fpr) << std::endl;
           DCHECK_EQ(num_running_empty_pages, static_cast<size_t>(0));
         } else {
           // Still part of the current free page run.
@@ -1235,8 +1280,8 @@
           DCHECK_EQ(remaining_curr_fpr_size % kPageSize, static_cast<size_t>(0));
           DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(kPageSize));
           remaining_curr_fpr_size -= kPageSize;
-          LOG(INFO) << "[" << i << "]=Empty (FPR part)"
-                    << " remaining_fpr_size=" << remaining_curr_fpr_size;
+          stream << "[" << i << "]=Empty (FPR part)"
+                 << " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
           if (remaining_curr_fpr_size == 0) {
             // Reset at the end of the current free page run.
             curr_fpr = NULL;
@@ -1249,36 +1294,38 @@
       case kPageMapLargeObject: {
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
-        LOG(INFO) << "[" << i << "]=Large (start)";
+        stream << "[" << i << "]=Large (start)" << std::endl;
         break;
       }
       case kPageMapLargeObjectPart:
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
-        LOG(INFO) << "[" << i << "]=Large (part)";
+        stream << "[" << i << "]=Large (part)" << std::endl;
         break;
       case kPageMapRun: {
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
         Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
         size_t idx = run->size_bracket_idx_;
-        LOG(INFO) << "[" << i << "]=Run (start)"
-                  << " idx=" << idx
-                  << " numOfPages=" << numOfPages[idx]
-                  << " thread_local=" << static_cast<int>(run->is_thread_local_)
-                  << " is_all_free=" << (run->IsAllFree() ? 1 : 0);
+        stream << "[" << i << "]=Run (start)"
+               << " idx=" << idx
+               << " numOfPages=" << numOfPages[idx]
+               << " thread_local=" << static_cast<int>(run->is_thread_local_)
+               << " is_all_free=" << (run->IsAllFree() ? 1 : 0)
+               << std::endl;
         break;
       }
       case kPageMapRunPart:
         DCHECK_EQ(remaining_curr_fpr_size, static_cast<size_t>(0));
         num_running_empty_pages = 0;
-        LOG(INFO) << "[" << i << "]=Run (part)";
+        stream << "[" << i << "]=Run (part)" << std::endl;
         break;
       default:
-        LOG(FATAL) << "Unreachable - page map type: " << pm;
+        stream << "[" << i << "]=Unrecognizable page map type: " << pm;
         break;
     }
   }
+  return stream.str();
 }
 
 size_t RosAlloc::UsableSize(void* ptr) {
@@ -1468,6 +1515,8 @@
 
 void RosAlloc::RevokeThreadLocalRuns(Thread* thread) {
   Thread* self = Thread::Current();
+  // Avoid race conditions on the bulk free bit maps with BulkFree() (GC).
+  WriterMutexLock wmu(self, bulk_free_lock_);
   for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) {
     MutexLock mu(self, *size_bracket_locks_[idx]);
     Run* thread_local_run = reinterpret_cast<Run*>(thread->rosalloc_runs_[idx]);
@@ -1629,6 +1678,223 @@
   ++(*objects_allocated);
 }
 
+void RosAlloc::Verify() {
+  Thread* self = Thread::Current();
+  CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
+      << "The mutator locks isn't exclusively locked at RosAlloc::Verify()";
+  MutexLock mu(self, *Locks::thread_list_lock_);
+  WriterMutexLock wmu(self, bulk_free_lock_);
+  std::vector<Run*> runs;
+  {
+    MutexLock mu(self, lock_);
+    size_t pm_end = page_map_.size();
+    size_t i = 0;
+    while (i < pm_end) {
+      byte pm = page_map_[i];
+      switch (pm) {
+        case kPageMapEmpty: {
+          // The start of a free page run.
+          FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+          DCHECK(fpr->magic_num_ == kMagicNumFree) << "Bad magic number : " << fpr->magic_num_;
+          CHECK(free_page_runs_.find(fpr) != free_page_runs_.end())
+              << "An empty page must belong to the free page run set";
+          size_t fpr_size = fpr->ByteSize(this);
+          CHECK(IsAligned<kPageSize>(fpr_size))
+              << "A free page run size isn't page-aligned : " << fpr_size;
+          size_t num_pages = fpr_size / kPageSize;
+          CHECK_GT(num_pages, static_cast<uintptr_t>(0))
+              << "A free page run size must be > 0 : " << fpr_size;
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            CHECK_EQ(page_map_[j], kPageMapEmpty)
+                << "A mismatch between the page map table for kPageMapEmpty "
+                << " at page index " << j
+                << " and the free page run size : page index range : "
+                << i << " to " << (i + num_pages) << std::endl << DumpPageMap();
+          }
+          i += num_pages;
+          CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
+                              << std::endl << DumpPageMap();
+          break;
+        }
+        case kPageMapLargeObject: {
+          // The start of a large object.
+          size_t num_pages = 1;
+          size_t idx = i + 1;
+          while (idx < pm_end && page_map_[idx] == kPageMapLargeObjectPart) {
+            num_pages++;
+            idx++;
+          }
+          void* start = base_ + i * kPageSize;
+          mirror::Object* obj = reinterpret_cast<mirror::Object*>(start);
+          size_t obj_size = obj->SizeOf();
+          CHECK(obj_size > kLargeSizeThreshold)
+              << "A rosalloc large object size must be > " << kLargeSizeThreshold;
+          CHECK_EQ(num_pages, RoundUp(obj_size, kPageSize) / kPageSize)
+              << "A rosalloc large object size " << obj_size
+              << " does not match the page map table " << (num_pages * kPageSize)
+              << std::endl << DumpPageMap();
+          i += num_pages;
+          CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
+                              << std::endl << DumpPageMap();
+          break;
+        }
+        case kPageMapLargeObjectPart:
+          LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
+          break;
+        case kPageMapRun: {
+          // The start of a run.
+          Run* run = reinterpret_cast<Run*>(base_ + i * kPageSize);
+          DCHECK(run->magic_num_ == kMagicNum) << "Bad magic number" << run->magic_num_;
+          size_t idx = run->size_bracket_idx_;
+          CHECK(idx < kNumOfSizeBrackets) << "Out of range size bracket index : " << idx;
+          size_t num_pages = numOfPages[idx];
+          CHECK_GT(num_pages, static_cast<uintptr_t>(0))
+              << "Run size must be > 0 : " << num_pages;
+          for (size_t j = i + 1; j < i + num_pages; ++j) {
+            CHECK_EQ(page_map_[j], kPageMapRunPart)
+                << "A mismatch between the page map table for kPageMapRunPart "
+                << " at page index " << j
+                << " and the run size : page index range " << i << " to " << (i + num_pages)
+                << std::endl << DumpPageMap();
+          }
+          runs.push_back(run);
+          i += num_pages;
+          CHECK_LE(i, pm_end) << "Page map index " << i << " out of range < " << pm_end
+                              << std::endl << DumpPageMap();
+          break;
+        }
+        case kPageMapRunPart:
+          LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
+          break;
+        default:
+          LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap();
+          break;
+      }
+    }
+  }
+
+  // Call Verify() here for the lock order.
+  for (auto& run : runs) {
+    run->Verify(self, this);
+  }
+}
+
+void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) {
+  DCHECK(magic_num_ == kMagicNum) << "Bad magic number : " << Dump();
+  size_t idx = size_bracket_idx_;
+  CHECK(idx < kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump();
+  byte* slot_base = reinterpret_cast<byte*>(this) + headerSizes[idx];
+  size_t num_slots = numOfSlots[idx];
+  size_t bracket_size = IndexToBracketSize(idx);
+  CHECK_EQ(slot_base + num_slots * bracket_size,
+           reinterpret_cast<byte*>(this) + numOfPages[idx] * kPageSize)
+      << "Mismatch in the end address of the run " << Dump();
+  // Check that the bulk free bitmap is clean. It's only used during BulkFree().
+  CHECK(IsBulkFreeBitmapClean()) << "The bulk free bit map isn't clean " << Dump();
+  // Check the bump index mode, if it's on.
+  if (top_slot_idx_ < num_slots) {
+    // If the bump index mode is on (top_slot_idx_ < num_slots), then
+    // all of the slots after the top index must be free.
+    for (size_t i = top_slot_idx_; i < num_slots; ++i) {
+      size_t vec_idx = i / 32;
+      size_t vec_off = i % 32;
+      uint32_t vec = alloc_bit_map_[vec_idx];
+      CHECK_EQ((vec & (1 << vec_off)), static_cast<uint32_t>(0))
+          << "A slot >= top_slot_idx_ isn't free " << Dump();
+    }
+  } else {
+    CHECK_EQ(top_slot_idx_, num_slots)
+        << "If the bump index mode is off, the top index == the number of slots "
+        << Dump();
+  }
+  // Check the thread local runs, the current runs, and the run sets.
+  if (is_thread_local_) {
+    // If it's a thread local run, then it must be pointed to by an owner thread.
+    bool owner_found = false;
+    std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList();
+    for (auto it = thread_list.begin(); it != thread_list.end(); ++it) {
+      Thread* thread = *it;
+      for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+        MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
+        Run* thread_local_run = reinterpret_cast<Run*>(thread->rosalloc_runs_[i]);
+        if (thread_local_run == this) {
+          CHECK(!owner_found)
+              << "A thread local run has more than one owner thread " << Dump();
+          CHECK_EQ(i, idx)
+              << "A mismatching size bracket index in a thread local run " << Dump();
+          owner_found = true;
+        }
+      }
+    }
+    CHECK(owner_found) << "A thread local run has no owner thread " << Dump();
+  } else {
+    // If it's not thread local, check that the thread local free bitmap is clean.
+    CHECK(IsThreadLocalFreeBitmapClean())
+        << "A non-thread-local run's thread local free bitmap isn't clean "
+        << Dump();
+    // Check if it's a current run for the size bucket.
+    bool is_current_run = false;
+    for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
+      MutexLock mu(self, *rosalloc->size_bracket_locks_[i]);
+      Run* current_run = rosalloc->current_runs_[i];
+      if (idx == i) {
+        if (this == current_run) {
+          is_current_run = true;
+        }
+      } else {
+        // If the size bucket index does not match, then it must not
+        // be a current run.
+        CHECK_NE(this, current_run)
+            << "A current run points to a run with a wrong size bracket index " << Dump();
+      }
+    }
+    // If it's neither a thread local or current run, then it must be
+    // in a run set.
+    if (!is_current_run) {
+      MutexLock mu(self, rosalloc->lock_);
+      std::set<Run*>& non_full_runs = rosalloc->non_full_runs_[idx];
+      // If it's all free, it must be a free page run rather than a run.
+      CHECK(!IsAllFree()) << "A free run must be in a free page run set " << Dump();
+      if (!IsFull()) {
+        // If it's not full, it must in the non-full run set.
+        CHECK(non_full_runs.find(this) != non_full_runs.end())
+            << "A non-full run isn't in the non-full run set " << Dump();
+      } else {
+        // If it's full, it must in the full run set (debug build only.)
+        if (kIsDebugBuild) {
+          hash_set<Run*, hash_run, eq_run>& full_runs = rosalloc->full_runs_[idx];
+          CHECK(full_runs.find(this) != full_runs.end())
+              << " A full run isn't in the full run set " << Dump();
+        }
+      }
+    }
+  }
+  // Check each slot.
+  size_t num_vec = RoundUp(num_slots, 32) / 32;
+  size_t slots = 0;
+  for (size_t v = 0; v < num_vec; v++, slots += 32) {
+    DCHECK(num_slots >= slots) << "Out of bounds";
+    uint32_t vec = alloc_bit_map_[v];
+    uint32_t thread_local_free_vec = ThreadLocalFreeBitMap()[v];
+    size_t end = std::min(num_slots - slots, static_cast<size_t>(32));
+    for (size_t i = 0; i < end; ++i) {
+      bool is_allocated = ((vec >> i) & 0x1) != 0;
+      // If a thread local run, slots may be marked freed in the
+      // thread local free bitmap.
+      bool is_thread_local_freed = is_thread_local_ && ((thread_local_free_vec >> i) & 0x1) != 0;
+      if (is_allocated && !is_thread_local_freed) {
+        byte* slot_addr = slot_base + (slots + i) * bracket_size;
+        mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
+        size_t obj_size = obj->SizeOf();
+        CHECK_LE(obj_size, kLargeSizeThreshold)
+            << "A run slot contains a large object " << Dump();
+        CHECK_EQ(SizeToIndex(obj_size), idx)
+            << "A run slot contains an object with wrong size " << Dump();
+      }
+    }
+  }
+}
+
 }  // namespace allocator
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 4eb13315..c4238c7 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -212,11 +212,11 @@
       return size;
     }
     // Returns the base address of the free bit map.
-    uint32_t* bulk_free_bit_map() {
+    uint32_t* BulkFreeBitMap() {
       return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]);
     }
     // Returns the base address of the thread local free bit map.
-    uint32_t* thread_local_free_bit_map() {
+    uint32_t* ThreadLocalFreeBitMap() {
       return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]);
     }
     void* End() {
@@ -248,16 +248,26 @@
     bool IsAllFree();
     // Returns true if all the slots in the run are in use.
     bool IsFull();
+    // Returns true if the bulk free bit map is clean.
+    bool IsBulkFreeBitmapClean();
+    // Returns true if the thread local free bit map is clean.
+    bool IsThreadLocalFreeBitmapClean();
     // Clear all the bit maps.
     void ClearBitMaps();
     // Iterate over all the slots and apply the given function.
     void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
     // Dump the run metadata for debugging.
-    void Dump();
+    std::string Dump();
+    // Verify for debugging.
+    void Verify(Thread* self, RosAlloc* rosalloc)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_);
 
    private:
     // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap().
     void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
+    // Turns the bit map into a string for debugging.
+    static std::string BitMapToStr(uint32_t* bit_map_base, size_t num_vec);
   };
 
   // The magic number for a run.
@@ -456,7 +466,9 @@
   // and the footprint.
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // The reader-writer lock to allow one bulk free at a time while
-  // allowing multiple individual frees at the same time.
+  // allowing multiple individual frees at the same time. Also, this
+  // is used to avoid race conditions between BulkFree() and
+  // RevokeThreadLocalRuns() on the bulk free bitmaps.
   ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   // The page release mode.
@@ -529,7 +541,7 @@
   // Releases the thread-local runs assigned to all the threads back to the common set of runs.
   void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
   // Dumps the page map for debugging.
-  void DumpPageMap(Thread* self);
+  std::string DumpPageMap() EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
   // Callbacks for InspectAll that will count the number of bytes
   // allocated and objects allocated, respectively.
@@ -539,6 +551,9 @@
   bool DoesReleaseAllPages() const {
     return page_release_mode_ == kPageReleaseModeAll;
   }
+
+  // Verify for debugging.
+  void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
 };
 
 }  // namespace allocator
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 25e8966..094e274 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -88,14 +88,18 @@
     // Mutator lock may be already exclusively held when we do garbage collections for changing the
     // current collector / allocator during process state updates.
     if (Locks::mutator_lock_->IsExclusiveHeld(self)) {
+      // PreGcRosAllocVerification() is called in Heap::TransitionCollector().
       GetHeap()->RevokeAllThreadLocalBuffers();
       MarkingPhase();
       ReclaimPhase();
+      // PostGcRosAllocVerification() is called in Heap::TransitionCollector().
     } else {
       thread_list->SuspendAll();
+      GetHeap()->PreGcRosAllocVerification(&timings_);
       GetHeap()->RevokeAllThreadLocalBuffers();
       MarkingPhase();
       ReclaimPhase();
+      GetHeap()->PostGcRosAllocVerification(&timings_);
       thread_list->ResumeAll();
     }
     ATRACE_END();
@@ -114,10 +118,12 @@
       thread_list->SuspendAll();
       ATRACE_END();
       ATRACE_BEGIN("All mutator threads suspended");
+      GetHeap()->PreGcRosAllocVerification(&timings_);
       done = HandleDirtyObjectsPhase();
       if (done) {
         GetHeap()->RevokeAllThreadLocalBuffers();
       }
+      GetHeap()->PostGcRosAllocVerification(&timings_);
       ATRACE_END();
       uint64_t pause_end = NanoTime();
       ATRACE_BEGIN("Resuming mutator threads");
@@ -151,10 +157,11 @@
          space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect)) {
       accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
       accounting::SpaceBitmap* mark_bitmap = space->GetMarkBitmap();
-      if (live_bitmap != mark_bitmap) {
+      if (live_bitmap != nullptr && live_bitmap != mark_bitmap) {
         heap_->GetLiveBitmap()->ReplaceBitmap(live_bitmap, mark_bitmap);
         heap_->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-        space->AsMallocSpace()->SwapBitmaps();
+        CHECK(space->IsContinuousMemMapAllocSpace());
+        space->AsContinuousMemMapAllocSpace()->SwapBitmaps();
       }
     }
   }
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index 9c1c5dc..d148ae5 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -118,7 +118,7 @@
     while (ref_offsets != 0) {
       size_t right_shift = CLZ(ref_offsets);
       MemberOffset field_offset = CLASS_OFFSET_FROM_CLZ(right_shift);
-      mirror::Object* ref = obj->GetFieldObject<mirror::Object*>(field_offset, false);
+      mirror::Object* ref = obj->GetFieldObject<mirror::Object>(field_offset, false);
       visitor(obj, ref, field_offset, is_static);
       ref_offsets &= ~(CLASS_HIGH_BIT >> right_shift);
     }
@@ -127,17 +127,17 @@
     // walk up the class inheritance hierarchy and find reference
     // offsets the hard way. In the static case, just consider this
     // class.
-    for (const mirror::Class* klass = is_static ? obj->AsClass() : obj->GetClass();
-         klass != NULL;
-         klass = is_static ? NULL : klass->GetSuperClass()) {
+    for (mirror::Class* klass = is_static ? obj->AsClass() : obj->GetClass();
+         klass != nullptr;
+         klass = is_static ? nullptr : klass->GetSuperClass()) {
       size_t num_reference_fields = (is_static
                                      ? klass->NumReferenceStaticFields()
                                      : klass->NumReferenceInstanceFields());
       for (size_t i = 0; i < num_reference_fields; ++i) {
         mirror::ArtField* field = (is_static ? klass->GetStaticField(i)
-                                   : klass->GetInstanceField(i));
+                                             : klass->GetInstanceField(i));
         MemberOffset field_offset = field->GetOffset();
-        mirror::Object* ref = obj->GetFieldObject<mirror::Object*>(field_offset, false);
+        mirror::Object* ref = obj->GetFieldObject<mirror::Object>(field_offset, false);
         visitor(obj, ref, field_offset, is_static);
       }
     }
@@ -150,7 +150,7 @@
   const size_t length = static_cast<size_t>(array->GetLength());
   for (size_t i = 0; i < length; ++i) {
     mirror::Object* element = array->GetWithoutChecks(static_cast<int32_t>(i));
-    const size_t width = sizeof(mirror::Object*);
+    const size_t width = sizeof(mirror::HeapReference<mirror::Object>);
     MemberOffset offset(i * width + mirror::Array::DataOffset(width).Int32Value());
     visitor(array, element, offset, false);
   }
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 5d450a7..862d06f 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -89,7 +89,8 @@
 void MarkSweep::ImmuneSpace(space::ContinuousSpace* space) {
   // Bind live to mark bitmap if necessary.
   if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
-    BindLiveToMarkBitmap(space);
+    CHECK(space->IsContinuousMemMapAllocSpace());
+    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
   }
 
   // Add the space to the immune region.
@@ -143,11 +144,6 @@
       mark_stack_(NULL),
       immune_begin_(NULL),
       immune_end_(NULL),
-      soft_reference_list_(NULL),
-      weak_reference_list_(NULL),
-      finalizer_reference_list_(NULL),
-      phantom_reference_list_(NULL),
-      cleared_reference_list_(NULL),
       live_stack_freeze_size_(0),
       gc_barrier_(new Barrier(0)),
       large_object_lock_("mark sweep large object lock", kMarkSweepLargeObjectLock),
@@ -161,11 +157,6 @@
   mark_stack_ = heap_->mark_stack_.get();
   DCHECK(mark_stack_ != nullptr);
   SetImmuneRange(nullptr, nullptr);
-  soft_reference_list_ = nullptr;
-  weak_reference_list_ = nullptr;
-  finalizer_reference_list_ = nullptr;
-  phantom_reference_list_ = nullptr;
-  cleared_reference_list_ = nullptr;
   class_count_ = 0;
   array_count_ = 0;
   other_count_ = 0;
@@ -347,7 +338,8 @@
     timings_.EndSplit();
 
     // Unbind the live and mark bitmaps.
-    UnBindBitmaps();
+    TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+    GetHeap()->UnBindBitmaps();
   }
 }
 
@@ -589,14 +581,6 @@
   timings_.EndSplit();
 }
 
-void MarkSweep::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsMallocSpace());
-  space::MallocSpace* alloc_space = space->AsMallocSpace();
-  accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
-  GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-}
-
 class ScanObjectVisitor {
  public:
   explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
@@ -893,14 +877,8 @@
 // recursively marks until the mark stack is emptied.
 void MarkSweep::RecursiveMark() {
   TimingLogger::ScopedSplit split("RecursiveMark", &timings_);
-  // RecursiveMark will build the lists of known instances of the Reference classes.
-  // See DelayReferenceReferent for details.
-  CHECK(soft_reference_list_ == NULL);
-  CHECK(weak_reference_list_ == NULL);
-  CHECK(finalizer_reference_list_ == NULL);
-  CHECK(phantom_reference_list_ == NULL);
-  CHECK(cleared_reference_list_ == NULL);
-
+  // RecursiveMark will build the lists of known instances of the Reference classes. See
+  // DelayReferenceReferent for details.
   if (kUseRecursiveMark) {
     const bool partial = GetGcType() == kGcTypePartial;
     ScanObjectVisitor scan_visitor(this);
@@ -1146,13 +1124,13 @@
   DCHECK(mark_stack_->IsEmpty());
   TimingLogger::ScopedSplit("Sweep", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* malloc_space = space->AsMallocSpace();
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
       TimingLogger::ScopedSplit split(
-          malloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepMallocSpace", &timings_);
       size_t freed_objects = 0;
       size_t freed_bytes = 0;
-      malloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
+      alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
       heap_->RecordFree(freed_objects, freed_bytes);
       freed_objects_.FetchAndAdd(freed_objects);
       freed_bytes_.FetchAndAdd(freed_bytes);
@@ -1278,23 +1256,6 @@
   return heap_->GetMarkBitmap()->Test(object);
 }
 
-void MarkSweep::UnBindBitmaps() {
-  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
-  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* alloc_space = space->AsMallocSpace();
-      if (alloc_space->temp_bitmap_.get() != NULL) {
-        // At this point, the temp_bitmap holds our old mark bitmap.
-        accounting::SpaceBitmap* new_bitmap = alloc_space->temp_bitmap_.release();
-        GetHeap()->GetMarkBitmap()->ReplaceBitmap(alloc_space->mark_bitmap_.get(), new_bitmap);
-        CHECK_EQ(alloc_space->mark_bitmap_.release(), alloc_space->live_bitmap_.get());
-        alloc_space->mark_bitmap_.reset(new_bitmap);
-        DCHECK(alloc_space->temp_bitmap_.get() == NULL);
-      }
-    }
-  }
-}
-
 void MarkSweep::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
   // Can't enqueue references if we hold the mutator lock.
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index e2eafb5..bfedac7 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_MARK_SWEEP_H_
 #define ART_RUNTIME_GC_COLLECTOR_MARK_SWEEP_H_
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "barrier.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -118,12 +118,6 @@
   // the image. Mark that portion of the heap as immune.
   virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void BindLiveToMarkBitmap(space::ContinuousSpace* space)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
-  void UnBindBitmaps()
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   // Builds a mark stack with objects on dirty cards and recursively mark until it empties.
   void RecursiveMarkDirtyObjects(bool paused, byte minimum_age)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
@@ -151,10 +145,6 @@
   void SweepArray(accounting::ObjectStack* allocation_stack_, bool swap_bitmaps)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
-  mirror::Object* GetClearedReferences() {
-    return cleared_reference_list_;
-  }
-
   // Blackens an object.
   void ScanObject(mirror::Object* obj)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_)
@@ -346,12 +336,6 @@
   mirror::Object* immune_begin_;
   mirror::Object* immune_end_;
 
-  mirror::Object* soft_reference_list_;
-  mirror::Object* weak_reference_list_;
-  mirror::Object* finalizer_reference_list_;
-  mirror::Object* phantom_reference_list_;
-  mirror::Object* cleared_reference_list_;
-
   // Parallel finger.
   AtomicInteger atomic_finger_;
   // Number of classes scanned, if kCountScannedTypes.
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 99c726d..0c6a938 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -67,7 +67,8 @@
 void SemiSpace::ImmuneSpace(space::ContinuousSpace* space) {
   // Bind live to mark bitmap if necessary.
   if (space->GetLiveBitmap() != space->GetMarkBitmap()) {
-    BindLiveToMarkBitmap(space);
+    CHECK(space->IsContinuousMemMapAllocSpace());
+    space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
   }
   // Add the space to the immune region.
   if (immune_begin_ == nullptr) {
@@ -98,12 +99,13 @@
 
 void SemiSpace::BindBitmaps() {
   timings_.StartSplit("BindBitmaps");
-  WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
   // Mark all of the spaces we never collect as immune.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
     if (space->GetLiveBitmap() != nullptr) {
       if (space == to_space_) {
-        BindLiveToMarkBitmap(to_space_);
+        CHECK(to_space_->IsContinuousMemMapAllocSpace());
+        to_space_->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
       } else if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect
                  || space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect
                  // Add the main free list space and the non-moving
@@ -119,7 +121,6 @@
   if (generational_ && !whole_heap_collection_) {
     // We won't collect the large object space if a bump pointer space only collection.
     is_large_object_space_immune_ = true;
-    GetHeap()->GetLargeObjectsSpace()->CopyLiveToMarked();
   }
   timings_.EndSplit();
 }
@@ -180,8 +181,8 @@
       VLOG(heap) << "Bump pointer space only collection";
     }
   }
-  Thread* self = Thread::Current();
-  Locks::mutator_lock_->AssertExclusiveHeld(self);
+  Locks::mutator_lock_->AssertExclusiveHeld(self_);
+
   TimingLogger::ScopedSplit split("MarkingPhase", &timings_);
   // Need to do this with mutators paused so that somebody doesn't accidentally allocate into the
   // wrong space.
@@ -209,7 +210,7 @@
   // the live stack during the recursive mark.
   timings_.NewSplit("SwapStacks");
   heap_->SwapStacks();
-  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
   MarkRoots();
   // Mark roots of immune spaces.
   UpdateAndMarkModUnion();
@@ -287,6 +288,11 @@
 
   if (is_large_object_space_immune_) {
     DCHECK(generational_ && !whole_heap_collection_);
+    // Delay copying the live set to the marked set until here from
+    // BindBitmaps() as the large objects on the allocation stack may
+    // be newly added to the live set above in MarkAllocStackAsLive().
+    GetHeap()->GetLargeObjectsSpace()->CopyLiveToMarked();
+
     // When the large object space is immune, we need to scan the
     // large object space as roots as they contain references to their
     // classes (primitive array classes) that could move though they
@@ -305,10 +311,9 @@
 
 void SemiSpace::ReclaimPhase() {
   TimingLogger::ScopedSplit split("ReclaimPhase", &timings_);
-  Thread* self = Thread::Current();
-  ProcessReferences(self);
+  ProcessReferences(self_);
   {
-    ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    ReaderMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     SweepSystemWeaks();
   }
   // Record freed memory.
@@ -329,7 +334,7 @@
   timings_.EndSplit();
 
   {
-    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    WriterMutexLock mu(self_, *Locks::heap_bitmap_lock_);
     // Reclaim unmarked objects.
     Sweep(false);
     // Swap the live and mark bitmaps for each space which we modified space. This is an
@@ -339,7 +344,8 @@
     SwapBitmaps();
     timings_.EndSplit();
     // Unbind the live and mark bitmaps.
-    UnBindBitmaps();
+    TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
+    GetHeap()->UnBindBitmaps();
   }
   // Release the memory used by the from space.
   if (kResetFromSpace) {
@@ -497,7 +503,7 @@
           MarkStackPush(obj);
         }
       } else {
-        DCHECK(!to_space_->HasAddress(obj)) << "Marking object in to_space_";
+        CHECK(!to_space_->HasAddress(obj)) << "Marking object in to_space_";
         if (MarkLargeObject(obj)) {
           MarkStackPush(obj);
         }
@@ -530,14 +536,6 @@
   timings_.EndSplit();
 }
 
-void SemiSpace::BindLiveToMarkBitmap(space::ContinuousSpace* space) {
-  CHECK(space->IsMallocSpace());
-  space::MallocSpace* alloc_space = space->AsMallocSpace();
-  accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = alloc_space->BindLiveToMarkBitmap();
-  GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
-}
-
 mirror::Object* SemiSpace::MarkedForwardingAddressCallback(Object* object, void* arg) {
   return reinterpret_cast<SemiSpace*>(arg)->GetMarkedForwardAddress(object);
 }
@@ -548,7 +546,7 @@
   timings_.EndSplit();
 }
 
-bool SemiSpace::ShouldSweepSpace(space::MallocSpace* space) const {
+bool SemiSpace::ShouldSweepSpace(space::ContinuousSpace* space) const {
   return space != from_space_ && space != to_space_ && !IsImmuneSpace(space);
 }
 
@@ -556,16 +554,16 @@
   DCHECK(mark_stack_->IsEmpty());
   TimingLogger::ScopedSplit("Sweep", &timings_);
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* malloc_space = space->AsMallocSpace();
-      if (!ShouldSweepSpace(malloc_space)) {
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
+      if (!ShouldSweepSpace(alloc_space)) {
         continue;
       }
       TimingLogger::ScopedSplit split(
-          malloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
+          alloc_space->IsZygoteSpace() ? "SweepZygoteSpace" : "SweepAllocSpace", &timings_);
       size_t freed_objects = 0;
       size_t freed_bytes = 0;
-      malloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
+      alloc_space->Sweep(swap_bitmaps, &freed_objects, &freed_bytes);
       heap_->RecordFree(freed_objects, freed_bytes);
       freed_objects_.FetchAndAdd(freed_objects);
       freed_bytes_.FetchAndAdd(freed_bytes);
@@ -603,9 +601,9 @@
     if (new_address != ref) {
       DCHECK(new_address != nullptr);
       // Don't need to mark the card since we updating the object address and not changing the
-      // actual objects its pointing to. Using SetFieldPtr is better in this case since it does not
-      // dirty cards and use additional memory.
-      obj->SetFieldPtr(offset, new_address, false);
+      // actual objects its pointing to. Using SetFieldObjectWithoutWriteBarrier is better in this
+      // case since it does not dirty cards and use additional memory.
+      obj->SetFieldObjectWithoutWriteBarrier(offset, new_address, false);
     }
   }, kMovingClasses);
   mirror::Class* klass = obj->GetClass();
@@ -660,20 +658,6 @@
   return heap_->GetMarkBitmap()->Test(obj) ? obj : nullptr;
 }
 
-void SemiSpace::UnBindBitmaps() {
-  TimingLogger::ScopedSplit split("UnBindBitmaps", &timings_);
-  for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace()) {
-      space::MallocSpace* alloc_space = space->AsMallocSpace();
-      if (alloc_space->HasBoundBitmaps()) {
-        alloc_space->UnBindBitmaps();
-        heap_->GetMarkBitmap()->ReplaceBitmap(alloc_space->GetLiveBitmap(),
-                                              alloc_space->GetMarkBitmap());
-      }
-    }
-  }
-}
-
 void SemiSpace::SetToSpace(space::ContinuousMemMapAllocSpace* to_space) {
   DCHECK(to_space != nullptr);
   to_space_ = to_space;
@@ -686,7 +670,6 @@
 
 void SemiSpace::FinishPhase() {
   TimingLogger::ScopedSplit split("FinishPhase", &timings_);
-  // Can't enqueue references if we hold the mutator lock.
   Heap* heap = GetHeap();
   timings_.NewSplit("PostGcVerification");
   heap->PostGcVerification(this);
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index bf129a3..685b33c 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
 #define ART_RUNTIME_GC_COLLECTOR_SEMI_SPACE_H_
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "barrier.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -114,9 +114,6 @@
   // the image. Mark that portion of the heap as immune.
   virtual void BindBitmaps() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void BindLiveToMarkBitmap(space::ContinuousSpace* space)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-
   void UnBindBitmaps()
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
@@ -171,7 +168,7 @@
   void ResizeMarkStack(size_t new_size);
 
   // Returns true if we should sweep the space.
-  virtual bool ShouldSweepSpace(space::MallocSpace* space) const;
+  virtual bool ShouldSweepSpace(space::ContinuousSpace* space) const;
 
   // Returns how many threads we should use for the current GC phase based on if we are paused,
   // whether or not we care about pauses.
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index c562e8c..9e3adb4 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -38,12 +38,12 @@
   // know what was allocated since the last GC. A side-effect of binding the allocation space mark
   // and live bitmap is that marking the objects will place them in the live bitmap.
   for (const auto& space : GetHeap()->GetContinuousSpaces()) {
-    if (space->IsMallocSpace() &&
+    if (space->IsContinuousMemMapAllocSpace() &&
         space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
-      BindLiveToMarkBitmap(space);
+      DCHECK(space->IsContinuousMemMapAllocSpace());
+      space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
     }
   }
-
   GetHeap()->GetLargeObjectsSpace()->CopyLiveToMarked();
 }
 
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index 4bc9ad2..98c27fb 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -34,6 +34,8 @@
   kCollectorTypeSS,
   // A generational variant of kCollectorTypeSS.
   kCollectorTypeGSS,
+  // Heap trimming collector, doesn't do any actual collecting.
+  kCollectorTypeHeapTrim,
 };
 std::ostream& operator<<(std::ostream& os, const CollectorType& collector_type);
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 6d30e1c..62567d7 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -44,6 +44,7 @@
 #include "gc/space/large_object_space.h"
 #include "gc/space/rosalloc_space-inl.h"
 #include "gc/space/space-inl.h"
+#include "gc/space/zygote_space.h"
 #include "heap-inl.h"
 #include "image.h"
 #include "invoke_arg_array_builder.h"
@@ -72,13 +73,16 @@
 static constexpr size_t kGcAlotInterval = KB;
 // Minimum amount of remaining bytes before a concurrent GC is triggered.
 static constexpr size_t kMinConcurrentRemainingBytes = 128 * KB;
+static constexpr size_t kMaxConcurrentRemainingBytes = 512 * KB;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, size_t capacity, const std::string& image_file_name,
            CollectorType post_zygote_collector_type, CollectorType background_collector_type,
            size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
            size_t long_pause_log_threshold, size_t long_gc_log_threshold,
-           bool ignore_max_footprint, bool use_tlab)
+           bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
+           bool verify_post_gc_heap, bool verify_pre_gc_rosalloc,
+           bool verify_post_gc_rosalloc)
     : non_moving_space_(nullptr),
       rosalloc_space_(nullptr),
       dlmalloc_space_(nullptr),
@@ -99,7 +103,7 @@
       finalizer_reference_queue_(this),
       phantom_reference_queue_(this),
       cleared_references_(this),
-      is_gc_running_(false),
+      collector_type_running_(kCollectorTypeNone),
       last_gc_type_(collector::kGcTypeNone),
       next_gc_type_(collector::kGcTypePartial),
       capacity_(capacity),
@@ -118,11 +122,11 @@
       gc_memory_overhead_(0),
       verify_missing_card_marks_(false),
       verify_system_weaks_(false),
-      verify_pre_gc_heap_(false),
-      verify_post_gc_heap_(false),
+      verify_pre_gc_heap_(verify_pre_gc_heap),
+      verify_post_gc_heap_(verify_post_gc_heap),
       verify_mod_union_table_(false),
-      min_alloc_space_size_for_sticky_gc_(1112 * MB),
-      min_remaining_space_for_sticky_gc_(1 * MB),
+      verify_pre_gc_rosalloc_(verify_pre_gc_rosalloc),
+      verify_post_gc_rosalloc_(verify_post_gc_rosalloc),
       last_trim_time_ms_(0),
       allocation_rate_(0),
       /* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This
@@ -147,7 +151,7 @@
       total_wait_time_(0),
       total_allocation_time_(0),
       verify_object_mode_(kHeapVerificationNotPermitted),
-      gc_disable_count_(0),
+      disable_moving_gc_count_(0),
       running_on_valgrind_(RUNNING_ON_VALGRIND),
       use_tlab_(use_tlab) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
@@ -222,9 +226,19 @@
 
   // Compute heap capacity. Continuous spaces are sorted in order of Begin().
   CHECK(!continuous_spaces_.empty());
+
   // Relies on the spaces being sorted.
   byte* heap_begin = continuous_spaces_.front()->Begin();
   byte* heap_end = continuous_spaces_.back()->Limit();
+  if (Runtime::Current()->IsZygote()) {
+    std::string error_str;
+    post_zygote_non_moving_space_mem_map_.reset(
+        MemMap::MapAnonymous("post zygote non-moving space", nullptr, 64 * MB,
+                             PROT_READ | PROT_WRITE, true, &error_str));
+    CHECK(post_zygote_non_moving_space_mem_map_.get() != nullptr) << error_str;
+    heap_begin = std::min(post_zygote_non_moving_space_mem_map_->Begin(), heap_begin);
+    heap_end = std::max(post_zygote_non_moving_space_mem_map_->End(), heap_end);
+  }
   size_t heap_capacity = heap_end - heap_begin;
 
   // Allocate the card table.
@@ -318,19 +332,21 @@
   return false;
 }
 
-void Heap::IncrementDisableGC(Thread* self) {
+void Heap::IncrementDisableMovingGC(Thread* self) {
   // Need to do this holding the lock to prevent races where the GC is about to run / running when
   // we attempt to disable it.
   ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
   MutexLock mu(self, *gc_complete_lock_);
-  WaitForGcToCompleteLocked(self);
-  ++gc_disable_count_;
+  ++disable_moving_gc_count_;
+  if (IsCompactingGC(collector_type_running_)) {
+    WaitForGcToCompleteLocked(self);
+  }
 }
 
-void Heap::DecrementDisableGC(Thread* self) {
+void Heap::DecrementDisableMovingGC(Thread* self) {
   MutexLock mu(self, *gc_complete_lock_);
-  CHECK_GE(gc_disable_count_, 0U);
-  --gc_disable_count_;
+  CHECK_GE(disable_moving_gc_count_, 0U);
+  --disable_moving_gc_count_;
 }
 
 void Heap::UpdateProcessState(ProcessState process_state) {
@@ -475,7 +491,6 @@
     DCHECK(it != alloc_spaces_.end());
     alloc_spaces_.erase(it);
   }
-  delete space;
 }
 
 void Heap::RegisterGCAllocation(size_t bytes) {
@@ -595,7 +610,7 @@
 };
 
 mirror::Object* Heap::PreserveSoftReferenceCallback(mirror::Object* obj, void* arg) {
-  SoftReferenceArgs* args  = reinterpret_cast<SoftReferenceArgs*>(arg);
+  SoftReferenceArgs* args = reinterpret_cast<SoftReferenceArgs*>(arg);
   // TODO: Not preserve all soft references.
   return args->recursive_mark_callback_(obj, args->arg_);
 }
@@ -641,15 +656,15 @@
 bool Heap::IsEnqueued(mirror::Object* ref) const {
   // Since the references are stored as cyclic lists it means that once enqueued, the pending next
   // will always be non-null.
-  return ref->GetFieldObject<mirror::Object*>(GetReferencePendingNextOffset(), false) != nullptr;
+  return ref->GetFieldObject<mirror::Object>(GetReferencePendingNextOffset(), false) != nullptr;
 }
 
-bool Heap::IsEnqueuable(const mirror::Object* ref) const {
+bool Heap::IsEnqueuable(mirror::Object* ref) const {
   DCHECK(ref != nullptr);
   const mirror::Object* queue =
-      ref->GetFieldObject<mirror::Object*>(GetReferenceQueueOffset(), false);
+      ref->GetFieldObject<mirror::Object>(GetReferenceQueueOffset(), false);
   const mirror::Object* queue_next =
-      ref->GetFieldObject<mirror::Object*>(GetReferenceQueueNextOffset(), false);
+      ref->GetFieldObject<mirror::Object>(GetReferenceQueueNextOffset(), false);
   return queue != nullptr && queue_next == nullptr;
 }
 
@@ -708,7 +723,7 @@
 
 void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation) {
   std::ostringstream oss;
-  int64_t total_bytes_free = GetFreeMemory();
+  size_t total_bytes_free = GetFreeMemory();
   oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
       << " free bytes";
   // If the allocation failed due to fragmentation, print out the largest continuous allocation.
@@ -733,13 +748,25 @@
 }
 
 void Heap::Trim() {
+  Thread* self = Thread::Current();
+  {
+    // Need to do this before acquiring the locks since we don't want to get suspended while
+    // holding any locks.
+    ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
+    // Pretend we are doing a GC to prevent background compaction from deleting the space we are
+    // trimming.
+    MutexLock mu(self, *gc_complete_lock_);
+    // Ensure there is only one GC at a time.
+    WaitForGcToCompleteLocked(self);
+    collector_type_running_ = kCollectorTypeHeapTrim;
+  }
   uint64_t start_ns = NanoTime();
   // Trim the managed spaces.
   uint64_t total_alloc_space_allocated = 0;
   uint64_t total_alloc_space_size = 0;
   uint64_t managed_reclaimed = 0;
   for (const auto& space : continuous_spaces_) {
-    if (space->IsMallocSpace() && !space->IsZygoteSpace()) {
+    if (space->IsMallocSpace()) {
       gc::space::MallocSpace* alloc_space = space->AsMallocSpace();
       total_alloc_space_size += alloc_space->Size();
       managed_reclaimed += alloc_space->Trim();
@@ -750,6 +777,8 @@
   const float managed_utilization = static_cast<float>(total_alloc_space_allocated) /
       static_cast<float>(total_alloc_space_size);
   uint64_t gc_heap_end_ns = NanoTime();
+  // We never move things in the native heap, so we can finish the GC at this point.
+  FinishGC(self, collector::kGcTypeNone);
   // Trim the native heap.
   dlmalloc_trim(0);
   size_t native_reclaimed = 0;
@@ -772,30 +801,37 @@
 }
 
 bool Heap::IsHeapAddress(const mirror::Object* obj) const {
-  if (kMovingCollector && bump_pointer_space_->HasAddress(obj)) {
+  if (kMovingCollector && bump_pointer_space_ && bump_pointer_space_->HasAddress(obj)) {
     return true;
   }
   // TODO: This probably doesn't work for large objects.
   return FindSpaceFromObject(obj, true) != nullptr;
 }
 
-bool Heap::IsLiveObjectLocked(const mirror::Object* obj, bool search_allocation_stack,
+bool Heap::IsLiveObjectLocked(mirror::Object* obj, bool search_allocation_stack,
                               bool search_live_stack, bool sorted) {
-  // Locks::heap_bitmap_lock_->AssertReaderHeld(Thread::Current());
-  if (obj == nullptr || UNLIKELY(!IsAligned<kObjectAlignment>(obj))) {
+  if (UNLIKELY(!IsAligned<kObjectAlignment>(obj))) {
+    return false;
+  }
+  if (bump_pointer_space_ != nullptr && bump_pointer_space_->HasAddress(obj)) {
+    mirror::Class* klass = obj->GetClass();
+    if (obj == klass) {
+      // This case happens for java.lang.Class.
+      return true;
+    }
+    return VerifyClassClass(klass) && IsLiveObjectLocked(klass);
+  } else if (temp_space_ != nullptr && temp_space_->HasAddress(obj)) {
     return false;
   }
   space::ContinuousSpace* c_space = FindContinuousSpaceFromObject(obj, true);
   space::DiscontinuousSpace* d_space = NULL;
-  if (c_space != NULL) {
+  if (c_space != nullptr) {
     if (c_space->GetLiveBitmap()->Test(obj)) {
       return true;
     }
-  } else if (bump_pointer_space_->Contains(obj) || temp_space_->Contains(obj)) {
-      return true;
   } else {
     d_space = FindDiscontinuousSpaceFromObject(obj, true);
-    if (d_space != NULL) {
+    if (d_space != nullptr) {
       if (d_space->GetLiveObjects()->Test(obj)) {
         return true;
       }
@@ -828,20 +864,20 @@
   }
   // We need to check the bitmaps again since there is a race where we mark something as live and
   // then clear the stack containing it.
-  if (c_space != NULL) {
+  if (c_space != nullptr) {
     if (c_space->GetLiveBitmap()->Test(obj)) {
       return true;
     }
   } else {
     d_space = FindDiscontinuousSpaceFromObject(obj, true);
-    if (d_space != NULL && d_space->GetLiveObjects()->Test(obj)) {
+    if (d_space != nullptr && d_space->GetLiveObjects()->Test(obj)) {
       return true;
     }
   }
   return false;
 }
 
-void Heap::VerifyObjectImpl(const mirror::Object* obj) {
+void Heap::VerifyObjectImpl(mirror::Object* obj) {
   if (Thread::Current() == NULL ||
       Runtime::Current()->GetThreadList()->GetLockOwner() == Thread::Current()->GetTid()) {
     return;
@@ -849,6 +885,17 @@
   VerifyObjectBody(obj);
 }
 
+bool Heap::VerifyClassClass(const mirror::Class* c) const {
+  // Note: we don't use the accessors here as they have internal sanity checks that we don't want
+  // to run
+  const byte* raw_addr =
+      reinterpret_cast<const byte*>(c) + mirror::Object::ClassOffset().Int32Value();
+  mirror::Class* c_c = reinterpret_cast<mirror::HeapReference<mirror::Class> const *>(raw_addr)->AsMirrorPtr();
+  raw_addr = reinterpret_cast<const byte*>(c_c) + mirror::Object::ClassOffset().Int32Value();
+  mirror::Class* c_c_c = reinterpret_cast<mirror::HeapReference<mirror::Class> const *>(raw_addr)->AsMirrorPtr();
+  return c_c == c_c_c;
+}
+
 void Heap::DumpSpaces(std::ostream& stream) {
   for (const auto& space : continuous_spaces_) {
     accounting::SpaceBitmap* live_bitmap = space->GetLiveBitmap();
@@ -866,7 +913,7 @@
   }
 }
 
-void Heap::VerifyObjectBody(const mirror::Object* obj) {
+void Heap::VerifyObjectBody(mirror::Object* obj) {
   CHECK(IsAligned<kObjectAlignment>(obj)) << "Object isn't aligned: " << obj;
   // Ignore early dawn of the universe verifications.
   if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.Load()) < 10 * KB)) {
@@ -874,20 +921,13 @@
   }
   const byte* raw_addr = reinterpret_cast<const byte*>(obj) +
       mirror::Object::ClassOffset().Int32Value();
-  const mirror::Class* c = *reinterpret_cast<mirror::Class* const *>(raw_addr);
+  mirror::Class* c = reinterpret_cast<mirror::HeapReference<mirror::Class> const *>(raw_addr)->AsMirrorPtr();
   if (UNLIKELY(c == NULL)) {
     LOG(FATAL) << "Null class in object: " << obj;
   } else if (UNLIKELY(!IsAligned<kObjectAlignment>(c))) {
     LOG(FATAL) << "Class isn't aligned: " << c << " in object: " << obj;
   }
-  // Check obj.getClass().getClass() == obj.getClass().getClass().getClass()
-  // Note: we don't use the accessors here as they have internal sanity checks
-  // that we don't want to run
-  raw_addr = reinterpret_cast<const byte*>(c) + mirror::Object::ClassOffset().Int32Value();
-  const mirror::Class* c_c = *reinterpret_cast<mirror::Class* const *>(raw_addr);
-  raw_addr = reinterpret_cast<const byte*>(c_c) + mirror::Object::ClassOffset().Int32Value();
-  const mirror::Class* c_c_c = *reinterpret_cast<mirror::Class* const *>(raw_addr);
-  CHECK_EQ(c_c, c_c_c);
+  CHECK(VerifyClassClass(c));
 
   if (verify_object_mode_ > kVerifyAllFast) {
     // TODO: the bitmap tests below are racy if VerifyObjectBody is called without the
@@ -912,7 +952,7 @@
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-void Heap::RecordFree(int64_t freed_objects, int64_t freed_bytes) {
+void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
   DCHECK_LE(freed_bytes, num_bytes_allocated_.Load());
   num_bytes_allocated_.FetchAndSub(freed_bytes);
   if (Runtime::Current()->HasStatsEnabled()) {
@@ -1021,18 +1061,18 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : classes_(classes), use_is_assignable_from_(use_is_assignable_from), counts_(counts) {
   }
-
-  void operator()(const mirror::Object* o) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    for (size_t i = 0; i < classes_.size(); ++i) {
-      const mirror::Class* instance_class = o->GetClass();
-      if (use_is_assignable_from_) {
-        if (instance_class != NULL && classes_[i]->IsAssignableFrom(instance_class)) {
-          ++counts_[i];
+  static void Callback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    InstanceCounter* instance_counter = reinterpret_cast<InstanceCounter*>(arg);
+    mirror::Class* instance_class = obj->GetClass();
+    CHECK(instance_class != nullptr);
+    for (size_t i = 0; i < instance_counter->classes_.size(); ++i) {
+      if (instance_counter->use_is_assignable_from_) {
+        if (instance_counter->classes_[i]->IsAssignableFrom(instance_class)) {
+          ++instance_counter->counts_[i];
         }
-      } else {
-        if (instance_class == classes_[i]) {
-          ++counts_[i];
-        }
+      } else if (instance_class == instance_counter->classes_[i]) {
+        ++instance_counter->counts_[i];
       }
     }
   }
@@ -1041,22 +1081,18 @@
   const std::vector<mirror::Class*>& classes_;
   bool use_is_assignable_from_;
   uint64_t* const counts_;
-
   DISALLOW_COPY_AND_ASSIGN(InstanceCounter);
 };
 
 void Heap::CountInstances(const std::vector<mirror::Class*>& classes, bool use_is_assignable_from,
                           uint64_t* counts) {
-  // We only want reachable instances, so do a GC. This also ensures that the alloc stack
-  // is empty, so the live bitmap is the only place we need to look.
+  // Can't do any GC in this function since this may move classes.
   Thread* self = Thread::Current();
-  self->TransitionFromRunnableToSuspended(kNative);
-  CollectGarbage(false);
-  self->TransitionFromSuspendedToRunnable();
-
+  auto* old_cause = self->StartAssertNoThreadSuspension("CountInstances");
   InstanceCounter counter(classes, use_is_assignable_from, counts);
-  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  GetLiveBitmap()->Visit(counter);
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  VisitObjects(InstanceCounter::Callback, &counter);
+  self->EndAssertNoThreadSuspension(old_cause);
 }
 
 class InstanceCollector {
@@ -1065,12 +1101,15 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : class_(c), max_count_(max_count), instances_(instances) {
   }
-
-  void operator()(const mirror::Object* o) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const mirror::Class* instance_class = o->GetClass();
-    if (instance_class == class_) {
-      if (max_count_ == 0 || instances_.size() < max_count_) {
-        instances_.push_back(const_cast<mirror::Object*>(o));
+  static void Callback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    DCHECK(arg != nullptr);
+    InstanceCollector* instance_collector = reinterpret_cast<InstanceCollector*>(arg);
+    mirror::Class* instance_class = obj->GetClass();
+    if (instance_class == instance_collector->class_) {
+      if (instance_collector->max_count_ == 0 ||
+          instance_collector->instances_.size() < instance_collector->max_count_) {
+        instance_collector->instances_.push_back(obj);
       }
     }
   }
@@ -1079,22 +1118,18 @@
   mirror::Class* class_;
   uint32_t max_count_;
   std::vector<mirror::Object*>& instances_;
-
   DISALLOW_COPY_AND_ASSIGN(InstanceCollector);
 };
 
 void Heap::GetInstances(mirror::Class* c, int32_t max_count,
                         std::vector<mirror::Object*>& instances) {
-  // We only want reachable instances, so do a GC. This also ensures that the alloc stack
-  // is empty, so the live bitmap is the only place we need to look.
+  // Can't do any GC in this function since this may move classes.
   Thread* self = Thread::Current();
-  self->TransitionFromRunnableToSuspended(kNative);
-  CollectGarbage(false);
-  self->TransitionFromSuspendedToRunnable();
-
+  auto* old_cause = self->StartAssertNoThreadSuspension("GetInstances");
   InstanceCollector collector(c, max_count, instances);
-  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  GetLiveBitmap()->Visit(collector);
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  VisitObjects(&InstanceCollector::Callback, &collector);
+  self->EndAssertNoThreadSuspension(old_cause);
 }
 
 class ReferringObjectsFinder {
@@ -1105,6 +1140,11 @@
       : object_(object), max_count_(max_count), referring_objects_(referring_objects) {
   }
 
+  static void Callback(mirror::Object* obj, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+    reinterpret_cast<ReferringObjectsFinder*>(arg)->operator()(obj);
+  }
+
   // For bitmap Visit.
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
@@ -1124,22 +1164,18 @@
   mirror::Object* object_;
   uint32_t max_count_;
   std::vector<mirror::Object*>& referring_objects_;
-
   DISALLOW_COPY_AND_ASSIGN(ReferringObjectsFinder);
 };
 
 void Heap::GetReferringObjects(mirror::Object* o, int32_t max_count,
                                std::vector<mirror::Object*>& referring_objects) {
-  // We only want reachable instances, so do a GC. This also ensures that the alloc stack
-  // is empty, so the live bitmap is the only place we need to look.
+  // Can't do any GC in this function since this may move classes.
   Thread* self = Thread::Current();
-  self->TransitionFromRunnableToSuspended(kNative);
-  CollectGarbage(false);
-  self->TransitionFromSuspendedToRunnable();
-
+  auto* old_cause = self->StartAssertNoThreadSuspension("GetReferringObjects");
   ReferringObjectsFinder finder(o, max_count, referring_objects);
-  ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
-  GetLiveBitmap()->Visit(finder);
+  WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+  VisitObjects(&ReferringObjectsFinder::Callback, &finder);
+  self->EndAssertNoThreadSuspension(old_cause);
 }
 
 void Heap::CollectGarbage(bool clear_soft_references) {
@@ -1152,21 +1188,39 @@
   if (collector_type == collector_type_) {
     return;
   }
+  VLOG(heap) << "TransitionCollector: " << static_cast<int>(collector_type_)
+             << " -> " << static_cast<int>(collector_type);
   uint64_t start_time = NanoTime();
-  int32_t before_size  = GetTotalMemory();
-  int32_t before_allocated = num_bytes_allocated_.Load();
+  uint32_t before_size  = GetTotalMemory();
+  uint32_t before_allocated = num_bytes_allocated_.Load();
   ThreadList* tl = Runtime::Current()->GetThreadList();
   Thread* self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingPerformingGc);
   Locks::mutator_lock_->AssertNotHeld(self);
-  // Busy wait until we can GC (StartGC can fail if we have a non-zero gc_disable_count_, this
-  // rarely occurs however).
-  while (!StartGC(self)) {
-    usleep(100);
+  const bool copying_transition =
+      IsCompactingGC(background_collector_type_) || IsCompactingGC(post_zygote_collector_type_);
+  // Busy wait until we can GC (StartGC can fail if we have a non-zero
+  // compacting_gc_disable_count_, this should rarely occurs).
+  for (;;) {
+    {
+      ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
+      MutexLock mu(self, *gc_complete_lock_);
+      // Ensure there is only one GC at a time.
+      WaitForGcToCompleteLocked(self);
+      // GC can be disabled if someone has a used GetPrimitiveArrayCritical but not yet released.
+      if (!copying_transition || disable_moving_gc_count_ == 0) {
+        // TODO: Not hard code in semi-space collector?
+        collector_type_running_ = copying_transition ? kCollectorTypeSS : collector_type;
+        break;
+      }
+    }
+    usleep(1000);
   }
   tl->SuspendAll();
+  PreGcRosAllocVerification(&semi_space_collector_->GetTimings());
   switch (collector_type) {
     case kCollectorTypeSS:
+      // Fall-through.
     case kCollectorTypeGSS: {
       mprotect(temp_space_->Begin(), temp_space_->Capacity(), PROT_READ | PROT_WRITE);
       CHECK(main_space_ != nullptr);
@@ -1174,14 +1228,16 @@
       DCHECK(allocator_mem_map_.get() == nullptr);
       allocator_mem_map_.reset(main_space_->ReleaseMemMap());
       madvise(main_space_->Begin(), main_space_->Size(), MADV_DONTNEED);
-      // RemoveSpace deletes the removed space.
-      RemoveSpace(main_space_);
+      // RemoveSpace does not delete the removed space.
+      space::Space* old_space = main_space_;
+      RemoveSpace(old_space);
+      delete old_space;
       break;
     }
     case kCollectorTypeMS:
       // Fall through.
     case kCollectorTypeCMS: {
-      if (collector_type_ == kCollectorTypeSS || collector_type_ == kCollectorTypeGSS) {
+      if (IsCompactingGC(collector_type_)) {
         // TODO: Use mem-map from temp space?
         MemMap* mem_map = allocator_mem_map_.release();
         CHECK(mem_map != nullptr);
@@ -1211,6 +1267,7 @@
     }
   }
   ChangeCollector(collector_type);
+  PostGcRosAllocVerification(&semi_space_collector_->GetTimings());
   tl->ResumeAll();
   // Can't call into java code with all threads suspended.
   EnqueueClearedReferences();
@@ -1236,6 +1293,7 @@
     gc_plan_.clear();
     switch (collector_type_) {
       case kCollectorTypeSS:
+        // Fall-through.
       case kCollectorTypeGSS: {
         concurrent_gc_ = false;
         gc_plan_.push_back(collector::kGcTypeFull);
@@ -1275,10 +1333,6 @@
   }
 }
 
-static void MarkInBitmapCallback(mirror::Object* obj, void* arg) {
-  reinterpret_cast<accounting::SpaceBitmap*>(arg)->Set(obj);
-}
-
 // Special compacting collector which uses sub-optimal bin packing to reduce zygote space size.
 class ZygoteCompactingCollector : public collector::SemiSpace {
  public:
@@ -1328,7 +1382,7 @@
     }
   }
 
-  virtual bool ShouldSweepSpace(space::MallocSpace* space) const {
+  virtual bool ShouldSweepSpace(space::ContinuousSpace* space) const {
     // Don't sweep any spaces since we probably blasted the internal accounting of the free list
     // allocator.
     return false;
@@ -1347,6 +1401,9 @@
       forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated);
       if (to_space_live_bitmap_ != nullptr) {
         to_space_live_bitmap_->Set(forward_address);
+      } else {
+        GetHeap()->GetNonMovingSpace()->GetLiveBitmap()->Set(forward_address);
+        GetHeap()->GetNonMovingSpace()->GetMarkBitmap()->Set(forward_address);
       }
     } else {
       size_t size = it->first;
@@ -1365,7 +1422,19 @@
   }
 };
 
+void Heap::UnBindBitmaps() {
+  for (const auto& space : GetContinuousSpaces()) {
+    if (space->IsContinuousMemMapAllocSpace()) {
+      space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
+      if (alloc_space->HasBoundBitmaps()) {
+        alloc_space->UnBindBitmaps();
+      }
+    }
+  }
+}
+
 void Heap::PreZygoteFork() {
+  CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
   static Mutex zygote_creation_lock_("zygote creation lock", kZygoteCreationLock);
   Thread* self = Thread::Current();
   MutexLock mu(self, zygote_creation_lock_);
@@ -1374,7 +1443,6 @@
     return;
   }
   VLOG(heap) << "Starting PreZygoteFork";
-  CollectGarbageInternal(collector::kGcTypeFull, kGcCauseBackground, false);
   // Trim the pages at the end of the non moving space.
   non_moving_space_->Trim();
   non_moving_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
@@ -1382,6 +1450,9 @@
   ChangeCollector(post_zygote_collector_type_);
   // TODO: Delete bump_pointer_space_ and temp_pointer_space_?
   if (semi_space_collector_ != nullptr) {
+    // Temporarily disable rosalloc verification because the zygote
+    // compaction will mess up the rosalloc internal metadata.
+    ScopedDisableRosAllocVerification disable_rosalloc_verif(this);
     ZygoteCompactingCollector zygote_collector(this);
     zygote_collector.BuildBins(non_moving_space_);
     // Create a new bump pointer space which we will compact into.
@@ -1398,35 +1469,30 @@
     // Update the end and write out image.
     non_moving_space_->SetEnd(target_space.End());
     non_moving_space_->SetLimit(target_space.Limit());
-    accounting::SpaceBitmap* bitmap = non_moving_space_->GetLiveBitmap();
-    // Record the allocations in the bitmap.
     VLOG(heap) << "Zygote size " << non_moving_space_->Size() << " bytes";
-    target_space.Walk(MarkInBitmapCallback, bitmap);
   }
+  // Save the old space so that we can remove it after we complete creating the zygote space.
+  space::MallocSpace* old_alloc_space = non_moving_space_;
   // Turn the current alloc space into a zygote space and obtain the new alloc space composed of
-  // the remaining available heap memory.
-  space::MallocSpace* zygote_space = non_moving_space_;
-  main_space_ = non_moving_space_->CreateZygoteSpace("alloc space", low_memory_mode_);
+  // the remaining available space.
+  // Remove the old space before creating the zygote space since creating the zygote space sets
+  // the old alloc space's bitmaps to nullptr.
+  RemoveSpace(old_alloc_space);
+  space::ZygoteSpace* zygote_space = old_alloc_space->CreateZygoteSpace("alloc space",
+                                                                        low_memory_mode_,
+                                                                        &main_space_);
+  delete old_alloc_space;
+  CHECK(zygote_space != nullptr) << "Failed creating zygote space";
+  AddSpace(zygote_space, false);
+  CHECK(main_space_ != nullptr);
   if (main_space_->IsRosAllocSpace()) {
     rosalloc_space_ = main_space_->AsRosAllocSpace();
   } else if (main_space_->IsDlMallocSpace()) {
     dlmalloc_space_ = main_space_->AsDlMallocSpace();
   }
   main_space_->SetFootprintLimit(main_space_->Capacity());
-  // Change the GC retention policy of the zygote space to only collect when full.
-  zygote_space->SetGcRetentionPolicy(space::kGcRetentionPolicyFullCollect);
   AddSpace(main_space_);
   have_zygote_space_ = true;
-  // Remove the zygote space from alloc_spaces_ array since not doing so causes crashes in
-  // GetObjectsAllocated. This happens because the bin packing blows away the internal accounting
-  // stored in between objects.
-  if (zygote_space->IsAllocSpace()) {
-    // TODO: Refactor zygote spaces to be a new space type to avoid more of these types of issues.
-    auto it = std::find(alloc_spaces_.begin(), alloc_spaces_.end(), zygote_space->AsAllocSpace());
-    CHECK(it != alloc_spaces_.end());
-    alloc_spaces_.erase(it);
-    zygote_space->InvalidateAllocator();
-  }
   // Create the zygote space mod union table.
   accounting::ModUnionTable* mod_union_table =
       new accounting::ModUnionTableCardCache("zygote space mod-union table", this, zygote_space);
@@ -1438,9 +1504,10 @@
   }
   // Can't use RosAlloc for non moving space due to thread local buffers.
   // TODO: Non limited space for non-movable objects?
-  space::MallocSpace* new_non_moving_space
-      = space::DlMallocSpace::Create("Non moving dlmalloc space", 2 * MB, 64 * MB, 64 * MB,
-                                     nullptr);
+  MemMap* mem_map = post_zygote_non_moving_space_mem_map_.release();
+  space::MallocSpace* new_non_moving_space =
+      space::DlMallocSpace::CreateFromMemMap(mem_map, "Non moving dlmalloc space", kPageSize,
+                                             2 * MB, mem_map->Size(), mem_map->Size());
   AddSpace(new_non_moving_space, false);
   CHECK(new_non_moving_space != nullptr) << "Failed to create new non-moving space";
   new_non_moving_space->SetFootprintLimit(new_non_moving_space->Capacity());
@@ -1510,10 +1577,22 @@
   if (self->IsHandlingStackOverflow()) {
     LOG(WARNING) << "Performing GC on a thread that is handling a stack overflow.";
   }
-  gc_complete_lock_->AssertNotHeld(self);
-  if (!StartGC(self)) {
-    return collector::kGcTypeNone;
+  bool compacting_gc;
+  {
+    gc_complete_lock_->AssertNotHeld(self);
+    ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
+    MutexLock mu(self, *gc_complete_lock_);
+    // Ensure there is only one GC at a time.
+    WaitForGcToCompleteLocked(self);
+    compacting_gc = IsCompactingGC(collector_type_);
+    // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
+    if (compacting_gc && disable_moving_gc_count_ != 0) {
+      LOG(WARNING) << "Skipping GC due to disable moving GC count " << disable_moving_gc_count_;
+      return collector::kGcTypeNone;
+    }
+    collector_type_running_ = collector_type_;
   }
+
   if (gc_cause == kGcCauseForAlloc && runtime->HasStatsEnabled()) {
     ++runtime->GetStats()->gc_for_alloc_count;
     ++self->GetStats()->gc_for_alloc_count;
@@ -1533,7 +1612,7 @@
 
   collector::GarbageCollector* collector = nullptr;
   // TODO: Clean this up.
-  if (collector_type_ == kCollectorTypeSS || collector_type_ == kCollectorTypeGSS) {
+  if (compacting_gc) {
     DCHECK(current_allocator_ == kAllocatorTypeBumpPointer ||
            current_allocator_ == kAllocatorTypeTLAB);
     gc_type = semi_space_collector_->GetGcType();
@@ -1558,20 +1637,14 @@
   CHECK(collector != nullptr)
       << "Could not find garbage collector with concurrent=" << concurrent_gc_
       << " and type=" << gc_type;
-
   ATRACE_BEGIN(StringPrintf("%s %s GC", PrettyCause(gc_cause), collector->GetName()).c_str());
-
   collector->Run(gc_cause, clear_soft_references);
   total_objects_freed_ever_ += collector->GetFreedObjects();
   total_bytes_freed_ever_ += collector->GetFreedBytes();
-
   // Enqueue cleared references.
-  Locks::mutator_lock_->AssertNotHeld(self);
   EnqueueClearedReferences();
-
   // Grow the heap so that we know when to perform the next GC.
   GrowForUtilization(gc_type, collector->GetDurationNs());
-
   if (CareAboutPauseTimes()) {
     const size_t duration = collector->GetDurationNs();
     std::vector<uint64_t> pauses = collector->GetPauseTimes();
@@ -1613,25 +1686,12 @@
   return gc_type;
 }
 
-bool Heap::StartGC(Thread* self) {
-  MutexLock mu(self, *gc_complete_lock_);
-  // Ensure there is only one GC at a time.
-  WaitForGcToCompleteLocked(self);
-  // TODO: if another thread beat this one to do the GC, perhaps we should just return here?
-  //       Not doing at the moment to ensure soft references are cleared.
-  // GC can be disabled if someone has a used GetPrimitiveArrayCritical.
-  if (gc_disable_count_ != 0) {
-    LOG(WARNING) << "Skipping GC due to disable count " << gc_disable_count_;
-    return false;
-  }
-  is_gc_running_ = true;
-  return true;
-}
-
 void Heap::FinishGC(Thread* self, collector::GcType gc_type) {
   MutexLock mu(self, *gc_complete_lock_);
-  is_gc_running_ = false;
-  last_gc_type_ = gc_type;
+  collector_type_running_ = kCollectorTypeNone;
+  if (gc_type != collector::kGcTypeNone) {
+    last_gc_type_ = gc_type;
+  }
   // Wake anyone who may have been waiting for the GC to complete.
   gc_complete_cond_->Broadcast(self);
 }
@@ -1664,60 +1724,68 @@
 
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for smarter
   // analysis on visitors.
-  void operator()(const mirror::Object* obj, const mirror::Object* ref,
+  void operator()(mirror::Object* obj, mirror::Object* ref,
                   const MemberOffset& offset, bool /* is_static */) const
       NO_THREAD_SAFETY_ANALYSIS {
-    // Verify that the reference is live.
-    if (UNLIKELY(ref != NULL && !IsLive(ref))) {
+    if (ref == nullptr || IsLive(ref)) {
+      // Verify that the reference is live.
+      return;
+    }
+    if (!failed_) {
+      // Print message on only on first failure to prevent spam.
+      LOG(ERROR) << "!!!!!!!!!!!!!!Heap corruption detected!!!!!!!!!!!!!!!!!!!";
+      failed_ = true;
+    }
+    if (obj != nullptr) {
       accounting::CardTable* card_table = heap_->GetCardTable();
       accounting::ObjectStack* alloc_stack = heap_->allocation_stack_.get();
       accounting::ObjectStack* live_stack = heap_->live_stack_.get();
-      if (!failed_) {
-        // Print message on only on first failure to prevent spam.
-        LOG(ERROR) << "!!!!!!!!!!!!!!Heap corruption detected!!!!!!!!!!!!!!!!!!!";
-        failed_ = true;
+      byte* card_addr = card_table->CardFromAddr(obj);
+      LOG(ERROR) << "Object " << obj << " references dead object " << ref << " at offset "
+                 << offset << "\n card value = " << static_cast<int>(*card_addr);
+      if (heap_->IsValidObjectAddress(obj->GetClass())) {
+        LOG(ERROR) << "Obj type " << PrettyTypeOf(obj);
+      } else {
+        LOG(ERROR) << "Object " << obj << " class(" << obj->GetClass() << ") not a heap address";
       }
-      if (obj != nullptr) {
-        byte* card_addr = card_table->CardFromAddr(obj);
-        LOG(ERROR) << "Object " << obj << " references dead object " << ref << " at offset "
-                   << offset << "\n card value = " << static_cast<int>(*card_addr);
-        if (heap_->IsValidObjectAddress(obj->GetClass())) {
-          LOG(ERROR) << "Obj type " << PrettyTypeOf(obj);
+
+      // Attmept to find the class inside of the recently freed objects.
+      space::ContinuousSpace* ref_space = heap_->FindContinuousSpaceFromObject(ref, true);
+      if (ref_space != nullptr && ref_space->IsMallocSpace()) {
+        space::MallocSpace* space = ref_space->AsMallocSpace();
+        mirror::Class* ref_class = space->FindRecentFreedObject(ref);
+        if (ref_class != nullptr) {
+          LOG(ERROR) << "Reference " << ref << " found as a recently freed object with class "
+                     << PrettyClass(ref_class);
         } else {
-          LOG(ERROR) << "Object " << obj << " class(" << obj->GetClass() << ") not a heap address";
+          LOG(ERROR) << "Reference " << ref << " not found as a recently freed object";
         }
+      }
 
-        // Attmept to find the class inside of the recently freed objects.
-        space::ContinuousSpace* ref_space = heap_->FindContinuousSpaceFromObject(ref, true);
-        if (ref_space != nullptr && ref_space->IsMallocSpace()) {
-          space::MallocSpace* space = ref_space->AsMallocSpace();
-          mirror::Class* ref_class = space->FindRecentFreedObject(ref);
-          if (ref_class != nullptr) {
-            LOG(ERROR) << "Reference " << ref << " found as a recently freed object with class "
-                       << PrettyClass(ref_class);
-          } else {
-            LOG(ERROR) << "Reference " << ref << " not found as a recently freed object";
-          }
+      if (ref->GetClass() != nullptr && heap_->IsValidObjectAddress(ref->GetClass()) &&
+          ref->GetClass()->IsClass()) {
+        LOG(ERROR) << "Ref type " << PrettyTypeOf(ref);
+      } else {
+        LOG(ERROR) << "Ref " << ref << " class(" << ref->GetClass()
+                   << ") is not a valid heap address";
+      }
+
+      card_table->CheckAddrIsInCardTable(reinterpret_cast<const byte*>(obj));
+      void* cover_begin = card_table->AddrFromCard(card_addr);
+      void* cover_end = reinterpret_cast<void*>(reinterpret_cast<size_t>(cover_begin) +
+          accounting::CardTable::kCardSize);
+      LOG(ERROR) << "Card " << reinterpret_cast<void*>(card_addr) << " covers " << cover_begin
+          << "-" << cover_end;
+      accounting::SpaceBitmap* bitmap = heap_->GetLiveBitmap()->GetContinuousSpaceBitmap(obj);
+
+      if (bitmap == nullptr) {
+        LOG(ERROR) << "Object " << obj << " has no bitmap";
+        if (!heap_->VerifyClassClass(obj->GetClass())) {
+          LOG(ERROR) << "Object " << obj << " failed class verification!";
         }
-
-        if (ref->GetClass() != nullptr && heap_->IsValidObjectAddress(ref->GetClass()) &&
-            ref->GetClass()->IsClass()) {
-          LOG(ERROR) << "Ref type " << PrettyTypeOf(ref);
-        } else {
-          LOG(ERROR) << "Ref " << ref << " class(" << ref->GetClass()
-                     << ") is not a valid heap address";
-        }
-
-        card_table->CheckAddrIsInCardTable(reinterpret_cast<const byte*>(obj));
-        void* cover_begin = card_table->AddrFromCard(card_addr);
-        void* cover_end = reinterpret_cast<void*>(reinterpret_cast<size_t>(cover_begin) +
-            accounting::CardTable::kCardSize);
-        LOG(ERROR) << "Card " << reinterpret_cast<void*>(card_addr) << " covers " << cover_begin
-            << "-" << cover_end;
-        accounting::SpaceBitmap* bitmap = heap_->GetLiveBitmap()->GetContinuousSpaceBitmap(obj);
-
+      } else {
         // Print out how the object is live.
-        if (bitmap != NULL && bitmap->Test(obj)) {
+        if (bitmap->Test(obj)) {
           LOG(ERROR) << "Object " << obj << " found in live bitmap";
         }
         if (alloc_stack->Contains(const_cast<mirror::Object*>(obj))) {
@@ -1737,21 +1805,21 @@
         byte* byte_cover_begin = reinterpret_cast<byte*>(card_table->AddrFromCard(card_addr));
         card_table->Scan(bitmap, byte_cover_begin,
                          byte_cover_begin + accounting::CardTable::kCardSize, scan_visitor);
-
-        // Search to see if any of the roots reference our object.
-        void* arg = const_cast<void*>(reinterpret_cast<const void*>(obj));
-        Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg, false, false);
-
-        // Search to see if any of the roots reference our reference.
-        arg = const_cast<void*>(reinterpret_cast<const void*>(ref));
-        Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg, false, false);
-      } else {
-        LOG(ERROR) << "Root references dead object " << ref << "\nRef type " << PrettyTypeOf(ref);
       }
+
+      // Search to see if any of the roots reference our object.
+      void* arg = const_cast<void*>(reinterpret_cast<const void*>(obj));
+      Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg, false, false);
+
+      // Search to see if any of the roots reference our reference.
+      arg = const_cast<void*>(reinterpret_cast<const void*>(ref));
+      Runtime::Current()->VisitRoots(&RootMatchesObjectVisitor, arg, false, false);
+    } else {
+      LOG(ERROR) << "Root " << ref << " is dead with type " << PrettyTypeOf(ref);
     }
   }
 
-  bool IsLive(const mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+  bool IsLive(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
     return heap_->IsLiveObjectLocked(obj, true, false, true);
   }
 
@@ -1836,7 +1904,7 @@
 
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
-  void operator()(const mirror::Object* obj, const mirror::Object* ref, const MemberOffset& offset,
+  void operator()(mirror::Object* obj, mirror::Object* ref, const MemberOffset& offset,
                   bool is_static) const NO_THREAD_SAFETY_ANALYSIS {
     // Filter out class references since changing an object's class does not mark the card as dirty.
     // Also handles large objects, since the only reference they hold is a class reference.
@@ -1848,6 +1916,7 @@
         LOG(ERROR) << "Object " << obj << " is not in the address range of the card table";
         *failed_ = true;
       } else if (!card_table->IsDirty(obj)) {
+        // TODO: Check mod-union tables.
         // Card should be either kCardDirty if it got re-dirtied after we aged it, or
         // kCardDirty - 1 if it didnt get touched since we aged it.
         accounting::ObjectStack* live_stack = heap_->live_stack_.get();
@@ -1863,13 +1932,13 @@
 
           // Print which field of the object is dead.
           if (!obj->IsObjectArray()) {
-            const mirror::Class* klass = is_static ? obj->AsClass() : obj->GetClass();
+            mirror::Class* klass = is_static ? obj->AsClass() : obj->GetClass();
             CHECK(klass != NULL);
-            const mirror::ObjectArray<mirror::ArtField>* fields = is_static ? klass->GetSFields()
-                                                                            : klass->GetIFields();
+            mirror::ObjectArray<mirror::ArtField>* fields = is_static ? klass->GetSFields()
+                                                                      : klass->GetIFields();
             CHECK(fields != NULL);
             for (int32_t i = 0; i < fields->GetLength(); ++i) {
-              const mirror::ArtField* cur = fields->Get(i);
+              mirror::ArtField* cur = fields->Get(i);
               if (cur->GetOffset().Int32Value() == offset.Int32Value()) {
                 LOG(ERROR) << (is_static ? "Static " : "") << "field in the live stack is "
                           << PrettyField(cur);
@@ -1877,7 +1946,7 @@
               }
             }
           } else {
-            const mirror::ObjectArray<mirror::Object>* object_array =
+            mirror::ObjectArray<mirror::Object>* object_array =
                 obj->AsObjectArray<mirror::Object>();
             for (int32_t i = 0; i < object_array->GetLength(); ++i) {
               if (object_array->Get(i) == ref) {
@@ -1965,7 +2034,7 @@
       // were dirty before the GC started.
       // TODO: Don't need to use atomic.
       // The races are we either end up with: Aged card, unaged card. Since we have the checkpoint
-      // roots and then we scan / update mod union tables after. We will always scan either card.//
+      // roots and then we scan / update mod union tables after. We will always scan either card.
       // If we end up with the non aged card, we scan it it in the pause.
       card_table_->ModifyCardsAtomic(space->Begin(), space->End(), AgeCardVisitor(), VoidFunctor());
     }
@@ -2045,6 +2114,32 @@
   }
 }
 
+void Heap::PreGcRosAllocVerification(TimingLogger* timings) {
+  if (verify_pre_gc_rosalloc_) {
+    TimingLogger::ScopedSplit split("PreGcRosAllocVerification", timings);
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsRosAllocSpace()) {
+        VLOG(heap) << "PreGcRosAllocVerification : " << space->GetName();
+        space::RosAllocSpace* rosalloc_space = space->AsRosAllocSpace();
+        rosalloc_space->Verify();
+      }
+    }
+  }
+}
+
+void Heap::PostGcRosAllocVerification(TimingLogger* timings) {
+  if (verify_post_gc_rosalloc_) {
+    TimingLogger::ScopedSplit split("PostGcRosAllocVerification", timings);
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsRosAllocSpace()) {
+        VLOG(heap) << "PostGcRosAllocVerification : " << space->GetName();
+        space::RosAllocSpace* rosalloc_space = space->AsRosAllocSpace();
+        rosalloc_space->Verify();
+      }
+    }
+  }
+}
+
 collector::GcType Heap::WaitForGcToComplete(Thread* self) {
   ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
   MutexLock mu(self, *gc_complete_lock_);
@@ -2054,7 +2149,7 @@
 collector::GcType Heap::WaitForGcToCompleteLocked(Thread* self) {
   collector::GcType last_gc_type = collector::kGcTypeNone;
   uint64_t wait_start = NanoTime();
-  while (is_gc_running_) {
+  while (collector_type_running_ != kCollectorTypeNone) {
     ATRACE_BEGIN("GC: Wait For Completion");
     // We must wait, change thread state then sleep on gc_complete_cond_;
     gc_complete_cond_->Wait(self);
@@ -2094,7 +2189,12 @@
     if (bump_pointer_space_->HasAddress(obj)) {
       return true;
     }
-    if (main_space_ != nullptr && main_space_->HasAddress(obj)) {
+    // TODO: Refactor this logic into the space itself?
+    // Objects in the main space are only copied during background -> foreground transitions or
+    // visa versa.
+    if (main_space_ != nullptr && main_space_->HasAddress(obj) &&
+        (IsCompactingGC(background_collector_type_) ||
+            IsCompactingGC(post_zygote_collector_type_))) {
       return true;
     }
   }
@@ -2144,7 +2244,7 @@
     if (bytes_allocated + min_free_ <= max_allowed_footprint_) {
       next_gc_type_ = collector::kGcTypeSticky;
     } else {
-      next_gc_type_ = collector::kGcTypePartial;
+      next_gc_type_ = have_zygote_space_ ? collector::kGcTypePartial : collector::kGcTypeFull;
     }
     // If we have freed enough memory, shrink the heap back down.
     if (bytes_allocated + max_free_ < max_allowed_footprint_) {
@@ -2158,24 +2258,23 @@
     if (concurrent_gc_) {
       // Calculate when to perform the next ConcurrentGC.
       // Calculate the estimated GC duration.
-      double gc_duration_seconds = NsToMs(gc_duration) / 1000.0;
+      const double gc_duration_seconds = NsToMs(gc_duration) / 1000.0;
       // Estimate how many remaining bytes we will have when we need to start the next GC.
       size_t remaining_bytes = allocation_rate_ * gc_duration_seconds;
+      remaining_bytes = std::min(remaining_bytes, kMaxConcurrentRemainingBytes);
       remaining_bytes = std::max(remaining_bytes, kMinConcurrentRemainingBytes);
       if (UNLIKELY(remaining_bytes > max_allowed_footprint_)) {
         // A never going to happen situation that from the estimated allocation rate we will exceed
         // the applications entire footprint with the given estimated allocation rate. Schedule
-        // another GC straight away.
-        concurrent_start_bytes_ = bytes_allocated;
-      } else {
-        // Start a concurrent GC when we get close to the estimated remaining bytes. When the
-        // allocation rate is very high, remaining_bytes could tell us that we should start a GC
-        // right away.
-        concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes,
-                                           bytes_allocated);
+        // another GC nearly straight away.
+        remaining_bytes = kMinConcurrentRemainingBytes;
       }
-      DCHECK_LE(concurrent_start_bytes_, max_allowed_footprint_);
+      DCHECK_LE(remaining_bytes, max_allowed_footprint_);
       DCHECK_LE(max_allowed_footprint_, growth_limit_);
+      // Start a concurrent GC when we get close to the estimated remaining bytes. When the
+      // allocation rate is very high, remaining_bytes could tell us that we should start a GC
+      // right away.
+      concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes, bytes_allocated);
     }
   }
 }
@@ -2211,26 +2310,28 @@
 mirror::Object* Heap::GetReferenceReferent(mirror::Object* reference) {
   DCHECK(reference != NULL);
   DCHECK_NE(reference_referent_offset_.Uint32Value(), 0U);
-  return reference->GetFieldObject<mirror::Object*>(reference_referent_offset_, true);
+  return reference->GetFieldObject<mirror::Object>(reference_referent_offset_, true);
 }
 
 void Heap::AddFinalizerReference(Thread* self, mirror::Object* object) {
   ScopedObjectAccess soa(self);
   JValue result;
   ArgArray arg_array(NULL, 0);
-  arg_array.Append(reinterpret_cast<uint32_t>(object));
+  arg_array.Append(object);
   soa.DecodeMethod(WellKnownClasses::java_lang_ref_FinalizerReference_add)->Invoke(self,
       arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
 }
 
 void Heap::EnqueueClearedReferences() {
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertNotHeld(self);
   if (!cleared_references_.IsEmpty()) {
     // When a runtime isn't started there are no reference queues to care about so ignore.
     if (LIKELY(Runtime::Current()->IsStarted())) {
-      ScopedObjectAccess soa(Thread::Current());
+      ScopedObjectAccess soa(self);
       JValue result;
       ArgArray arg_array(NULL, 0);
-      arg_array.Append(reinterpret_cast<uint32_t>(cleared_references_.GetList()));
+      arg_array.Append(cleared_references_.GetList());
       soa.DecodeMethod(WellKnownClasses::java_lang_ref_ReferenceQueue_add)->Invoke(soa.Self(),
           arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
     }
@@ -2408,8 +2509,8 @@
   } while (!native_bytes_allocated_.CompareAndSwap(expected_size, new_size));
 }
 
-int64_t Heap::GetTotalMemory() const {
-  int64_t ret = 0;
+size_t Heap::GetTotalMemory() const {
+  size_t ret = 0;
   for (const auto& space : continuous_spaces_) {
     // Currently don't include the image space.
     if (!space->IsImageSpace()) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 0c3db86..476ceee 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/timing_logger.h"
 #include "gc/accounting/atomic_stack.h"
 #include "gc/accounting/card_table.h"
@@ -149,7 +149,9 @@
                 CollectorType post_zygote_collector_type, CollectorType background_collector_type,
                 size_t parallel_gc_threads, size_t conc_gc_threads, bool low_memory_mode,
                 size_t long_pause_threshold, size_t long_gc_threshold,
-                bool ignore_max_footprint, bool use_tlab);
+                bool ignore_max_footprint, bool use_tlab, bool verify_pre_gc_heap,
+                bool verify_post_gc_heap, bool verify_pre_gc_rosalloc,
+                bool verify_post_gc_rosalloc);
 
   ~Heap();
 
@@ -203,12 +205,14 @@
   void ChangeCollector(CollectorType collector_type);
 
   // The given reference is believed to be to an object in the Java heap, check the soundness of it.
-  void VerifyObjectImpl(const mirror::Object* o);
-  void VerifyObject(const mirror::Object* o) {
+  void VerifyObjectImpl(mirror::Object* o);
+  void VerifyObject(mirror::Object* o) {
     if (o != nullptr && this != nullptr && verify_object_mode_ > kNoHeapVerification) {
       VerifyObjectImpl(o);
     }
   }
+  // Check that c.getClass() == c.getClass().getClass().
+  bool VerifyClassClass(const mirror::Class* c) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Check sanity of all live references.
   void VerifyHeap() LOCKS_EXCLUDED(Locks::heap_bitmap_lock_);
@@ -229,9 +233,9 @@
 
   // Returns true if 'obj' is a live heap object, false otherwise (including for invalid addresses).
   // Requires the heap lock to be held.
-  bool IsLiveObjectLocked(const mirror::Object* obj, bool search_allocation_stack = true,
+  bool IsLiveObjectLocked(mirror::Object* obj, bool search_allocation_stack = true,
                           bool search_live_stack = true, bool sorted = false)
-      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   // Returns true if there is any chance that the object (obj) will move.
   bool IsMovableObject(const mirror::Object* obj) const;
@@ -240,12 +244,12 @@
   // compaction related errors.
   bool IsInTempSpace(const mirror::Object* obj) const;
 
-  // Enables us to prevent GC until objects are released.
-  void IncrementDisableGC(Thread* self);
-  void DecrementDisableGC(Thread* self);
+  // Enables us to compacting GC until objects are released.
+  void IncrementDisableMovingGC(Thread* self);
+  void DecrementDisableMovingGC(Thread* self);
 
   // Initiates an explicit garbage collection.
-  void CollectGarbage(bool clear_soft_references) LOCKS_EXCLUDED(Locks::mutator_lock_);
+  void CollectGarbage(bool clear_soft_references);
 
   // Does a concurrent GC, should only be called by the GC daemon thread
   // through runtime.
@@ -355,7 +359,7 @@
 
   // Freed bytes can be negative in cases where we copy objects from a compacted space to a
   // free-list backed space.
-  void RecordFree(int64_t freed_objects, int64_t freed_bytes);
+  void RecordFree(size_t freed_objects, size_t freed_bytes);
 
   // Must be called if a field of an Object in the heap changes, and before any GC safe-point.
   // The call is not needed if NULL is stored in the field.
@@ -408,16 +412,16 @@
   // consume. For a regular VM this would relate to the -Xmx option and would return -1 if no Xmx
   // were specified. Android apps start with a growth limit (small heap size) which is
   // cleared/extended for large apps.
-  int64_t GetMaxMemory() const {
+  size_t GetMaxMemory() const {
     return growth_limit_;
   }
 
   // Implements java.lang.Runtime.totalMemory, returning the amount of memory consumed by an
   // application.
-  int64_t GetTotalMemory() const;
+  size_t GetTotalMemory() const;
 
   // Implements java.lang.Runtime.freeMemory.
-  int64_t GetFreeMemory() const {
+  size_t GetFreeMemory() const {
     return GetTotalMemory() - num_bytes_allocated_;
   }
 
@@ -437,6 +441,11 @@
   void RevokeThreadLocalBuffers(Thread* thread);
   void RevokeAllThreadLocalBuffers();
 
+  void PreGcRosAllocVerification(TimingLogger* timings)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PostGcRosAllocVerification(TimingLogger* timings)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   accounting::HeapBitmap* GetLiveBitmap() SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) {
     return live_bitmap_.get();
   }
@@ -464,6 +473,9 @@
   void MarkAllocStackAsLive(accounting::ObjectStack* stack)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
 
+  // Unbind any bound bitmaps.
+  void UnBindBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+
   // DEPRECATED: Should remove in "near" future when support for multiple image spaces is added.
   // Assumes there is only one image space.
   space::ImageSpace* GetImageSpace() const;
@@ -531,7 +543,6 @@
   void Compact(space::ContinuousMemMapAllocSpace* target_space,
                space::ContinuousMemMapAllocSpace* source_space);
 
-  bool StartGC(Thread* self) LOCKS_EXCLUDED(gc_complete_lock_);
   void FinishGC(Thread* self, collector::GcType gc_type) LOCKS_EXCLUDED(gc_complete_lock_);
 
   static ALWAYS_INLINE bool AllocatorHasAllocationStack(AllocatorType allocator_type) {
@@ -542,7 +553,11 @@
   static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) {
     return AllocatorHasAllocationStack(allocator_type);
   }
-  bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const;
+  static bool IsCompactingGC(CollectorType collector_type) {
+    return collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS;
+  }
+  bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   ALWAYS_INLINE void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated,
                                        mirror::Object* obj);
 
@@ -588,8 +603,8 @@
   }
   void EnqueueClearedReferences();
   // Returns true if the reference object has not yet been enqueued.
-  bool IsEnqueuable(const mirror::Object* ref) const;
-  bool IsEnqueued(mirror::Object* ref) const;
+  bool IsEnqueuable(mirror::Object* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsEnqueued(mirror::Object* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DelayReferenceReferent(mirror::Class* klass, mirror::Object* obj, RootVisitor mark_visitor,
                               void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -605,14 +620,6 @@
   void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
   bool IsGCRequestPending() const;
 
-  size_t RecordAllocationInstrumented(size_t size, mirror::Object* object)
-      LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  size_t RecordAllocationUninstrumented(size_t size, mirror::Object* object)
-      LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Sometimes CollectGarbageInternal decides to run a different Gc than you requested. Returns
   // which type of Gc was actually ran.
   collector::GcType CollectGarbageInternal(collector::GcType gc_plan, GcCause gc_cause,
@@ -644,7 +651,7 @@
 
   // No thread saftey analysis since we call this everywhere and it is impossible to find a proper
   // lock ordering for it.
-  void VerifyObjectBody(const mirror::Object *obj) NO_THREAD_SAFETY_ANALYSIS;
+  void VerifyObjectBody(mirror::Object *obj) NO_THREAD_SAFETY_ANALYSIS;
 
   static void VerificationCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(GlobalSychronization::heap_bitmap_lock_);
@@ -691,6 +698,9 @@
   // don't have to worry about virtual address space fragmentation.
   UniquePtr<MemMap> allocator_mem_map_;
 
+  // The mem-map which we will use for the non-moving space after the zygote is done forking:
+  UniquePtr<MemMap> post_zygote_non_moving_space_mem_map_;
+
   // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark
   // sweep GC, false for other GC types.
   bool concurrent_gc_;
@@ -725,11 +735,6 @@
   // If we have a zygote space.
   bool have_zygote_space_;
 
-  // Number of pinned primitive arrays in the movable space.
-  // Block all GC until this hits zero, or we hit the timeout!
-  size_t number_gc_blockers_;
-  static constexpr size_t KGCBlockTimeout = 30000;
-
   // Guards access to the state of GC, associated conditional variable is used to signal when a GC
   // completes.
   Mutex* gc_complete_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -743,7 +748,7 @@
   ReferenceQueue cleared_references_;
 
   // True while the garbage collector is running.
-  volatile bool is_gc_running_ GUARDED_BY(gc_complete_lock_);
+  volatile CollectorType collector_type_running_ GUARDED_BY(gc_complete_lock_);
 
   // Last Gc type we ran. Used by WaitForConcurrentGc to know which Gc was waited on.
   volatile collector::GcType last_gc_type_ GUARDED_BY(gc_complete_lock_);
@@ -783,13 +788,13 @@
   size_t total_objects_freed_ever_;
 
   // Number of bytes allocated.  Adjusted after each allocation and free.
-  AtomicInteger num_bytes_allocated_;
+  Atomic<size_t> num_bytes_allocated_;
 
   // Bytes which are allocated and managed by native code but still need to be accounted for.
-  AtomicInteger native_bytes_allocated_;
+  Atomic<size_t> native_bytes_allocated_;
 
   // Data structure GC overhead.
-  AtomicInteger gc_memory_overhead_;
+  Atomic<size_t> gc_memory_overhead_;
 
   // Heap verification flags.
   const bool verify_missing_card_marks_;
@@ -797,18 +802,33 @@
   const bool verify_pre_gc_heap_;
   const bool verify_post_gc_heap_;
   const bool verify_mod_union_table_;
+  bool verify_pre_gc_rosalloc_;
+  bool verify_post_gc_rosalloc_;
+
+  // RAII that temporarily disables the rosalloc verification during
+  // the zygote fork.
+  class ScopedDisableRosAllocVerification {
+   private:
+    Heap* heap_;
+    bool orig_verify_pre_gc_;
+    bool orig_verify_post_gc_;
+   public:
+    explicit ScopedDisableRosAllocVerification(Heap* heap)
+        : heap_(heap),
+          orig_verify_pre_gc_(heap_->verify_pre_gc_rosalloc_),
+          orig_verify_post_gc_(heap_->verify_post_gc_rosalloc_) {
+      heap_->verify_pre_gc_rosalloc_ = false;
+      heap_->verify_post_gc_rosalloc_ = false;
+    }
+    ~ScopedDisableRosAllocVerification() {
+      heap_->verify_pre_gc_rosalloc_ = orig_verify_pre_gc_;
+      heap_->verify_post_gc_rosalloc_ = orig_verify_post_gc_;
+    }
+  };
 
   // Parallel GC data structures.
   UniquePtr<ThreadPool> thread_pool_;
 
-  // Sticky mark bits GC has some overhead, so if we have less a few megabytes of AllocSpace then
-  // it's probably better to just do a partial GC.
-  const size_t min_alloc_space_size_for_sticky_gc_;
-
-  // Minimum remaining size for sticky GC. Since sticky GC doesn't free up as much memory as a
-  // normal GC, it is important to not use it when we are almost out of memory.
-  const size_t min_remaining_space_for_sticky_gc_;
-
   // The last time a heap trim occurred.
   uint64_t last_trim_time_ms_;
 
@@ -879,8 +899,8 @@
   // The current state of heap verification, may be enabled or disabled.
   HeapVerificationMode verify_object_mode_;
 
-  // GC disable count, error on GC if > 0.
-  size_t gc_disable_count_ GUARDED_BY(gc_complete_lock_);
+  // Compacting GC disable count, prevents compacting GC from running iff > 0.
+  size_t disable_moving_gc_count_ GUARDED_BY(gc_complete_lock_);
 
   std::vector<collector::GarbageCollector*> garbage_collectors_;
   collector::SemiSpace* semi_space_collector_;
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index d006349..2d73a71 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -52,8 +52,7 @@
     ref->SetFieldObject(pending_next_offset, ref, false);
     list_ = ref;
   } else {
-    mirror::Object* head =
-        list_->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+    mirror::Object* head = list_->GetFieldObject<mirror::Object>(pending_next_offset, false);
     ref->SetFieldObject(pending_next_offset, head, false);
     list_->SetFieldObject(pending_next_offset, ref, false);
   }
@@ -62,7 +61,7 @@
 mirror::Object* ReferenceQueue::DequeuePendingReference() {
   DCHECK(!IsEmpty());
   MemberOffset pending_next_offset = heap_->GetReferencePendingNextOffset();
-  mirror::Object* head = list_->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+  mirror::Object* head = list_->GetFieldObject<mirror::Object>(pending_next_offset, false);
   DCHECK(head != nullptr);
   mirror::Object* ref;
   // Note: the following code is thread-safe because it is only called from ProcessReferences which
@@ -71,7 +70,7 @@
     ref = list_;
     list_ = nullptr;
   } else {
-    mirror::Object* next = head->GetFieldObject<mirror::Object*>(pending_next_offset, false);
+    mirror::Object* next = head->GetFieldObject<mirror::Object>(pending_next_offset, false);
     list_->SetFieldObject(pending_next_offset, next, false);
     ref = head;
   }
@@ -84,11 +83,11 @@
   os << "Reference starting at list_=" << list_ << "\n";
   while (cur != nullptr) {
     mirror::Object* pending_next =
-        cur->GetFieldObject<mirror::Object*>(heap_->GetReferencePendingNextOffset(), false);
+        cur->GetFieldObject<mirror::Object>(heap_->GetReferencePendingNextOffset(), false);
     os << "PendingNext=" << pending_next;
     if (cur->GetClass()->IsFinalizerReferenceClass()) {
       os << " Zombie=" <<
-          cur->GetFieldObject<mirror::Object*>(heap_->GetFinalizerReferenceZombieOffset(), false);
+          cur->GetFieldObject<mirror::Object>(heap_->GetFinalizerReferenceZombieOffset(), false);
     }
     os << "\n";
     cur = pending_next;
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 89589c3..3f3069e 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/timing_logger.h"
 #include "globals.h"
 #include "gtest/gtest.h"
@@ -83,7 +83,7 @@
  private:
   // Lock, used for parallel GC reference enqueuing. It allows for multiple threads simultaneously
   // calling AtomicEnqueueIfNotEnqueued.
-  Mutex lock_;
+  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // The heap contains the reference offsets.
   Heap* const heap_;
   // The actual reference list. Not a root since it will be nullptr when the GC is not running.
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index ac20972..74a0274 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -34,10 +34,9 @@
     if (UNLIKELY(new_end > growth_end_)) {
       return nullptr;
     }
-    // TODO: Use a cas which always equals the size of pointers.
-  } while (android_atomic_cas(reinterpret_cast<int32_t>(old_end),
-                              reinterpret_cast<int32_t>(new_end),
-                              reinterpret_cast<volatile int32_t*>(&end_)) != 0);
+  } while (!__sync_bool_compare_and_swap(reinterpret_cast<volatile intptr_t*>(&end_),
+                                         reinterpret_cast<intptr_t>(old_end),
+                                         reinterpret_cast<intptr_t>(new_end)));
   return reinterpret_cast<mirror::Object*>(old_end);
 }
 
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index 4dc17df..a314d74 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -29,7 +29,7 @@
   capacity = RoundUp(capacity, kPageSize);
   std::string error_msg;
   UniquePtr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
-                                                 PROT_READ | PROT_WRITE, &error_msg));
+                                                 PROT_READ | PROT_WRITE, true, &error_msg));
   if (mem_map.get() == nullptr) {
     LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
         << PrettySize(capacity) << " with message " << error_msg;
@@ -69,7 +69,7 @@
   return ret;
 }
 
-size_t BumpPointerSpace::AllocationSize(const mirror::Object* obj) {
+size_t BumpPointerSpace::AllocationSize(mirror::Object* obj) {
   return AllocationSizeNonvirtual(obj);
 }
 
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index 3e25b6b..d73fe3b 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -49,8 +49,7 @@
   mirror::Object* AllocNonvirtualWithoutAccounting(size_t num_bytes);
 
   // Return the storage space required by obj.
-  virtual size_t AllocationSize(const mirror::Object* obj)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual size_t AllocationSize(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // NOPS unless we support free lists.
   virtual size_t Free(Thread*, mirror::Object*) {
@@ -60,7 +59,7 @@
     return 0;
   }
 
-  size_t AllocationSizeNonvirtual(const mirror::Object* obj)
+  size_t AllocationSizeNonvirtual(mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return obj->SizeOf();
   }
@@ -135,7 +134,6 @@
   byte* AllocBlock(size_t bytes) EXCLUSIVE_LOCKS_REQUIRED(block_lock_);
   void RevokeThreadLocalBuffersLocked(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(block_lock_);
 
-  size_t InternalAllocationSize(const mirror::Object* obj);
   mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 981af53..931ed21 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -228,7 +228,7 @@
   return dlmalloc_space->MoreCore(increment);
 }
 
-size_t DlMallocSpace::AllocationSize(const mirror::Object* obj) {
+size_t DlMallocSpace::AllocationSize(mirror::Object* obj) {
   return AllocationSizeNonvirtual(obj);
 }
 
@@ -287,6 +287,7 @@
 }
 
 void DlMallocSpace::Clear() {
+  // TODO: Delete and create new mspace here.
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
   GetLiveBitmap()->Clear();
   GetMarkBitmap()->Clear();
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index 671d2b2..4507c36 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -48,13 +48,15 @@
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
                                           size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-  virtual size_t AllocationSize(const mirror::Object* obj);
-  virtual size_t Free(Thread* self, mirror::Object* ptr);
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
+  virtual size_t AllocationSize(mirror::Object* obj);
+  virtual size_t Free(Thread* self, mirror::Object* ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
 
-  size_t AllocationSizeNonvirtual(const mirror::Object* obj) {
+  size_t AllocationSizeNonvirtual(mirror::Object* obj) {
     void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
     return mspace_usable_size(obj_ptr) + kChunkOverhead;
   }
@@ -97,10 +99,6 @@
 
   virtual void Clear();
 
-  virtual void InvalidateAllocator() {
-    mspace_for_alloc_ = nullptr;
-  }
-
   virtual bool IsDlMallocSpace() const {
     return true;
   }
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 4777cc6..ebad8dd 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -35,7 +35,7 @@
 namespace gc {
 namespace space {
 
-AtomicInteger ImageSpace::bitmap_index_(0);
+Atomic<uint32_t> ImageSpace::bitmap_index_(0);
 
 ImageSpace::ImageSpace(const std::string& name, MemMap* mem_map,
                        accounting::SpaceBitmap* live_bitmap)
@@ -171,7 +171,7 @@
   byte* current = Begin() + RoundUp(sizeof(ImageHeader), kObjectAlignment);
   while (current < End()) {
     DCHECK_ALIGNED(current, kObjectAlignment);
-    const mirror::Object* obj = reinterpret_cast<const mirror::Object*>(current);
+    mirror::Object* obj = reinterpret_cast<mirror::Object*>(current);
     CHECK(live_bitmap_->Test(obj));
     CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
     current += RoundUp(obj->SizeOf(), kObjectAlignment);
@@ -227,7 +227,7 @@
     *error_msg = StringPrintf("Failed to map image bitmap: %s", error_msg->c_str());
     return nullptr;
   }
-  size_t bitmap_index = bitmap_index_.FetchAndAdd(1);
+  uint32_t bitmap_index = bitmap_index_.FetchAndAdd(1);
   std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_file_name,
                                        bitmap_index));
   UniquePtr<accounting::SpaceBitmap> bitmap(
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index 78a83c9..9e19774 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -29,10 +29,6 @@
 // An image space is a space backed with a memory mapped image.
 class ImageSpace : public MemMapSpace {
  public:
-  bool CanAllocateInto() const {
-    return false;
-  }
-
   SpaceType GetType() const {
     return kSpaceTypeImageSpace;
   }
@@ -75,6 +71,10 @@
 
   void Dump(std::ostream& os) const;
 
+  // Sweeping image spaces is a NOP.
+  void Sweep(bool /* swap_bitmaps */, size_t* /* freed_objects */, size_t* /* freed_bytes */) {
+  }
+
  private:
   // Tries to initialize an ImageSpace from the given image path,
   // returning NULL on error.
@@ -94,7 +94,7 @@
 
   friend class Space;
 
-  static AtomicInteger bitmap_index_;
+  static Atomic<uint32_t> bitmap_index_;
 
   UniquePtr<accounting::SpaceBitmap> live_bitmap_;
 
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 7fcfed4..987a655 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -60,7 +60,7 @@
                                            size_t* bytes_allocated) {
   std::string error_msg;
   MemMap* mem_map = MemMap::MapAnonymous("large object space allocation", NULL, num_bytes,
-                                         PROT_READ | PROT_WRITE, &error_msg);
+                                         PROT_READ | PROT_WRITE, true, &error_msg);
   if (UNLIKELY(mem_map == NULL)) {
     LOG(WARNING) << "Large object allocation failed: " << error_msg;
     return NULL;
@@ -92,9 +92,9 @@
   return allocation_size;
 }
 
-size_t LargeObjectMapSpace::AllocationSize(const mirror::Object* obj) {
+size_t LargeObjectMapSpace::AllocationSize(mirror::Object* obj) {
   MutexLock mu(Thread::Current(), lock_);
-  MemMaps::iterator found = mem_maps_.find(const_cast<mirror::Object*>(obj));
+  MemMaps::iterator found = mem_maps_.find(obj);
   CHECK(found != mem_maps_.end()) << "Attempted to get size of a large object which is not live";
   return found->second->Size();
 }
@@ -134,7 +134,7 @@
   CHECK_EQ(size % kAlignment, 0U);
   std::string error_msg;
   MemMap* mem_map = MemMap::MapAnonymous(name.c_str(), requested_begin, size,
-                                         PROT_READ | PROT_WRITE, &error_msg);
+                                         PROT_READ | PROT_WRITE, true, &error_msg);
   CHECK(mem_map != NULL) << "Failed to allocate large object space mem map: " << error_msg;
   return new FreeListSpace(name, mem_map, mem_map->Begin(), mem_map->End());
 }
@@ -244,7 +244,7 @@
   return mem_map_->HasAddress(obj);
 }
 
-size_t FreeListSpace::AllocationSize(const mirror::Object* obj) {
+size_t FreeListSpace::AllocationSize(mirror::Object* obj) {
   AllocationHeader* header = GetAllocationHeader(obj);
   DCHECK(Contains(obj));
   DCHECK(!header->IsFree());
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index cd7c383..5274c8d 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -92,7 +92,7 @@
   static LargeObjectMapSpace* Create(const std::string& name);
 
   // Return the storage space required by obj.
-  size_t AllocationSize(const mirror::Object* obj);
+  size_t AllocationSize(mirror::Object* obj);
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* ptr);
   void Walk(DlMallocSpace::WalkCallback, void* arg) LOCKS_EXCLUDED(lock_);
@@ -118,8 +118,7 @@
   virtual ~FreeListSpace();
   static FreeListSpace* Create(const std::string& name, byte* requested_begin, size_t capacity);
 
-  size_t AllocationSize(const mirror::Object* obj)
-      EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  size_t AllocationSize(mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(lock_);
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* obj);
   bool Contains(const mirror::Object* obj) const;
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 2b2b26e..f90e6c7 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -19,6 +19,8 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
+#include "gc/space/space-inl.h"
+#include "gc/space/zygote_space.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
@@ -33,22 +35,27 @@
 size_t MallocSpace::bitmap_index_ = 0;
 
 MallocSpace::MallocSpace(const std::string& name, MemMap* mem_map,
-                         byte* begin, byte* end, byte* limit, size_t growth_limit)
+                         byte* begin, byte* end, byte* limit, size_t growth_limit,
+                         bool create_bitmaps)
     : ContinuousMemMapAllocSpace(name, mem_map, begin, end, limit, kGcRetentionPolicyAlwaysCollect),
       recent_free_pos_(0), lock_("allocation space lock", kAllocSpaceLock),
       growth_limit_(growth_limit) {
-  size_t bitmap_index = bitmap_index_++;
-  static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
-  CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
-  live_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #" << bitmap_index;
-  mark_bitmap_.reset(accounting::SpaceBitmap::Create(
-      StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
-      Begin(), Capacity()));
-  DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #" << bitmap_index;
+  if (create_bitmaps) {
+    size_t bitmap_index = bitmap_index_++;
+    static const uintptr_t kGcCardSize = static_cast<uintptr_t>(accounting::CardTable::kCardSize);
+    CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->Begin())));
+    CHECK(IsAligned<kGcCardSize>(reinterpret_cast<uintptr_t>(mem_map->End())));
+    live_bitmap_.reset(accounting::SpaceBitmap::Create(
+        StringPrintf("allocspace %s live-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+        Begin(), Capacity()));
+    DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace live bitmap #"
+        << bitmap_index;
+    mark_bitmap_.reset(accounting::SpaceBitmap::Create(
+        StringPrintf("allocspace %s mark-bitmap %d", name.c_str(), static_cast<int>(bitmap_index)),
+        Begin(), Capacity()));
+    DCHECK(live_bitmap_.get() != NULL) << "could not create allocspace mark bitmap #"
+        << bitmap_index;
+  }
   for (auto& freed : recent_freed_objects_) {
     freed.first = nullptr;
     freed.second = nullptr;
@@ -80,7 +87,7 @@
 
   std::string error_msg;
   MemMap* mem_map = MemMap::MapAnonymous(name.c_str(), requested_begin, *capacity,
-                                         PROT_READ | PROT_WRITE, &error_msg);
+                                         PROT_READ | PROT_WRITE, true, &error_msg);
   if (mem_map == nullptr) {
     LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
                << PrettySize(*capacity) << ": " << error_msg;
@@ -88,14 +95,6 @@
   return mem_map;
 }
 
-void MallocSpace::SwapBitmaps() {
-  live_bitmap_.swap(mark_bitmap_);
-  // Swap names to get more descriptive diagnostics.
-  std::string temp_name(live_bitmap_->GetName());
-  live_bitmap_->SetName(mark_bitmap_->GetName());
-  mark_bitmap_->SetName(temp_name);
-}
-
 mirror::Class* MallocSpace::FindRecentFreedObject(const mirror::Object* obj) {
   size_t pos = recent_free_pos_;
   // Start at the most recently freed object and work our way back since there may be duplicates
@@ -154,29 +153,8 @@
   return original_end;
 }
 
-// Returns the old mark bitmap.
-accounting::SpaceBitmap* MallocSpace::BindLiveToMarkBitmap() {
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
-  temp_bitmap_.reset(mark_bitmap);
-  mark_bitmap_.reset(live_bitmap);
-  return mark_bitmap;
-}
-
-bool MallocSpace::HasBoundBitmaps() const {
-  return temp_bitmap_.get() != nullptr;
-}
-
-void MallocSpace::UnBindBitmaps() {
-  CHECK(HasBoundBitmaps());
-  // At this point, the temp_bitmap holds our old mark bitmap.
-  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
-  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
-  mark_bitmap_.reset(new_bitmap);
-  DCHECK(temp_bitmap_.get() == NULL);
-}
-
-MallocSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode) {
+ZygoteSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode,
+                                            MallocSpace** out_malloc_space) {
   // For RosAlloc, revoke thread local runs before creating a new
   // alloc space so that we won't mix thread local runs from different
   // alloc spaces.
@@ -220,15 +198,23 @@
   if (capacity - initial_size > 0) {
     CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size, PROT_NONE), alloc_space_name);
   }
-  MallocSpace* alloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator,
-                                            end_, end, limit_, growth_limit);
+  *out_malloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator, end_, end,
+                                     limit_, growth_limit);
   SetLimit(End());
   live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
   CHECK_EQ(live_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
   mark_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
   CHECK_EQ(mark_bitmap_->HeapLimit(), reinterpret_cast<uintptr_t>(End()));
-  VLOG(heap) << "zygote space creation done";
-  return alloc_space;
+
+  // Create the actual zygote space.
+  ZygoteSpace* zygote_space = ZygoteSpace::Create("Zygote space", ReleaseMemMap(),
+                                                  live_bitmap_.release(), mark_bitmap_.release());
+  if (UNLIKELY(zygote_space == nullptr)) {
+    VLOG(heap) << "Failed creating zygote space from space " << GetName();
+  } else {
+    VLOG(heap) << "zygote space creation done";
+  }
+  return zygote_space;
 }
 
 void MallocSpace::Dump(std::ostream& os) const {
@@ -239,24 +225,16 @@
       << ",name=\"" << GetName() << "\"]";
 }
 
-struct SweepCallbackContext {
-  bool swap_bitmaps;
-  Heap* heap;
-  space::MallocSpace* space;
-  Thread* self;
-  size_t freed_objects;
-  size_t freed_bytes;
-};
-
-static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
+void MallocSpace::SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
   SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
-  space::AllocSpace* space = context->space;
+  DCHECK(context->space->IsMallocSpace());
+  space::MallocSpace* space = context->space->AsMallocSpace();
   Thread* self = context->self;
   Locks::heap_bitmap_lock_->AssertExclusiveHeld(self);
   // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
   // the bitmaps as an optimization.
   if (!context->swap_bitmaps) {
-    accounting::SpaceBitmap* bitmap = context->space->GetLiveBitmap();
+    accounting::SpaceBitmap* bitmap = space->GetLiveBitmap();
     for (size_t i = 0; i < num_ptrs; ++i) {
       bitmap->Clear(ptrs[i]);
     }
@@ -268,54 +246,6 @@
   context->freed_bytes += space->FreeList(self, num_ptrs, ptrs);
 }
 
-static void ZygoteSweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
-  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
-  Locks::heap_bitmap_lock_->AssertExclusiveHeld(context->self);
-  accounting::CardTable* card_table = context->heap->GetCardTable();
-  // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
-  // the bitmaps as an optimization.
-  if (!context->swap_bitmaps) {
-    accounting::SpaceBitmap* bitmap = context->space->GetLiveBitmap();
-    for (size_t i = 0; i < num_ptrs; ++i) {
-      bitmap->Clear(ptrs[i]);
-    }
-  }
-  // We don't free any actual memory to avoid dirtying the shared zygote pages.
-  for (size_t i = 0; i < num_ptrs; ++i) {
-    // Need to mark the card since this will update the mod-union table next GC cycle.
-    card_table->MarkCard(ptrs[i]);
-  }
-}
-
-void MallocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes) {
-  DCHECK(freed_objects != nullptr);
-  DCHECK(freed_bytes != nullptr);
-  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
-  accounting::SpaceBitmap* mark_bitmap = GetMarkBitmap();
-  // If the bitmaps are bound then sweeping this space clearly won't do anything.
-  if (live_bitmap == mark_bitmap) {
-    return;
-  }
-  SweepCallbackContext scc;
-  scc.swap_bitmaps = swap_bitmaps;
-  scc.heap = Runtime::Current()->GetHeap();
-  scc.self = Thread::Current();
-  scc.space = this;
-  scc.freed_objects = 0;
-  scc.freed_bytes = 0;
-  if (swap_bitmaps) {
-    std::swap(live_bitmap, mark_bitmap);
-  }
-  // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
-  accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap,
-                                     reinterpret_cast<uintptr_t>(Begin()),
-                                     reinterpret_cast<uintptr_t>(End()),
-                                     IsZygoteSpace() ? &ZygoteSweepCallback : &SweepCallback,
-                                     reinterpret_cast<void*>(&scc));
-  *freed_objects += scc.freed_objects;
-  *freed_bytes += scc.freed_bytes;
-}
-
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index 7681b6d..f17bcd2 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -31,6 +31,8 @@
 
 namespace space {
 
+class ZygoteSpace;
+
 // TODO: Remove define macro
 #define CHECK_MEMORY_CALL(call, args, what) \
   do { \
@@ -41,19 +43,13 @@
     } \
   } while (false)
 
-// const bool kUseRosAlloc = true;
-
 // A common parent of DlMallocSpace and RosAllocSpace.
 class MallocSpace : public ContinuousMemMapAllocSpace {
  public:
   typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
 
   SpaceType GetType() const {
-    if (GetGcRetentionPolicy() == kGcRetentionPolicyFullCollect) {
-      return kSpaceTypeZygoteSpace;
-    } else {
-      return kSpaceTypeAllocSpace;
-    }
+    return kSpaceTypeMallocSpace;
   }
 
   // Allocate num_bytes without allowing the underlying space to grow.
@@ -62,9 +58,11 @@
   // Allocate num_bytes allowing the underlying space to grow.
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
   // Return the storage space required by obj.
-  virtual size_t AllocationSize(const mirror::Object* obj) = 0;
-  virtual size_t Free(Thread* self, mirror::Object* ptr) = 0;
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) = 0;
+  virtual size_t AllocationSize(mirror::Object* obj) = 0;
+  virtual size_t Free(Thread* self, mirror::Object* ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
 #ifndef NDEBUG
   virtual void CheckMoreCoreForPrecondition() {}  // to be overridden in the debug build.
@@ -109,51 +107,27 @@
     return GetMemMap()->Size();
   }
 
-  accounting::SpaceBitmap* GetLiveBitmap() const {
-    return live_bitmap_.get();
-  }
-
-  accounting::SpaceBitmap* GetMarkBitmap() const {
-    return mark_bitmap_.get();
-  }
-
   void Dump(std::ostream& os) const;
 
   void SetGrowthLimit(size_t growth_limit);
 
-  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
-  void SwapBitmaps();
-
   virtual MallocSpace* CreateInstance(const std::string& name, MemMap* mem_map, void* allocator,
                                       byte* begin, byte* end, byte* limit, size_t growth_limit) = 0;
 
-  // Turn ourself into a zygote space and return a new alloc space
-  // which has our unused memory.  When true, the low memory mode
-  // argument specifies that the heap wishes the created space to be
-  // more aggressive in releasing unused pages.
-  MallocSpace* CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode);
-
+  // Splits ourself into a zygote space and new malloc space which has our unused memory. When true,
+  // the low memory mode argument specifies that the heap wishes the created space to be more
+  // aggressive in releasing unused pages. Invalidates the space its called on.
+  ZygoteSpace* CreateZygoteSpace(const char* alloc_space_name, bool low_memory_mode,
+                                 MallocSpace** out_malloc_space) NO_THREAD_SAFETY_ANALYSIS;
   virtual uint64_t GetBytesAllocated() = 0;
   virtual uint64_t GetObjectsAllocated() = 0;
 
-  // Returns the old mark bitmap.
-  accounting::SpaceBitmap* BindLiveToMarkBitmap();
-  bool HasBoundBitmaps() const;
-  void UnBindBitmaps();
-
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
-  // Used to ensure that failure happens when you free / allocate into an invalidated space. If we
-  // don't do this we may get heap corruption instead of a segfault at null.
-  virtual void InvalidateAllocator() = 0;
-
-  // Sweep the references in the malloc space.
-  void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
-
  protected:
   MallocSpace(const std::string& name, MemMap* mem_map, byte* begin, byte* end,
-              byte* limit, size_t growth_limit);
+              byte* limit, size_t growth_limit, bool create_bitmaps = true);
 
   static MemMap* CreateMemMap(const std::string& name, size_t starting_size, size_t* initial_size,
                               size_t* growth_limit, size_t* capacity, byte* requested_begin);
@@ -164,11 +138,13 @@
   virtual void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size,
                                 bool low_memory_mode) = 0;
 
-  void RegisterRecentFree(mirror::Object* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  void RegisterRecentFree(mirror::Object* ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
-  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
-  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+    return &SweepCallback;
+  }
 
   // Recent allocation buffer.
   static constexpr size_t kRecentFreeCount = kDebugSpaces ? (1 << 16) : 0;
@@ -190,9 +166,10 @@
   // one time by a call to ClearGrowthLimit.
   size_t growth_limit_;
 
-  friend class collector::MarkSweep;
-
  private:
+  static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   DISALLOW_COPY_AND_ASSIGN(MallocSpace);
 };
 
@@ -232,13 +209,14 @@
     return result;
   }
 
-  virtual size_t AllocationSize(const mirror::Object* obj) {
-    size_t result = BaseMallocSpaceType::AllocationSize(reinterpret_cast<const mirror::Object*>(
-        reinterpret_cast<const byte*>(obj) - kValgrindRedZoneBytes));
+  virtual size_t AllocationSize(mirror::Object* obj) {
+    size_t result = BaseMallocSpaceType::AllocationSize(reinterpret_cast<mirror::Object*>(
+        reinterpret_cast<byte*>(obj) - kValgrindRedZoneBytes));
     return result - 2 * kValgrindRedZoneBytes;
   }
 
-  virtual size_t Free(Thread* self, mirror::Object* ptr) {
+  virtual size_t Free(Thread* self, mirror::Object* ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     void* obj_after_rdz = reinterpret_cast<void*>(ptr);
     void* obj_with_rdz = reinterpret_cast<byte*>(obj_after_rdz) - kValgrindRedZoneBytes;
     // Make redzones undefined.
@@ -249,7 +227,8 @@
     return freed - 2 * kValgrindRedZoneBytes;
   }
 
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     size_t freed = 0;
     for (size_t i = 0; i < num_ptrs; i++) {
       freed += Free(self, ptrs[i]);
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index e5993f6..86e441e 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -220,7 +220,7 @@
   return rosalloc_space->MoreCore(increment);
 }
 
-size_t RosAllocSpace::AllocationSize(const mirror::Object* obj) {
+size_t RosAllocSpace::AllocationSize(mirror::Object* obj) {
   return AllocationSizeNonvirtual(obj);
 }
 
@@ -312,6 +312,7 @@
 }
 
 void RosAllocSpace::Clear() {
+  // TODO: Delete and create new mspace here.
   madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
   GetLiveBitmap()->Clear();
   GetMarkBitmap()->Clear();
diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h
index 6720976..2377423 100644
--- a/runtime/gc/space/rosalloc_space.h
+++ b/runtime/gc/space/rosalloc_space.h
@@ -47,13 +47,15 @@
   virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
                                           size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
-  virtual size_t AllocationSize(const mirror::Object* obj);
-  virtual size_t Free(Thread* self, mirror::Object* ptr);
-  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
+  virtual size_t AllocationSize(mirror::Object* obj);
+  virtual size_t Free(Thread* self, mirror::Object* ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
 
-  size_t AllocationSizeNonvirtual(const mirror::Object* obj)
+  size_t AllocationSizeNonvirtual(mirror::Object* obj)
       NO_THREAD_SAFETY_ANALYSIS {
     // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held.
     void* obj_ptr = const_cast<void*>(reinterpret_cast<const void*>(obj));
@@ -95,10 +97,6 @@
   // Returns the class of a recently freed object.
   mirror::Class* FindRecentFreedObject(const mirror::Object* obj);
 
-  virtual void InvalidateAllocator() {
-    rosalloc_for_alloc_ = NULL;
-  }
-
   virtual bool IsRosAllocSpace() const {
     return true;
   }
@@ -106,6 +104,10 @@
     return this;
   }
 
+  void Verify() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    rosalloc_->Verify();
+  }
+
  protected:
   RosAllocSpace(const std::string& name, MemMap* mem_map, allocator::RosAlloc* rosalloc,
                 byte* begin, byte* end, byte* limit, size_t growth_limit);
diff --git a/runtime/gc/space/space-inl.h b/runtime/gc/space/space-inl.h
index 0c1d7a2..02a63f6 100644
--- a/runtime/gc/space/space-inl.h
+++ b/runtime/gc/space/space-inl.h
@@ -32,7 +32,7 @@
 }
 
 inline MallocSpace* Space::AsMallocSpace() {
-  DCHECK(GetType() == kSpaceTypeAllocSpace || GetType() == kSpaceTypeZygoteSpace);
+  DCHECK(IsMallocSpace());
   DCHECK(IsDlMallocSpace() || IsRosAllocSpace());
   return down_cast<MallocSpace*>(down_cast<MemMapSpace*>(this));
 }
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index f8ba6b3..32a00bc 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -17,6 +17,9 @@
 #include "space.h"
 
 #include "base/logging.h"
+#include "gc/accounting/heap_bitmap.h"
+#include "runtime.h"
+#include "thread-inl.h"
 
 namespace art {
 namespace gc {
@@ -41,6 +44,69 @@
     mark_objects_(new accounting::ObjectSet("large marked objects")) {
 }
 
+void ContinuousMemMapAllocSpace::Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes) {
+  DCHECK(freed_objects != nullptr);
+  DCHECK(freed_bytes != nullptr);
+  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  accounting::SpaceBitmap* mark_bitmap = GetMarkBitmap();
+  // If the bitmaps are bound then sweeping this space clearly won't do anything.
+  if (live_bitmap == mark_bitmap) {
+    return;
+  }
+  SweepCallbackContext scc;
+  scc.swap_bitmaps = swap_bitmaps;
+  scc.heap = Runtime::Current()->GetHeap();
+  scc.self = Thread::Current();
+  scc.space = this;
+  scc.freed_objects = 0;
+  scc.freed_bytes = 0;
+  if (swap_bitmaps) {
+    std::swap(live_bitmap, mark_bitmap);
+  }
+  // Bitmaps are pre-swapped for optimization which enables sweeping with the heap unlocked.
+  accounting::SpaceBitmap::SweepWalk(*live_bitmap, *mark_bitmap,
+                                     reinterpret_cast<uintptr_t>(Begin()),
+                                     reinterpret_cast<uintptr_t>(End()),
+                                     GetSweepCallback(),
+                                     reinterpret_cast<void*>(&scc));
+  *freed_objects += scc.freed_objects;
+  *freed_bytes += scc.freed_bytes;
+}
+
+// Returns the old mark bitmap.
+void ContinuousMemMapAllocSpace::BindLiveToMarkBitmap() {
+  CHECK(!HasBoundBitmaps());
+  accounting::SpaceBitmap* live_bitmap = GetLiveBitmap();
+  if (live_bitmap != mark_bitmap_.get()) {
+    accounting::SpaceBitmap* mark_bitmap = mark_bitmap_.release();
+    Runtime::Current()->GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap, live_bitmap);
+    temp_bitmap_.reset(mark_bitmap);
+    mark_bitmap_.reset(live_bitmap);
+  }
+}
+
+bool ContinuousMemMapAllocSpace::HasBoundBitmaps() const {
+  return temp_bitmap_.get() != nullptr;
+}
+
+void ContinuousMemMapAllocSpace::UnBindBitmaps() {
+  CHECK(HasBoundBitmaps());
+  // At this point, the temp_bitmap holds our old mark bitmap.
+  accounting::SpaceBitmap* new_bitmap = temp_bitmap_.release();
+  Runtime::Current()->GetHeap()->GetMarkBitmap()->ReplaceBitmap(mark_bitmap_.get(), new_bitmap);
+  CHECK_EQ(mark_bitmap_.release(), live_bitmap_.get());
+  mark_bitmap_.reset(new_bitmap);
+  DCHECK(temp_bitmap_.get() == nullptr);
+}
+
+void ContinuousMemMapAllocSpace::SwapBitmaps() {
+  live_bitmap_.swap(mark_bitmap_);
+  // Swap names to get more descriptive diagnostics.
+  std::string temp_name(live_bitmap_->GetName());
+  live_bitmap_->SetName(mark_bitmap_->GetName());
+  mark_bitmap_->SetName(temp_name);
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 5292344..98e6f65 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -44,6 +44,7 @@
 
 class AllocSpace;
 class BumpPointerSpace;
+class ContinuousMemMapAllocSpace;
 class ContinuousSpace;
 class DiscontinuousSpace;
 class MallocSpace;
@@ -51,6 +52,7 @@
 class RosAllocSpace;
 class ImageSpace;
 class LargeObjectSpace;
+class ZygoteSpace;
 
 static constexpr bool kDebugSpaces = kIsDebugBuild;
 
@@ -68,7 +70,7 @@
 
 enum SpaceType {
   kSpaceTypeImageSpace,
-  kSpaceTypeAllocSpace,
+  kSpaceTypeMallocSpace,
   kSpaceTypeZygoteSpace,
   kSpaceTypeBumpPointerSpace,
   kSpaceTypeLargeObjectSpace,
@@ -91,11 +93,6 @@
     return gc_retention_policy_;
   }
 
-  // Does the space support allocation?
-  virtual bool CanAllocateInto() const {
-    return true;
-  }
-
   // Is the given object contained within this space?
   virtual bool Contains(const mirror::Object* obj) const = 0;
 
@@ -111,7 +108,7 @@
   // Is this a dlmalloc backed allocation space?
   bool IsMallocSpace() const {
     SpaceType type = GetType();
-    return type == kSpaceTypeAllocSpace || type == kSpaceTypeZygoteSpace;
+    return type == kSpaceTypeMallocSpace;
   }
   MallocSpace* AsMallocSpace();
 
@@ -120,20 +117,24 @@
   }
   virtual DlMallocSpace* AsDlMallocSpace() {
     LOG(FATAL) << "Unreachable";
-    return NULL;
+    return nullptr;
   }
   virtual bool IsRosAllocSpace() const {
     return false;
   }
   virtual RosAllocSpace* AsRosAllocSpace() {
     LOG(FATAL) << "Unreachable";
-    return NULL;
+    return nullptr;
   }
 
   // Is this the space allocated into by the Zygote and no-longer in use?
   bool IsZygoteSpace() const {
     return GetType() == kSpaceTypeZygoteSpace;
   }
+  virtual ZygoteSpace* AsZygoteSpace() {
+    LOG(FATAL) << "Unreachable";
+    return nullptr;
+  }
 
   // Is this space a bump pointer space?
   bool IsBumpPointerSpace() const {
@@ -141,7 +142,7 @@
   }
   virtual BumpPointerSpace* AsBumpPointerSpace() {
     LOG(FATAL) << "Unreachable";
-    return NULL;
+    return nullptr;
   }
 
   // Does this space hold large objects and implement the large object space abstraction?
@@ -168,6 +169,14 @@
     return nullptr;
   }
 
+  virtual bool IsContinuousMemMapAllocSpace() const {
+    return false;
+  }
+  virtual ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace() {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+
   virtual ~Space() {}
 
  protected:
@@ -181,6 +190,15 @@
   std::string name_;
 
  protected:
+  struct SweepCallbackContext {
+    bool swap_bitmaps;
+    Heap* heap;
+    space::Space* space;
+    Thread* self;
+    size_t freed_objects;
+    size_t freed_bytes;
+  };
+
   // When should objects within this space be reclaimed? Not constant as we vary it in the case
   // of Zygote forking.
   GcRetentionPolicy gc_retention_policy_;
@@ -205,7 +223,7 @@
   virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
 
   // Return the storage space required by obj.
-  virtual size_t AllocationSize(const mirror::Object* obj) = 0;
+  virtual size_t AllocationSize(mirror::Object* obj) = 0;
 
   // Returns how many bytes were freed.
   virtual size_t Free(Thread* self, mirror::Object* ptr) = 0;
@@ -378,22 +396,53 @@
   virtual bool IsAllocSpace() const {
     return true;
   }
-
   virtual AllocSpace* AsAllocSpace() {
     return this;
   }
 
+  virtual bool IsContinuousMemMapAllocSpace() const {
+    return true;
+  }
+  virtual ContinuousMemMapAllocSpace* AsContinuousMemMapAllocSpace() {
+    return this;
+  }
+
+  bool HasBoundBitmaps() const EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void BindLiveToMarkBitmap()
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  void UnBindBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
+  // Swap the live and mark bitmaps of this space. This is used by the GC for concurrent sweeping.
+  void SwapBitmaps();
+
   virtual void Clear() {
     LOG(FATAL) << "Unimplemented";
   }
 
+  virtual accounting::SpaceBitmap* GetLiveBitmap() const {
+    return live_bitmap_.get();
+  }
+  virtual accounting::SpaceBitmap* GetMarkBitmap() const {
+    return mark_bitmap_.get();
+  }
+
+  virtual void Sweep(bool swap_bitmaps, size_t* freed_objects, size_t* freed_bytes);
+  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+
  protected:
+  UniquePtr<accounting::SpaceBitmap> live_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> mark_bitmap_;
+  UniquePtr<accounting::SpaceBitmap> temp_bitmap_;
+
   ContinuousMemMapAllocSpace(const std::string& name, MemMap* mem_map, byte* begin,
                              byte* end, byte* limit, GcRetentionPolicy gc_retention_policy)
       : MemMapSpace(name, mem_map, begin, end, limit, gc_retention_policy) {
   }
 
  private:
+  friend class gc::Heap;
   DISALLOW_COPY_AND_ASSIGN(ContinuousMemMapAllocSpace);
 };
 
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index b1be9d8..9989ffe 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -16,6 +16,7 @@
 
 #include "dlmalloc_space.h"
 #include "large_object_space.h"
+#include "zygote_space.h"
 
 #include "common_test.h"
 #include "globals.h"
@@ -162,6 +163,7 @@
   EXPECT_TRUE(ptr5 == NULL);
 
   // Release some memory.
+  ScopedObjectAccess soa(self);
   size_t free3 = space->AllocationSize(ptr3);
   EXPECT_EQ(free3, ptr3_bytes_allocated);
   EXPECT_EQ(free3, space->Free(self, ptr3));
@@ -179,7 +181,16 @@
 
   // Make sure that the zygote space isn't directly at the start of the space.
   space->Alloc(self, 1U * MB, &dummy);
-  space = space->CreateZygoteSpace("alloc space", Runtime::Current()->GetHeap()->IsLowMemoryMode());
+
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  space::Space* old_space = space;
+  heap->RemoveSpace(old_space);
+  space::ZygoteSpace* zygote_space = space->CreateZygoteSpace("alloc space",
+                                                              heap->IsLowMemoryMode(),
+                                                              &space);
+  delete old_space;
+  // Add the zygote space.
+  AddSpace(zygote_space);
 
   // Make space findable to the heap, will also delete space when runtime is cleaned up
   AddSpace(space);
@@ -247,6 +258,7 @@
   EXPECT_TRUE(ptr5 == NULL);
 
   // Release some memory.
+  ScopedObjectAccess soa(self);
   size_t free3 = space->AllocationSize(ptr3);
   EXPECT_EQ(free3, ptr3_bytes_allocated);
   space->Free(self, ptr3);
@@ -344,30 +356,36 @@
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->Alloc(self, 16, &allocation_size);
-    EXPECT_TRUE(lots_of_objects[i] != NULL);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
     InstallClass(lots_of_objects[i], 16);
     EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
-  // Release memory and check pointers are NULL
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    EXPECT_TRUE(lots_of_objects[i] == NULL);
+  // Release memory and check pointers are NULL.
+  {
+    ScopedObjectAccess soa(self);
+    space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+    for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+      EXPECT_TRUE(lots_of_objects[i] == nullptr);
+    }
   }
 
   // Succeeds, fits by adjusting the max allowed footprint.
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
     size_t allocation_size = 0;
     lots_of_objects[i] = space->AllocWithGrowth(self, 1024, &allocation_size);
-    EXPECT_TRUE(lots_of_objects[i] != NULL);
+    EXPECT_TRUE(lots_of_objects[i] != nullptr);
     InstallClass(lots_of_objects[i], 1024);
     EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i]));
   }
 
   // Release memory and check pointers are NULL
-  space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
-  for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    EXPECT_TRUE(lots_of_objects[i] == NULL);
+  {
+    ScopedObjectAccess soa(self);
+    space->FreeList(self, arraysize(lots_of_objects), lots_of_objects);
+    for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
+      EXPECT_TRUE(lots_of_objects[i] == nullptr);
+    }
   }
 }
 
@@ -481,28 +499,30 @@
       break;
     }
 
-    // Free some objects
-    for (size_t i = 0; i < last_object; i += free_increment) {
-      mirror::Object* object = lots_of_objects.get()[i];
-      if (object == NULL) {
-        continue;
+    {
+      // Free some objects
+      ScopedObjectAccess soa(self);
+      for (size_t i = 0; i < last_object; i += free_increment) {
+        mirror::Object* object = lots_of_objects.get()[i];
+        if (object == NULL) {
+          continue;
+        }
+        size_t allocation_size = space->AllocationSize(object);
+        if (object_size > 0) {
+          EXPECT_GE(allocation_size, static_cast<size_t>(object_size));
+        } else {
+          EXPECT_GE(allocation_size, 8u);
+        }
+        space->Free(self, object);
+        lots_of_objects.get()[i] = NULL;
+        amount_allocated -= allocation_size;
+        footprint = space->GetFootprint();
+        EXPECT_GE(space->Size(), footprint);  // invariant
       }
-      size_t allocation_size = space->AllocationSize(object);
-      if (object_size > 0) {
-        EXPECT_GE(allocation_size, static_cast<size_t>(object_size));
-      } else {
-        EXPECT_GE(allocation_size, 8u);
-      }
-      space->Free(self, object);
-      lots_of_objects.get()[i] = NULL;
-      amount_allocated -= allocation_size;
-      footprint = space->GetFootprint();
-      EXPECT_GE(space->Size(), footprint);  // invariant
+
+      free_increment >>= 1;
     }
-
-    free_increment >>= 1;
   }
-
   // The space has become empty here before allocating a large object
   // below. For RosAlloc, revoke thread-local runs, which are kept
   // even when empty for a performance reason, so that they won't
@@ -530,8 +550,10 @@
   EXPECT_LE(space->Size(), growth_limit);
 
   // Clean up
-  space->Free(self, large_object);
-
+  {
+    ScopedObjectAccess soa(self);
+    space->Free(self, large_object);
+  }
   // Sanity check footprint
   footprint = space->GetFootprint();
   EXPECT_LE(footprint, growth_limit);
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
new file mode 100644
index 0000000..a303765
--- /dev/null
+++ b/runtime/gc/space/zygote_space.cc
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "zygote_space.h"
+
+#include "gc/accounting/card_table-inl.h"
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
+#include "thread-inl.h"
+#include "utils.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+class CountObjectsAllocated {
+ public:
+  explicit CountObjectsAllocated(size_t* objects_allocated)
+      : objects_allocated_(objects_allocated) {}
+
+  void operator()(mirror::Object* obj) const {
+    ++*objects_allocated_;
+  }
+
+ private:
+  size_t* const objects_allocated_;
+};
+
+ZygoteSpace* ZygoteSpace::Create(const std::string& name, MemMap* mem_map,
+                                 accounting::SpaceBitmap* live_bitmap,
+                                 accounting::SpaceBitmap* mark_bitmap) {
+  DCHECK(live_bitmap != nullptr);
+  DCHECK(mark_bitmap != nullptr);
+  size_t objects_allocated = 0;
+  CountObjectsAllocated visitor(&objects_allocated);
+  ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+  live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(mem_map->Begin()),
+                                reinterpret_cast<uintptr_t>(mem_map->End()), visitor);
+  ZygoteSpace* zygote_space = new ZygoteSpace(name, mem_map, objects_allocated);
+  CHECK(zygote_space->live_bitmap_.get() == nullptr);
+  CHECK(zygote_space->mark_bitmap_.get() == nullptr);
+  zygote_space->live_bitmap_.reset(live_bitmap);
+  zygote_space->mark_bitmap_.reset(mark_bitmap);
+  return zygote_space;
+}
+
+ZygoteSpace::ZygoteSpace(const std::string& name, MemMap* mem_map, size_t objects_allocated)
+    : ContinuousMemMapAllocSpace(name, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
+                                 kGcRetentionPolicyFullCollect),
+      objects_allocated_(objects_allocated) {
+}
+
+void ZygoteSpace::Dump(std::ostream& os) const {
+  os << GetType()
+      << " begin=" << reinterpret_cast<void*>(Begin())
+      << ",end=" << reinterpret_cast<void*>(End())
+      << ",size=" << PrettySize(Size())
+      << ",name=\"" << GetName() << "\"]";
+}
+
+void ZygoteSpace::SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg) {
+  SweepCallbackContext* context = static_cast<SweepCallbackContext*>(arg);
+  DCHECK(context->space->IsZygoteSpace());
+  ZygoteSpace* zygote_space = context->space->AsZygoteSpace();
+  Locks::heap_bitmap_lock_->AssertExclusiveHeld(context->self);
+  accounting::CardTable* card_table = context->heap->GetCardTable();
+  // If the bitmaps aren't swapped we need to clear the bits since the GC isn't going to re-swap
+  // the bitmaps as an optimization.
+  if (!context->swap_bitmaps) {
+    accounting::SpaceBitmap* bitmap = zygote_space->GetLiveBitmap();
+    for (size_t i = 0; i < num_ptrs; ++i) {
+      bitmap->Clear(ptrs[i]);
+    }
+  }
+  // We don't free any actual memory to avoid dirtying the shared zygote pages.
+  for (size_t i = 0; i < num_ptrs; ++i) {
+    // Need to mark the card since this will update the mod-union table next GC cycle.
+    card_table->MarkCard(ptrs[i]);
+  }
+  zygote_space->objects_allocated_.FetchAndSub(num_ptrs);
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h
new file mode 100644
index 0000000..e0035b3
--- /dev/null
+++ b/runtime/gc/space/zygote_space.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
+#define ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
+
+#include "malloc_space.h"
+#include "mem_map.h"
+
+namespace art {
+namespace gc {
+
+namespace accounting {
+class SpaceBitmap;
+}
+
+namespace space {
+
+// An zygote space is a space which you cannot allocate into or free from.
+class ZygoteSpace : public ContinuousMemMapAllocSpace {
+ public:
+  // Returns the remaining storage in the out_map field.
+  static ZygoteSpace* Create(const std::string& name, MemMap* mem_map,
+                             accounting::SpaceBitmap* live_bitmap,
+                             accounting::SpaceBitmap* mark_bitmap)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void Dump(std::ostream& os) const;
+  virtual SpaceType GetType() const {
+    return kSpaceTypeZygoteSpace;
+  }
+  virtual ZygoteSpace* AsZygoteSpace() {
+    return this;
+  }
+  virtual mirror::Object* AllocWithGrowth(Thread* /*self*/, size_t /*num_bytes*/,
+                                          size_t* /*bytes_allocated*/) {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+    LOG(FATAL) << "Unimplemented";
+    return nullptr;
+  }
+  virtual size_t AllocationSize(mirror::Object* obj) {
+    LOG(FATAL) << "Unimplemented";
+    return 0;
+  }
+  virtual size_t Free(Thread* self, mirror::Object* ptr) {
+    LOG(FATAL) << "Unimplemented";
+    return 0;
+  }
+  virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) {
+    LOG(FATAL) << "Unimplemented";
+    return 0;
+  }
+  virtual uint64_t GetBytesAllocated() {
+    return Size();
+  }
+  virtual uint64_t GetObjectsAllocated() {
+    return objects_allocated_;
+  }
+
+ protected:
+  virtual accounting::SpaceBitmap::SweepCallback* GetSweepCallback() {
+    return &SweepCallback;
+  }
+
+ private:
+  ZygoteSpace(const std::string& name, MemMap* mem_map, size_t objects_allocated);
+  static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg);
+
+  AtomicInteger objects_allocated_;
+
+  friend class Space;
+  DISALLOW_COPY_AND_ASSIGN(ZygoteSpace);
+};
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_ZYGOTE_SPACE_H_
diff --git a/runtime/globals.h b/runtime/globals.h
index a0d7e48..8c3ae56 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -36,7 +36,7 @@
 static constexpr size_t kBitsPerByte = 8;
 static constexpr size_t kBitsPerByteLog2 = 3;
 static constexpr int kBitsPerWord = kWordSize * kBitsPerByte;
-static constexpr size_t kWordHighBitMask = 1 << (kBitsPerWord - 1);
+static constexpr size_t kWordHighBitMask = static_cast<size_t>(1) << (kBitsPerWord - 1);
 
 // Required stack alignment
 static constexpr size_t kStackAlignment = 16;
@@ -88,6 +88,10 @@
 // True if we allow moving methods.
 static constexpr bool kMovingMethods = false;
 
+// If true, the quick compiler embeds class pointers in the compiled
+// code, if possible.
+static constexpr bool kEmbedClassInCode = true;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_GLOBALS_H_
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 9f899e8..24d403d 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -167,14 +167,8 @@
   hprof_basic_long = 11,
 };
 
-typedef uint32_t HprofId;
-typedef HprofId HprofStringId;
-typedef HprofId HprofObjectId;
-typedef HprofId HprofClassObjectId;
-typedef std::set<mirror::Class*> ClassSet;
-typedef std::set<mirror::Class*>::iterator ClassSetIterator;
-typedef SafeMap<std::string, size_t> StringMap;
-typedef SafeMap<std::string, size_t>::iterator StringMapIterator;
+typedef uint32_t HprofStringId;
+typedef uint32_t HprofClassObjectId;
 
 // Represents a top-level hprof record, whose serialized format is:
 // U1  TAG: denoting the type of the record
@@ -183,11 +177,8 @@
 // U1* BODY: as many bytes as specified in the above uint32_t field
 class HprofRecord {
  public:
-  HprofRecord() {
-    dirty_ = false;
-    alloc_length_ = 128;
+  HprofRecord() : alloc_length_(128), fp_(nullptr), tag_(0), time_(0), length_(0), dirty_(false) {
     body_ = reinterpret_cast<unsigned char*>(malloc(alloc_length_));
-    fp_ = NULL;
   }
 
   ~HprofRecord() {
@@ -233,7 +224,7 @@
 
   int AddU1(uint8_t value) {
     int err = GuaranteeRecordAppend(1);
-    if (err != 0) {
+    if (UNLIKELY(err != 0)) {
       return err;
     }
 
@@ -253,13 +244,30 @@
     return AddU8List(&value, 1);
   }
 
-  int AddId(HprofObjectId value) {
-    return AddU4((uint32_t) value);
+  int AddObjectId(const mirror::Object* value) {
+    return AddU4(PointerToLowMemUInt32(value));
+  }
+
+  // The ID for the synthetic object generated to account for class static overhead.
+  int AddClassStaticsId(const mirror::Class* value) {
+    return AddU4(1 | PointerToLowMemUInt32(value));
+  }
+
+  int AddJniGlobalRefId(jobject value) {
+    return AddU4(PointerToLowMemUInt32(value));
+  }
+
+  int AddClassId(HprofClassObjectId value) {
+    return AddU4(value);
+  }
+
+  int AddStringId(HprofStringId value) {
+    return AddU4(value);
   }
 
   int AddU1List(const uint8_t* values, size_t numValues) {
     int err = GuaranteeRecordAppend(numValues);
-    if (err != 0) {
+    if (UNLIKELY(err != 0)) {
       return err;
     }
 
@@ -270,7 +278,7 @@
 
   int AddU2List(const uint16_t* values, size_t numValues) {
     int err = GuaranteeRecordAppend(numValues * 2);
-    if (err != 0) {
+    if (UNLIKELY(err != 0)) {
       return err;
     }
 
@@ -285,7 +293,7 @@
 
   int AddU4List(const uint32_t* values, size_t numValues) {
     int err = GuaranteeRecordAppend(numValues * 4);
-    if (err != 0) {
+    if (UNLIKELY(err != 0)) {
       return err;
     }
 
@@ -317,8 +325,16 @@
     return 0;
   }
 
-  int AddIdList(const HprofObjectId* values, size_t numValues) {
-    return AddU4List((const uint32_t*) values, numValues);
+  int AddIdList(mirror::ObjectArray<mirror::Object>* values)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    int32_t length = values->GetLength();
+    for (int32_t i = 0; i < length; ++i) {
+      int err = AddObjectId(values->GetWithoutChecks(i));
+      if (UNLIKELY(err != 0)) {
+        return err;
+      }
+    }
+    return 0;
   }
 
   int AddUtf8String(const char* str) {
@@ -510,12 +526,11 @@
     HprofRecord* rec = &current_record_;
     uint32_t nextSerialNumber = 1;
 
-    for (ClassSetIterator it = classes_.begin(); it != classes_.end(); ++it) {
-      const mirror::Class* c = *it;
-      CHECK(c != NULL);
+    for (mirror::Class* c : classes_) {
+      CHECK(c != nullptr);
 
       int err = current_record_.StartNewRecord(header_fp_, HPROF_TAG_LOAD_CLASS, HPROF_TIME);
-      if (err != 0) {
+      if (UNLIKELY(err != 0)) {
         return err;
       }
 
@@ -525,9 +540,9 @@
       // U4: stack trace serial number
       // ID: class name string ID
       rec->AddU4(nextSerialNumber++);
-      rec->AddId((HprofClassObjectId) c);
+      rec->AddObjectId(c);
       rec->AddU4(HPROF_NULL_STACK_TRACE);
-      rec->AddId(LookupClassNameId(c));
+      rec->AddStringId(LookupClassNameId(c));
     }
 
     return 0;
@@ -536,9 +551,9 @@
   int WriteStringTable() {
     HprofRecord* rec = &current_record_;
 
-    for (StringMapIterator it = strings_.begin(); it != strings_.end(); ++it) {
-      const std::string& string = (*it).first;
-      size_t id = (*it).second;
+    for (std::pair<std::string, HprofStringId> p : strings_) {
+      const std::string& string = p.first;
+      size_t id = p.second;
 
       int err = current_record_.StartNewRecord(header_fp_, HPROF_TAG_STRING, HPROF_TIME);
       if (err != 0) {
@@ -573,24 +588,26 @@
 
   int MarkRootObject(const mirror::Object* obj, jobject jniObj);
 
-  HprofClassObjectId LookupClassId(mirror::Class* c)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (c == NULL) {
-      // c is the superclass of java.lang.Object or a primitive
-      return (HprofClassObjectId)0;
+  HprofClassObjectId LookupClassId(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (c == nullptr) {
+      // c is the superclass of java.lang.Object or a primitive.
+      return 0;
     }
 
-    std::pair<ClassSetIterator, bool> result = classes_.insert(c);
-    const mirror::Class* present = *result.first;
+    {
+      auto result = classes_.insert(c);
+      const mirror::Class* present = *result.first;
+      CHECK_EQ(present, c);
+    }
 
     // Make sure that we've assigned a string ID for this class' name
     LookupClassNameId(c);
 
-    CHECK_EQ(present, c);
-    return (HprofStringId) present;
+    HprofClassObjectId result = PointerToLowMemUInt32(c);
+    return result;
   }
 
-  HprofStringId LookupStringId(mirror::String* string) {
+  HprofStringId LookupStringId(mirror::String* string) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return LookupStringId(string->ToModifiedUtf8());
   }
 
@@ -599,7 +616,7 @@
   }
 
   HprofStringId LookupStringId(const std::string& string) {
-    StringMapIterator it = strings_.find(string);
+    auto it = strings_.find(string);
     if (it != strings_.end()) {
       return it->second;
     }
@@ -608,8 +625,7 @@
     return id;
   }
 
-  HprofStringId LookupClassNameId(const mirror::Class* c)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  HprofStringId LookupClassNameId(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return LookupStringId(PrettyDescriptor(c));
   }
 
@@ -675,9 +691,9 @@
   char* body_data_ptr_;
   size_t body_data_size_;
 
-  ClassSet classes_;
-  size_t next_string_id_;
-  StringMap strings_;
+  std::set<mirror::Class*> classes_;
+  HprofStringId next_string_id_;
+  SafeMap<std::string, HprofStringId> strings_;
 
   DISALLOW_COPY_AND_ASSIGN(Hprof);
 };
@@ -685,11 +701,8 @@
 #define OBJECTS_PER_SEGMENT     ((size_t)128)
 #define BYTES_PER_SEGMENT       ((size_t)4096)
 
-// The static field-name for the synthetic object generated to account
-// for class static overhead.
+// The static field-name for the synthetic object generated to account for class static overhead.
 #define STATIC_OVERHEAD_NAME    "$staticOverhead"
-// The ID for the synthetic object generated to account for class static overhead.
-#define CLASS_STATICS_ID(c) ((HprofObjectId)(((uint32_t)(c)) | 1))
 
 static HprofBasicType SignatureToBasicTypeAndSize(const char* sig, size_t* sizeOut) {
   char c = sig[0];
@@ -765,15 +778,15 @@
   case HPROF_ROOT_DEBUGGER:
   case HPROF_ROOT_VM_INTERNAL:
     rec->AddU1(heapTag);
-    rec->AddId((HprofObjectId)obj);
+    rec->AddObjectId(obj);
     break;
 
   // ID: object ID
   // ID: JNI global ref ID
   case HPROF_ROOT_JNI_GLOBAL:
     rec->AddU1(heapTag);
-    rec->AddId((HprofObjectId)obj);
-    rec->AddId((HprofId)jniObj);
+    rec->AddObjectId(obj);
+    rec->AddJniGlobalRefId(jniObj);
     break;
 
   // ID: object ID
@@ -783,7 +796,7 @@
   case HPROF_ROOT_JNI_MONITOR:
   case HPROF_ROOT_JAVA_FRAME:
     rec->AddU1(heapTag);
-    rec->AddId((HprofObjectId)obj);
+    rec->AddObjectId(obj);
     rec->AddU4(gc_thread_serial_number_);
     rec->AddU4((uint32_t)-1);
     break;
@@ -793,7 +806,7 @@
   case HPROF_ROOT_NATIVE_STACK:
   case HPROF_ROOT_THREAD_BLOCK:
     rec->AddU1(heapTag);
-    rec->AddId((HprofObjectId)obj);
+    rec->AddObjectId(obj);
     rec->AddU4(gc_thread_serial_number_);
     break;
 
@@ -802,7 +815,7 @@
   // U4: stack trace serial number
   case HPROF_ROOT_THREAD_OBJECT:
     rec->AddU1(heapTag);
-    rec->AddId((HprofObjectId)obj);
+    rec->AddObjectId(obj);
     rec->AddU4(gc_thread_serial_number_);
     rec->AddU4((uint32_t)-1);    // xxx
     break;
@@ -859,7 +872,7 @@
       nameId = LookupStringId("<ILLEGAL>");
       break;
     }
-    rec->AddId(nameId);
+    rec->AddStringId(nameId);
     current_heap_ = desiredHeap;
   }
 
@@ -875,11 +888,11 @@
       // obj is a ClassObject.
       size_t sFieldCount = thisClass->NumStaticFields();
       if (sFieldCount != 0) {
-        int byteLength = sFieldCount*sizeof(JValue);  // TODO bogus; fields are packed
+        int byteLength = sFieldCount * sizeof(JValue);  // TODO bogus; fields are packed
         // Create a byte array to reflect the allocation of the
         // StaticField array at the end of this class.
         rec->AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
-        rec->AddId(CLASS_STATICS_ID(obj));
+        rec->AddClassStaticsId(thisClass);
         rec->AddU4(StackTraceSerialNumber(obj));
         rec->AddU4(byteLength);
         rec->AddU1(hprof_basic_byte);
@@ -889,14 +902,14 @@
       }
 
       rec->AddU1(HPROF_CLASS_DUMP);
-      rec->AddId(LookupClassId(thisClass));
+      rec->AddClassId(LookupClassId(thisClass));
       rec->AddU4(StackTraceSerialNumber(thisClass));
-      rec->AddId(LookupClassId(thisClass->GetSuperClass()));
-      rec->AddId((HprofObjectId)thisClass->GetClassLoader());
-      rec->AddId((HprofObjectId)0);    // no signer
-      rec->AddId((HprofObjectId)0);    // no prot domain
-      rec->AddId((HprofId)0);           // reserved
-      rec->AddId((HprofId)0);           // reserved
+      rec->AddClassId(LookupClassId(thisClass->GetSuperClass()));
+      rec->AddObjectId(thisClass->GetClassLoader());
+      rec->AddObjectId(nullptr);    // no signer
+      rec->AddObjectId(nullptr);    // no prot domain
+      rec->AddObjectId(nullptr);    // reserved
+      rec->AddObjectId(nullptr);    // reserved
       if (thisClass->IsClassClass()) {
         // ClassObjects have their static fields appended, so aren't all the same size.
         // But they're at least this size.
@@ -916,9 +929,9 @@
         rec->AddU2((uint16_t)0);
       } else {
         rec->AddU2((uint16_t)(sFieldCount+1));
-        rec->AddId(LookupStringId(STATIC_OVERHEAD_NAME));
+        rec->AddStringId(LookupStringId(STATIC_OVERHEAD_NAME));
         rec->AddU1(hprof_basic_object);
-        rec->AddId(CLASS_STATICS_ID(obj));
+        rec->AddClassStaticsId(thisClass);
 
         for (size_t i = 0; i < sFieldCount; ++i) {
           mirror::ArtField* f = thisClass->GetStaticField(i);
@@ -926,7 +939,7 @@
 
           size_t size;
           HprofBasicType t = SignatureToBasicTypeAndSize(fh.GetTypeDescriptor(), &size);
-          rec->AddId(LookupStringId(fh.GetName()));
+          rec->AddStringId(LookupStringId(fh.GetName()));
           rec->AddU1(t);
           if (size == 1) {
             rec->AddU1(static_cast<uint8_t>(f->Get32(thisClass)));
@@ -949,24 +962,24 @@
         mirror::ArtField* f = thisClass->GetInstanceField(i);
         fh.ChangeField(f);
         HprofBasicType t = SignatureToBasicTypeAndSize(fh.GetTypeDescriptor(), NULL);
-        rec->AddId(LookupStringId(fh.GetName()));
+        rec->AddStringId(LookupStringId(fh.GetName()));
         rec->AddU1(t);
       }
     } else if (c->IsArrayClass()) {
-      const mirror::Array* aobj = obj->AsArray();
+      mirror::Array* aobj = obj->AsArray();
       uint32_t length = aobj->GetLength();
 
       if (obj->IsObjectArray()) {
         // obj is an object array.
         rec->AddU1(HPROF_OBJECT_ARRAY_DUMP);
 
-        rec->AddId((HprofObjectId)obj);
+        rec->AddObjectId(obj);
         rec->AddU4(StackTraceSerialNumber(obj));
         rec->AddU4(length);
-        rec->AddId(LookupClassId(c));
+        rec->AddClassId(LookupClassId(c));
 
         // Dump the elements, which are always objects or NULL.
-        rec->AddIdList((const HprofObjectId*)aobj->GetRawData(sizeof(mirror::Object*)), length);
+        rec->AddIdList(aobj->AsObjectArray<mirror::Object>());
       } else {
         size_t size;
         HprofBasicType t = PrimitiveToBasicTypeAndSize(c->GetComponentType()->GetPrimitiveType(), &size);
@@ -974,28 +987,28 @@
         // obj is a primitive array.
         rec->AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
 
-        rec->AddId((HprofObjectId)obj);
+        rec->AddObjectId(obj);
         rec->AddU4(StackTraceSerialNumber(obj));
         rec->AddU4(length);
         rec->AddU1(t);
 
         // Dump the raw, packed element values.
         if (size == 1) {
-          rec->AddU1List((const uint8_t*)aobj->GetRawData(sizeof(uint8_t)), length);
+          rec->AddU1List((const uint8_t*)aobj->GetRawData(sizeof(uint8_t), 0), length);
         } else if (size == 2) {
-          rec->AddU2List((const uint16_t*)aobj->GetRawData(sizeof(uint16_t)), length);
+          rec->AddU2List((const uint16_t*)aobj->GetRawData(sizeof(uint16_t), 0), length);
         } else if (size == 4) {
-          rec->AddU4List((const uint32_t*)aobj->GetRawData(sizeof(uint32_t)), length);
+          rec->AddU4List((const uint32_t*)aobj->GetRawData(sizeof(uint32_t), 0), length);
         } else if (size == 8) {
-          rec->AddU8List((const uint64_t*)aobj->GetRawData(sizeof(uint64_t)), length);
+          rec->AddU8List((const uint64_t*)aobj->GetRawData(sizeof(uint64_t), 0), length);
         }
       }
     } else {
       // obj is an instance object.
       rec->AddU1(HPROF_INSTANCE_DUMP);
-      rec->AddId((HprofObjectId)obj);
+      rec->AddObjectId(obj);
       rec->AddU4(StackTraceSerialNumber(obj));
-      rec->AddId(LookupClassId(c));
+      rec->AddClassId(LookupClassId(c));
 
       // Reserve some space for the length of the instance data, which we won't
       // know until we're done writing it.
@@ -1004,7 +1017,7 @@
 
       // Write the instance data;  fields for this class, followed by super class fields,
       // and so on. Don't write the klass or monitor fields of Object.class.
-      const mirror::Class* sclass = c;
+      mirror::Class* sclass = c;
       FieldHelper fh;
       while (!sclass->IsObjectClass()) {
         int ifieldCount = sclass->NumInstanceFields();
@@ -1019,10 +1032,9 @@
             rec->AddU2(f->Get32(obj));
           } else if (size == 4) {
             rec->AddU4(f->Get32(obj));
-          } else if (size == 8) {
-            rec->AddU8(f->Get64(obj));
           } else {
-            CHECK(false);
+            CHECK_EQ(size, 8U);
+            rec->AddU8(f->Get64(obj));
           }
         }
 
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 51b238c..21e942e 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -326,7 +326,7 @@
    * Extract the table index from an indirect reference.
    */
   static uint32_t ExtractIndex(IndirectRef iref) {
-    uint32_t uref = (uint32_t) iref;
+    uintptr_t uref = reinterpret_cast<uintptr_t>(iref);
     return (uref >> 2) & 0xffff;
   }
 
@@ -337,8 +337,8 @@
   IndirectRef ToIndirectRef(const mirror::Object* /*o*/, uint32_t tableIndex) const {
     DCHECK_LT(tableIndex, 65536U);
     uint32_t serialChunk = slot_data_[tableIndex].serial;
-    uint32_t uref = serialChunk << 20 | (tableIndex << 2) | kind_;
-    return (IndirectRef) uref;
+    uintptr_t uref = serialChunk << 20 | (tableIndex << 2) | kind_;
+    return reinterpret_cast<IndirectRef>(uref);
   }
 
   /*
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index aee7447..ac83601 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -29,6 +29,7 @@
   kArm,
   kThumb2,
   kX86,
+  kX86_64,
   kMips
 };
 
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 0b11543..59ffdc1 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -18,7 +18,7 @@
 
 #include <sys/uio.h>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "debugger.h"
@@ -68,10 +68,21 @@
   return true;
 }
 
-static void UpdateEntrypoints(mirror::ArtMethod* method, const void* code) {
-  method->SetEntryPointFromCompiledCode(code);
+static void UpdateEntrypoints(mirror::ArtMethod* method, const void* quick_code,
+                              const void* portable_code, bool have_portable_code)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  method->SetEntryPointFromPortableCompiledCode(portable_code);
+  method->SetEntryPointFromQuickCompiledCode(quick_code);
+  bool portable_enabled = method->IsPortableCompiled();
+  if (have_portable_code && !portable_enabled) {
+    method->SetIsPortableCompiled();
+  } else if (portable_enabled) {
+    method->ClearIsPortableCompiled();
+  }
   if (!method->IsResolutionMethod()) {
-    if (code == GetCompiledCodeToInterpreterBridge()) {
+    if (quick_code == GetQuickToInterpreterBridge()) {
+      DCHECK(portable_code == GetPortableToInterpreterBridge());
+      DCHECK(!method->IsNative()) << PrettyMethod(method);
       method->SetEntryPointFromInterpreter(art::interpreter::artInterpreterToInterpreterBridge);
     } else {
       method->SetEntryPointFromInterpreter(art::artInterpreterToCompiledCodeBridge);
@@ -84,37 +95,47 @@
     // Do not change stubs for these methods.
     return;
   }
-  const void* new_code;
+  const void* new_portable_code;
+  const void* new_quick_code;
   bool uninstall = !entry_exit_stubs_installed_ && !interpreter_stubs_installed_;
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   bool is_class_initialized = method->GetDeclaringClass()->IsInitialized();
+  bool have_portable_code = false;
   if (uninstall) {
     if ((forced_interpret_only_ || IsDeoptimized(method)) && !method->IsNative()) {
-      new_code = GetCompiledCodeToInterpreterBridge();
+      new_portable_code = GetPortableToInterpreterBridge();
+      new_quick_code = GetQuickToInterpreterBridge();
     } else if (is_class_initialized || !method->IsStatic() || method->IsConstructor()) {
-      new_code = class_linker->GetOatCodeFor(method);
+      new_portable_code = class_linker->GetPortableOatCodeFor(method, &have_portable_code);
+      new_quick_code = class_linker->GetQuickOatCodeFor(method);
     } else {
-      new_code = GetResolutionTrampoline(class_linker);
+      new_portable_code = GetPortableResolutionTrampoline(class_linker);
+      new_quick_code = GetQuickResolutionTrampoline(class_linker);
     }
   } else {  // !uninstall
     if ((interpreter_stubs_installed_ || IsDeoptimized(method)) && !method->IsNative()) {
-      new_code = GetCompiledCodeToInterpreterBridge();
+      new_portable_code = GetPortableToInterpreterBridge();
+      new_quick_code = GetQuickToInterpreterBridge();
     } else {
       // Do not overwrite resolution trampoline. When the trampoline initializes the method's
       // class, all its static methods code will be set to the instrumentation entry point.
       // For more details, see ClassLinker::FixupStaticTrampolines.
       if (is_class_initialized || !method->IsStatic() || method->IsConstructor()) {
         // Do not overwrite interpreter to prevent from posting method entry/exit events twice.
-        new_code = class_linker->GetOatCodeFor(method);
-        if (entry_exit_stubs_installed_ && new_code != GetCompiledCodeToInterpreterBridge()) {
-          new_code = GetQuickInstrumentationEntryPoint();
+        new_portable_code = class_linker->GetPortableOatCodeFor(method, &have_portable_code);
+        new_quick_code = class_linker->GetQuickOatCodeFor(method);
+        if (entry_exit_stubs_installed_ && new_quick_code != GetQuickToInterpreterBridge()) {
+          DCHECK(new_portable_code != GetPortableToInterpreterBridge());
+          new_portable_code = GetPortableToInterpreterBridge();
+          new_quick_code = GetQuickInstrumentationEntryPoint();
         }
       } else {
-        new_code = GetResolutionTrampoline(class_linker);
+        new_portable_code = GetPortableResolutionTrampoline(class_linker);
+        new_quick_code = GetQuickResolutionTrampoline(class_linker);
       }
     }
   }
-  UpdateEntrypoints(method, new_code);
+  UpdateEntrypoints(method, new_quick_code, new_portable_code, have_portable_code);
 }
 
 // Places the instrumentation exit pc as the return PC for every quick frame. This also allows
@@ -470,23 +491,38 @@
   }
 }
 
-void Instrumentation::UpdateMethodsCode(mirror::ArtMethod* method, const void* code) const {
-  const void* new_code;
+void Instrumentation::UpdateMethodsCode(mirror::ArtMethod* method, const void* quick_code,
+                                        const void* portable_code, bool have_portable_code) const {
+  const void* new_portable_code;
+  const void* new_quick_code;
+  bool new_have_portable_code;
   if (LIKELY(!instrumentation_stubs_installed_)) {
-    new_code = code;
+    new_portable_code = portable_code;
+    new_quick_code = quick_code;
+    new_have_portable_code = have_portable_code;
   } else {
     if ((interpreter_stubs_installed_ || IsDeoptimized(method)) && !method->IsNative()) {
-      new_code = GetCompiledCodeToInterpreterBridge();
-    } else if (code == GetResolutionTrampoline(Runtime::Current()->GetClassLinker()) ||
-               code == GetCompiledCodeToInterpreterBridge()) {
-      new_code = code;
+      new_portable_code = GetPortableToInterpreterBridge();
+      new_quick_code = GetQuickToInterpreterBridge();
+      new_have_portable_code = false;
+    } else if (quick_code == GetQuickResolutionTrampoline(Runtime::Current()->GetClassLinker()) ||
+               quick_code == GetQuickToInterpreterBridge()) {
+      DCHECK((portable_code == GetPortableResolutionTrampoline(Runtime::Current()->GetClassLinker())) ||
+             (portable_code == GetPortableToInterpreterBridge()));
+      new_portable_code = portable_code;
+      new_quick_code = quick_code;
+      new_have_portable_code = have_portable_code;
     } else if (entry_exit_stubs_installed_) {
-      new_code = GetQuickInstrumentationEntryPoint();
+      new_quick_code = GetQuickInstrumentationEntryPoint();
+      new_portable_code = GetPortableToInterpreterBridge();
+      new_have_portable_code = false;
     } else {
-      new_code = code;
+      new_portable_code = portable_code;
+      new_quick_code = quick_code;
+      new_have_portable_code = have_portable_code;
     }
   }
-  UpdateEntrypoints(method, new_code);
+  UpdateEntrypoints(method, new_quick_code, new_portable_code, new_have_portable_code);
 }
 
 void Instrumentation::Deoptimize(mirror::ArtMethod* method) {
@@ -499,7 +535,8 @@
   CHECK(!already_deoptimized) << "Method " << PrettyMethod(method) << " is already deoptimized";
 
   if (!interpreter_stubs_installed_) {
-    UpdateEntrypoints(method, GetCompiledCodeToInterpreterBridge());
+    UpdateEntrypoints(method, GetQuickToInterpreterBridge(), GetPortableToInterpreterBridge(),
+                      false);
 
     // Install instrumentation exit stub and instrumentation frames. We may already have installed
     // these previously so it will only cover the newly created frames.
@@ -522,10 +559,15 @@
   if (!interpreter_stubs_installed_) {
     // Restore its code or resolution trampoline.
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-    if (method->IsStatic() && !method->IsConstructor() && !method->GetDeclaringClass()->IsInitialized()) {
-      UpdateEntrypoints(method, GetResolutionTrampoline(class_linker));
+    if (method->IsStatic() && !method->IsConstructor() &&
+        !method->GetDeclaringClass()->IsInitialized()) {
+      UpdateEntrypoints(method, GetQuickResolutionTrampoline(class_linker),
+                        GetPortableResolutionTrampoline(class_linker), false);
     } else {
-      UpdateEntrypoints(method, class_linker->GetOatCodeFor(method));
+      bool have_portable_code = false;
+      const void* quick_code = class_linker->GetQuickOatCodeFor(method);
+      const void* portable_code = class_linker->GetPortableOatCodeFor(method, &have_portable_code);
+      UpdateEntrypoints(method, quick_code, portable_code, have_portable_code);
     }
 
     // If there is no deoptimized method left, we can restore the stack of each thread.
@@ -582,21 +624,21 @@
   ConfigureStubs(false, false);
 }
 
-const void* Instrumentation::GetQuickCodeFor(const mirror::ArtMethod* method) const {
+const void* Instrumentation::GetQuickCodeFor(mirror::ArtMethod* method) const {
   Runtime* runtime = Runtime::Current();
   if (LIKELY(!instrumentation_stubs_installed_)) {
-    const void* code = method->GetEntryPointFromCompiledCode();
+    const void* code = method->GetEntryPointFromQuickCompiledCode();
     DCHECK(code != NULL);
     if (LIKELY(code != GetQuickResolutionTrampoline(runtime->GetClassLinker()) &&
                code != GetQuickToInterpreterBridge())) {
       return code;
     }
   }
-  return runtime->GetClassLinker()->GetOatCodeFor(method);
+  return runtime->GetClassLinker()->GetQuickOatCodeFor(method);
 }
 
 void Instrumentation::MethodEnterEventImpl(Thread* thread, mirror::Object* this_object,
-                                           const mirror::ArtMethod* method,
+                                           mirror::ArtMethod* method,
                                            uint32_t dex_pc) const {
   auto it = method_entry_listeners_.begin();
   bool is_end = (it == method_entry_listeners_.end());
@@ -610,7 +652,7 @@
 }
 
 void Instrumentation::MethodExitEventImpl(Thread* thread, mirror::Object* this_object,
-                                          const mirror::ArtMethod* method,
+                                          mirror::ArtMethod* method,
                                           uint32_t dex_pc, const JValue& return_value) const {
   auto it = method_exit_listeners_.begin();
   bool is_end = (it == method_exit_listeners_.end());
@@ -624,7 +666,7 @@
 }
 
 void Instrumentation::MethodUnwindEvent(Thread* thread, mirror::Object* this_object,
-                                        const mirror::ArtMethod* method,
+                                        mirror::ArtMethod* method,
                                         uint32_t dex_pc) const {
   if (have_method_unwind_listeners_) {
     for (InstrumentationListener* listener : method_unwind_listeners_) {
@@ -634,7 +676,7 @@
 }
 
 void Instrumentation::DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
-                                          const mirror::ArtMethod* method,
+                                          mirror::ArtMethod* method,
                                           uint32_t dex_pc) const {
   // TODO: STL copy-on-write collection? The copy below is due to the debug listener having an
   // action where it can remove itself as a listener and break the iterator. The copy only works
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 41b545d..f01add1 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_INSTRUMENTATION_H_
 #define ART_RUNTIME_INSTRUMENTATION_H_
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/macros.h"
 #include "locks.h"
 
@@ -55,26 +55,26 @@
 
   // Call-back for when a method is entered.
   virtual void MethodEntered(Thread* thread, mirror::Object* this_object,
-                             const mirror::ArtMethod* method,
+                             mirror::ArtMethod* method,
                              uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when a method is exited.
   // TODO: its likely passing the return value would be useful, however, we may need to get and
   //       parse the shorty to determine what kind of register holds the result.
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc,
+                            mirror::ArtMethod* method, uint32_t dex_pc,
                             const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when a method is popped due to an exception throw. A method will either cause a
   // MethodExited call-back or a MethodUnwind call-back when its activation is removed.
   virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc)
+                            mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back for when the dex pc moves in a method.
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                          const mirror::ArtMethod* method, uint32_t new_dex_pc)
+                          mirror::ArtMethod* method, uint32_t new_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
   // Call-back when an exception is caught.
@@ -171,13 +171,14 @@
   void ResetQuickAllocEntryPoints();
 
   // Update the code of a method respecting any installed stubs.
-  void UpdateMethodsCode(mirror::ArtMethod* method, const void* code) const
+  void UpdateMethodsCode(mirror::ArtMethod* method, const void* quick_code,
+                         const void* portable_code, bool have_portable_code) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get the quick code for the given method. More efficient than asking the class linker as it
   // will short-cut to GetCode if instrumentation and static method resolution stubs aren't
   // installed.
-  const void* GetQuickCodeFor(const mirror::ArtMethod* method) const
+  const void* GetQuickCodeFor(mirror::ArtMethod* method) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ForceInterpretOnly() {
@@ -218,7 +219,7 @@
   // Inform listeners that a method has been entered. A dex PC is provided as we may install
   // listeners into executing code and get method enter events for methods already on the stack.
   void MethodEnterEvent(Thread* thread, mirror::Object* this_object,
-                        const mirror::ArtMethod* method, uint32_t dex_pc) const
+                        mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(HasMethodEntryListeners())) {
       MethodEnterEventImpl(thread, this_object, method, dex_pc);
@@ -227,7 +228,7 @@
 
   // Inform listeners that a method has been exited.
   void MethodExitEvent(Thread* thread, mirror::Object* this_object,
-                       const mirror::ArtMethod* method, uint32_t dex_pc,
+                       mirror::ArtMethod* method, uint32_t dex_pc,
                        const JValue& return_value) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(HasMethodExitListeners())) {
@@ -237,12 +238,12 @@
 
   // Inform listeners that a method has been exited due to an exception.
   void MethodUnwindEvent(Thread* thread, mirror::Object* this_object,
-                         const mirror::ArtMethod* method, uint32_t dex_pc) const
+                         mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Inform listeners that the dex pc has moved (only supported by the interpreter).
   void DexPcMovedEvent(Thread* thread, mirror::Object* this_object,
-                       const mirror::ArtMethod* method, uint32_t dex_pc) const
+                       mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(HasDexPcListeners())) {
       DexPcMovedEventImpl(thread, this_object, method, dex_pc);
@@ -289,14 +290,14 @@
   }
 
   void MethodEnterEventImpl(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc) const
+                            mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void MethodExitEventImpl(Thread* thread, mirror::Object* this_object,
-                           const mirror::ArtMethod* method,
+                           mirror::ArtMethod* method,
                            uint32_t dex_pc, const JValue& return_value) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
-                           const mirror::ArtMethod* method, uint32_t dex_pc) const
+                           mirror::ArtMethod* method, uint32_t dex_pc) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Have we hijacked ArtMethod::code_ so that it calls instrumentation/interpreter code?
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index f574a0f..922e642 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -57,7 +57,10 @@
   } else if (name == "int java.lang.String.fastIndexOf(int, int)") {
     result->SetI(receiver->AsString()->FastIndexOf(args[0], args[1]));
   } else if (name == "java.lang.Object java.lang.reflect.Array.createMultiArray(java.lang.Class, int[])") {
-    result->SetL(Array::CreateMultiArray(self, reinterpret_cast<Object*>(args[0])->AsClass(), reinterpret_cast<Object*>(args[1])->AsIntArray()));
+    SirtRef<mirror::Class> sirt_class(self, reinterpret_cast<Object*>(args[0])->AsClass());
+    SirtRef<mirror::IntArray> sirt_dimensions(self,
+                                              reinterpret_cast<Object*>(args[1])->AsIntArray());
+    result->SetL(Array::CreateMultiArray(self, sirt_class, sirt_dimensions));
   } else if (name == "java.lang.Object java.lang.Throwable.nativeFillInStackTrace()") {
     ScopedObjectAccessUnchecked soa(self);
     result->SetL(soa.Decode<Object*>(self->CreateInternalStackTrace(soa)));
@@ -356,7 +359,7 @@
     DCHECK_LT(shorty_pos + 1, mh.GetShortyLength());
     switch (shorty[shorty_pos + 1]) {
       case 'L': {
-        Object* o = reinterpret_cast<Object*>(args[arg_pos]);
+        Object* o = reinterpret_cast<StackReference<Object>*>(&args[arg_pos])->AsMirrorPtr();
         shadow_frame->SetVRegReference(cur_reg, o);
         break;
       }
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index be358e3..0b959fb 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -25,15 +25,16 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Assign register 'src_reg' from shadow_frame to register 'dest_reg' into new_shadow_frame.
-static inline void AssignRegister(ShadowFrame& new_shadow_frame, const ShadowFrame& shadow_frame,
-                                  size_t dest_reg, size_t src_reg) {
+static inline void AssignRegister(ShadowFrame* new_shadow_frame, const ShadowFrame& shadow_frame,
+                                  size_t dest_reg, size_t src_reg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // If both register locations contains the same value, the register probably holds a reference.
   int32_t src_value = shadow_frame.GetVReg(src_reg);
   mirror::Object* o = shadow_frame.GetVRegReference<false>(src_reg);
-  if (src_value == reinterpret_cast<int32_t>(o)) {
-    new_shadow_frame.SetVRegReference(dest_reg, o);
+  if (src_value == reinterpret_cast<intptr_t>(o)) {
+    new_shadow_frame->SetVRegReference(dest_reg, o);
   } else {
-    new_shadow_frame.SetVReg(dest_reg, src_value);
+    new_shadow_frame->SetVReg(dest_reg, src_value);
   }
 }
 
@@ -84,7 +85,7 @@
       ++dest_reg;
       ++arg_offset;
     }
-    for (size_t shorty_pos = 0; dest_reg < num_regs; ++shorty_pos, ++dest_reg, ++arg_offset) {
+    for (uint32_t shorty_pos = 0; dest_reg < num_regs; ++shorty_pos, ++dest_reg, ++arg_offset) {
       DCHECK_LT(shorty_pos + 1, mh.GetShortyLength());
       const size_t src_reg = (is_range) ? vregC + arg_offset : arg[arg_offset];
       switch (shorty[shorty_pos + 1]) {
@@ -131,18 +132,18 @@
       const uint16_t first_src_reg = inst->VRegC_3rc();
       for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
           ++dest_reg, ++src_reg) {
-        AssignRegister(*new_shadow_frame, shadow_frame, dest_reg, src_reg);
+        AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg);
       }
     } else {
       DCHECK_LE(num_ins, 5U);
       uint16_t regList = inst->Fetch16(2);
       uint16_t count = num_ins;
       if (count == 5) {
-        AssignRegister(*new_shadow_frame, shadow_frame, first_dest_reg + 4U, (inst_data >> 8) & 0x0f);
+        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + 4U, (inst_data >> 8) & 0x0f);
         --count;
        }
       for (size_t arg_index = 0; arg_index < count; ++arg_index, regList >>= 4) {
-        AssignRegister(*new_shadow_frame, shadow_frame, first_dest_reg + arg_index, regList & 0x0f);
+        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, regList & 0x0f);
       }
     }
   }
@@ -200,26 +201,20 @@
     DCHECK(self->IsExceptionPending());
     return false;
   }
+  uint32_t arg[5];  // only used in filled-new-array.
+  uint32_t vregC;   // only used in filled-new-array-range.
   if (is_range) {
-    uint32_t vregC = inst->VRegC_3rc();
-    const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
-    for (int32_t i = 0; i < length; ++i) {
-      if (is_primitive_int_component) {
-        newArray->AsIntArray()->Set(i, shadow_frame.GetVReg(vregC + i));
-      } else {
-        newArray->AsObjectArray<Object>()->Set(i, shadow_frame.GetVRegReference(vregC + i));
-      }
-    }
+    vregC = inst->VRegC_3rc();
   } else {
-    uint32_t arg[5];
     inst->GetArgs(arg);
-    const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
-    for (int32_t i = 0; i < length; ++i) {
-      if (is_primitive_int_component) {
-        newArray->AsIntArray()->Set(i, shadow_frame.GetVReg(arg[i]));
-      } else {
-        newArray->AsObjectArray<Object>()->Set(i, shadow_frame.GetVRegReference(arg[i]));
-      }
+  }
+  const bool is_primitive_int_component = componentClass->IsPrimitiveInt();
+  for (int32_t i = 0; i < length; ++i) {
+    size_t src_reg = is_range ? vregC + i : arg[i];
+    if (is_primitive_int_component) {
+      newArray->AsIntArray()->SetWithoutChecks(i, shadow_frame.GetVReg(src_reg));
+    } else {
+      newArray->AsObjectArray<Object>()->SetWithoutChecks(i, shadow_frame.GetVRegReference(src_reg));
     }
   }
 
@@ -295,7 +290,7 @@
     CHECK(field.get() != NULL);
     ArtMethod* c = jlr_Field->FindDeclaredDirectMethod("<init>", "(Ljava/lang/reflect/ArtField;)V");
     uint32_t args[1];
-    args[0] = reinterpret_cast<uint32_t>(found);
+    args[0] = StackReference<mirror::Object>::FromMirrorPtr(found).AsVRegValue();
     EnterInterpreterFromInvoke(self, c, field.get(), args, NULL);
     result->SetL(field.get());
   } else if (name == "void java.lang.System.arraycopy(java.lang.Object, int, java.lang.Object, int, int)" ||
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 4481210..768ca33 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -218,7 +218,7 @@
       shadow_frame.SetVRegLong(vregA, static_cast<int64_t>(obj->GetField64(field_offset, is_volatile)));
       break;
     case Primitive::kPrimNot:
-      shadow_frame.SetVRegReference(vregA, obj->GetFieldObject<mirror::Object*>(field_offset, is_volatile));
+      shadow_frame.SetVRegReference(vregA, obj->GetFieldObject<mirror::Object>(field_offset, is_volatile));
       break;
     default:
       LOG(FATAL) << "Unreachable: " << field_type;
@@ -529,10 +529,10 @@
     oss << PrettyMethod(shadow_frame.GetMethod())
         << StringPrintf("\n0x%x: ", dex_pc)
         << inst->DumpString(&mh.GetDexFile()) << "\n";
-    for (size_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
+    for (uint32_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
       uint32_t raw_value = shadow_frame.GetVReg(i);
       Object* ref_value = shadow_frame.GetVRegReference(i);
-      oss << StringPrintf(" vreg%d=0x%08X", i, raw_value);
+      oss << StringPrintf(" vreg%u=0x%08X", i, raw_value);
       if (ref_value != NULL) {
         if (ref_value->GetClass()->IsStringClass() &&
             ref_value->AsString()->GetCharArray() != NULL) {
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 942c275..e8504b7 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -568,7 +568,7 @@
         HANDLE_PENDING_EXCEPTION();
       } else {
         uint32_t size_in_bytes = payload->element_count * payload->element_width;
-        memcpy(array->GetRawData(payload->element_width), payload->data, size_in_bytes);
+        memcpy(array->GetRawData(payload->element_width, 0), payload->data, size_in_bytes);
         ADVANCE(3);
       }
     }
@@ -932,8 +932,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       BooleanArray* array = a->AsBooleanArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -950,8 +950,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       ByteArray* array = a->AsByteArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -968,8 +968,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       CharArray* array = a->AsCharArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -986,8 +986,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       ShortArray* array = a->AsShortArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1004,8 +1004,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       IntArray* array = a->AsIntArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1022,8 +1022,8 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       LongArray* array = a->AsLongArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data), array->GetData()[index]);
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1040,7 +1040,7 @@
     } else {
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       ObjectArray<Object>* array = a->AsObjectArray<Object>();
-      if (LIKELY(array->IsValidIndex(index))) {
+      if (LIKELY(array->CheckIsValidIndex(index))) {
         shadow_frame.SetVRegReference(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
         ADVANCE(2);
       } else {
@@ -1059,8 +1059,8 @@
       uint8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       BooleanArray* array = a->AsBooleanArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1078,8 +1078,8 @@
       int8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       ByteArray* array = a->AsByteArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1097,8 +1097,8 @@
       uint16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       CharArray* array = a->AsCharArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1116,8 +1116,8 @@
       int16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       ShortArray* array = a->AsShortArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1135,8 +1135,8 @@
       int32_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       IntArray* array = a->AsIntArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1154,8 +1154,8 @@
       int64_t val = shadow_frame.GetVRegLong(inst->VRegA_23x(inst_data));
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       LongArray* array = a->AsLongArray();
-      if (LIKELY(array->IsValidIndex(index))) {
-        array->GetData()[index] = val;
+      if (LIKELY(array->CheckIsValidIndex(index))) {
+        array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
         HANDLE_PENDING_EXCEPTION();
@@ -1173,7 +1173,7 @@
       int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
       Object* val = shadow_frame.GetVRegReference(inst->VRegA_23x(inst_data));
       ObjectArray<Object>* array = a->AsObjectArray<Object>();
-      if (LIKELY(array->IsValidIndex(index) && array->CheckAssignable(val))) {
+      if (LIKELY(array->CheckIsValidIndex(index) && array->CheckAssignable(val))) {
         array->SetWithoutChecks(index, val);
         ADVANCE(2);
       } else {
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 75041ea..e5d15b1 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -483,7 +483,7 @@
           break;
         }
         uint32_t size_in_bytes = payload->element_count * payload->element_width;
-        memcpy(array->GetRawData(payload->element_width), payload->data, size_in_bytes);
+        memcpy(array->GetRawData(payload->element_width, 0), payload->data, size_in_bytes);
         inst = inst->Next_3xx();
         break;
       }
@@ -830,8 +830,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         BooleanArray* array = a->AsBooleanArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -848,8 +848,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         ByteArray* array = a->AsByteArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -866,8 +866,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         CharArray* array = a->AsCharArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -884,8 +884,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         ShortArray* array = a->AsShortArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -902,8 +902,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         IntArray* array = a->AsIntArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -920,8 +920,8 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         LongArray* array = a->AsLongArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data), array->GetData()[index]);
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -938,7 +938,7 @@
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         ObjectArray<Object>* array = a->AsObjectArray<Object>();
-        if (LIKELY(array->IsValidIndex(index))) {
+        if (LIKELY(array->CheckIsValidIndex(index))) {
           shadow_frame.SetVRegReference(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
         } else {
@@ -957,8 +957,8 @@
         uint8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         BooleanArray* array = a->AsBooleanArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -976,8 +976,8 @@
         int8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         ByteArray* array = a->AsByteArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -995,8 +995,8 @@
         uint16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         CharArray* array = a->AsCharArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -1014,8 +1014,8 @@
         int16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         ShortArray* array = a->AsShortArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -1033,8 +1033,8 @@
         int32_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         IntArray* array = a->AsIntArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -1052,8 +1052,8 @@
         int64_t val = shadow_frame.GetVRegLong(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         LongArray* array = a->AsLongArray();
-        if (LIKELY(array->IsValidIndex(index))) {
-          array->GetData()[index] = val;
+        if (LIKELY(array->CheckIsValidIndex(index))) {
+          array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
           HANDLE_PENDING_EXCEPTION();
@@ -1071,7 +1071,7 @@
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
         Object* val = shadow_frame.GetVRegReference(inst->VRegA_23x(inst_data));
         ObjectArray<Object>* array = a->AsObjectArray<Object>();
-        if (LIKELY(array->IsValidIndex(index) && array->CheckAssignable(val))) {
+        if (LIKELY(array->CheckIsValidIndex(index) && array->CheckAssignable(val))) {
           array->SetWithoutChecks(index, val);
           inst = inst->Next_2xx();
         } else {
diff --git a/runtime/invoke_arg_array_builder.h b/runtime/invoke_arg_array_builder.h
index f615e8e..6ecce40 100644
--- a/runtime/invoke_arg_array_builder.h
+++ b/runtime/invoke_arg_array_builder.h
@@ -78,6 +78,10 @@
     num_bytes_ += 4;
   }
 
+  void Append(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    Append(StackReference<mirror::Object>::FromMirrorPtr(obj).AsVRegValue());
+  }
+
   void AppendWide(uint64_t value) {
     // For ARM and MIPS portable, align wide values to 8 bytes (ArgArray starts at offset of 4).
 #if defined(ART_USE_PORTABLE_COMPILER) && (defined(__arm__) || defined(__mips__))
@@ -93,8 +97,8 @@
   void BuildArgArray(const ScopedObjectAccess& soa, mirror::Object* receiver, va_list ap)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Set receiver if non-null (method is not static)
-    if (receiver != NULL) {
-      Append(reinterpret_cast<int32_t>(receiver));
+    if (receiver != nullptr) {
+      Append(receiver);
     }
     for (size_t i = 1; i < shorty_len_; ++i) {
       switch (shorty_[i]) {
@@ -112,7 +116,7 @@
           break;
         }
         case 'L':
-          Append(reinterpret_cast<int32_t>(soa.Decode<mirror::Object*>(va_arg(ap, jobject))));
+          Append(soa.Decode<mirror::Object*>(va_arg(ap, jobject)));
           break;
         case 'D': {
           JValue value;
@@ -131,8 +135,8 @@
   void BuildArgArray(const ScopedObjectAccess& soa, mirror::Object* receiver, jvalue* args)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Set receiver if non-null (method is not static)
-    if (receiver != NULL) {
-      Append(reinterpret_cast<int32_t>(receiver));
+    if (receiver != nullptr) {
+      Append(receiver);
     }
     for (size_t i = 1, args_offset = 0; i < shorty_len_; ++i, ++args_offset) {
       switch (shorty_[i]) {
@@ -153,7 +157,7 @@
           Append(args[args_offset].i);
           break;
         case 'L':
-          Append(reinterpret_cast<int32_t>(soa.Decode<mirror::Object*>(args[args_offset].l)));
+          Append(soa.Decode<mirror::Object*>(args[args_offset].l));
           break;
         case 'D':
         case 'J':
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index ebc844e..334dca4 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_JDWP_JDWP_H_
 #define ART_RUNTIME_JDWP_JDWP_H_
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/mutex.h"
 #include "jdwp/jdwp_bits.h"
 #include "jdwp/jdwp_constants.h"
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 4aa7f13..e372c26 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -638,13 +638,14 @@
    * go to sleep indefinitely.
    */
   while (event_thread_id_ != 0) {
-    VLOG(jdwp) << StringPrintf("event in progress (%#llx), %#llx sleeping", event_thread_id_, threadId);
+    VLOG(jdwp) << StringPrintf("event in progress (%#" PRIx64 "), %#" PRIx64 " sleeping",
+                               event_thread_id_, threadId);
     waited = true;
     event_thread_cond_.Wait(self);
   }
 
   if (waited || threadId != 0) {
-    VLOG(jdwp) << StringPrintf("event token grabbed (%#llx)", threadId);
+    VLOG(jdwp) << StringPrintf("event token grabbed (%#" PRIx64 ")", threadId);
   }
   if (threadId != 0) {
     event_thread_id_ = threadId;
@@ -664,7 +665,7 @@
   MutexLock mu(self, event_thread_lock_);
 
   CHECK_NE(event_thread_id_, 0U);
-  VLOG(jdwp) << StringPrintf("cleared event token (%#llx)", event_thread_id_);
+  VLOG(jdwp) << StringPrintf("cleared event token (%#" PRIx64 ")", event_thread_id_);
 
   event_thread_id_ = 0;
 
@@ -820,7 +821,8 @@
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
                  << basket.className << "." << Dbg::GetMethodName(pLoc->method_id)
-                 << StringPrintf(" thread=%#llx dex_pc=%#llx)", basket.threadId, pLoc->dex_pc);
+                 << StringPrintf(" thread=%#" PRIx64 "  dex_pc=%#" PRIx64 ")",
+                                 basket.threadId, pLoc->dex_pc);
 
       suspend_policy = scanSuspendPolicy(match_list, match_count);
       VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
@@ -885,7 +887,7 @@
 
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
-                 << StringPrintf("thread=%#llx", basket.threadId) << ")";
+                 << StringPrintf("thread=%#" PRIx64, basket.threadId) << ")";
 
       suspend_policy = scanSuspendPolicy(match_list, match_count);
       VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
@@ -968,8 +970,8 @@
     FindMatchingEvents(EK_EXCEPTION, &basket, match_list, &match_count);
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total)"
-                 << StringPrintf(" thread=%#llx", basket.threadId)
-                 << StringPrintf(" exceptId=%#llx", exceptionId)
+                 << StringPrintf(" thread=%#" PRIx64, basket.threadId)
+                 << StringPrintf(" exceptId=%#" PRIx64, exceptionId)
                  << " caught=" << basket.caught << ")"
                  << "  throw: " << *pThrowLoc;
       if (pCatchLoc->class_id == 0) {
@@ -1036,7 +1038,7 @@
     FindMatchingEvents(EK_CLASS_PREPARE, &basket, match_list, &match_count);
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
-                 << StringPrintf("thread=%#llx", basket.threadId) << ") " << signature;
+                 << StringPrintf("thread=%#" PRIx64, basket.threadId) << ") " << signature;
 
       suspend_policy = scanSuspendPolicy(match_list, match_count);
       VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 523d892..a514e69 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -21,6 +21,7 @@
 #include <string>
 
 #include "atomic.h"
+#include "base/hex_dump.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/stringprintf.h"
@@ -48,7 +49,7 @@
 std::string DescribeRefTypeId(const RefTypeId& ref_type_id) {
   std::string signature("unknown");
   Dbg::GetSignature(ref_type_id, &signature);
-  return StringPrintf("%#llx (%s)", ref_type_id, signature.c_str());
+  return StringPrintf("%#" PRIx64 " (%s)", ref_type_id, signature.c_str());
 }
 
 // Helper function: write a variable-width value into the output input buffer.
@@ -99,8 +100,9 @@
 
   int32_t arg_count = request.ReadSigned32("argument count");
 
-  VLOG(jdwp) << StringPrintf("    --> thread_id=%#llx object_id=%#llx", thread_id, object_id);
-  VLOG(jdwp) << StringPrintf("        class_id=%#llx method_id=%x %s.%s", class_id,
+  VLOG(jdwp) << StringPrintf("    --> thread_id=%#" PRIx64 " object_id=%#" PRIx64,
+                             thread_id, object_id);
+  VLOG(jdwp) << StringPrintf("        class_id=%#" PRIx64 " method_id=%x %s.%s", class_id,
                              method_id, Dbg::GetClassName(class_id).c_str(),
                              Dbg::GetMethodName(method_id).c_str());
   VLOG(jdwp) << StringPrintf("        %d args:", arg_count);
@@ -111,7 +113,8 @@
     argTypes[i] = request.ReadTag();
     size_t width = Dbg::GetTagWidth(argTypes[i]);
     argValues[i] = request.ReadValue(width);
-    VLOG(jdwp) << "          " << argTypes[i] << StringPrintf("(%zd): %#llx", width, argValues[i]);
+    VLOG(jdwp) << "          " << argTypes[i] << StringPrintf("(%zd): %#" PRIx64, width,
+                                                              argValues[i]);
   }
 
   uint32_t options = request.ReadUnsigned32("InvokeOptions bit flags");
@@ -143,7 +146,8 @@
     expandBufAdd1(pReply, JT_OBJECT);
     expandBufAddObjectId(pReply, exceptObjId);
 
-    VLOG(jdwp) << "  --> returned " << resultTag << StringPrintf(" %#llx (except=%#llx)", resultValue, exceptObjId);
+    VLOG(jdwp) << "  --> returned " << resultTag
+        << StringPrintf(" %#" PRIx64 " (except=%#" PRIx64 ")", resultValue, exceptObjId);
 
     /* show detailed debug output */
     if (resultTag == JT_STRING && exceptObjId == 0) {
@@ -526,7 +530,7 @@
   if (status != ERR_NONE) {
     return status;
   }
-  VLOG(jdwp) << StringPrintf("    --> ObjectId %#llx", class_object_id);
+  VLOG(jdwp) << StringPrintf("    --> ObjectId %#" PRIx64, class_object_id);
   expandBufAddObjectId(pReply, class_object_id);
   return ERR_NONE;
 }
@@ -936,7 +940,7 @@
   if (error != ERR_NONE) {
     return error;
   }
-  VLOG(jdwp) << StringPrintf("  Name of thread %#llx is \"%s\"", thread_id, name.c_str());
+  VLOG(jdwp) << StringPrintf("  Name of thread %#" PRIx64 " is \"%s\"", thread_id, name.c_str());
   expandBufAddUtf8String(pReply, name);
 
   return ERR_NONE;
@@ -1335,7 +1339,7 @@
         ObjectId thread_id = request.ReadThreadId();
         uint32_t size = request.ReadUnsigned32("step size");
         uint32_t depth = request.ReadUnsigned32("step depth");
-        VLOG(jdwp) << StringPrintf("    Step: thread=%#llx", thread_id)
+        VLOG(jdwp) << StringPrintf("    Step: thread=%#" PRIx64, thread_id)
                      << " size=" << JdwpStepSize(size) << " depth=" << JdwpStepDepth(depth);
 
         mod.step.threadId = thread_id;
@@ -1640,7 +1644,7 @@
   std::string result;
   result += "REQUEST: ";
   result += GetCommandName(request);
-  result += StringPrintf(" (length=%d id=0x%06x)", request.GetLength(), request.GetId());
+  result += StringPrintf(" (length=%zu id=0x%06x)", request.GetLength(), request.GetId());
   return result;
 }
 
@@ -1702,7 +1706,7 @@
   }
   if (i == arraysize(gHandlers)) {
     LOG(ERROR) << "Command not implemented: " << DescribeCommand(request);
-    LOG(ERROR) << HexDump(request.data(), request.size());
+    LOG(ERROR) << HexDump(request.data(), request.size(), false, "");
     result = ERR_NOT_IMPLEMENTED;
   }
 
@@ -1726,7 +1730,7 @@
   size_t respLen = expandBufGetLength(pReply) - kJDWPHeaderLen;
   VLOG(jdwp) << "REPLY: " << GetCommandName(request) << " " << result << " (length=" << respLen << ")";
   if (false) {
-    VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen);
+    VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen, false, "");
   }
 
   VLOG(jdwp) << "----------";
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 127ebfa..928f53d 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -156,11 +156,11 @@
   errno = 0;
   ssize_t actual = netState->WriteBufferedPacket(iov);
   if (static_cast<size_t>(actual) != expected) {
-    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet %c%c%c%c to debugger (%d of %d)",
-                                static_cast<uint8_t>(type >> 24),
-                                static_cast<uint8_t>(type >> 16),
-                                static_cast<uint8_t>(type >> 8),
-                                static_cast<uint8_t>(type),
+    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet %c%c%c%c to debugger (%zd of %zu)",
+                                static_cast<char>(type >> 24),
+                                static_cast<char>(type >> 16),
+                                static_cast<char>(type >> 8),
+                                static_cast<char>(type),
                                 actual, expected);
   }
 }
@@ -175,7 +175,7 @@
   errno = 0;
   ssize_t actual = netState->WritePacket(pReq);
   if (static_cast<size_t>(actual) != expandBufGetLength(pReq)) {
-    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet to debugger (%d of %d)",
+    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet to debugger (%zd of %zu)",
                                 actual, expandBufGetLength(pReq));
   }
 }
@@ -607,7 +607,7 @@
 std::ostream& operator<<(std::ostream& os, const JdwpLocation& rhs) {
   os << "JdwpLocation["
      << Dbg::GetClassName(rhs.class_id) << "." << Dbg::GetMethodName(rhs.method_id)
-     << "@" << StringPrintf("%#llx", rhs.dex_pc) << " " << rhs.type_tag << "]";
+     << "@" << StringPrintf("%#" PRIx64, rhs.dex_pc) << " " << rhs.type_tag << "]";
   return os;
 }
 
diff --git a/runtime/jdwp/jdwp_request.cc b/runtime/jdwp/jdwp_request.cc
index a9dd1e1..7b15d6d 100644
--- a/runtime/jdwp/jdwp_request.cc
+++ b/runtime/jdwp/jdwp_request.cc
@@ -16,6 +16,8 @@
 
 #include "jdwp/jdwp.h"
 
+#include <inttypes.h>
+
 #include "base/stringprintf.h"
 #include "jdwp/jdwp_priv.h"
 
@@ -98,7 +100,7 @@
 
 ObjectId Request::ReadObjectId(const char* specific_kind) {
   ObjectId id = Read8BE();
-  VLOG(jdwp) << StringPrintf("    %s id %#llx", specific_kind, id);
+  VLOG(jdwp) << StringPrintf("    %s id %#" PRIx64, specific_kind, id);
   return id;
 }
 
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index 369eddd..40ba3e3 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -31,7 +31,8 @@
 }
 
 ObjectRegistry::ObjectRegistry()
-    : lock_("ObjectRegistry lock", kJdwpObjectRegistryLock), next_id_(1) {
+    : lock_("ObjectRegistry lock", kJdwpObjectRegistryLock), allow_new_objects_(true),
+      condition_("object registry condition", lock_), next_id_(1) {
 }
 
 JDWP::RefTypeId ObjectRegistry::AddRefType(mirror::Class* c) {
@@ -49,58 +50,59 @@
 
   ScopedObjectAccessUnchecked soa(Thread::Current());
   MutexLock mu(soa.Self(), lock_);
-  ObjectRegistryEntry dummy;
-  dummy.jni_reference_type = JNIWeakGlobalRefType;
-  dummy.jni_reference = NULL;
-  dummy.reference_count = 0;
-  dummy.id = 0;
-  std::pair<object_iterator, bool> result = object_to_entry_.insert(std::make_pair(o, dummy));
-  ObjectRegistryEntry& entry = result.first->second;
-  if (!result.second) {
-    // This object was already in our map.
-    entry.reference_count += 1;
-    return entry.id;
+  while (UNLIKELY(!allow_new_objects_)) {
+    condition_.WaitHoldingLocks(soa.Self());
   }
+  ObjectRegistryEntry* entry;
+  auto it = object_to_entry_.find(o);
+  if (it != object_to_entry_.end()) {
+    // This object was already in our map.
+    entry = it->second;
+    ++entry->reference_count;
+  } else {
+    entry = new ObjectRegistryEntry;
+    entry->jni_reference_type = JNIWeakGlobalRefType;
+    entry->jni_reference = nullptr;
+    entry->reference_count = 0;
+    entry->id = 0;
+    object_to_entry_.insert(std::make_pair(o, entry));
 
-  // This object isn't in the registry yet, so add it.
-  JNIEnv* env = soa.Env();
+    // This object isn't in the registry yet, so add it.
+    JNIEnv* env = soa.Env();
 
-  jobject local_reference = soa.AddLocalReference<jobject>(o);
+    jobject local_reference = soa.AddLocalReference<jobject>(o);
 
-  entry.jni_reference_type = JNIWeakGlobalRefType;
-  entry.jni_reference = env->NewWeakGlobalRef(local_reference);
-  entry.reference_count = 1;
-  entry.id = next_id_++;
+    entry->jni_reference_type = JNIWeakGlobalRefType;
+    entry->jni_reference = env->NewWeakGlobalRef(local_reference);
+    entry->reference_count = 1;
+    entry->id = next_id_++;
 
-  id_to_entry_.Put(entry.id, &entry);
+    id_to_entry_.Put(entry->id, entry);
 
-  env->DeleteLocalRef(local_reference);
-
-  return entry.id;
+    env->DeleteLocalRef(local_reference);
+  }
+  return entry->id;
 }
 
 bool ObjectRegistry::Contains(mirror::Object* o) {
-  Thread* self = Thread::Current();
-  MutexLock mu(self, lock_);
-  return (object_to_entry_.find(o) != object_to_entry_.end());
+  MutexLock mu(Thread::Current(), lock_);
+  return object_to_entry_.find(o) != object_to_entry_.end();
 }
 
 void ObjectRegistry::Clear() {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
   VLOG(jdwp) << "Object registry contained " << object_to_entry_.size() << " entries";
-
   // Delete all the JNI references.
   JNIEnv* env = self->GetJniEnv();
-  for (object_iterator it = object_to_entry_.begin(); it != object_to_entry_.end(); ++it) {
-    ObjectRegistryEntry& entry = (it->second);
+  for (const auto& pair : object_to_entry_) {
+    const ObjectRegistryEntry& entry = *pair.second;
     if (entry.jni_reference_type == JNIWeakGlobalRefType) {
       env->DeleteWeakGlobalRef(entry.jni_reference);
     } else {
       env->DeleteGlobalRef(entry.jni_reference);
     }
   }
-
   // Clear the maps.
   object_to_entry_.clear();
   id_to_entry_.clear();
@@ -109,11 +111,11 @@
 mirror::Object* ObjectRegistry::InternalGet(JDWP::ObjectId id) {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   if (it == id_to_entry_.end()) {
     return kInvalidObject;
   }
-  ObjectRegistryEntry& entry = *(it->second);
+  ObjectRegistryEntry& entry = *it->second;
   return self->DecodeJObject(entry.jni_reference);
 }
 
@@ -123,26 +125,26 @@
   }
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   CHECK(it != id_to_entry_.end()) << id;
-  ObjectRegistryEntry& entry = *(it->second);
+  ObjectRegistryEntry& entry = *it->second;
   return entry.jni_reference;
 }
 
 void ObjectRegistry::DisableCollection(JDWP::ObjectId id) {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   CHECK(it != id_to_entry_.end());
-  Promote(*(it->second));
+  Promote(*it->second);
 }
 
 void ObjectRegistry::EnableCollection(JDWP::ObjectId id) {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   CHECK(it != id_to_entry_.end());
-  Demote(*(it->second));
+  Demote(*it->second);
 }
 
 void ObjectRegistry::Demote(ObjectRegistryEntry& entry) {
@@ -170,10 +172,9 @@
 bool ObjectRegistry::IsCollected(JDWP::ObjectId id) {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   CHECK(it != id_to_entry_.end());
-
-  ObjectRegistryEntry& entry = *(it->second);
+  ObjectRegistryEntry& entry = *it->second;
   if (entry.jni_reference_type == JNIWeakGlobalRefType) {
     JNIEnv* env = self->GetJniEnv();
     return env->IsSameObject(entry.jni_reference, NULL);  // Has the jweak been collected?
@@ -185,24 +186,55 @@
 void ObjectRegistry::DisposeObject(JDWP::ObjectId id, uint32_t reference_count) {
   Thread* self = Thread::Current();
   MutexLock mu(self, lock_);
-  id_iterator it = id_to_entry_.find(id);
+  auto it = id_to_entry_.find(id);
   if (it == id_to_entry_.end()) {
     return;
   }
-
-  ObjectRegistryEntry& entry = *(it->second);
-  entry.reference_count -= reference_count;
-  if (entry.reference_count <= 0) {
+  ObjectRegistryEntry* entry = it->second;
+  entry->reference_count -= reference_count;
+  if (entry->reference_count <= 0) {
     JNIEnv* env = self->GetJniEnv();
-    mirror::Object* object = self->DecodeJObject(entry.jni_reference);
-    if (entry.jni_reference_type == JNIWeakGlobalRefType) {
-      env->DeleteWeakGlobalRef(entry.jni_reference);
+    mirror::Object* object = self->DecodeJObject(entry->jni_reference);
+    if (entry->jni_reference_type == JNIWeakGlobalRefType) {
+      env->DeleteWeakGlobalRef(entry->jni_reference);
     } else {
-      env->DeleteGlobalRef(entry.jni_reference);
+      env->DeleteGlobalRef(entry->jni_reference);
     }
     object_to_entry_.erase(object);
     id_to_entry_.erase(id);
+    delete entry;
   }
 }
 
+void ObjectRegistry::UpdateObjectPointers(RootVisitor visitor, void* arg) {
+  MutexLock mu(Thread::Current(), lock_);
+  if (object_to_entry_.empty()) {
+    return;
+  }
+  std::map<mirror::Object*, ObjectRegistryEntry*> new_object_to_entry;
+  for (auto& pair : object_to_entry_) {
+    mirror::Object* new_obj;
+    if (pair.first != nullptr) {
+      new_obj = visitor(pair.first, arg);
+      if (new_obj != nullptr) {
+        new_object_to_entry.insert(std::make_pair(new_obj, pair.second));
+      }
+    }
+  }
+  object_to_entry_ = new_object_to_entry;
+}
+
+void ObjectRegistry::AllowNewObjects() {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, lock_);
+  allow_new_objects_ = true;
+  condition_.Broadcast(self);
+}
+
+void ObjectRegistry::DisallowNewObjects() {
+  Thread* self = Thread::Current();
+  MutexLock mu(self, lock_);
+  allow_new_objects_ = false;
+}
+
 }  // namespace art
diff --git a/runtime/jdwp/object_registry.h b/runtime/jdwp/object_registry.h
index 7f162ca..0190575 100644
--- a/runtime/jdwp/object_registry.h
+++ b/runtime/jdwp/object_registry.h
@@ -26,6 +26,7 @@
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "root_visitor.h"
 #include "safe_map.h"
 
 namespace art {
@@ -83,6 +84,15 @@
   // Avoid using this and use standard Get when possible.
   jobject GetJObject(JDWP::ObjectId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Visit, objects are treated as system weaks.
+  void UpdateObjectPointers(RootVisitor visitor, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // We have allow / disallow functionality since we use system weak sweeping logic to update moved
+  // objects inside of the object_to_entry_ map.
+  void AllowNewObjects() LOCKS_EXCLUDED(lock_);
+  void DisallowNewObjects() LOCKS_EXCLUDED(lock_);
+
  private:
   JDWP::ObjectId InternalAdd(mirror::Object* o) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::Object* InternalGet(JDWP::ObjectId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -90,11 +100,10 @@
   void Promote(ObjectRegistryEntry& entry) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, lock_);
 
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  bool allow_new_objects_ GUARDED_BY(lock_);
+  ConditionVariable condition_ GUARDED_BY(lock_);
 
-  typedef std::map<mirror::Object*, ObjectRegistryEntry>::iterator object_iterator;
-  std::map<mirror::Object*, ObjectRegistryEntry> object_to_entry_ GUARDED_BY(lock_);
-
-  typedef SafeMap<JDWP::ObjectId, ObjectRegistryEntry*>::iterator id_iterator;
+  std::map<mirror::Object*, ObjectRegistryEntry*> object_to_entry_ GUARDED_BY(lock_);
   SafeMap<JDWP::ObjectId, ObjectRegistryEntry*> id_to_entry_ GUARDED_BY(lock_);
 
   size_t next_id_ GUARDED_BY(lock_);
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 81cc94b..deea5f6 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -22,7 +22,7 @@
 #include <utility>
 #include <vector>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
@@ -523,6 +523,12 @@
     return dlsym(handle_, symbol_name.c_str());
   }
 
+  void VisitRoots(RootVisitor* visitor, void* arg) {
+    if (class_loader_ != nullptr) {
+      class_loader_ = visitor(class_loader_, arg);
+    }
+  }
+
  private:
   enum JNI_OnLoadState {
     kPending,
@@ -584,7 +590,7 @@
   }
 
   // See section 11.3 "Linking Native Methods" of the JNI spec.
-  void* FindNativeMethod(const ArtMethod* m, std::string& detail)
+  void* FindNativeMethod(ArtMethod* m, std::string& detail)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string jni_short_name(JniShortName(m));
     std::string jni_long_name(JniLongName(m));
@@ -613,6 +619,12 @@
     return NULL;
   }
 
+  void VisitRoots(RootVisitor* visitor, void* arg) {
+    for (auto& lib_pair : libraries_) {
+      lib_pair.second->VisitRoots(visitor, arg);
+    }
+  }
+
  private:
   SafeMap<std::string, SharedLibrary*> libraries_;
 };
@@ -2195,7 +2207,7 @@
     Array* array = soa.Decode<Array*>(java_array);
     gc::Heap* heap = Runtime::Current()->GetHeap();
     if (heap->IsMovableObject(array)) {
-      heap->IncrementDisableGC(soa.Self());
+      heap->IncrementDisableMovingGC(soa.Self());
       // Re-decode in case the object moved since IncrementDisableGC waits for GC to complete.
       array = soa.Decode<Array*>(java_array);
     }
@@ -2203,8 +2215,7 @@
     if (is_copy != nullptr) {
       *is_copy = JNI_FALSE;
     }
-    void* address = array->GetRawData(array->GetClass()->GetComponentSize());;
-    return address;
+    return array->GetRawData(array->GetClass()->GetComponentSize(), 0);
   }
 
   static void ReleasePrimitiveArrayCritical(JNIEnv* env, jarray array, void* elements, jint mode) {
@@ -2507,10 +2518,10 @@
 
   static jobject NewDirectByteBuffer(JNIEnv* env, void* address, jlong capacity) {
     if (capacity < 0) {
-      JniAbortF("NewDirectByteBuffer", "negative buffer capacity: %lld", capacity);
+      JniAbortF("NewDirectByteBuffer", "negative buffer capacity: %" PRId64, capacity);
     }
     if (address == NULL && capacity != 0) {
-      JniAbortF("NewDirectByteBuffer", "non-zero capacity for NULL pointer: %lld", capacity);
+      JniAbortF("NewDirectByteBuffer", "non-zero capacity for NULL pointer: %" PRId64, capacity);
     }
 
     // At the moment, the Java side is limited to 32 bits.
@@ -2633,15 +2644,12 @@
     ScopedObjectAccess soa(env);
     Array* array = soa.Decode<Array*>(java_array);
     size_t component_size = array->GetClass()->GetComponentSize();
-    void* array_data = array->GetRawData(component_size);
+    void* array_data = array->GetRawData(component_size, 0);
     gc::Heap* heap = Runtime::Current()->GetHeap();
     bool is_copy = array_data != reinterpret_cast<void*>(elements);
     size_t bytes = array->GetLength() * component_size;
     VLOG(heap) << "Release primitive array " << env << " array_data " << array_data
                << " elements " << reinterpret_cast<void*>(elements);
-    if (!is_copy && heap->IsMovableObject(array)) {
-      heap->DecrementDisableGC(soa.Self());
-    }
     // Don't need to copy if we had a direct pointer.
     if (mode != JNI_ABORT && is_copy) {
       memcpy(array_data, elements, bytes);
@@ -2649,10 +2657,12 @@
     if (mode != JNI_COMMIT) {
       if (is_copy) {
         delete[] reinterpret_cast<uint64_t*>(elements);
+      } else if (heap->IsMovableObject(array)) {
+        // Non copy to a movable object must means that we had disabled the moving GC.
+        heap->DecrementDisableMovingGC(soa.Self());
       }
+      UnpinPrimitiveArray(soa, array);
     }
-    // TODO: Do we always unpin primitive array?
-    UnpinPrimitiveArray(soa, array);
   }
 
   template <typename JavaArrayT, typename JavaT, typename ArrayT>
@@ -2934,10 +2944,6 @@
   if (vm->check_jni) {
     SetCheckJniEnabled(true);
   }
-  // The JniEnv local reference values must be at a consistent offset or else cross-compilation
-  // errors will ensue.
-  CHECK_EQ(JNIEnvExt::LocalRefCookieOffset().Int32Value(), 12);
-  CHECK_EQ(JNIEnvExt::SegmentStateOffset().Int32Value(), 16);
 }
 
 JNIEnvExt::~JNIEnvExt() {
@@ -3207,7 +3213,8 @@
   }
 }
 
-bool JavaVMExt::LoadNativeLibrary(const std::string& path, ClassLoader* class_loader,
+bool JavaVMExt::LoadNativeLibrary(const std::string& path,
+                                  const SirtRef<ClassLoader>& class_loader,
                                   std::string* detail) {
   detail->clear();
 
@@ -3223,18 +3230,18 @@
     library = libraries->Get(path);
   }
   if (library != NULL) {
-    if (library->GetClassLoader() != class_loader) {
+    if (library->GetClassLoader() != class_loader.get()) {
       // The library will be associated with class_loader. The JNI
       // spec says we can't load the same library into more than one
       // class loader.
       StringAppendF(detail, "Shared library \"%s\" already opened by "
           "ClassLoader %p; can't open in ClassLoader %p",
-          path.c_str(), library->GetClassLoader(), class_loader);
+          path.c_str(), library->GetClassLoader(), class_loader.get());
       LOG(WARNING) << detail;
       return false;
     }
     VLOG(jni) << "[Shared library \"" << path << "\" already loaded in "
-              << "ClassLoader " << class_loader << "]";
+              << "ClassLoader " << class_loader.get() << "]";
     if (!library->CheckOnLoadResult()) {
       StringAppendF(detail, "JNI_OnLoad failed on a previous attempt "
           "to load \"%s\"", path.c_str());
@@ -3275,18 +3282,19 @@
     MutexLock mu(self, libraries_lock);
     library = libraries->Get(path);
     if (library == NULL) {  // We won race to get libraries_lock
-      library = new SharedLibrary(path, handle, class_loader);
+      library = new SharedLibrary(path, handle, class_loader.get());
       libraries->Put(path, library);
       created_library = true;
     }
   }
   if (!created_library) {
     LOG(INFO) << "WOW: we lost a race to add shared library: "
-        << "\"" << path << "\" ClassLoader=" << class_loader;
+        << "\"" << path << "\" ClassLoader=" << class_loader.get();
     return library->CheckOnLoadResult();
   }
 
-  VLOG(jni) << "[Added shared library \"" << path << "\" for ClassLoader " << class_loader << "]";
+  VLOG(jni) << "[Added shared library \"" << path << "\" for ClassLoader " << class_loader.get()
+      << "]";
 
   bool was_successful = false;
   void* sym = dlsym(handle, "JNI_OnLoad");
@@ -3301,7 +3309,7 @@
     typedef int (*JNI_OnLoadFn)(JavaVM*, void*);
     JNI_OnLoadFn jni_on_load = reinterpret_cast<JNI_OnLoadFn>(sym);
     SirtRef<ClassLoader> old_class_loader(self, self->GetClassLoaderOverride());
-    self->SetClassLoaderOverride(class_loader);
+    self->SetClassLoaderOverride(class_loader.get());
 
     int version = 0;
     {
@@ -3387,6 +3395,11 @@
     MutexLock mu(self, pins_lock);
     pin_table.VisitRoots(visitor, arg);
   }
+  {
+    MutexLock mu(self, libraries_lock);
+    // Libraries contains shared libraries which hold a pointer to a class loader.
+    libraries->VisitRoots(visitor, arg);
+  }
   // The weak_globals table is visited by the GC itself (because it mutates the table).
 }
 
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 96f7ae0..cd3c5cb 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -25,6 +25,7 @@
 #include "reference_table.h"
 #include "root_visitor.h"
 #include "runtime.h"
+#include "sirt_ref.h"
 
 #include <iosfwd>
 #include <string>
@@ -72,7 +73,7 @@
    * Returns 'true' on success. On failure, sets 'detail' to a
    * human-readable description of the error.
    */
-  bool LoadNativeLibrary(const std::string& path, mirror::ClassLoader* class_loader,
+  bool LoadNativeLibrary(const std::string& path, const SirtRef<mirror::ClassLoader>& class_loader,
                          std::string* detail)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 9b278f8..fed734e 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -131,7 +131,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
     }
 
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
@@ -148,11 +148,11 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
+    arg_array.Append(0U);
     result.SetB(-1);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'B');
     EXPECT_EQ(0, result.GetB());
@@ -184,11 +184,11 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
+    arg_array.Append(0U);
     result.SetI(-1);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
@@ -221,7 +221,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
@@ -264,12 +264,12 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
-    arg_array.Append(0);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
     result.SetI(-1);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
@@ -310,13 +310,13 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
     result.SetI(-1);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
@@ -361,14 +361,14 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
     result.SetI(-1);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
@@ -417,15 +417,15 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
-    arg_array.Append(0);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
+    arg_array.Append(0U);
     result.SetI(-1.0);
     method->Invoke(Thread::Current(), arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'I');
     EXPECT_EQ(0, result.GetI());
@@ -480,7 +480,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
@@ -547,7 +547,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
@@ -603,7 +603,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
@@ -668,7 +668,7 @@
     JValue result;
 
     if (!is_static) {
-      arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+      arg_array.Append(receiver);
       args++;
     }
 
@@ -1492,8 +1492,8 @@
   } while (false)
 
 
-#if !defined(ART_USE_PORTABLE_COMPILER)
 TEST_F(JniInternalTest, GetPrimitiveField_SetPrimitiveField) {
+  TEST_DISABLED_FOR_PORTABLE();
   Thread::Current()->TransitionFromSuspendedToRunnable();
   LoadDex("AllFields");
   bool started = runtime_->Start();
@@ -1524,6 +1524,7 @@
 }
 
 TEST_F(JniInternalTest, GetObjectField_SetObjectField) {
+  TEST_DISABLED_FOR_PORTABLE();
   Thread::Current()->TransitionFromSuspendedToRunnable();
   LoadDex("AllFields");
   runtime_->Start();
@@ -1553,7 +1554,6 @@
   env_->SetObjectField(o, i_fid, s2);
   ASSERT_TRUE(env_->IsSameObject(s2, env_->GetObjectField(o, i_fid)));
 }
-#endif
 
 TEST_F(JniInternalTest, NewLocalRef_NULL) {
   EXPECT_TRUE(env_->NewLocalRef(NULL) == NULL);
@@ -1756,7 +1756,7 @@
   ASSERT_TRUE(method != NULL);
 
   ArgArray arg_array(NULL, 0);
-  arg_array.Append(0);
+  arg_array.Append(0U);
   JValue result;
 
   // Start runtime.
diff --git a/runtime/lock_word-inl.h b/runtime/lock_word-inl.h
index aea10c2..414b3bb 100644
--- a/runtime/lock_word-inl.h
+++ b/runtime/lock_word-inl.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_LOCK_WORD_INL_H_
 
 #include "lock_word.h"
+#include "monitor_pool.h"
 
 namespace art {
 
@@ -33,7 +34,8 @@
 
 inline Monitor* LockWord::FatLockMonitor() const {
   DCHECK_EQ(GetState(), kFatLocked);
-  return reinterpret_cast<Monitor*>(value_ << kStateSize);
+  MonitorId mon_id = static_cast<MonitorId>(value_ & ~(kStateMask << kStateShift));
+  return MonitorPool::MonitorFromMonitorId(mon_id);
 }
 
 inline size_t LockWord::ForwardingAddress() const {
@@ -46,7 +48,7 @@
 }
 
 inline LockWord::LockWord(Monitor* mon)
-    : value_((reinterpret_cast<uint32_t>(mon) >> kStateSize) | (kStateFat << kStateShift)) {
+    : value_(mon->GetMonitorId() | (kStateFat << kStateShift)) {
   DCHECK_EQ(FatLockMonitor(), mon);
 }
 
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index d24a3bb..ab86eaa 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -42,7 +42,7 @@
  *
  *  |33|222222222211111111110000000000|
  *  |10|987654321098765432109876543210|
- *  |01| Monitor* >> kStateSize       |
+ *  |01| MonitorId                    |
  *
  * When the lock word is in hash state and its bits are formatted as follows:
  *
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 97b34ef..393ea68 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -16,8 +16,10 @@
 
 #include "mem_map.h"
 
+#include <inttypes.h>
 #include <backtrace/BacktraceMap.h>
 
+#include "UniquePtr.h"
 #include "base/stringprintf.h"
 #include "ScopedFd.h"
 #include "utils.h"
@@ -54,20 +56,21 @@
   uintptr_t base = reinterpret_cast<uintptr_t>(addr);
   uintptr_t limit = base + byte_count;
 
-  BacktraceMap map(getpid());
-  if (!map.Build()) {
+  UniquePtr<BacktraceMap> map(BacktraceMap::Create(getpid()));
+  if (!map->Build()) {
     PLOG(WARNING) << "Failed to build process map";
     return;
   }
-  for (BacktraceMap::const_iterator it = map.begin(); it != map.end(); ++it) {
+  for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
     CHECK(!(base >= it->start && base < it->end)     // start of new within old
         && !(limit > it->start && limit < it->end)   // end of new within old
         && !(base <= it->start && limit > it->end))  // start/end of new includes all of old
-        << StringPrintf("Requested region 0x%08x-0x%08x overlaps with existing map 0x%08x-0x%08x (%s)\n",
+        << StringPrintf("Requested region 0x%08" PRIxPTR "-0x%08" PRIxPTR " overlaps with "
+                        "existing map 0x%08" PRIxPTR "-0x%08" PRIxPTR " (%s)\n",
                         base, limit,
-                        static_cast<uint32_t>(it->start), static_cast<uint32_t>(it->end),
+                        static_cast<uintptr_t>(it->start), static_cast<uintptr_t>(it->end),
                         it->name.c_str())
-        << std::make_pair(it, map.end());
+        << std::make_pair(it, map->end());
   }
 }
 
@@ -76,7 +79,7 @@
 #endif
 
 MemMap* MemMap::MapAnonymous(const char* name, byte* addr, size_t byte_count, int prot,
-                             std::string* error_msg) {
+                             bool low_4gb, std::string* error_msg) {
   if (byte_count == 0) {
     return new MemMap(name, NULL, 0, NULL, 0, prot);
   }
@@ -98,7 +101,11 @@
   ScopedFd fd(-1);
   int flags = MAP_PRIVATE | MAP_ANONYMOUS;
 #endif
-
+#ifdef __LP64__
+  if (low_4gb) {
+    flags |= MAP_32BIT;
+  }
+#endif
   byte* actual = reinterpret_cast<byte*>(mmap(addr, page_aligned_byte_count, prot, flags, fd.get(), 0));
   if (actual == MAP_FAILED) {
     std::string maps;
@@ -117,7 +124,7 @@
   CHECK_NE(0, prot);
   CHECK_NE(0, flags & (MAP_SHARED | MAP_PRIVATE));
   if (byte_count == 0) {
-    return new MemMap("file", NULL, 0, NULL, 0, prot);
+    return new MemMap(filename, NULL, 0, NULL, 0, prot);
   }
   // Adjust 'offset' to be page-aligned as required by mmap.
   int page_offset = start % kPageSize;
@@ -144,13 +151,13 @@
     std::string strerr(strerror(errno));
     std::string maps;
     ReadFileToString("/proc/self/maps", &maps);
-    *error_msg = StringPrintf("mmap(%p, %zd, %x, %x, %d, %lld) of file '%s' failed: %s\n%s",
+    *error_msg = StringPrintf("mmap(%p, %zd, %x, %x, %d, %" PRId64 ") of file '%s' failed: %s\n%s",
                               page_aligned_addr, page_aligned_byte_count, prot, flags, fd,
                               static_cast<int64_t>(page_aligned_offset), filename, strerr.c_str(),
                               maps.c_str());
     return NULL;
   }
-  return new MemMap("file", actual + page_offset, byte_count, actual, page_aligned_byte_count,
+  return new MemMap(filename, actual + page_offset, byte_count, actual, page_aligned_byte_count,
                     prot);
 }
 
@@ -264,4 +271,11 @@
   return false;
 }
 
+std::ostream& operator<<(std::ostream& os, const MemMap& mem_map) {
+  os << StringPrintf("[MemMap: %s prot=%x %p-%p]",
+                     mem_map.GetName().c_str(), mem_map.GetProtect(),
+                     mem_map.BaseBegin(), mem_map.BaseEnd());
+  return os;
+}
+
 }  // namespace art
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 2c65833..e39c10e 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -39,7 +39,7 @@
   //
   // On success, returns returns a MemMap instance.  On failure, returns a NULL;
   static MemMap* MapAnonymous(const char* ashmem_name, byte* addr, size_t byte_count, int prot,
-                              std::string* error_msg);
+                              bool low_4gb, std::string* error_msg);
 
   // Map part of a file, taking care of non-page aligned offsets.  The
   // "start" offset is absolute, not relative.
@@ -62,6 +62,10 @@
   // Releases the memory mapping
   ~MemMap();
 
+  const std::string& GetName() const {
+    return name_;
+  }
+
   bool Protect(int prot);
 
   int GetProtect() const {
@@ -80,6 +84,18 @@
     return Begin() + Size();
   }
 
+  void* BaseBegin() const {
+    return base_begin_;
+  }
+
+  size_t BaseSize() const {
+    return base_size_;
+  }
+
+  void* BaseEnd() const {
+    return reinterpret_cast<byte*>(BaseBegin()) + BaseSize();
+  }
+
   bool HasAddress(const void* addr) const {
     return Begin() <= addr && addr < End();
   }
@@ -102,6 +118,7 @@
 
   friend class MemMapTest;  // To allow access to base_begin_ and base_size_.
 };
+std::ostream& operator<<(std::ostream& os, const MemMap& mem_map);
 
 }  // namespace art
 
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index cf2c9d0..6cb59b4 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -23,76 +23,111 @@
 
 class MemMapTest : public testing::Test {
  public:
-  byte* BaseBegin(MemMap* mem_map) {
+  static byte* BaseBegin(MemMap* mem_map) {
     return reinterpret_cast<byte*>(mem_map->base_begin_);
   }
-  size_t BaseSize(MemMap* mem_map) {
+  static size_t BaseSize(MemMap* mem_map) {
     return mem_map->base_size_;
   }
+
+  static void RemapAtEndTest(bool low_4gb) {
+    std::string error_msg;
+    // Cast the page size to size_t.
+    const size_t page_size = static_cast<size_t>(kPageSize);
+    // Map a two-page memory region.
+    MemMap* m0 = MemMap::MapAnonymous("MemMapTest_RemapAtEndTest_map0",
+                                      nullptr,
+                                      2 * page_size,
+                                      PROT_READ | PROT_WRITE,
+                                      low_4gb,
+                                      &error_msg);
+    // Check its state and write to it.
+    byte* base0 = m0->Begin();
+    ASSERT_TRUE(base0 != nullptr) << error_msg;
+    size_t size0 = m0->Size();
+    EXPECT_EQ(m0->Size(), 2 * page_size);
+    EXPECT_EQ(BaseBegin(m0), base0);
+    EXPECT_EQ(BaseSize(m0), size0);
+    memset(base0, 42, 2 * page_size);
+    // Remap the latter half into a second MemMap.
+    MemMap* m1 = m0->RemapAtEnd(base0 + page_size,
+                                "MemMapTest_RemapAtEndTest_map1",
+                                PROT_READ | PROT_WRITE,
+                                &error_msg);
+    // Check the states of the two maps.
+    EXPECT_EQ(m0->Begin(), base0) << error_msg;
+    EXPECT_EQ(m0->Size(), page_size);
+    EXPECT_EQ(BaseBegin(m0), base0);
+    EXPECT_EQ(BaseSize(m0), page_size);
+    byte* base1 = m1->Begin();
+    size_t size1 = m1->Size();
+    EXPECT_EQ(base1, base0 + page_size);
+    EXPECT_EQ(size1, page_size);
+    EXPECT_EQ(BaseBegin(m1), base1);
+    EXPECT_EQ(BaseSize(m1), size1);
+    // Write to the second region.
+    memset(base1, 43, page_size);
+    // Check the contents of the two regions.
+    for (size_t i = 0; i < page_size; ++i) {
+      EXPECT_EQ(base0[i], 42);
+    }
+    for (size_t i = 0; i < page_size; ++i) {
+      EXPECT_EQ(base1[i], 43);
+    }
+    // Unmap the first region.
+    delete m0;
+    // Make sure the second region is still accessible after the first
+    // region is unmapped.
+    for (size_t i = 0; i < page_size; ++i) {
+      EXPECT_EQ(base1[i], 43);
+    }
+    delete m1;
+  }
 };
 
 TEST_F(MemMapTest, MapAnonymousEmpty) {
   std::string error_msg;
   UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
-                                             NULL,
+                                             nullptr,
                                              0,
                                              PROT_READ,
+                                             false,
                                              &error_msg));
-  ASSERT_TRUE(map.get() != NULL) << error_msg;
+  ASSERT_TRUE(map.get() != nullptr) << error_msg;
+  ASSERT_TRUE(error_msg.empty());
+  map.reset(MemMap::MapAnonymous("MapAnonymousEmpty",
+                                 nullptr,
+                                 kPageSize,
+                                 PROT_READ | PROT_WRITE,
+                                 false,
+                                 &error_msg));
+  ASSERT_TRUE(map.get() != nullptr) << error_msg;
   ASSERT_TRUE(error_msg.empty());
 }
 
-TEST_F(MemMapTest, RemapAtEnd) {
+#ifdef __LP64__
+TEST_F(MemMapTest, MapAnonymousEmpty32bit) {
   std::string error_msg;
-  // Cast the page size to size_t.
-  const size_t page_size = static_cast<size_t>(kPageSize);
-  // Map a two-page memory region.
-  MemMap* m0 = MemMap::MapAnonymous("MemMapTest_RemapAtEndTest_map0",
-                                    NULL,
-                                    2 * page_size,
-                                    PROT_READ | PROT_WRITE,
-                                    &error_msg);
-  // Check its state and write to it.
-  byte* base0 = m0->Begin();
-  ASSERT_TRUE(base0 != NULL) << error_msg;
-  size_t size0 = m0->Size();
-  EXPECT_EQ(m0->Size(), 2 * page_size);
-  EXPECT_EQ(BaseBegin(m0), base0);
-  EXPECT_EQ(BaseSize(m0), size0);
-  memset(base0, 42, 2 * page_size);
-  // Remap the latter half into a second MemMap.
-  MemMap* m1 = m0->RemapAtEnd(base0 + page_size,
-                              "MemMapTest_RemapAtEndTest_map1",
-                              PROT_READ | PROT_WRITE,
-                              &error_msg);
-  // Check the states of the two maps.
-  EXPECT_EQ(m0->Begin(), base0) << error_msg;
-  EXPECT_EQ(m0->Size(), page_size);
-  EXPECT_EQ(BaseBegin(m0), base0);
-  EXPECT_EQ(BaseSize(m0), page_size);
-  byte* base1 = m1->Begin();
-  size_t size1 = m1->Size();
-  EXPECT_EQ(base1, base0 + page_size);
-  EXPECT_EQ(size1, page_size);
-  EXPECT_EQ(BaseBegin(m1), base1);
-  EXPECT_EQ(BaseSize(m1), size1);
-  // Write to the second region.
-  memset(base1, 43, page_size);
-  // Check the contents of the two regions.
-  for (size_t i = 0; i < page_size; ++i) {
-    EXPECT_EQ(base0[i], 42);
-  }
-  for (size_t i = 0; i < page_size; ++i) {
-    EXPECT_EQ(base1[i], 43);
-  }
-  // Unmap the first region.
-  delete m0;
-  // Make sure the second region is still accessible after the first
-  // region is unmapped.
-  for (size_t i = 0; i < page_size; ++i) {
-    EXPECT_EQ(base1[i], 43);
-  }
-  delete m1;
+  UniquePtr<MemMap> map(MemMap::MapAnonymous("MapAnonymousEmpty",
+                                             nullptr,
+                                             kPageSize,
+                                             PROT_READ | PROT_WRITE,
+                                             true,
+                                             &error_msg));
+  ASSERT_TRUE(map.get() != nullptr) << error_msg;
+  ASSERT_TRUE(error_msg.empty());
+  ASSERT_LT(reinterpret_cast<uintptr_t>(BaseBegin(map.get())), 1ULL << 32);
 }
+#endif
+
+TEST_F(MemMapTest, RemapAtEnd) {
+  RemapAtEndTest(false);
+}
+
+#ifdef __LP64__
+TEST_F(MemMapTest, RemapAtEnd32bit) {
+  RemapAtEndTest(true);
+}
+#endif
 
 }  // namespace art
diff --git a/runtime/method_reference.h b/runtime/method_reference.h
index 1ff4ea0..8e46d7e 100644
--- a/runtime/method_reference.h
+++ b/runtime/method_reference.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_METHOD_REFERENCE_H_
 #define ART_RUNTIME_METHOD_REFERENCE_H_
 
+#include <stdint.h>
+
 namespace art {
 
 class DexFile;
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index bd81bd5..b2725e5 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -27,7 +27,7 @@
 namespace art {
 namespace mirror {
 
-inline size_t Array::SizeOf() const {
+inline size_t Array::SizeOf() {
   // This is safe from overflow because the array was already allocated, so we know it's sane.
   size_t component_size = GetClass()->GetComponentSize();
   int32_t component_count = GetLength();
@@ -64,9 +64,10 @@
   explicit SetLengthVisitor(int32_t length) : length_(length) {
   }
 
-  void operator()(mirror::Object* obj) const {
-    mirror::Array* array = obj->AsArray();
-    DCHECK(array->IsArrayInstance());
+  void operator()(Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Avoid AsArray as object is not yet in live bitmap or allocation stack.
+    Array* array = down_cast<Array*>(obj);
+    // DCHECK(array->IsArrayInstance());
     array->SetLength(length_);
   }
 
@@ -116,6 +117,114 @@
   }
 }
 
+// Similar to memmove except elements are of aligned appropriately for T, count is in T sized units
+// copies are guaranteed not to tear when T is less-than 64bit.
+template<typename T>
+static inline void ArrayBackwardCopy(T* d, const T* s, int32_t count) {
+  d += count;
+  s += count;
+  for (int32_t i = 0; i < count; ++i) {
+    d--;
+    s--;
+    *d = *s;
+  }
+}
+
+template<class T>
+void PrimitiveArray<T>::Memmove(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos,
+                                int32_t count) {
+  if (UNLIKELY(count == 0)) {
+    return;
+  }
+  DCHECK_GE(dst_pos, 0);
+  DCHECK_GE(src_pos, 0);
+  DCHECK_GT(count, 0);
+  DCHECK(src != nullptr);
+  DCHECK_LT(dst_pos, GetLength());
+  DCHECK_LE(dst_pos, GetLength() - count);
+  DCHECK_LT(src_pos, src->GetLength());
+  DCHECK_LE(src_pos, src->GetLength() - count);
+
+  // Note for non-byte copies we can't rely on standard libc functions like memcpy(3) and memmove(3)
+  // in our implementation, because they may copy byte-by-byte.
+  if (LIKELY(src != this) || (dst_pos < src_pos) || (dst_pos - src_pos >= count)) {
+    // Forward copy ok.
+    Memcpy(dst_pos, src, src_pos, count);
+  } else {
+    // Backward copy necessary.
+    void* dst_raw = GetRawData(sizeof(T), dst_pos);
+    const void* src_raw = src->GetRawData(sizeof(T), src_pos);
+    if (sizeof(T) == sizeof(uint8_t)) {
+      // TUNING: use memmove here?
+      uint8_t* d = reinterpret_cast<uint8_t*>(dst_raw);
+      const uint8_t* s = reinterpret_cast<const uint8_t*>(src_raw);
+      ArrayBackwardCopy<uint8_t>(d, s, count);
+    } else if (sizeof(T) == sizeof(uint16_t)) {
+      uint16_t* d = reinterpret_cast<uint16_t*>(dst_raw);
+      const uint16_t* s = reinterpret_cast<const uint16_t*>(src_raw);
+      ArrayBackwardCopy<uint16_t>(d, s, count);
+    } else if (sizeof(T) == sizeof(uint32_t)) {
+      uint32_t* d = reinterpret_cast<uint32_t*>(dst_raw);
+      const uint32_t* s = reinterpret_cast<const uint32_t*>(src_raw);
+      ArrayBackwardCopy<uint32_t>(d, s, count);
+    } else {
+      DCHECK_EQ(sizeof(T), sizeof(uint64_t));
+      uint64_t* d = reinterpret_cast<uint64_t*>(dst_raw);
+      const uint64_t* s = reinterpret_cast<const uint64_t*>(src_raw);
+      ArrayBackwardCopy<uint64_t>(d, s, count);
+    }
+  }
+}
+
+// Similar to memcpy except elements are of aligned appropriately for T, count is in T sized units
+// copies are guaranteed not to tear when T is less-than 64bit.
+template<typename T>
+static inline void ArrayForwardCopy(T* d, const T* s, int32_t count) {
+  for (int32_t i = 0; i < count; ++i) {
+    *d = *s;
+    d++;
+    s++;
+  }
+}
+
+
+template<class T>
+void PrimitiveArray<T>::Memcpy(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos,
+                               int32_t count) {
+  if (UNLIKELY(count == 0)) {
+    return;
+  }
+  DCHECK_GE(dst_pos, 0);
+  DCHECK_GE(src_pos, 0);
+  DCHECK_GT(count, 0);
+  DCHECK(src != nullptr);
+  DCHECK_LT(dst_pos, GetLength());
+  DCHECK_LE(dst_pos, GetLength() - count);
+  DCHECK_LT(src_pos, src->GetLength());
+  DCHECK_LE(src_pos, src->GetLength() - count);
+
+  // Note for non-byte copies we can't rely on standard libc functions like memcpy(3) and memmove(3)
+  // in our implementation, because they may copy byte-by-byte.
+  void* dst_raw = GetRawData(sizeof(T), dst_pos);
+  const void* src_raw = src->GetRawData(sizeof(T), src_pos);
+  if (sizeof(T) == sizeof(uint8_t)) {
+    memcpy(dst_raw, src_raw, count);
+  } else if (sizeof(T) == sizeof(uint16_t)) {
+    uint16_t* d = reinterpret_cast<uint16_t*>(dst_raw);
+    const uint16_t* s = reinterpret_cast<const uint16_t*>(src_raw);
+    ArrayForwardCopy<uint16_t>(d, s, count);
+  } else if (sizeof(T) == sizeof(uint32_t)) {
+    uint32_t* d = reinterpret_cast<uint32_t*>(dst_raw);
+    const uint32_t* s = reinterpret_cast<const uint32_t*>(src_raw);
+    ArrayForwardCopy<uint32_t>(d, s, count);
+  } else {
+    DCHECK_EQ(sizeof(T), sizeof(uint64_t));
+    uint64_t* d = reinterpret_cast<uint64_t*>(dst_raw);
+    const uint64_t* s = reinterpret_cast<const uint64_t*>(src_raw);
+    ArrayForwardCopy<uint64_t>(d, s, count);
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index 00b88db..c23234e 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -40,23 +40,25 @@
 // piece and work our way in.
 // Recursively create an array with multiple dimensions.  Elements may be
 // Objects or primitive types.
-static Array* RecursiveCreateMultiArray(Thread* self, Class* array_class, int current_dimension,
-                                        SirtRef<mirror::IntArray>& dimensions)
+static Array* RecursiveCreateMultiArray(Thread* self,
+                                        const SirtRef<Class>& array_class, int current_dimension,
+                                        const SirtRef<mirror::IntArray>& dimensions)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   int32_t array_length = dimensions->Get(current_dimension);
-  SirtRef<Array> new_array(self, Array::Alloc<true>(self, array_class, array_length));
-  if (UNLIKELY(new_array.get() == NULL)) {
+  SirtRef<Array> new_array(self, Array::Alloc<true>(self, array_class.get(), array_length));
+  if (UNLIKELY(new_array.get() == nullptr)) {
     CHECK(self->IsExceptionPending());
-    return NULL;
+    return nullptr;
   }
   if (current_dimension + 1 < dimensions->GetLength()) {
     // Create a new sub-array in every element of the array.
     for (int32_t i = 0; i < array_length; i++) {
-      Array* sub_array = RecursiveCreateMultiArray(self, array_class->GetComponentType(),
+      SirtRef<mirror::Class> sirt_component_type(self, array_class->GetComponentType());
+      Array* sub_array = RecursiveCreateMultiArray(self, sirt_component_type,
                                                    current_dimension + 1, dimensions);
-      if (UNLIKELY(sub_array == NULL)) {
+      if (UNLIKELY(sub_array == nullptr)) {
         CHECK(self->IsExceptionPending());
-        return NULL;
+        return nullptr;
       }
       new_array->AsObjectArray<Array>()->Set(i, sub_array);
     }
@@ -64,7 +66,8 @@
   return new_array.get();
 }
 
-Array* Array::CreateMultiArray(Thread* self, Class* element_class, IntArray* dimensions) {
+Array* Array::CreateMultiArray(Thread* self, const SirtRef<Class>& element_class,
+                               const SirtRef<IntArray>& dimensions) {
   // Verify dimensions.
   //
   // The caller is responsible for verifying that "dimArray" is non-null
@@ -77,37 +80,36 @@
     int dimension = dimensions->Get(i);
     if (UNLIKELY(dimension < 0)) {
       ThrowNegativeArraySizeException(StringPrintf("Dimension %d: %d", i, dimension).c_str());
-      return NULL;
+      return nullptr;
     }
   }
 
   // Generate the full name of the array class.
   std::string descriptor(num_dimensions, '[');
-  descriptor += ClassHelper(element_class).GetDescriptor();
+  descriptor += ClassHelper(element_class.get()).GetDescriptor();
 
   // Find/generate the array class.
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   SirtRef<mirror::ClassLoader> class_loader(self, element_class->GetClassLoader());
-  Class* array_class = class_linker->FindClass(descriptor.c_str(), class_loader);
-  if (UNLIKELY(array_class == NULL)) {
+  SirtRef<mirror::Class> array_class(self,
+                                     class_linker->FindClass(descriptor.c_str(), class_loader));
+  if (UNLIKELY(array_class.get() == nullptr)) {
     CHECK(self->IsExceptionPending());
-    return NULL;
+    return nullptr;
   }
   // create the array
-  SirtRef<mirror::IntArray> sirt_dimensions(self, dimensions);
-  Array* new_array = RecursiveCreateMultiArray(self, array_class, 0, sirt_dimensions);
-  if (UNLIKELY(new_array == NULL)) {
+  Array* new_array = RecursiveCreateMultiArray(self, array_class, 0, dimensions);
+  if (UNLIKELY(new_array == nullptr)) {
     CHECK(self->IsExceptionPending());
-    return NULL;
   }
   return new_array;
 }
 
-void Array::ThrowArrayIndexOutOfBoundsException(int32_t index) const {
+void Array::ThrowArrayIndexOutOfBoundsException(int32_t index) {
   art::ThrowArrayIndexOutOfBoundsException(index, GetLength());
 }
 
-void Array::ThrowArrayStoreException(Object* object) const {
+void Array::ThrowArrayStoreException(Object* object) {
   art::ThrowArrayStoreException(object->GetClass(), this->GetClass());
 }
 
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 5265946..04f03c3 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -19,6 +19,7 @@
 
 #include "object.h"
 #include "gc/heap.h"
+#include "thread.h"
 
 namespace art {
 namespace mirror {
@@ -46,18 +47,19 @@
                       size_t component_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static Array* CreateMultiArray(Thread* self, Class* element_class, IntArray* dimensions)
+  static Array* CreateMultiArray(Thread* self, const SirtRef<Class>& element_class,
+                                 const SirtRef<IntArray>& dimensions)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t SizeOf() const;
+  size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t GetLength() const {
+  int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Array, length_), false);
   }
 
-  void SetLength(int32_t length) {
+  void SetLength(int32_t length) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK_GE(length, 0);
-    SetField32(OFFSET_OF_OBJECT_MEMBER(Array, length_), length, false);
+    SetField32(OFFSET_OF_OBJECT_MEMBER(Array, length_), length, false, false);
   }
 
   static MemberOffset LengthOffset() {
@@ -73,18 +75,22 @@
     }
   }
 
-  void* GetRawData(size_t component_size) {
-    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(component_size).Int32Value();
+  void* GetRawData(size_t component_size, int32_t index)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(component_size).Int32Value() +
+        + (index * component_size);
     return reinterpret_cast<void*>(data);
   }
 
-  const void* GetRawData(size_t component_size) const {
-    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(component_size).Int32Value();
-    return reinterpret_cast<const void*>(data);
+  const void* GetRawData(size_t component_size, int32_t index) const {
+    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(component_size).Int32Value() +
+        + (index * component_size);
+    return reinterpret_cast<void*>(data);
   }
 
-  bool IsValidIndex(int32_t index) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  // Returns true if the index is valid. If not, throws an ArrayIndexOutOfBoundsException and
+  // returns false.
+  bool CheckIsValidIndex(int32_t index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(static_cast<uint32_t>(index) >= static_cast<uint32_t>(GetLength()))) {
       ThrowArrayIndexOutOfBoundsException(index);
       return false;
@@ -93,12 +99,12 @@
   }
 
  protected:
-  void ThrowArrayIndexOutOfBoundsException(int32_t index) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void ThrowArrayStoreException(Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ThrowArrayStoreException(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
+  void ThrowArrayIndexOutOfBoundsException(int32_t index)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // The number of array elements.
   int32_t length_;
   // Marker for the data (used by generated code)
@@ -115,29 +121,56 @@
   static PrimitiveArray<T>* Alloc(Thread* self, size_t length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const T* GetData() const {
-    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(sizeof(T)).Int32Value();
-    return reinterpret_cast<T*>(data);
+  const T* GetData() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return reinterpret_cast<const T*>(GetRawData(sizeof(T), 0));
   }
 
-  T* GetData() {
-    intptr_t data = reinterpret_cast<intptr_t>(this) + DataOffset(sizeof(T)).Int32Value();
-    return reinterpret_cast<T*>(data);
+  T* GetData() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return reinterpret_cast<T*>(GetRawData(sizeof(T), 0));
   }
 
-  T Get(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (!IsValidIndex(i)) {
+  T Get(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (UNLIKELY(!CheckIsValidIndex(i))) {
+      DCHECK(Thread::Current()->IsExceptionPending());
       return T(0);
     }
+    return GetWithoutChecks(i);
+  }
+
+  T GetWithoutChecks(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(CheckIsValidIndex(i));
     return GetData()[i];
   }
 
   void Set(int32_t i, T value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (IsValidIndex(i)) {
-      GetData()[i] = value;
+    if (LIKELY(CheckIsValidIndex(i))) {
+      SetWithoutChecks(i, value);
+    } else {
+      DCHECK(Thread::Current()->IsExceptionPending());
     }
   }
 
+  void SetWithoutChecks(int32_t i, T value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(CheckIsValidIndex(i));
+    GetData()[i] = value;
+  }
+
+  /*
+   * Works like memmove(), except we guarantee not to allow tearing of array values (ie using
+   * smaller than element size copies). Arguments are assumed to be within the bounds of the array
+   * and the arrays non-null.
+   */
+  void Memmove(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos, int32_t count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  /*
+   * Works like memcpy(), except we guarantee not to allow tearing of array values (ie using
+   * smaller than element size copies). Arguments are assumed to be within the bounds of the array
+   * and the arrays non-null.
+   */
+  void Memcpy(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos, int32_t count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static void SetArrayClass(Class* array_class) {
     CHECK(array_class_ == NULL);
     CHECK(array_class != NULL);
diff --git a/runtime/mirror/art_field-inl.h b/runtime/mirror/art_field-inl.h
index d8c278c..530226b 100644
--- a/runtime/mirror/art_field-inl.h
+++ b/runtime/mirror/art_field-inl.h
@@ -29,8 +29,8 @@
 namespace art {
 namespace mirror {
 
-inline Class* ArtField::GetDeclaringClass() const {
-  Class* result = GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(ArtField, declaring_class_), false);
+inline Class* ArtField::GetDeclaringClass() {
+  Class* result = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(ArtField, declaring_class_), false);
   DCHECK(result != NULL);
   DCHECK(result->IsLoaded() || result->IsErroneous());
   return result;
@@ -40,106 +40,106 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(ArtField, declaring_class_), new_declaring_class, false);
 }
 
-inline uint32_t ArtField::GetAccessFlags() const {
+inline uint32_t ArtField::GetAccessFlags() {
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
   return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, access_flags_), false);
 }
 
-inline MemberOffset ArtField::GetOffset() const {
+inline MemberOffset ArtField::GetOffset() {
   DCHECK(GetDeclaringClass()->IsResolved() || GetDeclaringClass()->IsErroneous());
   return MemberOffset(GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, offset_), false));
 }
 
-inline MemberOffset ArtField::GetOffsetDuringLinking() const {
+inline MemberOffset ArtField::GetOffsetDuringLinking() {
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
   return MemberOffset(GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, offset_), false));
 }
 
-inline uint32_t ArtField::Get32(const Object* object) const {
+inline uint32_t ArtField::Get32(Object* object) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   return object->GetField32(GetOffset(), IsVolatile());
 }
 
-inline void ArtField::Set32(Object* object, uint32_t new_value) const {
+inline void ArtField::Set32(Object* object, uint32_t new_value) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   object->SetField32(GetOffset(), new_value, IsVolatile());
 }
 
-inline uint64_t ArtField::Get64(const Object* object) const {
+inline uint64_t ArtField::Get64(Object* object) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   return object->GetField64(GetOffset(), IsVolatile());
 }
 
-inline void ArtField::Set64(Object* object, uint64_t new_value) const {
+inline void ArtField::Set64(Object* object, uint64_t new_value) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   object->SetField64(GetOffset(), new_value, IsVolatile());
 }
 
-inline Object* ArtField::GetObj(const Object* object) const {
+inline Object* ArtField::GetObj(Object* object) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
-  return object->GetFieldObject<Object*>(GetOffset(), IsVolatile());
+  return object->GetFieldObject<Object>(GetOffset(), IsVolatile());
 }
 
-inline void ArtField::SetObj(Object* object, const Object* new_value) const {
+inline void ArtField::SetObj(Object* object, Object* new_value) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   object->SetFieldObject(GetOffset(), new_value, IsVolatile());
 }
 
-inline bool ArtField::GetBoolean(const Object* object) const {
+inline bool ArtField::GetBoolean(Object* object) {
   DCHECK_EQ(Primitive::kPrimBoolean, FieldHelper(this).GetTypeAsPrimitiveType())
       << PrettyField(this);
   return Get32(object);
 }
 
-inline void ArtField::SetBoolean(Object* object, bool z) const {
+inline void ArtField::SetBoolean(Object* object, bool z) {
   DCHECK_EQ(Primitive::kPrimBoolean, FieldHelper(this).GetTypeAsPrimitiveType())
       << PrettyField(this);
   Set32(object, z);
 }
 
-inline int8_t ArtField::GetByte(const Object* object) const {
+inline int8_t ArtField::GetByte(Object* object) {
   DCHECK_EQ(Primitive::kPrimByte, FieldHelper(this).GetTypeAsPrimitiveType())
       << PrettyField(this);
   return Get32(object);
 }
 
-inline void ArtField::SetByte(Object* object, int8_t b) const {
+inline void ArtField::SetByte(Object* object, int8_t b) {
   DCHECK_EQ(Primitive::kPrimByte, FieldHelper(this).GetTypeAsPrimitiveType())
       << PrettyField(this);
   Set32(object, b);
 }
 
-inline uint16_t ArtField::GetChar(const Object* object) const {
+inline uint16_t ArtField::GetChar(Object* object) {
   DCHECK_EQ(Primitive::kPrimChar, FieldHelper(this).GetTypeAsPrimitiveType())
       << PrettyField(this);
   return Get32(object);
 }
 
-inline void ArtField::SetChar(Object* object, uint16_t c) const {
+inline void ArtField::SetChar(Object* object, uint16_t c) {
   DCHECK_EQ(Primitive::kPrimChar, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   Set32(object, c);
 }
 
-inline int16_t ArtField::GetShort(const Object* object) const {
+inline int16_t ArtField::GetShort(Object* object) {
   DCHECK_EQ(Primitive::kPrimShort, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   return Get32(object);
 }
 
-inline void ArtField::SetShort(Object* object, int16_t s) const {
+inline void ArtField::SetShort(Object* object, int16_t s) {
   DCHECK_EQ(Primitive::kPrimShort, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   Set32(object, s);
 }
 
-inline int32_t ArtField::GetInt(const Object* object) const {
+inline int32_t ArtField::GetInt(Object* object) {
 #ifndef NDEBUG
   Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
   CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
@@ -147,7 +147,7 @@
   return Get32(object);
 }
 
-inline void ArtField::SetInt(Object* object, int32_t i) const {
+inline void ArtField::SetInt(Object* object, int32_t i) {
 #ifndef NDEBUG
   Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
   CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
@@ -155,7 +155,7 @@
   Set32(object, i);
 }
 
-inline int64_t ArtField::GetLong(const Object* object) const {
+inline int64_t ArtField::GetLong(Object* object) {
 #ifndef NDEBUG
   Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
   CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
@@ -163,7 +163,7 @@
   return Get64(object);
 }
 
-inline void ArtField::SetLong(Object* object, int64_t j) const {
+inline void ArtField::SetLong(Object* object, int64_t j) {
 #ifndef NDEBUG
   Primitive::Type type = FieldHelper(this).GetTypeAsPrimitiveType();
   CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
@@ -171,7 +171,7 @@
   Set64(object, j);
 }
 
-inline float ArtField::GetFloat(const Object* object) const {
+inline float ArtField::GetFloat(Object* object) {
   DCHECK_EQ(Primitive::kPrimFloat, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   JValue bits;
@@ -179,7 +179,7 @@
   return bits.GetF();
 }
 
-inline void ArtField::SetFloat(Object* object, float f) const {
+inline void ArtField::SetFloat(Object* object, float f) {
   DCHECK_EQ(Primitive::kPrimFloat, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   JValue bits;
@@ -187,7 +187,7 @@
   Set32(object, bits.GetI());
 }
 
-inline double ArtField::GetDouble(const Object* object) const {
+inline double ArtField::GetDouble(Object* object) {
   DCHECK_EQ(Primitive::kPrimDouble, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   JValue bits;
@@ -195,7 +195,7 @@
   return bits.GetD();
 }
 
-inline void ArtField::SetDouble(Object* object, double d) const {
+inline void ArtField::SetDouble(Object* object, double d) {
   DCHECK_EQ(Primitive::kPrimDouble, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   JValue bits;
@@ -203,13 +203,13 @@
   Set64(object, bits.GetJ());
 }
 
-inline Object* ArtField::GetObject(const Object* object) const {
+inline Object* ArtField::GetObject(Object* object) {
   DCHECK_EQ(Primitive::kPrimNot, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   return GetObj(object);
 }
 
-inline void ArtField::SetObject(Object* object, const Object* l) const {
+inline void ArtField::SetObject(Object* object, Object* l) {
   DCHECK_EQ(Primitive::kPrimNot, FieldHelper(this).GetTypeAsPrimitiveType())
        << PrettyField(this);
   SetObj(object, l);
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 62bcf06..b33fe4b 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -30,98 +30,74 @@
 // C++ mirror of java.lang.reflect.ArtField
 class MANAGED ArtField : public Object {
  public:
-  Class* GetDeclaringClass() const;
+  Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDeclaringClass(Class *new_declaring_class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t GetAccessFlags() const;
+  uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetAccessFlags(uint32_t new_access_flags) {
+  void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, access_flags_), new_access_flags, false);
   }
 
-  bool IsPublic() const {
+  bool IsPublic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccPublic) != 0;
   }
 
-  bool IsStatic() const {
+  bool IsStatic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccStatic) != 0;
   }
 
-  bool IsFinal() const {
+  bool IsFinal() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
-  uint32_t GetDexFieldIndex() const {
+  uint32_t GetDexFieldIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, field_dex_idx_), false);
   }
 
-  void SetDexFieldIndex(uint32_t new_idx) {
+  void SetDexFieldIndex(uint32_t new_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, field_dex_idx_), new_idx, false);
   }
 
-  // Offset to field within an Object
-  MemberOffset GetOffset() const;
+  // Offset to field within an Object.
+  MemberOffset GetOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static MemberOffset OffsetOffset() {
     return MemberOffset(OFFSETOF_MEMBER(ArtField, offset_));
   }
 
-  MemberOffset GetOffsetDuringLinking() const;
+  MemberOffset GetOffsetDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetOffset(MemberOffset num_bytes);
+  void SetOffset(MemberOffset num_bytes) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // field access, null object for static fields
-  bool GetBoolean(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetBoolean(Object* object, bool z) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int8_t GetByte(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetByte(Object* object, int8_t b) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  uint16_t GetChar(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetChar(Object* object, uint16_t c) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int16_t GetShort(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetShort(Object* object, int16_t s) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int32_t GetInt(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetInt(Object* object, int32_t i) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int64_t GetLong(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetLong(Object* object, int64_t j) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  float GetFloat(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetFloat(Object* object, float f) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  double GetDouble(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetDouble(Object* object, double d) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  Object* GetObject(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetObject(Object* object, const Object* l) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool GetBoolean(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetBoolean(Object* object, bool z) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  int8_t GetByte(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetByte(Object* object, int8_t b) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint16_t GetChar(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetChar(Object* object, uint16_t c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  int16_t GetShort(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetShort(Object* object, int16_t s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  int32_t GetInt(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetInt(Object* object, int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  int64_t GetLong(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetLong(Object* object, int64_t j) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  float GetFloat(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetFloat(Object* object, float f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  double GetDouble(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetDouble(Object* object, double d) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* GetObject(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetObject(Object* object, Object* l) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // raw field accesses
-  uint32_t Get32(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void Set32(Object* object, uint32_t new_value) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  uint64_t Get64(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void Set64(Object* object, uint64_t new_value) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  Object* GetObj(const Object* object) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void SetObj(Object* object, const Object* new_value) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Raw field accesses.
+  uint32_t Get32(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Set32(Object* object, uint32_t new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint64_t Get64(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void Set64(Object* object, uint64_t new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* GetObj(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetObj(Object* object, Object* new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* GetJavaLangReflectArtField() {
     DCHECK(java_lang_reflect_ArtField_ != NULL);
@@ -133,14 +109,14 @@
   static void VisitRoots(RootVisitor* visitor, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsVolatile() const {
+  bool IsVolatile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccVolatile) != 0;
   }
 
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of
-  Class* declaring_class_;
+  HeapReference<Class> declaring_class_;
 
   uint32_t access_flags_;
 
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 088f616..8ef3be8 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -27,8 +27,9 @@
 namespace art {
 namespace mirror {
 
-inline Class* ArtMethod::GetDeclaringClass() const {
-  Class* result = GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, declaring_class_), false);
+inline Class* ArtMethod::GetDeclaringClass() {
+  Class* result = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, declaring_class_),
+                                        false);
   DCHECK(result != NULL) << this;
   DCHECK(result->IsIdxLoaded() || result->IsErroneous()) << this;
   return result;
@@ -38,44 +39,44 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(ArtMethod, declaring_class_), new_declaring_class, false);
 }
 
-inline uint32_t ArtMethod::GetAccessFlags() const {
+inline uint32_t ArtMethod::GetAccessFlags() {
   DCHECK(GetDeclaringClass()->IsIdxLoaded() || GetDeclaringClass()->IsErroneous());
   return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, access_flags_), false);
 }
 
-inline uint16_t ArtMethod::GetMethodIndex() const {
+inline uint16_t ArtMethod::GetMethodIndex() {
   DCHECK(GetDeclaringClass()->IsResolved() || GetDeclaringClass()->IsErroneous());
   return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_), false);
 }
 
-inline uint32_t ArtMethod::GetDexMethodIndex() const {
+inline uint32_t ArtMethod::GetDexMethodIndex() {
 #ifdef ART_SEA_IR_MODE
   // TODO: Re-add this check for (PORTABLE + SMALL + ) SEA IR when PORTABLE IS fixed!
   // DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
 #else
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
 #endif
-  return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_dex_index_), false);
+  return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_method_index_), false);
 }
 
-inline ObjectArray<String>* ArtMethod::GetDexCacheStrings() const {
-  return GetFieldObject<ObjectArray<String>*>(
+inline ObjectArray<String>* ArtMethod::GetDexCacheStrings() {
+  return GetFieldObject<ObjectArray<String> >(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_strings_), false);
 }
 
-inline ObjectArray<ArtMethod>* ArtMethod::GetDexCacheResolvedMethods() const {
-  return GetFieldObject<ObjectArray<ArtMethod>*>(
+inline ObjectArray<ArtMethod>* ArtMethod::GetDexCacheResolvedMethods() {
+  return GetFieldObject<ObjectArray<ArtMethod> >(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_methods_), false);
 }
 
-inline ObjectArray<Class>* ArtMethod::GetDexCacheResolvedTypes() const {
-  return GetFieldObject<ObjectArray<Class>*>(
+inline ObjectArray<Class>* ArtMethod::GetDexCacheResolvedTypes() {
+  return GetFieldObject<ObjectArray<Class> >(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_), false);
 }
 
-inline uint32_t ArtMethod::GetCodeSize() const {
+inline uint32_t ArtMethod::GetCodeSize() {
   DCHECK(!IsRuntimeMethod() && !IsProxyMethod()) << PrettyMethod(this);
-  uintptr_t code = reinterpret_cast<uintptr_t>(GetEntryPointFromCompiledCode());
+  uintptr_t code = reinterpret_cast<uintptr_t>(GetEntryPointFromQuickCompiledCode());
   if (code == 0) {
     return 0;
   }
@@ -106,7 +107,7 @@
   }
 }
 
-inline void ArtMethod::AssertPcIsWithinCode(uintptr_t pc) const {
+inline void ArtMethod::AssertPcIsWithinQuickCode(uintptr_t pc) {
   if (!kIsDebugBuild) {
     return;
   }
@@ -116,34 +117,44 @@
   if (pc == GetQuickInstrumentationExitPc()) {
     return;
   }
-  const void* code = GetEntryPointFromCompiledCode();
-  if (code == GetCompiledCodeToInterpreterBridge() || code == GetQuickInstrumentationEntryPoint()) {
+  const void* code = GetEntryPointFromQuickCompiledCode();
+  if (code == GetQuickToInterpreterBridge() || code == GetQuickInstrumentationEntryPoint()) {
     return;
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  if (code == GetResolutionTrampoline(class_linker)) {
+  if (code == GetQuickResolutionTrampoline(class_linker)) {
     return;
   }
-  DCHECK(IsWithinCode(pc))
+  DCHECK(IsWithinQuickCode(pc))
       << PrettyMethod(this)
       << " pc=" << std::hex << pc
       << " code=" << code
       << " size=" << GetCodeSize();
 }
 
-inline uint32_t ArtMethod::GetOatCodeOffset() const {
+inline uint32_t ArtMethod::GetQuickOatCodeOffset() {
   DCHECK(!Runtime::Current()->IsStarted());
-  return reinterpret_cast<uint32_t>(GetEntryPointFromCompiledCode());
+  return PointerToLowMemUInt32(GetEntryPointFromQuickCompiledCode());
 }
 
-inline void ArtMethod::SetOatCodeOffset(uint32_t code_offset) {
+inline uint32_t ArtMethod::GetPortableOatCodeOffset() {
   DCHECK(!Runtime::Current()->IsStarted());
-  SetEntryPointFromCompiledCode(reinterpret_cast<void*>(code_offset));
+  return PointerToLowMemUInt32(GetEntryPointFromPortableCompiledCode());
 }
 
-inline uint32_t ArtMethod::GetOatMappingTableOffset() const {
+inline void ArtMethod::SetQuickOatCodeOffset(uint32_t code_offset) {
   DCHECK(!Runtime::Current()->IsStarted());
-  return reinterpret_cast<uint32_t>(GetMappingTable());
+  SetEntryPointFromQuickCompiledCode(reinterpret_cast<void*>(code_offset));
+}
+
+inline void ArtMethod::SetPortableOatCodeOffset(uint32_t code_offset) {
+  DCHECK(!Runtime::Current()->IsStarted());
+  SetEntryPointFromPortableCompiledCode(reinterpret_cast<void*>(code_offset));
+}
+
+inline uint32_t ArtMethod::GetOatMappingTableOffset() {
+  DCHECK(!Runtime::Current()->IsStarted());
+  return PointerToLowMemUInt32(GetMappingTable());
 }
 
 inline void ArtMethod::SetOatMappingTableOffset(uint32_t mapping_table_offset) {
@@ -151,9 +162,9 @@
   SetMappingTable(reinterpret_cast<const uint8_t*>(mapping_table_offset));
 }
 
-inline uint32_t ArtMethod::GetOatVmapTableOffset() const {
+inline uint32_t ArtMethod::GetOatVmapTableOffset() {
   DCHECK(!Runtime::Current()->IsStarted());
-  return reinterpret_cast<uint32_t>(GetVmapTable());
+  return PointerToLowMemUInt32(GetVmapTable());
 }
 
 inline void ArtMethod::SetOatVmapTableOffset(uint32_t vmap_table_offset) {
@@ -166,16 +177,16 @@
   SetNativeGcMap(reinterpret_cast<uint8_t*>(gc_map_offset));
 }
 
-inline uint32_t ArtMethod::GetOatNativeGcMapOffset() const {
+inline uint32_t ArtMethod::GetOatNativeGcMapOffset() {
   DCHECK(!Runtime::Current()->IsStarted());
-  return reinterpret_cast<uint32_t>(GetNativeGcMap());
+  return PointerToLowMemUInt32(GetNativeGcMap());
 }
 
-inline bool ArtMethod::IsRuntimeMethod() const {
+inline bool ArtMethod::IsRuntimeMethod() {
   return GetDexMethodIndex() == DexFile::kDexNoIndex;
 }
 
-inline bool ArtMethod::IsCalleeSaveMethod() const {
+inline bool ArtMethod::IsCalleeSaveMethod() {
   if (!IsRuntimeMethod()) {
     return false;
   }
@@ -190,14 +201,14 @@
   return result;
 }
 
-inline bool ArtMethod::IsResolutionMethod() const {
+inline bool ArtMethod::IsResolutionMethod() {
   bool result = this == Runtime::Current()->GetResolutionMethod();
   // Check that if we do think it is phony it looks like the resolution method.
   DCHECK(!result || IsRuntimeMethod());
   return result;
 }
 
-inline bool ArtMethod::IsImtConflictMethod() const {
+inline bool ArtMethod::IsImtConflictMethod() {
   bool result = this == Runtime::Current()->GetImtConflictMethod();
   // Check that if we do think it is phony it looks like the imt conflict method.
   DCHECK(!result || IsRuntimeMethod());
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index f4a076c..575ea03 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -47,7 +47,7 @@
   }
 }
 
-InvokeType ArtMethod::GetInvokeType() const {
+InvokeType ArtMethod::GetInvokeType() {
   // TODO: kSuper?
   if (GetDeclaringClass()->IsInterface()) {
     return kInterface;
@@ -100,11 +100,11 @@
   return num_registers;
 }
 
-bool ArtMethod::IsProxyMethod() const {
+bool ArtMethod::IsProxyMethod() {
   return GetDeclaringClass()->IsProxyClass();
 }
 
-ArtMethod* ArtMethod::FindOverriddenMethod() const {
+ArtMethod* ArtMethod::FindOverriddenMethod() {
   if (IsStatic()) {
     return NULL;
   }
@@ -147,13 +147,16 @@
   return result;
 }
 
-uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc) const {
+uintptr_t ArtMethod::NativePcOffset(const uintptr_t pc) {
   const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(this);
   return pc - reinterpret_cast<uintptr_t>(code);
 }
 
-uint32_t ArtMethod::ToDexPc(const uintptr_t pc) const {
-#if !defined(ART_USE_PORTABLE_COMPILER)
+uint32_t ArtMethod::ToDexPc(const uintptr_t pc) {
+  if (IsPortableCompiled()) {
+    // Portable doesn't use the machine pc, we just use dex pc instead.
+    return static_cast<uint32_t>(pc);
+  }
   MappingTable table(GetMappingTable());
   if (table.TotalSize() == 0) {
     DCHECK(IsNative() || IsCalleeSaveMethod() || IsProxyMethod()) << PrettyMethod(this);
@@ -176,16 +179,12 @@
     }
   }
   LOG(FATAL) << "Failed to find Dex offset for PC offset " << reinterpret_cast<void*>(sought_offset)
-             << "(PC " << reinterpret_cast<void*>(pc) << ", code=" << code
-             << ") in " << PrettyMethod(this);
+                     << "(PC " << reinterpret_cast<void*>(pc) << ", code=" << code
+                     << ") in " << PrettyMethod(this);
   return DexFile::kDexNoIndex;
-#else
-  // Compiler LLVM doesn't use the machine pc, we just use dex pc instead.
-  return static_cast<uint32_t>(pc);
-#endif
 }
 
-uintptr_t ArtMethod::ToNativePc(const uint32_t dex_pc) const {
+uintptr_t ArtMethod::ToNativePc(const uint32_t dex_pc) {
   MappingTable table(GetMappingTable());
   if (table.TotalSize() == 0) {
     DCHECK_EQ(dex_pc, 0U);
@@ -213,7 +212,7 @@
 }
 
 uint32_t ArtMethod::FindCatchBlock(Class* exception_type, uint32_t dex_pc,
-                                   bool* has_no_move_exception) const {
+                                   bool* has_no_move_exception) {
   MethodHelper mh(this);
   const DexFile::CodeItem* code_item = mh.GetCodeItem();
   // Default to handler not found.
@@ -265,16 +264,21 @@
     }
   } else {
     const bool kLogInvocationStartAndReturn = false;
-    if (GetEntryPointFromCompiledCode() != NULL) {
+    bool have_quick_code = GetEntryPointFromQuickCompiledCode() != nullptr;
+    bool have_portable_code = GetEntryPointFromPortableCompiledCode() != nullptr;
+    if (LIKELY(have_quick_code || have_portable_code)) {
       if (kLogInvocationStartAndReturn) {
-        LOG(INFO) << StringPrintf("Invoking '%s' code=%p", PrettyMethod(this).c_str(), GetEntryPointFromCompiledCode());
+        LOG(INFO) << StringPrintf("Invoking '%s' %s code=%p", PrettyMethod(this).c_str(),
+                                  have_quick_code ? "quick" : "portable",
+                                  have_quick_code ? GetEntryPointFromQuickCompiledCode()
+                                                  : GetEntryPointFromPortableCompiledCode());
       }
-#ifdef ART_USE_PORTABLE_COMPILER
-      (*art_portable_invoke_stub)(this, args, args_size, self, result, result_type);
-#else
-      (*art_quick_invoke_stub)(this, args, args_size, self, result, result_type);
-#endif
-      if (UNLIKELY(reinterpret_cast<int32_t>(self->GetException(NULL)) == -1)) {
+      if (!IsPortableCompiled()) {
+        (*art_quick_invoke_stub)(this, args, args_size, self, result, result_type);
+      } else {
+        (*art_portable_invoke_stub)(this, args, args_size, self, result, result_type);
+      }
+      if (UNLIKELY(reinterpret_cast<intptr_t>(self->GetException(NULL)) == -1)) {
         // Unusual case where we were running LLVM generated code and an
         // exception was thrown to force the activations to be removed from the
         // stack. Continue execution in the interpreter.
@@ -285,11 +289,13 @@
         interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result);
       }
       if (kLogInvocationStartAndReturn) {
-        LOG(INFO) << StringPrintf("Returned '%s' code=%p", PrettyMethod(this).c_str(), GetEntryPointFromCompiledCode());
+        LOG(INFO) << StringPrintf("Returned '%s' %s code=%p", PrettyMethod(this).c_str(),
+                                  have_quick_code ? "quick" : "portable",
+                                  have_quick_code ? GetEntryPointFromQuickCompiledCode()
+                                                  : GetEntryPointFromPortableCompiledCode());
       }
     } else {
-      LOG(INFO) << "Not invoking '" << PrettyMethod(this)
-          << "' code=" << reinterpret_cast<const void*>(GetEntryPointFromCompiledCode());
+      LOG(INFO) << "Not invoking '" << PrettyMethod(this) << "' code=null";
       if (result != NULL) {
         result->SetJ(0);
       }
@@ -300,9 +306,10 @@
   self->PopManagedStackFragment(fragment);
 }
 
-bool ArtMethod::IsRegistered() const {
-  void* native_method = GetFieldPtr<void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, native_method_), false);
-  CHECK(native_method != NULL);
+bool ArtMethod::IsRegistered() {
+  void* native_method =
+      GetFieldPtr<void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_jni_), false);
+  CHECK(native_method != nullptr);
   void* jni_stub = GetJniDlsymLookupStub();
   return native_method != jni_stub;
 }
@@ -323,7 +330,7 @@
     // around JNI bugs, that include not giving Object** SIRT references to native methods. Direct
     // the native method to runtime support and store the target somewhere runtime support will
     // find it.
-#if defined(__i386__)
+#if defined(__i386__) || defined(__x86_64__)
     UNIMPLEMENTED(FATAL);
 #else
     SetNativeMethod(reinterpret_cast<void*>(art_work_around_app_jni_bugs));
@@ -340,7 +347,7 @@
 }
 
 void ArtMethod::SetNativeMethod(const void* native_method) {
-  SetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, native_method_),
+  SetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_jni_),
       native_method, false);
 }
 
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 95ca4c9..bfa7cbe 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -45,7 +45,7 @@
 // C++ mirror of java.lang.reflect.Method and java.lang.reflect.Constructor
 class MANAGED ArtMethod : public Object {
  public:
-  Class* GetDeclaringClass() const;
+  Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDeclaringClass(Class *new_declaring_class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -53,41 +53,37 @@
     return MemberOffset(OFFSETOF_MEMBER(ArtMethod, declaring_class_));
   }
 
-  static MemberOffset EntryPointFromCompiledCodeOffset() {
-    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, entry_point_from_compiled_code_));
-  }
+  uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t GetAccessFlags() const;
-
-  void SetAccessFlags(uint32_t new_access_flags) {
+  void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, access_flags_), new_access_flags, false);
   }
 
   // Approximate what kind of method call would be used for this method.
-  InvokeType GetInvokeType() const;
+  InvokeType GetInvokeType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if the method is declared public.
-  bool IsPublic() const {
+  bool IsPublic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccPublic) != 0;
   }
 
   // Returns true if the method is declared private.
-  bool IsPrivate() const {
+  bool IsPrivate() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccPrivate) != 0;
   }
 
   // Returns true if the method is declared static.
-  bool IsStatic() const {
+  bool IsStatic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccStatic) != 0;
   }
 
   // Returns true if the method is a constructor.
-  bool IsConstructor() const {
+  bool IsConstructor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccConstructor) != 0;
   }
 
   // Returns true if the method is static, private, or a constructor.
-  bool IsDirect() const {
+  bool IsDirect() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return IsDirect(GetAccessFlags());
   }
 
@@ -96,55 +92,70 @@
   }
 
   // Returns true if the method is declared synchronized.
-  bool IsSynchronized() const {
+  bool IsSynchronized() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t synchonized = kAccSynchronized | kAccDeclaredSynchronized;
     return (GetAccessFlags() & synchonized) != 0;
   }
 
-  bool IsFinal() const {
+  bool IsFinal() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
-  bool IsMiranda() const {
+  bool IsMiranda() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccMiranda) != 0;
   }
 
-  bool IsNative() const {
+  bool IsNative() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccNative) != 0;
   }
 
-  bool IsFastNative() const {
+  bool IsFastNative() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t mask = kAccFastNative | kAccNative;
     return (GetAccessFlags() & mask) == mask;
   }
 
-  bool IsAbstract() const {
+  bool IsAbstract() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccAbstract) != 0;
   }
 
-  bool IsSynthetic() const {
+  bool IsSynthetic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
-  bool IsProxyMethod() const;
+  bool IsProxyMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsPreverified() const {
+  bool IsPreverified() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccPreverified) != 0;
   }
 
-  void SetPreverified() {
+  void SetPreverified() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(!IsPreverified());
     SetAccessFlags(GetAccessFlags() | kAccPreverified);
   }
 
+  bool IsPortableCompiled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (GetAccessFlags() & kAccPortableCompiled) != 0;
+  }
+
+  void SetIsPortableCompiled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(!IsPortableCompiled());
+    SetAccessFlags(GetAccessFlags() | kAccPortableCompiled);
+  }
+
+  void ClearIsPortableCompiled() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(IsPortableCompiled());
+    SetAccessFlags(GetAccessFlags() & ~kAccPortableCompiled);
+  }
+
   bool CheckIncompatibleClassChange(InvokeType type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint16_t GetMethodIndex() const;
+  uint16_t GetMethodIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t GetVtableIndex() const {
+  size_t GetVtableIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetMethodIndex();
   }
 
-  void SetMethodIndex(uint16_t new_method_index) {
+  void SetMethodIndex(uint16_t new_method_index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_), new_method_index, false);
   }
 
@@ -152,24 +163,24 @@
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
   }
 
-  uint32_t GetCodeItemOffset() const {
-    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, code_item_offset_), false);
+  uint32_t GetCodeItemOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_code_item_offset_), false);
   }
 
   void SetCodeItemOffset(uint32_t new_code_off) {
-    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, code_item_offset_), new_code_off, false);
+    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_code_item_offset_), new_code_off, false);
   }
 
   // Number of 32bit registers that would be required to hold all the arguments
   static size_t NumArgRegisters(const StringPiece& shorty);
 
-  uint32_t GetDexMethodIndex() const;
+  uint32_t GetDexMethodIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDexMethodIndex(uint32_t new_idx) {
-    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_dex_index_), new_idx, false);
+    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_method_index_), new_idx, false);
   }
 
-  ObjectArray<String>* GetDexCacheStrings() const;
+  ObjectArray<String>* GetDexCacheStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetDexCacheStrings(ObjectArray<String>* new_dex_cache_strings)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -185,41 +196,62 @@
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_);
   }
 
-  ObjectArray<ArtMethod>* GetDexCacheResolvedMethods() const;
+  ObjectArray<ArtMethod>* GetDexCacheResolvedMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetDexCacheResolvedMethods(ObjectArray<ArtMethod>* new_dex_cache_methods)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<Class>* GetDexCacheResolvedTypes() const;
+  ObjectArray<Class>* GetDexCacheResolvedTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetDexCacheResolvedTypes(ObjectArray<Class>* new_dex_cache_types)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Find the method that this method overrides
-  ArtMethod* FindOverriddenMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* FindOverriddenMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result, char result_type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  EntryPointFromInterpreter* GetEntryPointFromInterpreter() const {
-    return GetFieldPtr<EntryPointFromInterpreter*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_), false);
+  EntryPointFromInterpreter* GetEntryPointFromInterpreter() {
+    return GetFieldPtr<EntryPointFromInterpreter*>(
+               OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_), false);
   }
 
   void SetEntryPointFromInterpreter(EntryPointFromInterpreter* entry_point_from_interpreter) {
-    SetFieldPtr<EntryPointFromInterpreter*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_), entry_point_from_interpreter, false);
+    SetFieldPtr<EntryPointFromInterpreter*>(
+        OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_),
+        entry_point_from_interpreter, false);
   }
 
-  const void* GetEntryPointFromCompiledCode() const {
-    return GetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_compiled_code_), false);
+  static MemberOffset EntryPointFromPortableCompiledCodeOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, entry_point_from_portable_compiled_code_));
   }
 
-  void SetEntryPointFromCompiledCode(const void* entry_point_from_compiled_code) {
-    SetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_compiled_code_), entry_point_from_compiled_code, false);
+  const void* GetEntryPointFromPortableCompiledCode() {
+    return GetFieldPtr<const void*>(EntryPointFromPortableCompiledCodeOffset(), false);
   }
 
-  uint32_t GetCodeSize() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetEntryPointFromPortableCompiledCode(const void* entry_point_from_portable_compiled_code) {
+    SetFieldPtr<const void*>(EntryPointFromPortableCompiledCodeOffset(),
+        entry_point_from_portable_compiled_code, false);
+  }
 
-  bool IsWithinCode(uintptr_t pc) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    uintptr_t code = reinterpret_cast<uintptr_t>(GetEntryPointFromCompiledCode());
+  static MemberOffset EntryPointFromQuickCompiledCodeOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, entry_point_from_quick_compiled_code_));
+  }
+
+  const void* GetEntryPointFromQuickCompiledCode() {
+    return GetFieldPtr<const void*>(EntryPointFromQuickCompiledCodeOffset(), false);
+  }
+
+  void SetEntryPointFromQuickCompiledCode(const void* entry_point_from_quick_compiled_code) {
+    SetFieldPtr<const void*>(EntryPointFromQuickCompiledCodeOffset(),
+        entry_point_from_quick_compiled_code, false);
+  }
+
+
+  uint32_t GetCodeSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool IsWithinQuickCode(uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uintptr_t code = reinterpret_cast<uintptr_t>(GetEntryPointFromQuickCompiledCode());
     if (code == 0) {
       return pc == 0;
     }
@@ -231,45 +263,44 @@
     return (code <= pc && pc <= code + GetCodeSize());
   }
 
-  void AssertPcIsWithinCode(uintptr_t pc) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void AssertPcIsWithinQuickCode(uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t GetOatCodeOffset() const;
-
-  void SetOatCodeOffset(uint32_t code_offset);
-
-  static MemberOffset GetEntryPointFromCompiledCodeOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_compiled_code_);
-  }
+  uint32_t GetQuickOatCodeOffset();
+  uint32_t GetPortableOatCodeOffset();
+  void SetQuickOatCodeOffset(uint32_t code_offset);
+  void SetPortableOatCodeOffset(uint32_t code_offset);
 
   // Callers should wrap the uint8_t* in a MappingTable instance for convenient access.
-  const uint8_t* GetMappingTable() const {
-    return GetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, mapping_table_), false);
+  const uint8_t* GetMappingTable() {
+    return GetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_mapping_table_),
+        false);
   }
 
   void SetMappingTable(const uint8_t* mapping_table) {
-    SetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, mapping_table_),
-                                 mapping_table, false);
+    SetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_mapping_table_),
+                                mapping_table, false);
   }
 
-  uint32_t GetOatMappingTableOffset() const;
+  uint32_t GetOatMappingTableOffset();
 
   void SetOatMappingTableOffset(uint32_t mapping_table_offset);
 
   // Callers should wrap the uint8_t* in a VmapTable instance for convenient access.
-  const uint8_t* GetVmapTable() const {
-    return GetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, vmap_table_), false);
+  const uint8_t* GetVmapTable() {
+    return GetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_vmap_table_),
+        false);
   }
 
   void SetVmapTable(const uint8_t* vmap_table) {
-    SetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, vmap_table_), vmap_table, false);
+    SetFieldPtr<const uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_vmap_table_), vmap_table,
+        false);
   }
 
-  uint32_t GetOatVmapTableOffset() const;
+  uint32_t GetOatVmapTableOffset();
 
   void SetOatVmapTableOffset(uint32_t vmap_table_offset);
 
-  const uint8_t* GetNativeGcMap() const {
+  const uint8_t* GetNativeGcMap() {
     return GetFieldPtr<uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, gc_map_), false);
   }
   void SetNativeGcMap(const uint8_t* data) {
@@ -278,31 +309,30 @@
 
   // When building the oat need a convenient place to stuff the offset of the native GC map.
   void SetOatNativeGcMapOffset(uint32_t gc_map_offset);
-  uint32_t GetOatNativeGcMapOffset() const;
+  uint32_t GetOatNativeGcMapOffset();
 
-  size_t GetFrameSizeInBytes() const {
+  size_t GetFrameSizeInBytes() {
     DCHECK_EQ(sizeof(size_t), sizeof(uint32_t));
-    size_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, frame_size_in_bytes_), false);
+    size_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_frame_size_in_bytes_), false);
     DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
     return result;
   }
 
   void SetFrameSizeInBytes(size_t new_frame_size_in_bytes) {
-    DCHECK_EQ(sizeof(size_t), sizeof(uint32_t));
-    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, frame_size_in_bytes_),
+    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_frame_size_in_bytes_),
                new_frame_size_in_bytes, false);
   }
 
-  size_t GetReturnPcOffsetInBytes() const {
+  size_t GetReturnPcOffsetInBytes() {
     return GetFrameSizeInBytes() - kPointerSize;
   }
 
-  size_t GetSirtOffsetInBytes() const {
+  size_t GetSirtOffsetInBytes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     CHECK(IsNative());
     return kPointerSize;
   }
 
-  bool IsRegistered() const;
+  bool IsRegistered();
 
   void RegisterNative(Thread* self, const void* native_method, bool is_fast)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -310,10 +340,10 @@
   void UnregisterNative(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static MemberOffset NativeMethodOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, native_method_);
+    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_jni_);
   }
 
-  const void* GetNativeMethod() const {
+  const void* GetNativeMethod() {
     return reinterpret_cast<const void*>(GetField32(NativeMethodOffset(), false));
   }
 
@@ -323,47 +353,47 @@
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
   }
 
-  uint32_t GetCoreSpillMask() const {
-    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, core_spill_mask_), false);
+  uint32_t GetCoreSpillMask() {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_core_spill_mask_), false);
   }
 
   void SetCoreSpillMask(uint32_t core_spill_mask) {
     // Computed during compilation
-    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, core_spill_mask_), core_spill_mask, false);
+    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_core_spill_mask_), core_spill_mask, false);
   }
 
-  uint32_t GetFpSpillMask() const {
-    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, fp_spill_mask_), false);
+  uint32_t GetFpSpillMask() {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_fp_spill_mask_), false);
   }
 
   void SetFpSpillMask(uint32_t fp_spill_mask) {
     // Computed during compilation
-    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, fp_spill_mask_), fp_spill_mask, false);
+    SetField32(OFFSET_OF_OBJECT_MEMBER(ArtMethod, quick_fp_spill_mask_), fp_spill_mask, false);
   }
 
   // Is this a CalleSaveMethod or ResolutionMethod and therefore doesn't adhere to normal
   // conventions for a method of managed code. Returns false for Proxy methods.
-  bool IsRuntimeMethod() const;
+  bool IsRuntimeMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Is this a hand crafted method used for something like describing callee saves?
-  bool IsCalleeSaveMethod() const;
+  bool IsCalleeSaveMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsResolutionMethod() const;
+  bool IsResolutionMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsImtConflictMethod() const;
+  bool IsImtConflictMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uintptr_t NativePcOffset(const uintptr_t pc) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uintptr_t NativePcOffset(const uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Converts a native PC to a dex PC.
-  uint32_t ToDexPc(const uintptr_t pc) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t ToDexPc(const uintptr_t pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Converts a dex PC to a native PC.
-  uintptr_t ToNativePc(const uint32_t dex_pc) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uintptr_t ToNativePc(const uint32_t dex_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Find the catch block for the given exception type and dex_pc. When a catch block is found,
   // indicates whether the found catch block is responsible for clearing the exception or whether
   // a move-exception instruction is present.
-  uint32_t FindCatchBlock(Class* exception_type, uint32_t dex_pc, bool* has_no_move_exception) const
+  uint32_t FindCatchBlock(Class* exception_type, uint32_t dex_pc, bool* has_no_move_exception)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void SetClass(Class* java_lang_reflect_ArtMethod);
@@ -379,65 +409,83 @@
 
  protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  // The class we are a part of
-  Class* declaring_class_;
+  // The class we are a part of.
+  HeapReference<Class> declaring_class_;
 
-  // short cuts to declaring_class_->dex_cache_ member for fast compiled code access
-  ObjectArray<ArtMethod>* dex_cache_resolved_methods_;
+  // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
+  HeapReference<ObjectArray<ArtMethod> > dex_cache_resolved_methods_;
 
-  // short cuts to declaring_class_->dex_cache_ member for fast compiled code access
-  ObjectArray<Class>* dex_cache_resolved_types_;
+  // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
+  HeapReference<ObjectArray<Class> > dex_cache_resolved_types_;
 
-  // short cuts to declaring_class_->dex_cache_ member for fast compiled code access
-  ObjectArray<String>* dex_cache_strings_;
+  // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
+  HeapReference<ObjectArray<String> > dex_cache_strings_;
 
-  // Access flags; low 16 bits are defined by spec.
-  uint32_t access_flags_;
+  // Method dispatch from the interpreter invokes this pointer which may cause a bridge into
+  // compiled code.
+  uint64_t entry_point_from_interpreter_;
 
-  // Offset to the CodeItem.
-  uint32_t code_item_offset_;
+  // Pointer to JNI function registered to this method, or a function to resolve the JNI function.
+  uint64_t entry_point_from_jni_;
 
-  // Architecture-dependent register spill mask
-  uint32_t core_spill_mask_;
+  // Method dispatch from portable compiled code invokes this pointer which may cause bridging into
+  // quick compiled code or the interpreter.
+  uint64_t entry_point_from_portable_compiled_code_;
 
-  // Compiled code associated with this method for callers from managed code.
-  // May be compiled managed code or a bridge for invoking a native method.
-  // TODO: Break apart this into portable and quick.
-  const void* entry_point_from_compiled_code_;
+  // Method dispatch from quick compiled code invokes this pointer which may cause bridging into
+  // portable compiled code or the interpreter.
+  uint64_t entry_point_from_quick_compiled_code_;
 
-  // Called by the interpreter to execute this method.
-  EntryPointFromInterpreter* entry_point_from_interpreter_;
+  // Pointer to a data structure created by the compiler and used by the garbage collector to
+  // determine which registers hold live references to objects within the heap. Keyed by native PC
+  // offsets for the quick compiler and dex PCs for the portable.
+  uint64_t gc_map_;
 
-  // Architecture-dependent register spill mask
-  uint32_t fp_spill_mask_;
+  // --- Quick compiler meta-data. ---
+  // TODO: merge and place in native heap, such as done with the code size.
 
-  // Total size in bytes of the frame
-  size_t frame_size_in_bytes_;
-
-  // Garbage collection map of native PC offsets (quick) or dex PCs (portable) to reference bitmaps.
-  const uint8_t* gc_map_;
-
-  // Mapping from native pc to dex pc
-  const uint32_t* mapping_table_;
-
-  // Index into method_ids of the dex file associated with this method
-  uint32_t method_dex_index_;
-
-  // For concrete virtual methods, this is the offset of the method in Class::vtable_.
-  //
-  // For abstract methods in an interface class, this is the offset of the method in
-  // "iftable_->Get(n)->GetMethodArray()".
-  //
-  // For static and direct methods this is the index in the direct methods table.
-  uint32_t method_index_;
-
-  // The target native method registered with this method
-  const void* native_method_;
+  // Pointer to a data structure created by the quick compiler to map between dex PCs and native
+  // PCs, and vice-versa.
+  uint64_t quick_mapping_table_;
 
   // When a register is promoted into a register, the spill mask holds which registers hold dex
   // registers. The first promoted register's corresponding dex register is vmap_table_[1], the Nth
   // is vmap_table_[N]. vmap_table_[0] holds the length of the table.
-  const uint16_t* vmap_table_;
+  uint64_t quick_vmap_table_;
+
+  // --- End of quick compiler meta-data. ---
+
+  // Access flags; low 16 bits are defined by spec.
+  uint32_t access_flags_;
+
+  /* Dex file fields. The defining dex file is available via declaring_class_->dex_cache_ */
+
+  // Offset to the CodeItem.
+  uint32_t dex_code_item_offset_;
+
+  // Index into method_ids of the dex file associated with this method.
+  uint32_t dex_method_index_;
+
+  /* End of dex file fields. */
+
+  // Entry within a dispatch table for this method. For static/direct methods the index is into
+  // the declaringClass.directMethods, for virtual methods the vtable and for interface methods the
+  // ifTable.
+  uint32_t method_index_;
+
+  // --- Quick compiler meta-data. ---
+  // TODO: merge and place in native heap, such as done with the code size.
+
+  // Bit map of spilled machine registers.
+  uint32_t quick_core_spill_mask_;
+
+  // Bit map of spilled floating point machine registers.
+  uint32_t quick_fp_spill_mask_;
+
+  // Fixed frame size for this method when executed.
+  uint32_t quick_frame_size_in_bytes_;
+
+  // --- End of quick compiler meta-data. ---
 
   static Class* java_lang_reflect_ArtMethod_;
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index e0fab8c..a5f743b 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -22,6 +22,7 @@
 #include "art_field.h"
 #include "art_method.h"
 #include "class_loader.h"
+#include "common_throws.h"
 #include "dex_cache.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
@@ -32,63 +33,61 @@
 namespace art {
 namespace mirror {
 
-inline size_t Class::GetObjectSize() const {
+inline uint32_t Class::GetObjectSize() {
   DCHECK(!IsVariableSize()) << " class=" << PrettyTypeOf(this);
-  DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
   return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, object_size_), false);
 }
 
-inline Class* Class::GetSuperClass() const {
+inline Class* Class::GetSuperClass() {
   // Can only get super class for loaded classes (hack for when runtime is
   // initializing)
   DCHECK(IsLoaded() || !Runtime::Current()->IsStarted()) << IsLoaded();
-  return GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_), false);
+  return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_), false);
 }
 
-inline ClassLoader* Class::GetClassLoader() const {
-  return GetFieldObject<ClassLoader*>(OFFSET_OF_OBJECT_MEMBER(Class, class_loader_), false);
+inline ClassLoader* Class::GetClassLoader() {
+  return GetFieldObject<ClassLoader>(OFFSET_OF_OBJECT_MEMBER(Class, class_loader_), false);
 }
 
-inline DexCache* Class::GetDexCache() const {
-  return GetFieldObject<DexCache*>(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), false);
+inline DexCache* Class::GetDexCache() {
+  return GetFieldObject<DexCache>(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), false);
 }
 
-inline ObjectArray<ArtMethod>* Class::GetDirectMethods() const {
+inline ObjectArray<ArtMethod>* Class::GetDirectMethods() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod>*>(
+  return GetFieldObject<ObjectArray<ArtMethod> >(
       OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_), false);
 }
 
 inline void Class::SetDirectMethods(ObjectArray<ArtMethod>* new_direct_methods)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtMethod>*>(
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtMethod> >(
       OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_), false));
   DCHECK_NE(0, new_direct_methods->GetLength());
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_),
                  new_direct_methods, false);
 }
 
-inline ArtMethod* Class::GetDirectMethod(int32_t i) const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtMethod* Class::GetDirectMethod(int32_t i) {
   return GetDirectMethods()->Get(i);
 }
 
 inline void Class::SetDirectMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectArray<ArtMethod>* direct_methods =
-      GetFieldObject<ObjectArray<ArtMethod>*>(
+      GetFieldObject<ObjectArray<ArtMethod> >(
           OFFSET_OF_OBJECT_MEMBER(Class, direct_methods_), false);
   direct_methods->Set(i, f);
 }
 
 // Returns the number of static, private, and constructor methods.
-inline size_t Class::NumDirectMethods() const {
+inline uint32_t Class::NumDirectMethods() {
   return (GetDirectMethods() != NULL) ? GetDirectMethods()->GetLength() : 0;
 }
 
-inline ObjectArray<ArtMethod>* Class::GetVirtualMethods() const {
+inline ObjectArray<ArtMethod>* Class::GetVirtualMethods() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod>*>(
+  return GetFieldObject<ObjectArray<ArtMethod> >(
       OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_), false);
 }
 
@@ -100,18 +99,16 @@
                  new_virtual_methods, false);
 }
 
-inline size_t Class::NumVirtualMethods() const {
+inline uint32_t Class::NumVirtualMethods() {
   return (GetVirtualMethods() != NULL) ? GetVirtualMethods()->GetLength() : 0;
 }
 
-inline ArtMethod* Class::GetVirtualMethod(uint32_t i) const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtMethod* Class::GetVirtualMethod(uint32_t i) {
   DCHECK(IsResolved() || IsErroneous());
   return GetVirtualMethods()->Get(i);
 }
 
-inline ArtMethod* Class::GetVirtualMethodDuringLinking(uint32_t i) const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtMethod* Class::GetVirtualMethodDuringLinking(uint32_t i) {
   DCHECK(IsLoaded() || IsErroneous());
   return GetVirtualMethods()->Get(i);
 }
@@ -119,35 +116,34 @@
 inline void Class::SetVirtualMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ObjectArray<ArtMethod>* virtual_methods =
-      GetFieldObject<ObjectArray<ArtMethod>*>(
+      GetFieldObject<ObjectArray<ArtMethod> >(
           OFFSET_OF_OBJECT_MEMBER(Class, virtual_methods_), false);
   virtual_methods->Set(i, f);
 }
 
-inline ObjectArray<ArtMethod>* Class::GetVTable() const {
+inline ObjectArray<ArtMethod>* Class::GetVTable() {
   DCHECK(IsResolved() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod>*>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), false);
+  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), false);
 }
 
-inline ObjectArray<ArtMethod>* Class::GetVTableDuringLinking() const {
+inline ObjectArray<ArtMethod>* Class::GetVTableDuringLinking() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtMethod>*>(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), false);
+  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), false);
 }
 
-inline void Class::SetVTable(ObjectArray<ArtMethod>* new_vtable)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline void Class::SetVTable(ObjectArray<ArtMethod>* new_vtable) {
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, vtable_), new_vtable, false);
 }
 
-inline ObjectArray<ArtMethod>* Class::GetImTable() const {
-  return GetFieldObject<ObjectArray<ArtMethod>*>(OFFSET_OF_OBJECT_MEMBER(Class, imtable_), false);
+inline ObjectArray<ArtMethod>* Class::GetImTable() {
+  return GetFieldObject<ObjectArray<ArtMethod> >(OFFSET_OF_OBJECT_MEMBER(Class, imtable_), false);
 }
 
 inline void Class::SetImTable(ObjectArray<ArtMethod>* new_imtable) {
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, imtable_), new_imtable, false);
 }
 
-inline bool Class::Implements(const Class* klass) const {
+inline bool Class::Implements(Class* klass) {
   DCHECK(klass != NULL);
   DCHECK(klass->IsInterface()) << PrettyClass(this);
   // All interfaces implemented directly and by our superclass, and
@@ -182,13 +178,13 @@
 // Don't forget about primitive types.
 //   Object[]         = int[] --> false
 //
-inline bool Class::IsArrayAssignableFromArray(const Class* src) const {
+inline bool Class::IsArrayAssignableFromArray(Class* src) {
   DCHECK(IsArrayClass())  << PrettyClass(this);
   DCHECK(src->IsArrayClass()) << PrettyClass(src);
   return GetComponentType()->IsAssignableFrom(src->GetComponentType());
 }
 
-inline bool Class::IsAssignableFromArray(const Class* src) const {
+inline bool Class::IsAssignableFromArray(Class* src) {
   DCHECK(!IsInterface()) << PrettyClass(this);  // handled first in IsAssignableFrom
   DCHECK(src->IsArrayClass()) << PrettyClass(src);
   if (!IsArrayClass()) {
@@ -202,10 +198,96 @@
   return IsArrayAssignableFromArray(src);
 }
 
-inline bool Class::IsSubClass(const Class* klass) const {
+template <bool throw_on_failure, bool use_referrers_cache>
+inline bool Class::ResolvedFieldAccessTest(Class* access_to, ArtField* field,
+                                           uint32_t field_idx, DexCache* dex_cache) {
+  DCHECK_EQ(use_referrers_cache, dex_cache == nullptr);
+  if (UNLIKELY(!this->CanAccess(access_to))) {
+    // The referrer class can't access the field's declaring class but may still be able
+    // to access the field if the FieldId specifies an accessible subclass of the declaring
+    // class rather than the declaring class itself.
+    DexCache* referrer_dex_cache = use_referrers_cache ? this->GetDexCache() : dex_cache;
+    uint32_t class_idx = referrer_dex_cache->GetDexFile()->GetFieldId(field_idx).class_idx_;
+    // The referenced class has already been resolved with the field, get it from the dex cache.
+    Class* dex_access_to = referrer_dex_cache->GetResolvedType(class_idx);
+    DCHECK(dex_access_to != nullptr);
+    if (UNLIKELY(!this->CanAccess(dex_access_to))) {
+      if (throw_on_failure) {
+        ThrowIllegalAccessErrorClass(this, dex_access_to);
+      }
+      return false;
+    }
+    DCHECK_EQ(this->CanAccessMember(access_to, field->GetAccessFlags()),
+              this->CanAccessMember(dex_access_to, field->GetAccessFlags()));
+  }
+  if (LIKELY(this->CanAccessMember(access_to, field->GetAccessFlags()))) {
+    return true;
+  }
+  if (throw_on_failure) {
+    ThrowIllegalAccessErrorField(this, field);
+  }
+  return false;
+}
+
+template <bool throw_on_failure, bool use_referrers_cache, InvokeType throw_invoke_type>
+inline bool Class::ResolvedMethodAccessTest(Class* access_to, ArtMethod* method,
+                                            uint32_t method_idx, DexCache* dex_cache) {
+  COMPILE_ASSERT(throw_on_failure || throw_invoke_type == kStatic, non_default_throw_invoke_type);
+  DCHECK_EQ(use_referrers_cache, dex_cache == nullptr);
+  if (UNLIKELY(!this->CanAccess(access_to))) {
+    // The referrer class can't access the method's declaring class but may still be able
+    // to access the method if the MethodId specifies an accessible subclass of the declaring
+    // class rather than the declaring class itself.
+    DexCache* referrer_dex_cache = use_referrers_cache ? this->GetDexCache() : dex_cache;
+    uint32_t class_idx = referrer_dex_cache->GetDexFile()->GetMethodId(method_idx).class_idx_;
+    // The referenced class has already been resolved with the method, get it from the dex cache.
+    Class* dex_access_to = referrer_dex_cache->GetResolvedType(class_idx);
+    DCHECK(dex_access_to != nullptr);
+    if (UNLIKELY(!this->CanAccess(dex_access_to))) {
+      if (throw_on_failure) {
+        ThrowIllegalAccessErrorClassForMethodDispatch(this, dex_access_to,
+                                                      method, throw_invoke_type);
+      }
+      return false;
+    }
+    DCHECK_EQ(this->CanAccessMember(access_to, method->GetAccessFlags()),
+              this->CanAccessMember(dex_access_to, method->GetAccessFlags()));
+  }
+  if (LIKELY(this->CanAccessMember(access_to, method->GetAccessFlags()))) {
+    return true;
+  }
+  if (throw_on_failure) {
+    ThrowIllegalAccessErrorMethod(this, method);
+  }
+  return false;
+}
+
+inline bool Class::CanAccessResolvedField(Class* access_to, ArtField* field,
+                                          DexCache* dex_cache, uint32_t field_idx) {
+  return ResolvedFieldAccessTest<false, false>(access_to, field, field_idx, dex_cache);
+}
+
+inline bool Class::CheckResolvedFieldAccess(Class* access_to, ArtField* field,
+                                            uint32_t field_idx) {
+  return ResolvedFieldAccessTest<true, true>(access_to, field, field_idx, nullptr);
+}
+
+inline bool Class::CanAccessResolvedMethod(Class* access_to, ArtMethod* method,
+                                           DexCache* dex_cache, uint32_t method_idx) {
+  return ResolvedMethodAccessTest<false, false, kStatic>(access_to, method, method_idx, dex_cache);
+}
+
+template <InvokeType throw_invoke_type>
+inline bool Class::CheckResolvedMethodAccess(Class* access_to, ArtMethod* method,
+                                             uint32_t method_idx) {
+  return ResolvedMethodAccessTest<true, true, throw_invoke_type>(access_to, method, method_idx,
+                                                                 nullptr);
+}
+
+inline bool Class::IsSubClass(Class* klass) {
   DCHECK(!IsInterface()) << PrettyClass(this);
   DCHECK(!IsArrayClass()) << PrettyClass(this);
-  const Class* current = this;
+  Class* current = this;
   do {
     if (current == klass) {
       return true;
@@ -215,7 +297,7 @@
   return false;
 }
 
-inline ArtMethod* Class::FindVirtualMethodForInterface(ArtMethod* method) const {
+inline ArtMethod* Class::FindVirtualMethodForInterface(ArtMethod* method) {
   Class* declaring_class = method->GetDeclaringClass();
   DCHECK(declaring_class != NULL) << PrettyClass(this);
   DCHECK(declaring_class->IsInterface()) << PrettyMethod(method);
@@ -230,21 +312,19 @@
   return NULL;
 }
 
-inline ArtMethod* Class::FindVirtualMethodForVirtual(ArtMethod* method) const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtMethod* Class::FindVirtualMethodForVirtual(ArtMethod* method) {
   DCHECK(!method->GetDeclaringClass()->IsInterface() || method->IsMiranda());
   // The argument method may from a super class.
   // Use the index to a potentially overridden one for this instance's class.
   return GetVTable()->Get(method->GetMethodIndex());
 }
 
-inline ArtMethod* Class::FindVirtualMethodForSuper(ArtMethod* method) const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtMethod* Class::FindVirtualMethodForSuper(ArtMethod* method) {
   DCHECK(!method->GetDeclaringClass()->IsInterface());
   return GetSuperClass()->GetVTable()->Get(method->GetMethodIndex());
 }
 
-inline ArtMethod* Class::FindVirtualMethodForVirtualOrInterface(ArtMethod* method) const {
+inline ArtMethod* Class::FindVirtualMethodForVirtualOrInterface(ArtMethod* method) {
   if (method->IsDirect()) {
     return method;
   }
@@ -254,11 +334,11 @@
   return FindVirtualMethodForVirtual(method);
 }
 
-inline IfTable* Class::GetIfTable() const {
-  return GetFieldObject<IfTable*>(OFFSET_OF_OBJECT_MEMBER(Class, iftable_), false);
+inline IfTable* Class::GetIfTable() {
+  return GetFieldObject<IfTable>(OFFSET_OF_OBJECT_MEMBER(Class, iftable_), false);
 }
 
-inline int32_t Class::GetIfTableCount() const {
+inline int32_t Class::GetIfTableCount() {
   IfTable* iftable = GetIfTable();
   if (iftable == NULL) {
     return 0;
@@ -270,59 +350,58 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, iftable_), new_iftable, false);
 }
 
-inline ObjectArray<ArtField>* Class::GetIFields() const {
+inline ObjectArray<ArtField>* Class::GetIFields() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtField>*>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_), false);
+  return GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_), false);
 }
 
 inline void Class::SetIFields(ObjectArray<ArtField>* new_ifields)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField>*>(
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField> >(
       OFFSET_OF_OBJECT_MEMBER(Class, ifields_), false));
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, ifields_), new_ifields, false);
 }
 
-inline ObjectArray<ArtField>* Class::GetSFields() const {
+inline ObjectArray<ArtField>* Class::GetSFields() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtField>*>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), false);
+  return GetFieldObject<ObjectArray<ArtField> >(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), false);
 }
 
 inline void Class::SetSFields(ObjectArray<ArtField>* new_sfields)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField>*>(
+  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField> >(
       OFFSET_OF_OBJECT_MEMBER(Class, sfields_), false));
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), new_sfields, false);
 }
 
-inline size_t Class::NumStaticFields() const {
+inline uint32_t Class::NumStaticFields() {
   return (GetSFields() != NULL) ? GetSFields()->GetLength() : 0;
 }
 
-inline ArtField* Class::GetStaticField(uint32_t i) const  // TODO: uint16_t
+inline ArtField* Class::GetStaticField(uint32_t i)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return GetSFields()->Get(i);
 }
 
 inline void Class::SetStaticField(uint32_t i, ArtField* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectArray<ArtField>* sfields= GetFieldObject<ObjectArray<ArtField>*>(
+  ObjectArray<ArtField>* sfields= GetFieldObject<ObjectArray<ArtField> >(
       OFFSET_OF_OBJECT_MEMBER(Class, sfields_), false);
   sfields->Set(i, f);
 }
 
-inline size_t Class::NumInstanceFields() const {
+inline uint32_t Class::NumInstanceFields() {
   return (GetIFields() != NULL) ? GetIFields()->GetLength() : 0;
 }
 
-inline ArtField* Class::GetInstanceField(uint32_t i) const  // TODO: uint16_t
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline ArtField* Class::GetInstanceField(uint32_t i) {  // TODO: uint16_t
   DCHECK_NE(NumInstanceFields(), 0U);
   return GetIFields()->Get(i);
 }
 
 inline void Class::SetInstanceField(uint32_t i, ArtField* f)  // TODO: uint16_t
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectArray<ArtField>* ifields= GetFieldObject<ObjectArray<ArtField>*>(
+  ObjectArray<ArtField>* ifields= GetFieldObject<ObjectArray<ArtField> >(
       OFFSET_OF_OBJECT_MEMBER(Class, ifields_), false);
   ifields->Set(i, f);
 }
@@ -332,7 +411,7 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), klass, false);
 }
 
-inline uint32_t Class::GetAccessFlags() const {
+inline uint32_t Class::GetAccessFlags() {
   // Check class is loaded or this is java.lang.String that has a
   // circularity issue during loading the names of its members
   DCHECK(IsLoaded() || IsErroneous() ||
@@ -342,8 +421,8 @@
   return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_), false);
 }
 
-inline String* Class::GetName() const {
-  return GetFieldObject<String*>(OFFSET_OF_OBJECT_MEMBER(Class, name_), false);
+inline String* Class::GetName() {
+  return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Class, name_), false);
 }
 inline void Class::SetName(String* name) {
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, name_), name, false);
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index bd965fa..8051c9b 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -125,7 +125,7 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), new_dex_cache, false);
 }
 
-void Class::SetClassSize(size_t new_class_size) {
+void Class::SetClassSize(uint32_t new_class_size) {
   if (kIsDebugBuild && (new_class_size < GetClassSize())) {
     DumpClass(LOG(ERROR), kDumpClassFullDetail);
     CHECK_GE(new_class_size, GetClassSize()) << " class=" << PrettyTypeOf(this);
@@ -177,7 +177,7 @@
   return name;
 }
 
-void Class::DumpClass(std::ostream& os, int flags) const {
+void Class::DumpClass(std::ostream& os, int flags) {
   if ((flags & kDumpClassFullDetail) == 0) {
     os << PrettyClass(this);
     if ((flags & kDumpClassClassLoader) != 0) {
@@ -281,9 +281,9 @@
   }
 }
 
-bool Class::IsInSamePackage(const Class* that) const {
-  const Class* klass1 = this;
-  const Class* klass2 = that;
+bool Class::IsInSamePackage(Class* that) {
+  Class* klass1 = this;
+  Class* klass2 = that;
   if (klass1 == klass2) {
     return true;
   }
@@ -307,7 +307,7 @@
                          ClassHelper(klass2).GetDescriptor());
 }
 
-bool Class::IsClassClass() const {
+bool Class::IsClassClass() {
   Class* java_lang_Class = GetClass()->GetClass();
   return this == java_lang_Class;
 }
@@ -316,17 +316,17 @@
   return this == String::GetJavaLangString();
 }
 
-bool Class::IsThrowableClass() const {
+bool Class::IsThrowableClass() {
   return WellKnownClasses::ToClass(WellKnownClasses::java_lang_Throwable)->IsAssignableFrom(this);
 }
 
-bool Class::IsArtFieldClass() const {
+bool Class::IsArtFieldClass() {
   Class* java_lang_Class = GetClass();
   Class* java_lang_reflect_ArtField = java_lang_Class->GetInstanceField(0)->GetClass();
   return this == java_lang_reflect_ArtField;
 }
 
-bool Class::IsArtMethodClass() const {
+bool Class::IsArtMethodClass() {
   return this == ArtMethod::GetJavaLangReflectArtMethod();
 }
 
@@ -334,7 +334,7 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, class_loader_), new_class_loader, false);
 }
 
-ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature) const {
+ArtMethod* Class::FindInterfaceMethod(const StringPiece& name, const Signature& signature) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(name, signature);
   if (method != NULL) {
@@ -352,7 +352,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
+ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
   // Check the current class before checking the interfaces.
   ArtMethod* method = FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
   if (method != NULL) {
@@ -370,7 +370,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) const {
+ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) {
   MethodHelper mh;
   for (size_t i = 0; i < NumDirectMethods(); ++i) {
     ArtMethod* method = GetDirectMethod(i);
@@ -382,7 +382,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) const {
+ArtMethod* Class::FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) {
   MethodHelper mh;
   for (size_t i = 0; i < NumDirectMethods(); ++i) {
     ArtMethod* method = GetDirectMethod(i);
@@ -394,7 +394,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
+ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
   if (GetDexCache() == dex_cache) {
     for (size_t i = 0; i < NumDirectMethods(); ++i) {
       ArtMethod* method = GetDirectMethod(i);
@@ -406,8 +406,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDirectMethod(const StringPiece& name, const StringPiece& signature) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindDirectMethod(const StringPiece& name, const StringPiece& signature) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature);
     if (method != NULL) {
       return method;
@@ -416,8 +416,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDirectMethod(const StringPiece& name, const Signature& signature) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindDirectMethod(const StringPiece& name, const Signature& signature) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature);
     if (method != NULL) {
       return method;
@@ -426,8 +426,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(dex_cache, dex_method_idx);
     if (method != NULL) {
       return method;
@@ -436,7 +436,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) const {
+ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) {
   MethodHelper mh;
   for (size_t i = 0; i < NumVirtualMethods(); ++i) {
     ArtMethod* method = GetVirtualMethod(i);
@@ -449,7 +449,7 @@
 }
 
 ArtMethod* Class::FindDeclaredVirtualMethod(const StringPiece& name,
-                                            const Signature& signature) const {
+                                            const Signature& signature) {
   MethodHelper mh;
   for (size_t i = 0; i < NumVirtualMethods(); ++i) {
     ArtMethod* method = GetVirtualMethod(i);
@@ -461,7 +461,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
+ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
   if (GetDexCache() == dex_cache) {
     for (size_t i = 0; i < NumVirtualMethods(); ++i) {
       ArtMethod* method = GetVirtualMethod(i);
@@ -473,8 +473,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const StringPiece& signature) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const StringPiece& signature) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature);
     if (method != NULL) {
       return method;
@@ -483,8 +483,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const Signature& signature) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindVirtualMethod(const StringPiece& name, const Signature& signature) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature);
     if (method != NULL) {
       return method;
@@ -493,8 +493,8 @@
   return NULL;
 }
 
-ArtMethod* Class::FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const {
-  for (const Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) {
+  for (Class* klass = this; klass != NULL; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(dex_cache, dex_method_idx);
     if (method != NULL) {
       return method;
@@ -503,7 +503,7 @@
   return NULL;
 }
 
-ArtMethod* Class::FindClassInitializer() const {
+ArtMethod* Class::FindClassInitializer() {
   for (size_t i = 0; i < NumDirectMethods(); ++i) {
     ArtMethod* method = GetDirectMethod(i);
     if (method->IsConstructor() && method->IsStatic()) {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 9aa23d9..cbec476 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_CLASS_H_
 
 #include "gc/heap.h"
+#include "invoke_type.h"
 #include "modifiers.h"
 #include "object.h"
 #include "primitive.h"
@@ -119,7 +120,7 @@
     kStatusMax = 10,
   };
 
-  Status GetStatus() const {
+  Status GetStatus() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Status), sizeof(uint32_t));
     return static_cast<Status>(GetField32(OFFSET_OF_OBJECT_MEMBER(Class, status_), true));
   }
@@ -131,107 +132,107 @@
   }
 
   // Returns true if the class has failed to link.
-  bool IsErroneous() const {
+  bool IsErroneous() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() == kStatusError;
   }
 
   // Returns true if the class has been loaded.
-  bool IsIdxLoaded() const {
+  bool IsIdxLoaded() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusIdx;
   }
 
   // Returns true if the class has been loaded.
-  bool IsLoaded() const {
+  bool IsLoaded() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusLoaded;
   }
 
   // Returns true if the class has been linked.
-  bool IsResolved() const {
+  bool IsResolved() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusResolved;
   }
 
   // Returns true if the class was compile-time verified.
-  bool IsCompileTimeVerified() const {
+  bool IsCompileTimeVerified() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusRetryVerificationAtRuntime;
   }
 
   // Returns true if the class has been verified.
-  bool IsVerified() const {
+  bool IsVerified() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusVerified;
   }
 
   // Returns true if the class is initializing.
-  bool IsInitializing() const {
+  bool IsInitializing() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() >= kStatusInitializing;
   }
 
   // Returns true if the class is initialized.
-  bool IsInitialized() const {
+  bool IsInitialized() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStatus() == kStatusInitialized;
   }
 
-  uint32_t GetAccessFlags() const;
+  uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetAccessFlags(uint32_t new_access_flags) {
+  void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_), new_access_flags, false);
   }
 
   // Returns true if the class is an interface.
-  bool IsInterface() const {
+  bool IsInterface() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccInterface) != 0;
   }
 
   // Returns true if the class is declared public.
-  bool IsPublic() const {
+  bool IsPublic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccPublic) != 0;
   }
 
   // Returns true if the class is declared final.
-  bool IsFinal() const {
+  bool IsFinal() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccFinal) != 0;
   }
 
-  bool IsFinalizable() const {
+  bool IsFinalizable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccClassIsFinalizable) != 0;
   }
 
-  void SetFinalizable() {
+  void SetFinalizable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     uint32_t flags = GetField32(OFFSET_OF_OBJECT_MEMBER(Class, access_flags_), false);
     SetAccessFlags(flags | kAccClassIsFinalizable);
   }
 
   // Returns true if the class is abstract.
-  bool IsAbstract() const {
+  bool IsAbstract() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccAbstract) != 0;
   }
 
   // Returns true if the class is an annotation.
-  bool IsAnnotation() const {
+  bool IsAnnotation() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccAnnotation) != 0;
   }
 
   // Returns true if the class is synthetic.
-  bool IsSynthetic() const {
+  bool IsSynthetic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccSynthetic) != 0;
   }
 
-  bool IsReferenceClass() const {
+  bool IsReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccClassIsReference) != 0;
   }
 
-  bool IsWeakReferenceClass() const {
+  bool IsWeakReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccClassIsWeakReference) != 0;
   }
 
-  bool IsSoftReferenceClass() const {
+  bool IsSoftReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccReferenceFlagsMask) == kAccClassIsReference;
   }
 
-  bool IsFinalizerReferenceClass() const {
+  bool IsFinalizerReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccClassIsFinalizerReference) != 0;
   }
 
-  bool IsPhantomReferenceClass() const {
+  bool IsPhantomReferenceClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccClassIsPhantomReference) != 0;
   }
 
@@ -240,7 +241,7 @@
   // For array classes, where all the classes are final due to there being no sub-classes, an
   // Object[] may be assigned to by a String[] but a String[] may not be assigned to by other
   // types as the component is final.
-  bool CannotBeAssignedFromOtherTypes() const {
+  bool CannotBeAssignedFromOtherTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (!IsArrayClass()) {
       return IsFinal();
     } else {
@@ -253,12 +254,12 @@
     }
   }
 
-  String* GetName() const;  // Returns the cached name.
+  String* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);  // Returns the cached name.
   void SetName(String* name) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);  // Sets the cached name.
   // Computes the name, then sets the cached value.
   String* ComputeName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsProxyClass() const {
+  bool IsProxyClass() {
     // Read access flags without using getter as whether something is a proxy can be check in
     // any loaded state
     // TODO: switch to a check if the super class is java.lang.reflect.Proxy?
@@ -266,91 +267,91 @@
     return (access_flags & kAccClassIsProxy) != 0;
   }
 
-  Primitive::Type GetPrimitiveType() const {
+  Primitive::Type GetPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
     return static_cast<Primitive::Type>(
         GetField32(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), false));
   }
 
-  void SetPrimitiveType(Primitive::Type new_type) {
+  void SetPrimitiveType(Primitive::Type new_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t));
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), new_type, false);
   }
 
   // Returns true if the class is a primitive type.
-  bool IsPrimitive() const {
+  bool IsPrimitive() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() != Primitive::kPrimNot;
   }
 
-  bool IsPrimitiveBoolean() const {
+  bool IsPrimitiveBoolean() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimBoolean;
   }
 
-  bool IsPrimitiveByte() const {
+  bool IsPrimitiveByte() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimByte;
   }
 
-  bool IsPrimitiveChar() const {
+  bool IsPrimitiveChar() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimChar;
   }
 
-  bool IsPrimitiveShort() const {
+  bool IsPrimitiveShort() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimShort;
   }
 
-  bool IsPrimitiveInt() const {
+  bool IsPrimitiveInt() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimInt;
   }
 
-  bool IsPrimitiveLong() const {
+  bool IsPrimitiveLong() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimLong;
   }
 
-  bool IsPrimitiveFloat() const {
+  bool IsPrimitiveFloat() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimFloat;
   }
 
-  bool IsPrimitiveDouble() const {
+  bool IsPrimitiveDouble() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimDouble;
   }
 
-  bool IsPrimitiveVoid() const {
+  bool IsPrimitiveVoid() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetPrimitiveType() == Primitive::kPrimVoid;
   }
 
-  bool IsPrimitiveArray() const {
+  bool IsPrimitiveArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return IsArrayClass() && GetComponentType()->IsPrimitive();
   }
 
   // Depth of class from java.lang.Object
-  size_t Depth() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    size_t depth = 0;
+  uint32_t Depth() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uint32_t depth = 0;
     for (Class* klass = this; klass->GetSuperClass() != NULL; klass = klass->GetSuperClass()) {
       depth++;
     }
     return depth;
   }
 
-  bool IsArrayClass() const {
+  bool IsArrayClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetComponentType() != NULL;
   }
 
-  bool IsClassClass() const;
+  bool IsClassClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsStringClass() const;
 
-  bool IsThrowableClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsThrowableClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsArtFieldClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsArtFieldClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsArtMethodClass() const;
+  bool IsArtMethodClass();
 
   static MemberOffset ComponentTypeOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Class, component_type_);
   }
 
-  Class* GetComponentType() const {
-    return GetFieldObject<Class*>(ComponentTypeOffset(), false);
+  Class* GetComponentType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<Class>(ComponentTypeOffset(), false);
   }
 
   void SetComponentType(Class* new_component_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -359,18 +360,18 @@
     SetFieldObject(ComponentTypeOffset(), new_component_type, false);
   }
 
-  size_t GetComponentSize() const {
+  size_t GetComponentSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return Primitive::ComponentSize(GetComponentType()->GetPrimitiveType());
   }
 
-  bool IsObjectClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool IsObjectClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return !IsPrimitive() && GetSuperClass() == NULL;
   }
-  bool IsInstantiable() const {
+  bool IsInstantiable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return (!IsPrimitive() && !IsInterface() && !IsAbstract()) || ((IsAbstract()) && IsArrayClass());
   }
 
-  bool IsObjectArrayClass() const {
+  bool IsObjectArrayClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetComponentType() != NULL && !GetComponentType()->IsPrimitive();
   }
 
@@ -384,48 +385,44 @@
   Object* AllocNonMovableObject(Thread* self)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsVariableSize() const {
+  bool IsVariableSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Classes and arrays vary in size, and so the object_size_ field cannot
     // be used to get their instance size
     return IsClassClass() || IsArrayClass();
   }
 
-  size_t SizeOf() const {
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
+  uint32_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, class_size_), false);
   }
 
-  size_t GetClassSize() const {
-    DCHECK_EQ(sizeof(size_t), sizeof(uint32_t));
+  uint32_t GetClassSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, class_size_), false);
   }
 
-  void SetClassSize(size_t new_class_size)
+  void SetClassSize(uint32_t new_class_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t GetObjectSize() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t GetObjectSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetObjectSize(size_t new_object_size) {
+  void SetObjectSize(uint32_t new_object_size) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!IsVariableSize());
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
     return SetField32(OFFSET_OF_OBJECT_MEMBER(Class, object_size_), new_object_size, false);
   }
 
   // Returns true if this class is in the same packages as that class.
-  bool IsInSamePackage(const Class* that) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsInSamePackage(Class* that) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static bool IsInSamePackage(const StringPiece& descriptor1, const StringPiece& descriptor2);
 
   // Returns true if this class can access that class.
-  bool CanAccess(Class* that) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool CanAccess(Class* that) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return that->IsPublic() || this->IsInSamePackage(that);
   }
 
   // Can this class access a member in the provided class with the provided member access flags?
   // Note that access to the class isn't checked in case the declaring class is protected and the
   // method has been exposed by a public sub-class
-  bool CanAccessMember(Class* access_to, uint32_t member_flags) const
+  bool CanAccessMember(Class* access_to, uint32_t member_flags)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Classes can access all of their own members
     if (this == access_to) {
@@ -449,15 +446,35 @@
     return this->IsInSamePackage(access_to);
   }
 
-  bool IsSubClass(const Class* klass) const
+  // Can this class access a resolved field?
+  // Note that access to field's class is checked and this may require looking up the class
+  // referenced by the FieldId in the DexFile in case the declaring class is inaccessible.
+  bool CanAccessResolvedField(Class* access_to, ArtField* field,
+                              DexCache* dex_cache, uint32_t field_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool CheckResolvedFieldAccess(Class* access_to, ArtField* field,
+                                uint32_t field_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Can this class access a resolved method?
+  // Note that access to methods's class is checked and this may require looking up the class
+  // referenced by the MethodId in the DexFile in case the declaring class is inaccessible.
+  bool CanAccessResolvedMethod(Class* access_to, ArtMethod* resolved_method,
+                               DexCache* dex_cache, uint32_t method_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  template <InvokeType throw_invoke_type>
+  bool CheckResolvedMethodAccess(Class* access_to, ArtMethod* resolved_method,
+                                 uint32_t method_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool IsSubClass(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Can src be assigned to this class? For example, String can be assigned to Object (by an
   // upcast), however, an Object cannot be assigned to a String as a potentially exception throwing
   // downcast would be necessary. Similarly for interfaces, a class that implements (or an interface
   // that extends) another can be assigned to its parent, but not vice-versa. All Classes may assign
   // to themselves. Classes for primitive types may not assign to each other.
-  inline bool IsAssignableFrom(const Class* src) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  inline bool IsAssignableFrom(Class* src) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(src != NULL);
     if (this == src) {
       // Can always assign to things of the same type.
@@ -474,18 +491,18 @@
     }
   }
 
-  Class* GetSuperClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Class* GetSuperClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSuperClass(Class *new_super_class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // super class is assigned once, except during class linker initialization
-    Class* old_super_class = GetFieldObject<Class*>(
-        OFFSET_OF_OBJECT_MEMBER(Class, super_class_), false);
-    DCHECK(old_super_class == NULL || old_super_class == new_super_class);
-    DCHECK(new_super_class != NULL);
+    // Super class is assigned once, except during class linker initialization.
+    Class* old_super_class = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_),
+                                                   false);
+    DCHECK(old_super_class == nullptr || old_super_class == new_super_class);
+    DCHECK(new_super_class != nullptr);
     SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, super_class_), new_super_class, false);
   }
 
-  bool HasSuperClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool HasSuperClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetSuperClass() != NULL;
   }
 
@@ -493,7 +510,7 @@
     return MemberOffset(OFFSETOF_MEMBER(Class, super_class_));
   }
 
-  ClassLoader* GetClassLoader() const;
+  ClassLoader* GetClassLoader() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetClassLoader(ClassLoader* new_cl) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -507,46 +524,43 @@
     kDumpClassInitialized = (1 << 2),
   };
 
-  void DumpClass(std::ostream& os, int flags) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void DumpClass(std::ostream& os, int flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  DexCache* GetDexCache() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDexCache(DexCache* new_dex_cache) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetDirectMethods() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ObjectArray<ArtMethod>* GetDirectMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDirectMethods(ObjectArray<ArtMethod>* new_direct_methods)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* GetDirectMethod(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* GetDirectMethod(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetDirectMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns the number of static, private, and constructor methods.
-  size_t NumDirectMethods() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t NumDirectMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetVirtualMethods() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ObjectArray<ArtMethod>* GetVirtualMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetVirtualMethods(ObjectArray<ArtMethod>* new_virtual_methods)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns the number of non-inherited virtual methods.
-  size_t NumVirtualMethods() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t NumVirtualMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* GetVirtualMethod(uint32_t i) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* GetVirtualMethod(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* GetVirtualMethodDuringLinking(uint32_t i) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* GetVirtualMethodDuringLinking(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetVirtualMethod(uint32_t i, ArtMethod* f)  // TODO: uint16_t
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetVTable() const;
+  ObjectArray<ArtMethod>* GetVTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetVTableDuringLinking() const;
+  ObjectArray<ArtMethod>* GetVTableDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetVTable(ObjectArray<ArtMethod>* new_vtable)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -555,7 +569,7 @@
     return OFFSET_OF_OBJECT_MEMBER(Class, vtable_);
   }
 
-  ObjectArray<ArtMethod>* GetImTable() const;
+  ObjectArray<ArtMethod>* GetImTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetImTable(ObjectArray<ArtMethod>* new_imtable)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -566,105 +580,102 @@
 
   // Given a method implemented by this class but potentially from a super class, return the
   // specific implementation method for this class.
-  ArtMethod* FindVirtualMethodForVirtual(ArtMethod* method) const
+  ArtMethod* FindVirtualMethodForVirtual(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Given a method implemented by this class' super class, return the specific implementation
   // method for this class.
-  ArtMethod* FindVirtualMethodForSuper(ArtMethod* method) const
+  ArtMethod* FindVirtualMethodForSuper(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Given a method implemented by this class, but potentially from a
   // super class or interface, return the specific implementation
   // method for this class.
-  ArtMethod* FindVirtualMethodForInterface(ArtMethod* method) const
+  ArtMethod* FindVirtualMethodForInterface(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE;
 
-  ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method) const
+  ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindInterfaceMethod(const StringPiece& name, const Signature& signature) const
+  ArtMethod* FindInterfaceMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+  ArtMethod* FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature) const
+  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature) const
+  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+  ArtMethod* FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDirectMethod(const StringPiece& name, const StringPiece& signature) const
+  ArtMethod* FindDirectMethod(const StringPiece& name, const StringPiece& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDirectMethod(const StringPiece& name, const Signature& signature) const
+  ArtMethod* FindDirectMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+  ArtMethod* FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature) const
+  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature) const
+  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+  ArtMethod* FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindVirtualMethod(const StringPiece& name, const StringPiece& signature) const
+  ArtMethod* FindVirtualMethod(const StringPiece& name, const StringPiece& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindVirtualMethod(const StringPiece& name, const Signature& signature) const
+  ArtMethod* FindVirtualMethod(const StringPiece& name, const Signature& signature)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx) const
+  ArtMethod* FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* FindClassInitializer() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* FindClassInitializer() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t GetIfTableCount() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  int32_t GetIfTableCount() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  IfTable* GetIfTable() const;
+  IfTable* GetIfTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetIfTable(IfTable* new_iftable) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get instance fields of the class (See also GetSFields).
-  ObjectArray<ArtField>* GetIFields() const;
+  ObjectArray<ArtField>* GetIFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetIFields(ObjectArray<ArtField>* new_ifields) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t NumInstanceFields() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t NumInstanceFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtField* GetInstanceField(uint32_t i) const  // TODO: uint16_t
+  ArtField* GetInstanceField(uint32_t i)  // TODO: uint16_t
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetInstanceField(uint32_t i, ArtField* f)  // TODO: uint16_t
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns the number of instance fields containing reference types.
-  size_t NumReferenceInstanceFields() const {
+  uint32_t NumReferenceInstanceFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsResolved() || IsErroneous());
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_instance_fields_), false);
   }
 
-  size_t NumReferenceInstanceFieldsDuringLinking() const {
+  uint32_t NumReferenceInstanceFieldsDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsLoaded() || IsErroneous());
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_instance_fields_), false);
   }
 
-  void SetNumReferenceInstanceFields(size_t new_num) {
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
+  void SetNumReferenceInstanceFields(uint32_t new_num) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_instance_fields_), new_num, false);
   }
 
-  uint32_t GetReferenceInstanceOffsets() const {
+  uint32_t GetReferenceInstanceOffsets() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsResolved() || IsErroneous());
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, reference_instance_offsets_), false);
   }
@@ -678,39 +689,39 @@
   }
 
   // Returns the number of static fields containing reference types.
-  size_t NumReferenceStaticFields() const {
+  uint32_t NumReferenceStaticFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsResolved() || IsErroneous());
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_static_fields_), false);
   }
 
-  size_t NumReferenceStaticFieldsDuringLinking() const {
+  uint32_t NumReferenceStaticFieldsDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsLoaded() || IsErroneous());
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_static_fields_), false);
   }
 
-  void SetNumReferenceStaticFields(size_t new_num) {
-    DCHECK_EQ(sizeof(size_t), sizeof(int32_t));
+  void SetNumReferenceStaticFields(uint32_t new_num) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_static_fields_), new_num, false);
   }
 
   // Gets the static fields of the class.
-  ObjectArray<ArtField>* GetSFields() const;
+  ObjectArray<ArtField>* GetSFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSFields(ObjectArray<ArtField>* new_sfields) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t NumStaticFields() const;
+  uint32_t NumStaticFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtField* GetStaticField(uint32_t i) const;  // TODO: uint16_t
+  // TODO: uint16_t
+  ArtField* GetStaticField(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetStaticField(uint32_t i, ArtField* f);  // TODO: uint16_t
+  // TODO: uint16_t
+  void SetStaticField(uint32_t i, ArtField* f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t GetReferenceStaticOffsets() const {
+  uint32_t GetReferenceStaticOffsets() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, reference_static_offsets_), false);
   }
 
-  void SetReferenceStaticOffsets(uint32_t new_reference_offsets);
+  void SetReferenceStaticOffsets(uint32_t new_reference_offsets)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Find a static or instance field using the JLS resolution order
   ArtField* FindField(const StringPiece& name, const StringPiece& type)
@@ -746,33 +757,33 @@
   ArtField* FindDeclaredStaticField(const DexCache* dex_cache, uint32_t dex_field_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  pid_t GetClinitThreadId() const {
+  pid_t GetClinitThreadId() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsIdxLoaded() || IsErroneous());
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, clinit_thread_id_), false);
   }
 
-  void SetClinitThreadId(pid_t new_clinit_thread_id) {
+  void SetClinitThreadId(pid_t new_clinit_thread_id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, clinit_thread_id_), new_clinit_thread_id, false);
   }
 
-  Class* GetVerifyErrorClass() const {
+  Class* GetVerifyErrorClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // DCHECK(IsErroneous());
-    return GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), false);
+    return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_class_), false);
   }
 
-  uint16_t GetDexClassDefIndex() const {
+  uint16_t GetDexClassDefIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_class_def_idx_), false);
   }
 
-  void SetDexClassDefIndex(uint16_t class_def_idx) {
+  void SetDexClassDefIndex(uint16_t class_def_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_class_def_idx_), class_def_idx, false);
   }
 
-  uint16_t GetDexTypeIndex() const {
+  uint16_t GetDexTypeIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), false);
   }
 
-  void SetDexTypeIndex(uint16_t type_idx) {
+  void SetDexTypeIndex(uint16_t type_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_type_idx_), type_idx, false);
   }
 
@@ -793,28 +804,34 @@
  private:
   void SetVerifyErrorClass(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool Implements(const Class* klass) const
+  template <bool throw_on_failure, bool use_referrers_cache>
+  bool ResolvedFieldAccessTest(Class* access_to, ArtField* field,
+                               uint32_t field_idx, DexCache* dex_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsArrayAssignableFromArray(const Class* klass) const
+  template <bool throw_on_failure, bool use_referrers_cache, InvokeType throw_invoke_type>
+  bool ResolvedMethodAccessTest(Class* access_to, ArtMethod* resolved_method,
+                                uint32_t method_idx, DexCache* dex_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsAssignableFromArray(const Class* klass) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool Implements(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsArrayAssignableFromArray(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsAssignableFromArray(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void CheckObjectAlloc() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // defining class loader, or NULL for the "bootstrap" system loader
-  ClassLoader* class_loader_;
+  HeapReference<ClassLoader> class_loader_;
 
   // For array classes, the component class object for instanceof/checkcast
   // (for String[][][], this will be String[][]). NULL for non-array classes.
-  Class* component_type_;
+  HeapReference<Class> component_type_;
 
   // DexCache of resolved constant pool entries (will be NULL for classes generated by the
   // runtime such as arrays and primitive classes).
-  DexCache* dex_cache_;
+  HeapReference<DexCache> dex_cache_;
 
   // static, private, and <init> methods
-  ObjectArray<ArtMethod>* direct_methods_;
+  HeapReference<ObjectArray<ArtMethod> > direct_methods_;
 
   // instance fields
   //
@@ -826,7 +843,7 @@
   // All instance fields that refer to objects are guaranteed to be at
   // the beginning of the field list.  num_reference_instance_fields_
   // specifies the number of reference fields.
-  ObjectArray<ArtField>* ifields_;
+  HeapReference<ObjectArray<ArtField> > ifields_;
 
   // The interface table (iftable_) contains pairs of a interface class and an array of the
   // interface methods. There is one pair per interface supported by this class.  That means one
@@ -839,38 +856,38 @@
   //
   // For every interface a concrete class implements, we create an array of the concrete vtable_
   // methods for the methods in the interface.
-  IfTable* iftable_;
+  HeapReference<IfTable> iftable_;
 
   // Interface method table (imt), for quick "invoke-interface".
-  ObjectArray<ArtMethod>* imtable_;
+  HeapReference<ObjectArray<ArtMethod> > imtable_;
 
-  // descriptor for the class such as "java.lang.Class" or "[C". Lazily initialized by ComputeName
-  String* name_;
+  // Descriptor for the class such as "java.lang.Class" or "[C". Lazily initialized by ComputeName
+  HeapReference<String> name_;
 
   // Static fields
-  ObjectArray<ArtField>* sfields_;
+  HeapReference<ObjectArray<ArtField>> sfields_;
 
   // The superclass, or NULL if this is java.lang.Object, an interface or primitive type.
-  Class* super_class_;
+  HeapReference<Class> super_class_;
 
   // If class verify fails, we must return same error on subsequent tries.
-  Class* verify_error_class_;
+  HeapReference<Class> verify_error_class_;
 
   // Virtual methods defined in this class; invoked through vtable.
-  ObjectArray<ArtMethod>* virtual_methods_;
+  HeapReference<ObjectArray<ArtMethod> > virtual_methods_;
 
   // Virtual method table (vtable), for use by "invoke-virtual".  The vtable from the superclass is
   // copied in, and virtual methods from our class either replace those from the super or are
   // appended. For abstract classes, methods may be created in the vtable that aren't in
   // virtual_ methods_ for miranda methods.
-  ObjectArray<ArtMethod>* vtable_;
+  HeapReference<ObjectArray<ArtMethod> > vtable_;
 
   // Access flags; low 16 bits are defined by VM spec.
   uint32_t access_flags_;
 
   // Total size of the Class instance; used when allocating storage on gc heap.
   // See also object_size_.
-  size_t class_size_;
+  uint32_t class_size_;
 
   // Tid used to check for recursive <clinit> invocation.
   pid_t clinit_thread_id_;
@@ -884,15 +901,15 @@
   int32_t dex_type_idx_;
 
   // Number of instance fields that are object refs.
-  size_t num_reference_instance_fields_;
+  uint32_t num_reference_instance_fields_;
 
   // Number of static fields that are object refs,
-  size_t num_reference_static_fields_;
+  uint32_t num_reference_static_fields_;
 
   // Total object size; used when allocating storage on gc heap.
   // (For interfaces and abstract classes this will be zero.)
   // See also class_size_.
-  size_t object_size_;
+  uint32_t object_size_;
 
   // Primitive type value, or Primitive::kPrimNot (0); set for generated primitive classes.
   Primitive::Type primitive_type_;
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index 415cb67..69accf5 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -32,9 +32,9 @@
 class MANAGED ClassLoader : public Object {
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  Object* packages_;
-  ClassLoader* parent_;
-  Object* proxyCache_;
+  HeapReference<Object> packages_;
+  HeapReference<ClassLoader> parent_;
+  HeapReference<Object> proxyCache_;
 
   friend struct art::ClassLoaderOffsets;  // for verifying offset information
   DISALLOW_IMPLICIT_CONSTRUCTORS(ClassLoader);
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index da26be5..f59c3a2 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -22,7 +22,7 @@
 namespace art {
 namespace mirror {
 
-inline ArtMethod* DexCache::GetResolvedMethod(uint32_t method_idx) const
+inline ArtMethod* DexCache::GetResolvedMethod(uint32_t method_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ArtMethod* method = GetResolvedMethods()->Get(method_idx);
   // Hide resolution trampoline methods from the caller
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index a5fe598..99529f0 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -52,8 +52,8 @@
 
   void Fixup(ArtMethod* trampoline) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  String* GetLocation() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<String*>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), false);
+  String* GetLocation() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), false);
   }
 
   static MemberOffset StringsOffset() {
@@ -68,24 +68,23 @@
     return OFFSET_OF_OBJECT_MEMBER(DexCache, resolved_methods_);
   }
 
-  size_t NumStrings() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t NumStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStrings()->GetLength();
   }
 
-  size_t NumResolvedTypes() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t NumResolvedTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetResolvedTypes()->GetLength();
   }
 
-  size_t NumResolvedMethods() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t NumResolvedMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetResolvedMethods()->GetLength();
   }
 
-  size_t NumResolvedFields() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t NumResolvedFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetResolvedFields()->GetLength();
   }
 
-  String* GetResolvedString(uint32_t string_idx) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  String* GetResolvedString(uint32_t string_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetStrings()->Get(string_idx);
   }
 
@@ -94,8 +93,7 @@
     GetStrings()->Set(string_idx, resolved);
   }
 
-  Class* GetResolvedType(uint32_t type_idx) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  Class* GetResolvedType(uint32_t type_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetResolvedTypes()->Get(type_idx);
   }
 
@@ -104,16 +102,14 @@
     GetResolvedTypes()->Set(type_idx, resolved);
   }
 
-  ArtMethod* GetResolvedMethod(uint32_t method_idx) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* GetResolvedMethod(uint32_t method_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetResolvedMethod(uint32_t method_idx, ArtMethod* resolved)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     GetResolvedMethods()->Set(method_idx, resolved);
   }
 
-  ArtField* GetResolvedField(uint32_t field_idx) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ArtField* GetResolvedField(uint32_t field_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetResolvedFields()->Get(field_idx);
   }
 
@@ -122,28 +118,24 @@
     GetResolvedFields()->Set(field_idx, resolved);
   }
 
-  ObjectArray<String>* GetStrings() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<String>* >(StringsOffset(), false);
+  ObjectArray<String>* GetStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject< ObjectArray<String> >(StringsOffset(), false);
   }
 
-  ObjectArray<Class>* GetResolvedTypes() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<Class>* >(
+  ObjectArray<Class>* GetResolvedTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<Class> >(
         OFFSET_OF_OBJECT_MEMBER(DexCache, resolved_types_), false);
   }
 
-  ObjectArray<ArtMethod>* GetResolvedMethods() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<ArtMethod>* >(ResolvedMethodsOffset(), false);
+  ObjectArray<ArtMethod>* GetResolvedMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject< ObjectArray<ArtMethod> >(ResolvedMethodsOffset(), false);
   }
 
-  ObjectArray<ArtField>* GetResolvedFields() const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<ArtField>* >(ResolvedFieldsOffset(), false);
+  ObjectArray<ArtField>* GetResolvedFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<ArtField> >(ResolvedFieldsOffset(), false);
   }
 
-  const DexFile* GetDexFile() const {
+  const DexFile* GetDexFile() {
     return GetFieldPtr<const DexFile*>(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_), false);
   }
 
@@ -152,13 +144,13 @@
   }
 
  private:
-  Object* dex_;
-  String* location_;
-  ObjectArray<ArtField>* resolved_fields_;
-  ObjectArray<ArtMethod>* resolved_methods_;
-  ObjectArray<Class>* resolved_types_;
-  ObjectArray<String>* strings_;
-  uint32_t dex_file_;
+  HeapReference<Object> dex_;
+  HeapReference<String> location_;
+  HeapReference<ObjectArray<ArtField> > resolved_fields_;
+  HeapReference<ObjectArray<ArtMethod> > resolved_methods_;
+  HeapReference<ObjectArray<Class> > resolved_types_;
+  HeapReference<ObjectArray<String> > strings_;
+  uint64_t dex_file_;
 
   friend struct art::DexCacheOffsets;  // for verifying offset information
   DISALLOW_IMPLICIT_CONSTRUCTORS(DexCache);
diff --git a/runtime/mirror/iftable.h b/runtime/mirror/iftable.h
index 421893d..be83d03 100644
--- a/runtime/mirror/iftable.h
+++ b/runtime/mirror/iftable.h
@@ -24,7 +24,7 @@
 
 class MANAGED IfTable : public ObjectArray<Object> {
  public:
-  Class* GetInterface(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  Class* GetInterface(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Class* interface = Get((i * kMax) + kInterface)->AsClass();
     DCHECK(interface != NULL);
     return interface;
@@ -32,15 +32,14 @@
 
   void SetInterface(int32_t i, Class* interface) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<ArtMethod>* GetMethodArray(int32_t i) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ObjectArray<ArtMethod>* GetMethodArray(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ObjectArray<ArtMethod>* method_array =
         down_cast<ObjectArray<ArtMethod>*>(Get((i * kMax) + kMethodArray));
     DCHECK(method_array != NULL);
     return method_array;
   }
 
-  size_t GetMethodArrayCount(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  size_t GetMethodArrayCount(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     ObjectArray<ArtMethod>* method_array =
         down_cast<ObjectArray<ArtMethod>*>(Get((i * kMax) + kMethodArray));
     if (method_array == NULL) {
@@ -56,7 +55,7 @@
     Set((i * kMax) + kMethodArray, new_ma);
   }
 
-  size_t Count() const {
+  size_t Count() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetLength() / kMax;
   }
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 9161bc5..b994354 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -32,19 +32,18 @@
 namespace art {
 namespace mirror {
 
-inline Class* Object::GetClass() const {
-  return GetFieldObject<Class*>(OFFSET_OF_OBJECT_MEMBER(Object, klass_), false);
+inline Class* Object::GetClass() {
+  return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Object, klass_), false);
 }
 
 inline void Object::SetClass(Class* new_klass) {
-  // new_klass may be NULL prior to class linker initialization
-  // We don't mark the card since the class is guaranteed to be referenced from another location.
-  // Proxy classes are held live by the class loader, and other classes are roots of the class
-  // linker.
-  SetFieldPtr(OFFSET_OF_OBJECT_MEMBER(Object, klass_), new_klass, false, false);
+  // new_klass may be NULL prior to class linker initialization.
+  // We don't mark the card as this occurs as part of object allocation. Not all objects have
+  // backing cards, such as large objects.
+  SetFieldObjectWithoutWriteBarrier(OFFSET_OF_OBJECT_MEMBER(Object, klass_), new_klass, false, false);
 }
 
-inline LockWord Object::GetLockWord() const {
+inline LockWord Object::GetLockWord() {
   return LockWord(GetField32(OFFSET_OF_OBJECT_MEMBER(Object, monitor_), true));
 }
 
@@ -85,19 +84,19 @@
   Monitor::Wait(self, this, ms, ns, true, kTimedWaiting);
 }
 
-inline bool Object::VerifierInstanceOf(const Class* klass) const {
+inline bool Object::VerifierInstanceOf(Class* klass) {
   DCHECK(klass != NULL);
   DCHECK(GetClass() != NULL);
   return klass->IsInterface() || InstanceOf(klass);
 }
 
-inline bool Object::InstanceOf(const Class* klass) const {
+inline bool Object::InstanceOf(Class* klass) {
   DCHECK(klass != NULL);
   DCHECK(GetClass() != NULL);
   return klass->IsAssignableFrom(GetClass());
 }
 
-inline bool Object::IsClass() const {
+inline bool Object::IsClass() {
   Class* java_lang_Class = GetClass()->GetClass();
   return GetClass() == java_lang_Class;
 }
@@ -107,12 +106,7 @@
   return down_cast<Class*>(this);
 }
 
-inline const Class* Object::AsClass() const {
-  DCHECK(IsClass());
-  return down_cast<const Class*>(this);
-}
-
-inline bool Object::IsObjectArray() const {
+inline bool Object::IsObjectArray() {
   return IsArrayInstance() && !GetClass()->GetComponentType()->IsPrimitive();
 }
 
@@ -122,17 +116,11 @@
   return down_cast<ObjectArray<T>*>(this);
 }
 
-template<class T>
-inline const ObjectArray<T>* Object::AsObjectArray() const {
-  DCHECK(IsObjectArray());
-  return down_cast<const ObjectArray<T>*>(this);
-}
-
-inline bool Object::IsArrayInstance() const {
+inline bool Object::IsArrayInstance() {
   return GetClass()->IsArrayClass();
 }
 
-inline bool Object::IsArtField() const {
+inline bool Object::IsArtField() {
   return GetClass()->IsArtFieldClass();
 }
 
@@ -141,12 +129,7 @@
   return down_cast<ArtField*>(this);
 }
 
-inline const ArtField* Object::AsArtField() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(IsArtField());
-  return down_cast<const ArtField*>(this);
-}
-
-inline bool Object::IsArtMethod() const {
+inline bool Object::IsArtMethod() {
   return GetClass()->IsArtMethodClass();
 }
 
@@ -155,12 +138,7 @@
   return down_cast<ArtMethod*>(this);
 }
 
-inline const ArtMethod* Object::AsArtMethod() const {
-  DCHECK(IsArtMethod());
-  return down_cast<const ArtMethod*>(this);
-}
-
-inline bool Object::IsReferenceInstance() const {
+inline bool Object::IsReferenceInstance() {
   return GetClass()->IsReferenceClass();
 }
 
@@ -169,11 +147,6 @@
   return down_cast<Array*>(this);
 }
 
-inline const Array* Object::AsArray() const {
-  DCHECK(IsArrayInstance());
-  return down_cast<const Array*>(this);
-}
-
 inline BooleanArray* Object::AsBooleanArray() {
   DCHECK(GetClass()->IsArrayClass());
   DCHECK(GetClass()->GetComponentType()->IsPrimitiveBoolean());
@@ -186,6 +159,13 @@
   return down_cast<ByteArray*>(this);
 }
 
+inline ByteArray* Object::AsByteSizedArray() {
+  DCHECK(GetClass()->IsArrayClass());
+  DCHECK(GetClass()->GetComponentType()->IsPrimitiveByte() ||
+         GetClass()->GetComponentType()->IsPrimitiveBoolean());
+  return down_cast<ByteArray*>(this);
+}
+
 inline CharArray* Object::AsCharArray() {
   DCHECK(GetClass()->IsArrayClass());
   DCHECK(GetClass()->GetComponentType()->IsPrimitiveChar());
@@ -198,6 +178,13 @@
   return down_cast<ShortArray*>(this);
 }
 
+inline ShortArray* Object::AsShortSizedArray() {
+  DCHECK(GetClass()->IsArrayClass());
+  DCHECK(GetClass()->GetComponentType()->IsPrimitiveShort() ||
+         GetClass()->GetComponentType()->IsPrimitiveChar());
+  return down_cast<ShortArray*>(this);
+}
+
 inline IntArray* Object::AsIntArray() {
   DCHECK(GetClass()->IsArrayClass());
   DCHECK(GetClass()->GetComponentType()->IsPrimitiveInt() ||
@@ -222,23 +209,23 @@
   return down_cast<Throwable*>(this);
 }
 
-inline bool Object::IsWeakReferenceInstance() const {
+inline bool Object::IsWeakReferenceInstance() {
   return GetClass()->IsWeakReferenceClass();
 }
 
-inline bool Object::IsSoftReferenceInstance() const {
+inline bool Object::IsSoftReferenceInstance() {
   return GetClass()->IsSoftReferenceClass();
 }
 
-inline bool Object::IsFinalizerReferenceInstance() const {
+inline bool Object::IsFinalizerReferenceInstance() {
   return GetClass()->IsFinalizerReferenceClass();
 }
 
-inline bool Object::IsPhantomReferenceInstance() const {
+inline bool Object::IsPhantomReferenceInstance() {
   return GetClass()->IsPhantomReferenceClass();
 }
 
-inline size_t Object::SizeOf() const {
+inline size_t Object::SizeOf() {
   size_t result;
   if (IsArrayInstance()) {
     result = AsArray()->SizeOf();
@@ -253,64 +240,67 @@
   return result;
 }
 
-inline uint32_t Object::GetField32(MemberOffset field_offset, bool is_volatile) const {
+inline int32_t Object::GetField32(MemberOffset field_offset, bool is_volatile) {
   VerifyObject(this);
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
   const int32_t* word_addr = reinterpret_cast<const int32_t*>(raw_addr);
   if (UNLIKELY(is_volatile)) {
     int32_t result = *(reinterpret_cast<volatile int32_t*>(const_cast<int32_t*>(word_addr)));
-    QuasiAtomic::MembarLoadLoad();
+    QuasiAtomic::MembarLoadLoad();  // Ensure volatile loads don't re-order.
     return result;
   } else {
     return *word_addr;
   }
 }
 
-inline void Object::SetField32(MemberOffset field_offset, uint32_t new_value, bool is_volatile,
+inline void Object::SetField32(MemberOffset field_offset, int32_t new_value, bool is_volatile,
                                bool this_is_valid) {
   if (this_is_valid) {
     VerifyObject(this);
   }
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
-  uint32_t* word_addr = reinterpret_cast<uint32_t*>(raw_addr);
+  int32_t* word_addr = reinterpret_cast<int32_t*>(raw_addr);
   if (UNLIKELY(is_volatile)) {
     QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
     *word_addr = new_value;
-    QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any loads.
+    QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any volatile loads.
   } else {
     *word_addr = new_value;
   }
 }
 
-inline bool Object::CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value) {
+inline bool Object::CasField32(MemberOffset field_offset, int32_t old_value, int32_t new_value) {
   VerifyObject(this);
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
-  volatile uint32_t* addr = reinterpret_cast<volatile uint32_t*>(raw_addr);
+  volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw_addr);
   return __sync_bool_compare_and_swap(addr, old_value, new_value);
 }
 
-inline uint64_t Object::GetField64(MemberOffset field_offset, bool is_volatile) const {
+inline int64_t Object::GetField64(MemberOffset field_offset, bool is_volatile) {
   VerifyObject(this);
   const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
   const int64_t* addr = reinterpret_cast<const int64_t*>(raw_addr);
   if (UNLIKELY(is_volatile)) {
-    uint64_t result = QuasiAtomic::Read64(addr);
-    QuasiAtomic::MembarLoadLoad();
+    int64_t result = QuasiAtomic::Read64(addr);
+    QuasiAtomic::MembarLoadLoad();  // Ensure volatile loads don't re-order.
     return result;
   } else {
     return *addr;
   }
 }
 
-inline void Object::SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile) {
-  VerifyObject(this);
+inline void Object::SetField64(MemberOffset field_offset, int64_t new_value, bool is_volatile,
+                               bool this_is_valid) {
+  if (this_is_valid) {
+    VerifyObject(this);
+  }
   byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
   int64_t* addr = reinterpret_cast<int64_t*>(raw_addr);
   if (UNLIKELY(is_volatile)) {
     QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
     QuasiAtomic::Write64(addr, new_value);
     if (!QuasiAtomic::LongAtomicsUseMutexes()) {
-      QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any loads.
+      QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any volatile loads.
     } else {
       // Fence from from mutex is enough.
     }
@@ -319,12 +309,69 @@
   }
 }
 
-inline void Object::WriteBarrierField(const Object* dst, MemberOffset field_offset,
-                                      const Object* new_value) {
-  Runtime::Current()->GetHeap()->WriteBarrierField(dst, field_offset, new_value);
+inline bool Object::CasField64(MemberOffset field_offset, int64_t old_value, int64_t new_value) {
+  VerifyObject(this);
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  volatile int64_t* addr = reinterpret_cast<volatile int64_t*>(raw_addr);
+  return QuasiAtomic::Cas64(old_value, new_value, addr);
 }
 
-inline void Object::VerifyObject(const Object* obj) {
+template<class T>
+inline T* Object::GetFieldObject(MemberOffset field_offset, bool is_volatile) {
+  VerifyObject(this);
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr);
+  HeapReference<T> objref = *objref_addr;
+
+  if (UNLIKELY(is_volatile)) {
+    QuasiAtomic::MembarLoadLoad();  // Ensure loads don't re-order.
+  }
+  T* result = objref.AsMirrorPtr();
+  VerifyObject(result);
+  return result;
+}
+
+inline void Object::SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset, Object* new_value,
+                                                      bool is_volatile, bool this_is_valid) {
+  if (this_is_valid) {
+    VerifyObject(this);
+  }
+  VerifyObject(new_value);
+  HeapReference<Object> objref(HeapReference<Object>::FromMirrorPtr(new_value));
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  HeapReference<Object>* objref_addr = reinterpret_cast<HeapReference<Object>*>(raw_addr);
+  if (UNLIKELY(is_volatile)) {
+    QuasiAtomic::MembarStoreStore();  // Ensure this store occurs after others in the queue.
+    objref_addr->Assign(new_value);
+    QuasiAtomic::MembarStoreLoad();  // Ensure this store occurs before any loads.
+  } else {
+    objref_addr->Assign(new_value);
+  }
+}
+
+inline void Object::SetFieldObject(MemberOffset field_offset, Object* new_value, bool is_volatile,
+                                   bool this_is_valid) {
+  SetFieldObjectWithoutWriteBarrier(field_offset, new_value, is_volatile, this_is_valid);
+  if (new_value != nullptr) {
+    CheckFieldAssignment(field_offset, new_value);
+    Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value);
+  }
+}
+
+inline bool Object::CasFieldObject(MemberOffset field_offset, Object* old_value, Object* new_value) {
+  VerifyObject(this);
+  byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+  volatile int32_t* addr = reinterpret_cast<volatile int32_t*>(raw_addr);
+  HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
+  bool success =  __sync_bool_compare_and_swap(addr, old_ref.reference_, new_ref.reference_);
+  if (success) {
+    Runtime::Current()->GetHeap()->WriteBarrierField(this, field_offset, new_value);
+  }
+  return success;
+}
+
+inline void Object::VerifyObject(Object* obj) {
   if (kIsDebugBuild) {
     Runtime::Current()->GetHeap()->VerifyObject(obj);
   }
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index bdb3250..1251852 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -52,7 +52,7 @@
   Class* c = src->GetClass();
   if (c->IsArrayClass()) {
     if (!c->GetComponentType()->IsPrimitive()) {
-      const ObjectArray<Object>* array = dest->AsObjectArray<Object>();
+      ObjectArray<Object>* array = dest->AsObjectArray<Object>();
       heap->WriteBarrierArray(dest, 0, array->GetLength());
     }
   } else {
@@ -139,14 +139,15 @@
   return 0;
 }
 
-void Object::CheckFieldAssignmentImpl(MemberOffset field_offset, const Object* new_value) {
-  const Class* c = GetClass();
+void Object::CheckFieldAssignmentImpl(MemberOffset field_offset, Object* new_value) {
+  Class* c = GetClass();
   if (Runtime::Current()->GetClassLinker() == NULL ||
+      !Runtime::Current()->IsStarted() ||
       !Runtime::Current()->GetHeap()->IsObjectValidationEnabled() ||
       !c->IsResolved()) {
     return;
   }
-  for (const Class* cur = c; cur != NULL; cur = cur->GetSuperClass()) {
+  for (Class* cur = c; cur != NULL; cur = cur->GetSuperClass()) {
     ObjectArray<ArtField>* fields = cur->GetIFields();
     if (fields != NULL) {
       size_t num_ref_ifields = cur->NumReferenceInstanceFields();
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 058aee7..c42750f 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -21,6 +21,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "cutils/atomic-inline.h"
+#include "object_reference.h"
 #include "offsets.h"
 
 namespace art {
@@ -51,17 +52,13 @@
 class String;
 class Throwable;
 
-// Classes shared with the managed side of the world need to be packed so that they don't have
-// extra platform specific padding.
-#define MANAGED PACKED(4)
-
 // Fields within mirror objects aren't accessed directly so that the appropriate amount of
 // handshaking is done with GC (for example, read and write barriers). This macro is used to
 // compute an offset for the Set/Get methods defined in Object that can safely access fields.
 #define OFFSET_OF_OBJECT_MEMBER(type, field) \
     MemberOffset(OFFSETOF_MEMBER(type, field))
 
-const bool kCheckFieldAssignments = false;
+constexpr bool kCheckFieldAssignments = false;
 
 // C++ mirror of java.lang.Object
 class MANAGED Object {
@@ -70,19 +67,17 @@
     return OFFSET_OF_OBJECT_MEMBER(Object, klass_);
   }
 
-  Class* GetClass() const;
+  Class* GetClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetClass(Class* new_klass);
+  void SetClass(Class* new_klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // The verifier treats all interfaces as java.lang.Object and relies on runtime checks in
   // invoke-interface to detect incompatible interface types.
-  bool VerifierInstanceOf(const Class* klass) const
-        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool VerifierInstanceOf(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool InstanceOf(const Class* klass) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool InstanceOf(Class* klass) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t SizeOf() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  size_t SizeOf() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Object* Clone(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -92,9 +87,9 @@
     return OFFSET_OF_OBJECT_MEMBER(Object, monitor_);
   }
 
-  LockWord GetLockWord() const;
+  LockWord GetLockWord();
   void SetLockWord(LockWord new_val);
-  bool CasLockWord(LockWord old_val, LockWord new_val);
+  bool CasLockWord(LockWord old_val, LockWord new_val) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   uint32_t GetLockOwnerThreadId();
 
   void MonitorEnter(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -111,111 +106,113 @@
 
   void Wait(Thread* self, int64_t timeout, int32_t nanos) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsClass() const;
+  bool IsClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Class* AsClass();
+  Class* AsClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const Class* AsClass() const;
-
-  bool IsObjectArray() const;
+  bool IsObjectArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<class T>
-  ObjectArray<T>* AsObjectArray();
+  ObjectArray<T>* AsObjectArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<class T>
-  const ObjectArray<T>* AsObjectArray() const;
+  bool IsArrayInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsArrayInstance() const;
+  Array* AsArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Array* AsArray();
+  BooleanArray* AsBooleanArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ByteArray* AsByteArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ByteArray* AsByteSizedArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const Array* AsArray() const;
+  CharArray* AsCharArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ShortArray* AsShortArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ShortArray* AsShortSizedArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  BooleanArray* AsBooleanArray();
-  ByteArray* AsByteArray();
-  CharArray* AsCharArray();
-  ShortArray* AsShortArray();
-  IntArray* AsIntArray();
-  LongArray* AsLongArray();
+  IntArray* AsIntArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  LongArray* AsLongArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  String* AsString();
+  String* AsString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   Throwable* AsThrowable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsArtMethod() const;
+  bool IsArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtMethod* AsArtMethod();
+  ArtMethod* AsArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const ArtMethod* AsArtMethod() const;
-
-  bool IsArtField() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ArtField* AsArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  const ArtField* AsArtField() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsReferenceInstance() const;
+  bool IsWeakReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsWeakReferenceInstance() const;
+  bool IsSoftReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsSoftReferenceInstance() const;
+  bool IsFinalizerReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsFinalizerReferenceInstance() const;
+  bool IsPhantomReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsPhantomReferenceInstance() const;
+  // Accessor for Java type fields.
+  template<class T> T* GetFieldObject(MemberOffset field_offset, bool is_volatile)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset, Object* new_value,
+                                         bool is_volatile, bool this_is_valid = true)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetFieldObject(MemberOffset field_offset, Object* new_value, bool is_volatile,
+                      bool this_is_valid = true)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool CasFieldObject(MemberOffset field_offset, Object* old_value, Object* new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Accessors for Java type fields
-  template<class T>
-  T GetFieldObject(MemberOffset field_offset, bool is_volatile) const {
-    T result = reinterpret_cast<T>(GetField32(field_offset, is_volatile));
-    VerifyObject(result);
-    return result;
-  }
-
-  void SetFieldObject(MemberOffset field_offset, const Object* new_value, bool is_volatile,
-                      bool this_is_valid = true) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    VerifyObject(new_value);
-    SetField32(field_offset, reinterpret_cast<uint32_t>(new_value), is_volatile, this_is_valid);
-    if (new_value != NULL) {
-      CheckFieldAssignment(field_offset, new_value);
-      WriteBarrierField(this, field_offset, new_value);
-    }
-  }
-
-  Object** GetFieldObjectAddr(MemberOffset field_offset) ALWAYS_INLINE {
+  HeapReference<Object>* GetFieldObjectReferenceAddr(MemberOffset field_offset) ALWAYS_INLINE {
     VerifyObject(this);
-    return reinterpret_cast<Object**>(reinterpret_cast<byte*>(this) + field_offset.Int32Value());
+    return reinterpret_cast<HeapReference<Object>*>(reinterpret_cast<byte*>(this) +
+        field_offset.Int32Value());
   }
 
-  uint32_t GetField32(MemberOffset field_offset, bool is_volatile) const;
+  int32_t GetField32(MemberOffset field_offset, bool is_volatile);
 
-  void SetField32(MemberOffset field_offset, uint32_t new_value, bool is_volatile,
+  void SetField32(MemberOffset field_offset, int32_t new_value, bool is_volatile,
                   bool this_is_valid = true);
 
-  bool CasField32(MemberOffset field_offset, uint32_t old_value, uint32_t new_value);
+  bool CasField32(MemberOffset field_offset, int32_t old_value, int32_t new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint64_t GetField64(MemberOffset field_offset, bool is_volatile) const;
+  int64_t GetField64(MemberOffset field_offset, bool is_volatile);
 
-  void SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile);
+  void SetField64(MemberOffset field_offset, int64_t new_value, bool is_volatile,
+                  bool this_is_valid = true);
+
+  bool CasField64(MemberOffset field_offset, int64_t old_value, int64_t new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<typename T>
-  void SetFieldPtr(MemberOffset field_offset, T new_value, bool is_volatile, bool this_is_valid = true) {
-    SetField32(field_offset, reinterpret_cast<uint32_t>(new_value), is_volatile, this_is_valid);
+  void SetFieldPtr(MemberOffset field_offset, T new_value, bool is_volatile,
+                   bool this_is_valid = true) {
+#ifndef __LP64__
+    SetField32(field_offset, reinterpret_cast<int32_t>(new_value), is_volatile, this_is_valid);
+#else
+    SetField64(field_offset, reinterpret_cast<int64_t>(new_value), is_volatile, this_is_valid);
+#endif
   }
 
  protected:
   // Accessors for non-Java type fields
   template<class T>
-  T GetFieldPtr(MemberOffset field_offset, bool is_volatile) const {
+  T GetFieldPtr(MemberOffset field_offset, bool is_volatile) {
+#ifndef __LP64__
     return reinterpret_cast<T>(GetField32(field_offset, is_volatile));
+#else
+    return reinterpret_cast<T>(GetField64(field_offset, is_volatile));
+#endif
   }
 
  private:
-  static void VerifyObject(const Object* obj) ALWAYS_INLINE;
+  static void VerifyObject(Object* obj) ALWAYS_INLINE;
   // Verify the type correctness of stores to fields.
-  void CheckFieldAssignmentImpl(MemberOffset field_offset, const Object* new_value)
+  void CheckFieldAssignmentImpl(MemberOffset field_offset, Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  void CheckFieldAssignment(MemberOffset field_offset, const Object* new_value)
+  void CheckFieldAssignment(MemberOffset field_offset, Object* new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (kCheckFieldAssignments) {
       CheckFieldAssignmentImpl(field_offset, new_value);
@@ -225,11 +222,9 @@
   // Generate an identity hash code.
   static int32_t GenerateIdentityHashCode();
 
-  // Write barrier called post update to a reference bearing field.
-  static void WriteBarrierField(const Object* dst, MemberOffset offset, const Object* new_value);
-
-  Class* klass_;
-
+  // The Class representing the type of the object.
+  HeapReference<Class> klass_;
+  // Monitor and hash code information.
   uint32_t monitor_;
 
   friend class art::ImageWriter;
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index be49b42..c342479 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -25,6 +25,7 @@
 #include "runtime.h"
 #include "sirt_ref.h"
 #include "thread.h"
+#include <string>
 
 namespace art {
 namespace mirror {
@@ -32,8 +33,8 @@
 template<class T>
 inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class,
                                              int32_t length, gc::AllocatorType allocator_type) {
-  Array* array = Array::Alloc<true>(self, object_array_class, length, sizeof(Object*),
-                                    allocator_type);
+  Array* array = Array::Alloc<true>(self, object_array_class, length,
+                                    sizeof(HeapReference<Object>), allocator_type);
   if (UNLIKELY(array == nullptr)) {
     return nullptr;
   } else {
@@ -49,12 +50,12 @@
 }
 
 template<class T>
-inline T* ObjectArray<T>::Get(int32_t i) const {
-  if (UNLIKELY(!IsValidIndex(i))) {
+inline T* ObjectArray<T>::Get(int32_t i) {
+  if (UNLIKELY(!CheckIsValidIndex(i))) {
+    DCHECK(Thread::Current()->IsExceptionPending());
     return NULL;
   }
-  MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
-  return GetFieldObject<T*>(data_offset, false);
+  return GetFieldObject<T>(OffsetOfElement(i), false);
 }
 
 template<class T>
@@ -71,9 +72,8 @@
 
 template<class T>
 inline void ObjectArray<T>::Set(int32_t i, T* object) {
-  if (LIKELY(IsValidIndex(i) && CheckAssignable(object))) {
-    MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
-    SetFieldObject(data_offset, object, false);
+  if (LIKELY(CheckIsValidIndex(i) && CheckAssignable(object))) {
+    SetFieldObject(OffsetOfElement(i), object, false);
   } else {
     DCHECK(Thread::Current()->IsExceptionPending());
   }
@@ -81,69 +81,125 @@
 
 template<class T>
 inline void ObjectArray<T>::SetWithoutChecks(int32_t i, T* object) {
-  DCHECK(IsValidIndex(i));
-  MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
-  SetFieldObject(data_offset, object, false);
+  DCHECK(CheckIsValidIndex(i));
+  DCHECK(CheckAssignable(object));
+  SetFieldObject(OffsetOfElement(i), object, false);
 }
 
 template<class T>
-inline void ObjectArray<T>::SetPtrWithoutChecks(int32_t i, T* object) {
-  DCHECK(IsValidIndex(i));
-  MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
-  SetFieldPtr(data_offset, object, false);
+inline void ObjectArray<T>::SetWithoutChecksAndWriteBarrier(int32_t i, T* object) {
+  DCHECK(CheckIsValidIndex(i));
+  // TODO:  enable this check. It fails when writing the image in ImageWriter::FixupObjectArray.
+  // DCHECK(CheckAssignable(object));
+  SetFieldObjectWithoutWriteBarrier(OffsetOfElement(i), object, false);
 }
 
 template<class T>
-inline T* ObjectArray<T>::GetWithoutChecks(int32_t i) const {
-  DCHECK(IsValidIndex(i));
-  MemberOffset data_offset(DataOffset(sizeof(Object*)).Int32Value() + i * sizeof(Object*));
-  return GetFieldObject<T*>(data_offset, false);
+inline T* ObjectArray<T>::GetWithoutChecks(int32_t i) {
+  DCHECK(CheckIsValidIndex(i));
+  return GetFieldObject<T>(OffsetOfElement(i), false);
 }
 
 template<class T>
-inline void ObjectArray<T>::Copy(const ObjectArray<T>* src, int src_pos,
-                                 ObjectArray<T>* dst, int dst_pos,
-                                 size_t length) {
-  if (src->IsValidIndex(src_pos) &&
-      src->IsValidIndex(src_pos+length-1) &&
-      dst->IsValidIndex(dst_pos) &&
-      dst->IsValidIndex(dst_pos+length-1)) {
-    MemberOffset src_offset(DataOffset(sizeof(Object*)).Int32Value() + src_pos * sizeof(Object*));
-    MemberOffset dst_offset(DataOffset(sizeof(Object*)).Int32Value() + dst_pos * sizeof(Object*));
-    Class* array_class = dst->GetClass();
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    if (array_class == src->GetClass()) {
-      // No need for array store checks if arrays are of the same type
-      for (size_t i = 0; i < length; i++) {
-        Object* object = src->GetFieldObject<Object*>(src_offset, false);
-        heap->VerifyObject(object);
-        // directly set field, we do a bulk write barrier at the end
-        dst->SetField32(dst_offset, reinterpret_cast<uint32_t>(object), false, true);
-        src_offset = MemberOffset(src_offset.Uint32Value() + sizeof(Object*));
-        dst_offset = MemberOffset(dst_offset.Uint32Value() + sizeof(Object*));
-      }
+inline void ObjectArray<T>::AssignableMemmove(int32_t dst_pos, ObjectArray<T>* src,
+                                              int32_t src_pos, int32_t count) {
+  if (kIsDebugBuild) {
+    for (int i = 0; i < count; ++i) {
+      // The Get will perform the VerifyObject.
+      src->GetWithoutChecks(src_pos + i);
+    }
+  }
+  // Perform the memmove using int memmove then perform the write barrier.
+  CHECK_EQ(sizeof(HeapReference<T>), sizeof(uint32_t));
+  IntArray* dstAsIntArray = reinterpret_cast<IntArray*>(this);
+  IntArray* srcAsIntArray = reinterpret_cast<IntArray*>(src);
+  dstAsIntArray->Memmove(dst_pos, srcAsIntArray, src_pos, count);
+  Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
+  if (kIsDebugBuild) {
+    for (int i = 0; i < count; ++i) {
+      // The Get will perform the VerifyObject.
+      GetWithoutChecks(dst_pos + i);
+    }
+  }
+}
+
+template<class T>
+inline void ObjectArray<T>::AssignableMemcpy(int32_t dst_pos, ObjectArray<T>* src,
+                                             int32_t src_pos, int32_t count) {
+  if (kIsDebugBuild) {
+    for (int i = 0; i < count; ++i) {
+      // The Get will perform the VerifyObject.
+      src->GetWithoutChecks(src_pos + i);
+    }
+  }
+  // Perform the memmove using int memcpy then perform the write barrier.
+  CHECK_EQ(sizeof(HeapReference<T>), sizeof(uint32_t));
+  IntArray* dstAsIntArray = reinterpret_cast<IntArray*>(this);
+  IntArray* srcAsIntArray = reinterpret_cast<IntArray*>(src);
+  dstAsIntArray->Memcpy(dst_pos, srcAsIntArray, src_pos, count);
+  Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
+  if (kIsDebugBuild) {
+    for (int i = 0; i < count; ++i) {
+      // The Get will perform the VerifyObject.
+      GetWithoutChecks(dst_pos + i);
+    }
+  }
+}
+
+template<class T>
+inline void ObjectArray<T>::AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src,
+                                                     int32_t src_pos, int32_t count,
+                                                     bool throw_exception) {
+  DCHECK_NE(this, src)
+      << "This case should be handled with memmove that handles overlaps correctly";
+  // We want to avoid redundant IsAssignableFrom checks where possible, so we cache a class that
+  // we know is assignable to the destination array's component type.
+  Class* dst_class = GetClass()->GetComponentType();
+  Class* lastAssignableElementClass = dst_class;
+
+  Object* o = nullptr;
+  int i = 0;
+  for (; i < count; ++i) {
+    // The follow get operations force the objects to be verified.
+    o = src->GetWithoutChecks(src_pos + i);
+    if (o == nullptr) {
+      // Null is always assignable.
+      SetWithoutChecks(dst_pos + i, nullptr);
     } else {
-      Class* element_class = array_class->GetComponentType();
-      CHECK(!element_class->IsPrimitive());
-      for (size_t i = 0; i < length; i++) {
-        Object* object = src->GetFieldObject<Object*>(src_offset, false);
-        if (object != NULL && !object->InstanceOf(element_class)) {
-          dst->ThrowArrayStoreException(object);
-          return;
-        }
-        heap->VerifyObject(object);
-        // directly set field, we do a bulk write barrier at the end
-        dst->SetField32(dst_offset, reinterpret_cast<uint32_t>(object), false, true);
-        src_offset = MemberOffset(src_offset.Uint32Value() + sizeof(Object*));
-        dst_offset = MemberOffset(dst_offset.Uint32Value() + sizeof(Object*));
+      // TODO: use the underlying class reference to avoid uncompression when not necessary.
+      Class* o_class = o->GetClass();
+      if (LIKELY(lastAssignableElementClass == o_class)) {
+        SetWithoutChecks(dst_pos + i, o);
+      } else if (LIKELY(dst_class->IsAssignableFrom(o_class))) {
+        lastAssignableElementClass = o_class;
+        SetWithoutChecks(dst_pos + i, o);
+      } else {
+        // Can't put this element into the array, break to perform write-barrier and throw
+        // exception.
+        break;
       }
     }
-    heap->WriteBarrierArray(dst, dst_pos, length);
+  }
+  Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
+  if (UNLIKELY(i != count)) {
+    std::string actualSrcType(PrettyTypeOf(o));
+    std::string dstType(PrettyTypeOf(this));
+    Thread* self = Thread::Current();
+    ThrowLocation throw_location = self->GetCurrentLocationForThrow();
+    if (throw_exception) {
+      self->ThrowNewExceptionF(throw_location, "Ljava/lang/ArrayStoreException;",
+                               "source[%d] of type %s cannot be stored in destination array of type %s",
+                               src_pos + i, actualSrcType.c_str(), dstType.c_str());
+    } else {
+      LOG(FATAL) << StringPrintf("source[%d] of type %s cannot be stored in destination array of type %s",
+                                 src_pos + i, actualSrcType.c_str(), dstType.c_str());
+    }
   }
 }
 
 template<class T>
 inline ObjectArray<T>* ObjectArray<T>::CopyOf(Thread* self, int32_t new_length) {
+  DCHECK_GE(new_length, 0);
   // We may get copied by a compacting GC.
   SirtRef<ObjectArray<T> > sirt_this(self, this);
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -151,11 +207,17 @@
       heap->GetCurrentNonMovingAllocator();
   ObjectArray<T>* new_array = Alloc(self, GetClass(), new_length, allocator_type);
   if (LIKELY(new_array != nullptr)) {
-    Copy(sirt_this.get(), 0, new_array, 0, std::min(sirt_this->GetLength(), new_length));
+    new_array->AssignableMemcpy(0, sirt_this.get(), 0, std::min(sirt_this->GetLength(), new_length));
   }
   return new_array;
 }
 
+template<class T>
+inline MemberOffset ObjectArray<T>::OffsetOfElement(int32_t i) {
+  return MemberOffset(DataOffset(sizeof(HeapReference<Object>)).Int32Value() +
+                      (i * sizeof(HeapReference<Object>)));
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 5da8845..347494e 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -33,7 +33,7 @@
   static ObjectArray<T>* Alloc(Thread* self, Class* object_array_class, int32_t length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  T* Get(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  T* Get(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns true if the object can be stored into the array. If not, throws
   // an ArrayStoreException and returns false.
@@ -44,22 +44,30 @@
   // Set element without bound and element type checks, to be used in limited
   // circumstances, such as during boot image writing
   void SetWithoutChecks(int32_t i, T* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetWithoutChecksAndWriteBarrier(int32_t i, T* object)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Set element without bound and element type checks, to be used in limited circumstances, such
-  // as during boot image writing. Does not do write barrier.
-  void SetPtrWithoutChecks(int32_t i, T* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  T* GetWithoutChecks(int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  T* GetWithoutChecks(int32_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Copy src into this array (dealing with overlaps as memmove does) without assignability checks.
+  void AssignableMemmove(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
+                         int32_t count) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static void Copy(const ObjectArray<T>* src, int src_pos,
-                   ObjectArray<T>* dst, int dst_pos,
-                   size_t length)
+  // Copy src into this array assuming no overlap and without assignability checks.
+  void AssignableMemcpy(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
+                        int32_t count) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Copy src into this array with assignability checks.
+  void AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
+                                int32_t count, bool throw_exception)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ObjectArray<T>* CopyOf(Thread* self, int32_t new_length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
+  static MemberOffset OffsetOfElement(int32_t i);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(ObjectArray);
 };
 
diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h
new file mode 100644
index 0000000..b30890f
--- /dev/null
+++ b/runtime/mirror/object_reference.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
+#define ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
+
+#include "locks.h"
+
+namespace art {
+namespace mirror {
+
+class Object;
+
+// Classes shared with the managed side of the world need to be packed so that they don't have
+// extra platform specific padding.
+#define MANAGED PACKED(4)
+
+// Value type representing a reference to a mirror::Object of type MirrorType.
+template<bool kPoisonReferences, class MirrorType>
+class MANAGED ObjectReference {
+ public:
+  MirrorType* AsMirrorPtr() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return UnCompress();
+  }
+
+  void Assign(MirrorType* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    reference_ = Compress(other);
+  }
+
+  void Clear() {
+    reference_ = 0;
+  }
+
+  uint32_t AsVRegValue() const {
+    return reference_;
+  }
+
+ protected:
+  ObjectReference<kPoisonReferences, MirrorType>(MirrorType* mirror_ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : reference_(Compress(mirror_ptr)) {
+  }
+
+  // Compress reference to its bit representation.
+  static uint32_t Compress(MirrorType* mirror_ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uintptr_t as_bits = reinterpret_cast<uintptr_t>(mirror_ptr);
+    return static_cast<uint32_t>(kPoisonReferences ? -as_bits : as_bits);
+  }
+
+  // Uncompress an encoded reference from its bit representation.
+  MirrorType* UnCompress() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    uintptr_t as_bits = kPoisonReferences ? -reference_ : reference_;
+    return reinterpret_cast<MirrorType*>(as_bits);
+  }
+
+  friend class Object;
+
+  // The encoded reference to a mirror::Object.
+  uint32_t reference_;
+};
+
+// References between objects within the managed heap.
+template<class MirrorType>
+class MANAGED HeapReference : public ObjectReference<false, MirrorType> {
+ public:
+  static HeapReference<MirrorType> FromMirrorPtr(MirrorType* mirror_ptr)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return HeapReference<MirrorType>(mirror_ptr);
+  }
+ private:
+  HeapReference<MirrorType>(MirrorType* mirror_ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : ObjectReference<false, MirrorType>(mirror_ptr) {}
+};
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 3637181..db9723b 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -77,7 +77,7 @@
   EXPECT_EQ(CLASS_COMPONENT_TYPE_OFFSET, Class::ComponentTypeOffset().Int32Value());
 
   EXPECT_EQ(ARRAY_LENGTH_OFFSET, Array::LengthOffset().Int32Value());
-  EXPECT_EQ(OBJECT_ARRAY_DATA_OFFSET, Array::DataOffset(sizeof(Object*)).Int32Value());
+  EXPECT_EQ(OBJECT_ARRAY_DATA_OFFSET, Array::DataOffset(sizeof(HeapReference<Object>)).Int32Value());
 
   EXPECT_EQ(STRING_VALUE_OFFSET, String::ValueOffset().Int32Value());
   EXPECT_EQ(STRING_COUNT_OFFSET, String::CountOffset().Int32Value());
@@ -85,7 +85,8 @@
   EXPECT_EQ(STRING_DATA_OFFSET, Array::DataOffset(sizeof(uint16_t)).Int32Value());
 
   EXPECT_EQ(METHOD_DEX_CACHE_METHODS_OFFSET, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
-  EXPECT_EQ(METHOD_CODE_OFFSET, ArtMethod::EntryPointFromCompiledCodeOffset().Int32Value());
+  EXPECT_EQ(METHOD_PORTABLE_CODE_OFFSET, ArtMethod::EntryPointFromPortableCompiledCodeOffset().Int32Value());
+  EXPECT_EQ(METHOD_QUICK_CODE_OFFSET, ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
 }
 
 TEST_F(ObjectTest, IsInSamePackage) {
@@ -235,12 +236,12 @@
   SirtRef<Class> c(soa.Self(), class_linker_->FindSystemClass("I"));
   SirtRef<IntArray> dims(soa.Self(), IntArray::Alloc(soa.Self(), 1));
   dims->Set(0, 1);
-  Array* multi = Array::CreateMultiArray(soa.Self(), c.get(), dims.get());
+  Array* multi = Array::CreateMultiArray(soa.Self(), c, dims);
   EXPECT_TRUE(multi->GetClass() == class_linker_->FindSystemClass("[I"));
   EXPECT_EQ(1, multi->GetLength());
 
   dims->Set(0, -1);
-  multi = Array::CreateMultiArray(soa.Self(), c.get(), dims.get());
+  multi = Array::CreateMultiArray(soa.Self(), c, dims);
   EXPECT_TRUE(soa.Self()->IsExceptionPending());
   EXPECT_EQ(PrettyDescriptor(soa.Self()->GetException(NULL)->GetClass()),
             "java.lang.NegativeArraySizeException");
@@ -251,7 +252,7 @@
     for (int j = 0; j < 20; ++j) {
       dims->Set(0, i);
       dims->Set(1, j);
-      multi = Array::CreateMultiArray(soa.Self(), c.get(), dims.get());
+      multi = Array::CreateMultiArray(soa.Self(), c, dims);
       EXPECT_TRUE(multi->GetClass() == class_linker_->FindSystemClass("[[I"));
       EXPECT_EQ(i, multi->GetLength());
       for (int k = 0; k < i; ++k) {
@@ -295,7 +296,7 @@
   uint32_t field_idx = dex_file->GetIndexForFieldId(*field_id);
 
   ArtField* field = FindFieldFromCode<StaticObjectRead, true>(field_idx, clinit, Thread::Current(),
-                                                              sizeof(Object*));
+                                                              sizeof(HeapReference<Object>));
   Object* s0 = field->GetObj(klass);
   EXPECT_TRUE(s0 != NULL);
 
diff --git a/runtime/mirror/proxy.h b/runtime/mirror/proxy.h
index 18a84dc..ff019c6 100644
--- a/runtime/mirror/proxy.h
+++ b/runtime/mirror/proxy.h
@@ -29,24 +29,28 @@
 // has the static fields used to implement reflection on proxy objects.
 class MANAGED SynthesizedProxyClass : public Class {
  public:
-  ObjectArray<Class>* GetInterfaces() {
-    return interfaces_;
+  ObjectArray<Class>* GetInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<Class> >(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
+                                                                       interfaces_),
+                                               false);
   }
 
-  ObjectArray<ObjectArray<Class> >* GetThrows() {
-    return throws_;
+  ObjectArray<ObjectArray<Class> >* GetThrows()  SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<ObjectArray<ObjectArray<Class> > >(OFFSET_OF_OBJECT_MEMBER(SynthesizedProxyClass,
+                                                                                     throws_),
+                                               false);
   }
 
  private:
-  ObjectArray<Class>* interfaces_;
-  ObjectArray<ObjectArray<Class> >* throws_;
+  HeapReference<ObjectArray<Class> > interfaces_;
+  HeapReference<ObjectArray<ObjectArray<Class> > > throws_;
   DISALLOW_IMPLICIT_CONSTRUCTORS(SynthesizedProxyClass);
 };
 
 // C++ mirror of java.lang.reflect.Proxy.
 class MANAGED Proxy : public Object {
  private:
-  Object* h_;
+  HeapReference<Object> h_;
 
   friend struct art::ProxyOffsets;  // for verifying offset information
   DISALLOW_IMPLICIT_CONSTRUCTORS(Proxy);
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index d1be4dc..73d2673 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -29,24 +29,23 @@
 // C++ mirror of java.lang.StackTraceElement
 class MANAGED StackTraceElement : public Object {
  public:
-  const String* GetDeclaringClass() const {
-    return GetFieldObject<const String*>(
-        OFFSET_OF_OBJECT_MEMBER(StackTraceElement, declaring_class_), false);
+  String* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, declaring_class_),
+                                  false);
   }
 
-  const String* GetMethodName() const {
-    return GetFieldObject<const String*>(
-        OFFSET_OF_OBJECT_MEMBER(StackTraceElement, method_name_), false);
+  String* GetMethodName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, method_name_),
+                                  false);
   }
 
-  const String* GetFileName() const {
-    return GetFieldObject<const String*>(
-        OFFSET_OF_OBJECT_MEMBER(StackTraceElement, file_name_), false);
+  String* GetFileName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, file_name_),
+                                  false);
   }
 
-  int32_t GetLineNumber() const {
-    return GetField32(
-        OFFSET_OF_OBJECT_MEMBER(StackTraceElement, line_number_), false);
+  int32_t GetLineNumber() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, line_number_), false);
   }
 
   static StackTraceElement* Alloc(Thread* self,
@@ -63,9 +62,9 @@
 
  private:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  String* declaring_class_;
-  String* file_name_;
-  String* method_name_;
+  HeapReference<String> declaring_class_;
+  HeapReference<String> file_name_;
+  HeapReference<String> method_name_;
   int32_t line_number_;
 
   static Class* GetStackTraceElement() {
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 1f756a1..10ae066 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -29,23 +29,19 @@
 namespace art {
 namespace mirror {
 
-const CharArray* String::GetCharArray() const {
-  return GetFieldObject<const CharArray*>(ValueOffset(), false);
-}
-
 CharArray* String::GetCharArray() {
-  return GetFieldObject<CharArray*>(ValueOffset(), false);
+  return GetFieldObject<CharArray>(ValueOffset(), false);
 }
 
 void String::ComputeHashCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   SetHashCode(ComputeUtf16Hash(GetCharArray(), GetOffset(), GetLength()));
 }
 
-int32_t String::GetUtfLength() const {
+int32_t String::GetUtfLength() {
   return CountUtf8Bytes(GetCharArray()->GetData() + GetOffset(), GetLength());
 }
 
-int32_t String::FastIndexOf(int32_t ch, int32_t start) const {
+int32_t String::FastIndexOf(int32_t ch, int32_t start) {
   int32_t count = GetLength();
   if (start < 0) {
     start = 0;
@@ -97,13 +93,13 @@
   return result;
 }
 
-int32_t String::GetLength() const {
+int32_t String::GetLength() {
   int32_t result = GetField32(OFFSET_OF_OBJECT_MEMBER(String, count_), false);
   DCHECK(result >= 0 && result <= GetCharArray()->GetLength());
   return result;
 }
 
-uint16_t String::CharAt(int32_t index) const {
+uint16_t String::CharAt(int32_t index) {
   // TODO: do we need this? Equals is the only caller, and could
   // bounds check itself.
   DCHECK_GE(count_, 0);  // ensures the unsigned comparison is safe.
@@ -179,7 +175,7 @@
   return string;
 }
 
-bool String::Equals(const String* that) const {
+bool String::Equals(String* that) {
   if (this == that) {
     // Quick reference equality test
     return true;
@@ -201,7 +197,7 @@
   }
 }
 
-bool String::Equals(const uint16_t* that_chars, int32_t that_offset, int32_t that_length) const {
+bool String::Equals(const uint16_t* that_chars, int32_t that_offset, int32_t that_length) {
   if (this->GetLength() != that_length) {
     return false;
   } else {
@@ -214,7 +210,7 @@
   }
 }
 
-bool String::Equals(const char* modified_utf8) const {
+bool String::Equals(const char* modified_utf8) {
   for (int32_t i = 0; i < GetLength(); ++i) {
     uint16_t ch = GetUtf16FromUtf8(&modified_utf8);
     if (ch == '\0' || ch != CharAt(i)) {
@@ -224,7 +220,7 @@
   return *modified_utf8 == '\0';
 }
 
-bool String::Equals(const StringPiece& modified_utf8) const {
+bool String::Equals(const StringPiece& modified_utf8) {
   const char* p = modified_utf8.data();
   for (int32_t i = 0; i < GetLength(); ++i) {
     uint16_t ch = GetUtf16FromUtf8(&p);
@@ -236,7 +232,7 @@
 }
 
 // Create a modified UTF-8 encoded std::string from a java/lang/String object.
-std::string String::ToModifiedUtf8() const {
+std::string String::ToModifiedUtf8() {
   const uint16_t* chars = GetCharArray()->GetData() + GetOffset();
   size_t byte_count = GetUtfLength();
   std::string result(byte_count, static_cast<char>(0));
@@ -259,9 +255,9 @@
 }
 #endif
 
-int32_t String::CompareTo(String* rhs) const {
+int32_t String::CompareTo(String* rhs) {
   // Quick test for comparison of a string with itself.
-  const String* lhs = this;
+  String* lhs = this;
   if (lhs == rhs) {
     return 0;
   }
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 4bbcb9c..406c5a3 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -44,24 +44,23 @@
     return OFFSET_OF_OBJECT_MEMBER(String, offset_);
   }
 
-  const CharArray* GetCharArray() const;
-  CharArray* GetCharArray();
+  CharArray* GetCharArray() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t GetOffset() const {
+  int32_t GetOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     int32_t result = GetField32(OffsetOffset(), false);
     DCHECK_LE(0, result);
     return result;
   }
 
-  int32_t GetLength() const;
+  int32_t GetLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   int32_t GetHashCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void ComputeHashCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t GetUtfLength() const;
+  int32_t GetUtfLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint16_t CharAt(int32_t index) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint16_t CharAt(int32_t index) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   String* Intern() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -78,29 +77,28 @@
                                        const char* utf8_data_in)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool Equals(const char* modified_utf8) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool Equals(const char* modified_utf8) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // TODO: do we need this overload? give it a more intention-revealing name.
-  bool Equals(const StringPiece& modified_utf8) const
+  bool Equals(const StringPiece& modified_utf8)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool Equals(const String* that) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool Equals(String* that) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Compare UTF-16 code point values not in a locale-sensitive manner
   int Compare(int32_t utf16_length, const char* utf8_data_in);
 
   // TODO: do we need this overload? give it a more intention-revealing name.
   bool Equals(const uint16_t* that_chars, int32_t that_offset,
-              int32_t that_length) const
+              int32_t that_length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Create a modified UTF-8 encoded std::string from a java/lang/String object.
-  std::string ToModifiedUtf8() const;
+  std::string ToModifiedUtf8() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t FastIndexOf(int32_t ch, int32_t start) const;
+  int32_t FastIndexOf(int32_t ch, int32_t start) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  int32_t CompareTo(String* other) const;
+  int32_t CompareTo(String* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* GetJavaLangString() {
     DCHECK(java_lang_String_ != NULL);
@@ -114,7 +112,7 @@
 
  private:
   void SetHashCode(int32_t new_hash_code) {
-    DCHECK_EQ(0u, GetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), false));
+    DCHECK_EQ(0, GetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), false));
     SetField32(OFFSET_OF_OBJECT_MEMBER(String, hash_code_), new_hash_code, false);
   }
 
@@ -123,7 +121,7 @@
     SetField32(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count, false);
   }
 
-  void SetOffset(int32_t new_offset) {
+  void SetOffset(int32_t new_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LE(0, new_offset);
     DCHECK_GE(GetLength(), new_offset);
     SetField32(OFFSET_OF_OBJECT_MEMBER(String, offset_), new_offset, false);
@@ -138,7 +136,7 @@
   void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  CharArray* array_;
+  HeapReference<CharArray> array_;
 
   int32_t count_;
 
@@ -155,8 +153,8 @@
 
 class MANAGED StringClass : public Class {
  private:
-  CharArray* ASCII_;
-  Object* CASE_INSENSITIVE_ORDER_;
+  HeapReference<CharArray> ASCII_;
+  HeapReference<Object> CASE_INSENSITIVE_ORDER_;
   uint32_t REPLACEMENT_CHAR_;
   int64_t serialVersionUID_;
   friend struct art::StringClassOffsets;  // for verifying offset information
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index b55db72..2318b74 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -33,22 +33,22 @@
 Class* Throwable::java_lang_Throwable_ = NULL;
 
 void Throwable::SetCause(Throwable* cause) {
-  CHECK(cause != NULL);
+  CHECK(cause != nullptr);
   CHECK(cause != this);
-  Throwable* current_cause = GetFieldObject<Throwable*>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_),
-                                                        false);
+  Throwable* current_cause = GetFieldObject<Throwable>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_),
+                                                       false);
   CHECK(current_cause == NULL || current_cause == this);
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_), cause, false);
 }
 
-bool Throwable::IsCheckedException() const {
+bool Throwable::IsCheckedException() {
   if (InstanceOf(WellKnownClasses::ToClass(WellKnownClasses::java_lang_Error))) {
     return false;
   }
   return !InstanceOf(WellKnownClasses::ToClass(WellKnownClasses::java_lang_RuntimeException));
 }
 
-std::string Throwable::Dump() const {
+std::string Throwable::Dump() {
   std::string result(PrettyTypeOf(this));
   result += ": ";
   String* msg = GetDetailMessage();
@@ -74,7 +74,7 @@
                              source_file, line_number);
     }
   }
-  Throwable* cause = GetFieldObject<Throwable*>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_), false);
+  Throwable* cause = GetFieldObject<Throwable>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_), false);
   if (cause != NULL && cause != this) {  // Constructor makes cause == this by default.
     result += "Caused by: ";
     result += cause->Dump();
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index 5a90599..bc9848a 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -33,16 +33,16 @@
   void SetDetailMessage(String* new_detail_message) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_), new_detail_message, false);
   }
-  String* GetDetailMessage() const {
-    return GetFieldObject<String*>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_), false);
+  String* GetDetailMessage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_), false);
   }
-  std::string Dump() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  std::string Dump() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // This is a runtime version of initCause, you shouldn't use it if initCause may have been
   // overridden. Also it asserts rather than throwing exceptions. Currently this is only used
   // in cases like the verifier where the checks cannot fail and initCause isn't overridden.
   void SetCause(Throwable* cause) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsCheckedException() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsCheckedException() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Class* GetJavaLangThrowable() {
     DCHECK(java_lang_Throwable_ != NULL);
@@ -55,16 +55,16 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  Object* GetStackState() const {
-    return GetFieldObject<Object*>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), true);
+  Object* GetStackState() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, stack_state_), true);
   }
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  Throwable* cause_;
-  String* detail_message_;
-  Object* stack_state_;  // Note this is Java volatile:
-  Object* stack_trace_;
-  Object* suppressed_exceptions_;
+  HeapReference<Throwable> cause_;
+  HeapReference<String> detail_message_;
+  HeapReference<Object> stack_state_;  // Note this is Java volatile:
+  HeapReference<Object> stack_trace_;
+  HeapReference<Object> suppressed_exceptions_;
 
   static Class* java_lang_Throwable_;
 
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index 4e365be..0addd51 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -47,6 +47,7 @@
 static const uint32_t kAccClassIsProxy = 0x00040000;  // class (dex only)
 static const uint32_t kAccPreverified = 0x00080000;  // method (dex only)
 static const uint32_t kAccFastNative = 0x0080000;  // method (dex only)
+static const uint32_t kAccPortableCompiled = 0x0100000;  // method (dex only)
 
 // Special runtime-only flags.
 // Note: if only kAccClassIsReference is set, we have a soft reference.
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 4186693..72220e0 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -79,7 +79,7 @@
   is_sensitive_thread_hook_ = is_sensitive_thread_hook;
 }
 
-Monitor::Monitor(Thread* owner, mirror::Object* obj, int32_t hash_code)
+Monitor::Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
     : monitor_lock_("a monitor lock", kMonitorLock),
       monitor_contenders_("monitor contenders", monitor_lock_),
       num_waiters_(0),
@@ -89,10 +89,11 @@
       wait_set_(NULL),
       hash_code_(hash_code),
       locking_method_(NULL),
-      locking_dex_pc_(0) {
+      locking_dex_pc_(0),
+      monitor_id_(MonitorPool::CreateMonitorId(self, this)) {
   // We should only inflate a lock if the owner is ourselves or suspended. This avoids a race
   // with the owner unlocking the thin-lock.
-  CHECK(owner == nullptr || owner == Thread::Current() || owner->IsSuspended());
+  CHECK(owner == nullptr || owner == self || owner->IsSuspended());
   // The identity hash code is set for the life time of the monitor.
 }
 
@@ -145,6 +146,7 @@
 }
 
 Monitor::~Monitor() {
+  MonitorPool::ReleaseMonitorId(monitor_id_);
   // Deflated monitors have a null object.
 }
 
@@ -219,7 +221,7 @@
     // Contended.
     const bool log_contention = (lock_profiling_threshold_ != 0);
     uint64_t wait_start_ms = log_contention ? 0 : MilliTime();
-    const mirror::ArtMethod* owners_method = locking_method_;
+    mirror::ArtMethod* owners_method = locking_method_;
     uint32_t owners_dex_pc = locking_dex_pc_;
     monitor_lock_.Unlock(self);  // Let go of locks in order.
     {
@@ -411,7 +413,7 @@
   if (ms < 0 || ns < 0 || ns > 999999) {
     ThrowLocation throw_location = self->GetCurrentLocationForThrow();
     self->ThrowNewExceptionF(throw_location, "Ljava/lang/IllegalArgumentException;",
-                             "timeout arguments out of range: ms=%lld ns=%d", ms, ns);
+                             "timeout arguments out of range: ms=%" PRId64 " ns=%d", ms, ns);
     monitor_lock_.Unlock(self);
     return;
   }
@@ -430,7 +432,7 @@
   int prev_lock_count = lock_count_;
   lock_count_ = 0;
   owner_ = NULL;
-  const mirror::ArtMethod* saved_method = locking_method_;
+  mirror::ArtMethod* saved_method = locking_method_;
   locking_method_ = NULL;
   uintptr_t saved_dex_pc = locking_dex_pc_;
   locking_dex_pc_ = 0;
@@ -611,7 +613,7 @@
   DCHECK(self != NULL);
   DCHECK(obj != NULL);
   // Allocate and acquire a new monitor.
-  UniquePtr<Monitor> m(new Monitor(owner, obj, hash_code));
+  UniquePtr<Monitor> m(new Monitor(self, owner, obj, hash_code));
   if (m->Install(self)) {
     VLOG(monitor) << "monitor: thread " << owner->GetThreadId()
                     << " created monitor " << m.get() << " for object " << obj;
@@ -1008,7 +1010,7 @@
   return owner_ != nullptr;
 }
 
-void Monitor::TranslateLocation(const mirror::ArtMethod* method, uint32_t dex_pc,
+void Monitor::TranslateLocation(mirror::ArtMethod* method, uint32_t dex_pc,
                                 const char** source_file, uint32_t* line_number) const {
   // If method is null, location is unknown
   if (method == NULL) {
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 16e9410..85a8c48 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -24,7 +24,7 @@
 #include <list>
 #include <vector>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "base/mutex.h"
 #include "root_visitor.h"
 #include "sirt_ref.h"
@@ -40,6 +40,8 @@
 class Thread;
 class StackVisitor;
 
+typedef uint32_t MonitorId;
+
 class Monitor {
  public:
   // The default number of spins that are done before thread suspension is used to forcibly inflate
@@ -108,6 +110,10 @@
     return hash_code_.Load() != 0;
   }
 
+  MonitorId GetMonitorId() const {
+    return monitor_id_;
+  }
+
   static void InflateThinLocked(Thread* self, SirtRef<mirror::Object>& obj, LockWord lock_word,
                                 uint32_t hash_code) NO_THREAD_SAFETY_ANALYSIS;
 
@@ -115,7 +121,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
  private:
-  explicit Monitor(Thread* owner, mirror::Object* obj, int32_t hash_code)
+  explicit Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Install the monitor into its object, may fail if another thread installs a different monitor
@@ -162,7 +168,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Translates the provided method and pc into its declaring class' source file and line number.
-  void TranslateLocation(const mirror::ArtMethod* method, uint32_t pc,
+  void TranslateLocation(mirror::ArtMethod* method, uint32_t pc,
                          const char** source_file, uint32_t* line_number) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -195,9 +201,12 @@
   // Method and dex pc where the lock owner acquired the lock, used when lock
   // sampling is enabled. locking_method_ may be null if the lock is currently
   // unlocked, or if the lock is acquired by the system when the stack is empty.
-  const mirror::ArtMethod* locking_method_ GUARDED_BY(monitor_lock_);
+  mirror::ArtMethod* locking_method_ GUARDED_BY(monitor_lock_);
   uint32_t locking_dex_pc_ GUARDED_BY(monitor_lock_);
 
+  // The denser encoded version of this monitor as stored in the lock word.
+  MonitorId monitor_id_;
+
   friend class MonitorInfo;
   friend class MonitorList;
   friend class mirror::Object;
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
new file mode 100644
index 0000000..eadd7a6
--- /dev/null
+++ b/runtime/monitor_pool.cc
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "monitor_pool.h"
+
+#include "base/logging.h"
+#include "base/mutex-inl.h"
+#include "monitor.h"
+
+namespace art {
+
+MonitorPool::MonitorPool() : allocated_ids_lock_("allocated monitor ids lock") {
+}
+
+Monitor* MonitorPool::LookupMonitorFromTable(MonitorId mon_id) {
+  ReaderMutexLock mu(Thread::Current(), allocated_ids_lock_);
+  return table_.Get(mon_id);
+}
+
+MonitorId MonitorPool::AllocMonitorIdFromTable(Thread* self, Monitor* mon) {
+  WriterMutexLock mu(self, allocated_ids_lock_);
+  for (size_t i = 0; i < allocated_ids_.size(); ++i) {
+    if (!allocated_ids_[i]) {
+      allocated_ids_.set(i);
+      MonitorId mon_id = i + 1;  // Zero is reserved to mean "invalid".
+      table_.Put(mon_id, mon);
+      return mon_id;
+    }
+  }
+  LOG(FATAL) << "Out of internal monitor ids";
+  return 0;
+}
+
+void MonitorPool::ReleaseMonitorIdFromTable(MonitorId mon_id) {
+  WriterMutexLock mu(Thread::Current(), allocated_ids_lock_);
+  DCHECK(table_.Get(mon_id) != nullptr);
+  table_.erase(mon_id);
+  --mon_id;  // Zero is reserved to mean "invalid".
+  DCHECK(allocated_ids_[mon_id]) << mon_id;
+  allocated_ids_.reset(mon_id);
+}
+
+}  // namespace art
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
new file mode 100644
index 0000000..32f3f4e
--- /dev/null
+++ b/runtime/monitor_pool.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MONITOR_POOL_H_
+#define ART_RUNTIME_MONITOR_POOL_H_
+
+#include "monitor.h"
+
+#include "safe_map.h"
+
+#include <stdint.h>
+
+namespace art {
+
+// Abstraction to keep monitors small enough to fit in a lock word (32bits). On 32bit systems the
+// monitor id loses the alignment bits of the Monitor*.
+class MonitorPool {
+ public:
+  static MonitorPool* Create() {
+#ifndef __LP64__
+    return nullptr;
+#else
+    return new MonitorPool();
+#endif
+  }
+
+  static Monitor* MonitorFromMonitorId(MonitorId mon_id) {
+#ifndef __LP64__
+    return reinterpret_cast<Monitor*>(mon_id << 3);
+#else
+    return Runtime::Current()->GetMonitorPool()->LookupMonitorFromTable(mon_id);
+#endif
+  }
+
+  static MonitorId MonitorIdFromMonitor(Monitor* mon) {
+#ifndef __LP64__
+    return reinterpret_cast<MonitorId>(mon) >> 3;
+#else
+    return mon->GetMonitorId();
+#endif
+  }
+
+  static MonitorId CreateMonitorId(Thread* self, Monitor* mon) {
+#ifndef __LP64__
+    UNUSED(self);
+    return MonitorIdFromMonitor(mon);
+#else
+    return Runtime::Current()->GetMonitorPool()->AllocMonitorIdFromTable(self, mon);
+#endif
+  }
+
+  static void ReleaseMonitorId(MonitorId mon_id) {
+#ifndef __LP64__
+    UNUSED(mon_id);
+#else
+    Runtime::Current()->GetMonitorPool()->ReleaseMonitorIdFromTable(mon_id);
+#endif
+  }
+
+ private:
+#ifdef __LP64__
+  MonitorPool();
+
+  Monitor* LookupMonitorFromTable(MonitorId mon_id);
+
+  MonitorId LookupMonitorIdFromTable(Monitor* mon);
+
+  MonitorId AllocMonitorIdFromTable(Thread* self, Monitor* mon);
+
+  void ReleaseMonitorIdFromTable(MonitorId mon_id);
+
+  ReaderWriterMutex allocated_ids_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  static constexpr uint32_t kMaxMonitorId = 0xFFFF;
+  std::bitset<kMaxMonitorId> allocated_ids_ GUARDED_BY(allocated_ids_lock_);
+  SafeMap<MonitorId, Monitor*> table_ GUARDED_BY(allocated_ids_lock_);
+#endif
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_MONITOR_POOL_H_
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 45a2eed..1a3ceb8 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -84,7 +84,7 @@
   void operator=(const NullableScopedUtfChars&);
 };
 
-static jint DexFile_openDexFileNative(JNIEnv* env, jclass, jstring javaSourceName, jstring javaOutputName, jint) {
+static jlong DexFile_openDexFileNative(JNIEnv* env, jclass, jstring javaSourceName, jstring javaOutputName, jint) {
   ScopedUtfChars sourceName(env, javaSourceName);
   if (sourceName.c_str() == NULL) {
     return 0;
@@ -125,10 +125,10 @@
     ThrowIOException("%s", error_msg.c_str());
     return 0;
   }
-  return static_cast<jint>(reinterpret_cast<uintptr_t>(dex_file));
+  return static_cast<jlong>(reinterpret_cast<uintptr_t>(dex_file));
 }
 
-static const DexFile* toDexFile(int dex_file_address, JNIEnv* env) {
+static const DexFile* toDexFile(jlong dex_file_address, JNIEnv* env) {
   const DexFile* dex_file = reinterpret_cast<const DexFile*>(static_cast<uintptr_t>(dex_file_address));
   if (UNLIKELY(dex_file == nullptr)) {
     ScopedObjectAccess soa(env);
@@ -137,7 +137,7 @@
   return dex_file;
 }
 
-static void DexFile_closeDexFile(JNIEnv* env, jclass, jint cookie) {
+static void DexFile_closeDexFile(JNIEnv* env, jclass, jlong cookie) {
   const DexFile* dex_file;
   dex_file = toDexFile(cookie, env);
   if (dex_file == nullptr) {
@@ -150,7 +150,7 @@
 }
 
 static jclass DexFile_defineClassNative(JNIEnv* env, jclass, jstring javaName, jobject javaLoader,
-                                        jint cookie) {
+                                        jlong cookie) {
   const DexFile* dex_file = toDexFile(cookie, env);
   if (dex_file == NULL) {
     VLOG(class_linker) << "Failed to find dex_file";
@@ -177,7 +177,7 @@
   return soa.AddLocalReference<jclass>(result);
 }
 
-static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jint cookie) {
+static jobjectArray DexFile_getClassNameList(JNIEnv* env, jclass, jlong cookie) {
   const DexFile* dex_file;
   dex_file = toDexFile(cookie, env);
   if (dex_file == nullptr) {
@@ -290,7 +290,7 @@
         return JNI_TRUE;
       }
       if (oat_file->GetOatHeader().GetImageFileLocationOatDataBegin()
-          != reinterpret_cast<uint32_t>(image_header.GetOatDataBegin())) {
+          != reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin())) {
         if (kDebugLogging) {
           ScopedObjectAccess soa(env);
           LOG(INFO) << "DexFile_isDexOptNeeded cache file " << cache_location
@@ -330,11 +330,11 @@
 }
 
 static JNINativeMethod gMethods[] = {
-  NATIVE_METHOD(DexFile, closeDexFile, "(I)V"),
-  NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;I)Ljava/lang/Class;"),
-  NATIVE_METHOD(DexFile, getClassNameList, "(I)[Ljava/lang/String;"),
+  NATIVE_METHOD(DexFile, closeDexFile, "(J)V"),
+  NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;J)Ljava/lang/Class;"),
+  NATIVE_METHOD(DexFile, getClassNameList, "(J)[Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
-  NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)I"),
+  NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)J"),
 };
 
 void register_dalvik_system_DexFile(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 4a84cfe..d9baaaf 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -24,6 +24,7 @@
 #include "gc/space/dlmalloc_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
+#include "gc/space/zygote_space.h"
 #include "hprof/hprof.h"
 #include "jni_internal.h"
 #include "mirror/class.h"
@@ -232,14 +233,19 @@
 static jlong VMDebug_countInstancesOfClass(JNIEnv* env, jclass, jclass javaClass,
                                            jboolean countAssignable) {
   ScopedObjectAccess soa(env);
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  // We only want reachable instances, so do a GC. This also ensures that the alloc stack
+  // is empty, so the live bitmap is the only place we need to look. Need to do GC before decoding
+  // any jobjects.
+  heap->CollectGarbage(false);
   mirror::Class* c = soa.Decode<mirror::Class*>(javaClass);
-  if (c == NULL) {
+  if (c == nullptr) {
     return 0;
   }
   std::vector<mirror::Class*> classes;
   classes.push_back(c);
   uint64_t count = 0;
-  Runtime::Current()->GetHeap()->CountInstances(classes, countAssignable, &count);
+  heap->CountInstances(classes, countAssignable, &count);
   return count;
 }
 
@@ -265,9 +271,9 @@
     if (space->IsImageSpace()) {
       // Currently don't include the image space.
     } else if (space->IsZygoteSpace()) {
-      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
-      zygoteSize += malloc_space->GetFootprint();
-      zygoteUsed += malloc_space->GetBytesAllocated();
+      gc::space::ZygoteSpace* zygote_space = space->AsZygoteSpace();
+      zygoteSize += zygote_space->Size();
+      zygoteUsed += zygote_space->GetBytesAllocated();
     } else if (space->IsMallocSpace()) {
       // This is a malloc space.
       gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index c9e255c..e1b5f97 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -90,7 +90,7 @@
     ThrowRuntimeException("Trying to get address of movable array object");
     return 0;
   }
-  return reinterpret_cast<uintptr_t>(array->GetRawData(array->GetClass()->GetComponentSize()));
+  return reinterpret_cast<uintptr_t>(array->GetRawData(array->GetClass()->GetComponentSize(), 0));
 }
 
 static void VMRuntime_clearGrowthLimit(JNIEnv*, jobject) {
@@ -181,7 +181,8 @@
 
 typedef std::map<std::string, mirror::String*> StringTable;
 
-static mirror::Object* PreloadDexCachesStringsVisitor(mirror::Object* root, void* arg) {
+static mirror::Object* PreloadDexCachesStringsVisitor(mirror::Object* root, void* arg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   StringTable& table = *reinterpret_cast<StringTable*>(arg);
   mirror::String* string = const_cast<mirror::Object*>(root)->AsString();
   // LOG(INFO) << "VMRuntime.preloadDexCaches interned=" << string->ToModifiedUtf8();
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index f915365..7e02e29 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -79,7 +79,7 @@
     ClosestUserClassLoaderVisitor(Thread* thread, mirror::Object* bootstrap, mirror::Object* system)
       : StackVisitor(thread, NULL), bootstrap(bootstrap), system(system), class_loader(NULL) {}
 
-    bool VisitFrame() {
+    bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
       DCHECK(class_loader == NULL);
       mirror::Class* c = GetMethod()->GetDeclaringClass();
       mirror::Object* cl = c->GetClassLoader();
diff --git a/runtime/native/dalvik_system_Zygote.cc b/runtime/native/dalvik_system_Zygote.cc
index 7fa9457..22c5430 100644
--- a/runtime/native/dalvik_system_Zygote.cc
+++ b/runtime/native/dalvik_system_Zygote.cc
@@ -47,8 +47,10 @@
 #if defined(__linux__)
 #include <sys/personality.h>
 #include <sys/utsname.h>
+#if defined(HAVE_ANDROID_OS)
 #include <sys/capability.h>
 #endif
+#endif
 
 namespace art {
 
diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc
index e969fcf..0629f4d 100644
--- a/runtime/native/java_lang_Runtime.cc
+++ b/runtime/native/java_lang_Runtime.cc
@@ -64,7 +64,8 @@
   std::string detail;
   {
     ScopedObjectAccess soa(env);
-    mirror::ClassLoader* classLoader = soa.Decode<mirror::ClassLoader*>(javaLoader);
+    SirtRef<mirror::ClassLoader> classLoader(soa.Self(),
+                                             soa.Decode<mirror::ClassLoader*>(javaLoader));
     JavaVMExt* vm = Runtime::Current()->GetJavaVM();
     bool success = vm->LoadNativeLibrary(filename.c_str(), classLoader, &detail);
     if (success) {
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index ea78e04..6bbe642 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -24,151 +24,15 @@
 #include "mirror/object_array-inl.h"
 #include "scoped_fast_native_object_access.h"
 
-/*
- * We make guarantees about the atomicity of accesses to primitive
- * variables.  These guarantees also apply to elements of arrays.
- * In particular, 8-bit, 16-bit, and 32-bit accesses must be atomic and
- * must not cause "word tearing".  Accesses to 64-bit array elements must
- * either be atomic or treated as two 32-bit operations.  References are
- * always read and written atomically, regardless of the number of bits
- * used to represent them.
- *
- * We can't rely on standard libc functions like memcpy(3) and memmove(3)
- * in our implementation of System.arraycopy, because they may copy
- * byte-by-byte (either for the full run or for "unaligned" parts at the
- * start or end).  We need to use functions that guarantee 16-bit or 32-bit
- * atomicity as appropriate.
- *
- * System.arraycopy() is heavily used, so having an efficient implementation
- * is important.  The bionic libc provides a platform-optimized memory move
- * function that should be used when possible.  If it's not available,
- * the trivial "reference implementation" versions below can be used until
- * a proper version can be written.
- *
- * For these functions, The caller must guarantee that dst/src are aligned
- * appropriately for the element type, and that n is a multiple of the
- * element size.
- */
-
-/*
- * Works like memmove(), except:
- * - if all arguments are at least 32-bit aligned, we guarantee that we
- *   will use operations that preserve atomicity of 32-bit values
- * - if not, we guarantee atomicity of 16-bit values
- *
- * If all three arguments are not at least 16-bit aligned, the behavior
- * of this function is undefined.  (We could remove this restriction by
- * testing for unaligned values and punting to memmove(), but that's
- * not currently useful.)
- *
- * TODO: add loop for 64-bit alignment
- * TODO: use __builtin_prefetch
- * TODO: write ARM/MIPS/x86 optimized versions
- */
-void MemmoveWords(void* dst, const void* src, size_t n) {
-  DCHECK_EQ((((uintptr_t) dst | (uintptr_t) src | n) & 0x01), 0U);
-
-  char* d = reinterpret_cast<char*>(dst);
-  const char* s = reinterpret_cast<const char*>(src);
-  size_t copyCount;
-
-  // If the source and destination pointers are the same, this is
-  // an expensive no-op.  Testing for an empty move now allows us
-  // to skip a check later.
-  if (n == 0 || d == s) {
-    return;
-  }
-
-  // Determine if the source and destination buffers will overlap if
-  // we copy data forward (i.e. *dst++ = *src++).
-  //
-  // It's okay if the destination buffer starts before the source and
-  // there is some overlap, because the reader is always ahead of the
-  // writer.
-  if (LIKELY((d < s) || ((size_t)(d - s) >= n))) {
-    // Copy forward.  We prefer 32-bit loads and stores even for 16-bit
-    // data, so sort that out.
-    if (((reinterpret_cast<uintptr_t>(d) | reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) {
-      // Not 32-bit aligned.  Two possibilities:
-      // (1) Congruent, we can align to 32-bit by copying one 16-bit val
-      // (2) Non-congruent, we can do one of:
-      //   a. copy whole buffer as a series of 16-bit values
-      //   b. load/store 32 bits, using shifts to ensure alignment
-      //   c. just copy the as 32-bit values and assume the CPU
-      //      will do a reasonable job
-      //
-      // We're currently using (a), which is suboptimal.
-      if (((reinterpret_cast<uintptr_t>(d) ^ reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) {
-        copyCount = n;
-      } else {
-        copyCount = 2;
-      }
-      n -= copyCount;
-      copyCount /= sizeof(uint16_t);
-
-      while (copyCount--) {
-        *reinterpret_cast<uint16_t*>(d) = *reinterpret_cast<const uint16_t*>(s);
-        d += sizeof(uint16_t);
-        s += sizeof(uint16_t);
-      }
-    }
-
-    // Copy 32-bit aligned words.
-    copyCount = n / sizeof(uint32_t);
-    while (copyCount--) {
-      *reinterpret_cast<uint32_t*>(d) = *reinterpret_cast<const uint32_t*>(s);
-      d += sizeof(uint32_t);
-      s += sizeof(uint32_t);
-    }
-
-    // Check for leftovers.  Either we finished exactly, or we have one remaining 16-bit chunk.
-    if ((n & 0x02) != 0) {
-      *reinterpret_cast<uint16_t*>(d) = *reinterpret_cast<const uint16_t*>(s);
-    }
-  } else {
-    // Copy backward, starting at the end.
-    d += n;
-    s += n;
-
-    if (((reinterpret_cast<uintptr_t>(d) | reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) {
-      // try for 32-bit alignment.
-      if (((reinterpret_cast<uintptr_t>(d) ^ reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) {
-        copyCount = n;
-      } else {
-        copyCount = 2;
-      }
-      n -= copyCount;
-      copyCount /= sizeof(uint16_t);
-
-      while (copyCount--) {
-        d -= sizeof(uint16_t);
-        s -= sizeof(uint16_t);
-        *reinterpret_cast<uint16_t*>(d) = *reinterpret_cast<const uint16_t*>(s);
-      }
-    }
-
-    // Copy 32-bit aligned words.
-    copyCount = n / sizeof(uint32_t);
-    while (copyCount--) {
-      d -= sizeof(uint32_t);
-      s -= sizeof(uint32_t);
-      *reinterpret_cast<uint32_t*>(d) = *reinterpret_cast<const uint32_t*>(s);
-    }
-
-    // Copy leftovers.
-    if ((n & 0x02) != 0) {
-      d -= sizeof(uint16_t);
-      s -= sizeof(uint16_t);
-      *reinterpret_cast<uint16_t*>(d) = *reinterpret_cast<const uint16_t*>(s);
-    }
-  }
-}
-
-#define move16 MemmoveWords
-#define move32 MemmoveWords
-
 namespace art {
 
+/*
+ * We make guarantees about the atomicity of accesses to primitive variables.  These guarantees
+ * also apply to elements of arrays. In particular, 8-bit, 16-bit, and 32-bit accesses must not
+ * cause "word tearing".  Accesses to 64-bit array elements may be two 32-bit operations.
+ * References are never torn regardless of the number of bits used to represent them.
+ */
+
 static void ThrowArrayStoreException_NotAnArray(const char* identifier, mirror::Object* array)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   std::string actualType(PrettyTypeOf(array));
@@ -178,168 +42,132 @@
                            "%s of type %s is not an array", identifier, actualType.c_str());
 }
 
-static void System_arraycopy(JNIEnv* env, jclass, jobject javaSrc, jint srcPos, jobject javaDst, jint dstPos, jint length) {
+static void System_arraycopy(JNIEnv* env, jclass, jobject javaSrc, jint srcPos, jobject javaDst,
+                             jint dstPos, jint length) {
+  // The API is defined in terms of length, but length is somewhat overloaded so we use count.
+  const jint count = length;
   ScopedFastNativeObjectAccess soa(env);
 
   // Null pointer checks.
-  if (UNLIKELY(javaSrc == NULL)) {
-    ThrowNullPointerException(NULL, "src == null");
+  if (UNLIKELY(javaSrc == nullptr)) {
+    ThrowNullPointerException(nullptr, "src == null");
     return;
   }
-  if (UNLIKELY(javaDst == NULL)) {
-    ThrowNullPointerException(NULL, "dst == null");
+  if (UNLIKELY(javaDst == nullptr)) {
+    ThrowNullPointerException(nullptr, "dst == null");
     return;
   }
 
   // Make sure source and destination are both arrays.
   mirror::Object* srcObject = soa.Decode<mirror::Object*>(javaSrc);
-  mirror::Object* dstObject = soa.Decode<mirror::Object*>(javaDst);
   if (UNLIKELY(!srcObject->IsArrayInstance())) {
     ThrowArrayStoreException_NotAnArray("source", srcObject);
     return;
   }
+  mirror::Object* dstObject = soa.Decode<mirror::Object*>(javaDst);
   if (UNLIKELY(!dstObject->IsArrayInstance())) {
     ThrowArrayStoreException_NotAnArray("destination", dstObject);
     return;
   }
   mirror::Array* srcArray = srcObject->AsArray();
   mirror::Array* dstArray = dstObject->AsArray();
-  mirror::Class* srcComponentType = srcArray->GetClass()->GetComponentType();
-  mirror::Class* dstComponentType = dstArray->GetClass()->GetComponentType();
 
   // Bounds checking.
-  if (UNLIKELY(srcPos < 0 || dstPos < 0 || length < 0 || srcPos > srcArray->GetLength() - length || dstPos > dstArray->GetLength() - length)) {
+  if (UNLIKELY(srcPos < 0) || UNLIKELY(dstPos < 0) || UNLIKELY(count < 0) ||
+      UNLIKELY(srcPos > srcArray->GetLength() - count) ||
+      UNLIKELY(dstPos > dstArray->GetLength() - count)) {
     ThrowLocation throw_location = soa.Self()->GetCurrentLocationForThrow();
     soa.Self()->ThrowNewExceptionF(throw_location, "Ljava/lang/ArrayIndexOutOfBoundsException;",
                                    "src.length=%d srcPos=%d dst.length=%d dstPos=%d length=%d",
-                                   srcArray->GetLength(), srcPos, dstArray->GetLength(), dstPos, length);
+                                   srcArray->GetLength(), srcPos, dstArray->GetLength(), dstPos,
+                                   count);
     return;
   }
 
-  // Handle primitive arrays.
-  if (srcComponentType->IsPrimitive() || dstComponentType->IsPrimitive()) {
-    // If one of the arrays holds a primitive type the other array must hold the exact same type.
-    if (UNLIKELY(srcComponentType != dstComponentType)) {
-      std::string srcType(PrettyTypeOf(srcArray));
-      std::string dstType(PrettyTypeOf(dstArray));
-      ThrowLocation throw_location = soa.Self()->GetCurrentLocationForThrow();
-      soa.Self()->ThrowNewExceptionF(throw_location, "Ljava/lang/ArrayStoreException;",
-                                     "Incompatible types: src=%s, dst=%s",
-                                     srcType.c_str(), dstType.c_str());
-      return;
-    }
+  mirror::Class* dstComponentType = dstArray->GetClass()->GetComponentType();
+  mirror::Class* srcComponentType = srcArray->GetClass()->GetComponentType();
+  Primitive::Type dstComponentPrimitiveType = dstComponentType->GetPrimitiveType();
 
-    size_t width = srcArray->GetClass()->GetComponentSize();
-    uint8_t* dstBytes = reinterpret_cast<uint8_t*>(dstArray->GetRawData(width));
-    const uint8_t* srcBytes = reinterpret_cast<const uint8_t*>(srcArray->GetRawData(width));
-
-    switch (width) {
-    case 1:
-      memmove(dstBytes + dstPos, srcBytes + srcPos, length);
-      break;
-    case 2:
-      move16(dstBytes + dstPos * 2, srcBytes + srcPos * 2, length * 2);
-      break;
-    case 4:
-      move32(dstBytes + dstPos * 4, srcBytes + srcPos * 4, length * 4);
-      break;
-    case 8:
-      // We don't need to guarantee atomicity of the entire 64-bit word.
-      move32(dstBytes + dstPos * 8, srcBytes + srcPos * 8, length * 8);
-      break;
-    default:
-      LOG(FATAL) << "Unknown primitive array type: " << PrettyTypeOf(srcArray);
-    }
-
-    return;
-  }
-
-  // Neither class is primitive. Are the types trivially compatible?
-  const size_t width = sizeof(mirror::Object*);
-  uint8_t* dstBytes = reinterpret_cast<uint8_t*>(dstArray->GetRawData(width));
-  const uint8_t* srcBytes = reinterpret_cast<const uint8_t*>(srcArray->GetRawData(width));
-  if (dstArray == srcArray || dstComponentType->IsAssignableFrom(srcComponentType)) {
-    // Yes. Bulk copy.
-    COMPILE_ASSERT(sizeof(width) == sizeof(uint32_t), move32_assumes_Object_references_are_32_bit);
-    move32(dstBytes + dstPos * width, srcBytes + srcPos * width, length * width);
-    Runtime::Current()->GetHeap()->WriteBarrierArray(dstArray, dstPos, length);
-    return;
-  }
-
-  // The arrays are not trivially compatible. However, we may still be able to copy some or all of
-  // the elements if the source objects are compatible (for example, copying an Object[] to
-  // String[], the Objects being copied might actually be Strings).
-  // We can't do a bulk move because that would introduce a check-use race condition, so we copy
-  // elements one by one.
-
-  // We already dealt with overlapping copies, so we don't need to cope with that case below.
-  CHECK_NE(dstArray, srcArray);
-
-  mirror::Object* const * srcObjects =
-      reinterpret_cast<mirror::Object* const *>(srcBytes + srcPos * width);
-  mirror::Object** dstObjects = reinterpret_cast<mirror::Object**>(dstBytes + dstPos * width);
-  mirror::Class* dstClass = dstArray->GetClass()->GetComponentType();
-
-  // We want to avoid redundant IsAssignableFrom checks where possible, so we cache a class that
-  // we know is assignable to the destination array's component type.
-  mirror::Class* lastAssignableElementClass = dstClass;
-
-  mirror::Object* o = NULL;
-  int i = 0;
-  for (; i < length; ++i) {
-    o = srcObjects[i];
-    if (o != NULL) {
-      mirror::Class* oClass = o->GetClass();
-      if (lastAssignableElementClass == oClass) {
-        dstObjects[i] = o;
-      } else if (dstClass->IsAssignableFrom(oClass)) {
-        lastAssignableElementClass = oClass;
-        dstObjects[i] = o;
-      } else {
-        // Can't put this element into the array.
-        break;
+  if (LIKELY(srcComponentType == dstComponentType)) {
+    // Trivial assignability.
+    switch (dstComponentPrimitiveType) {
+      case Primitive::kPrimVoid:
+        LOG(FATAL) << "Unreachable, cannot have arrays of type void";
+        return;
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+        DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 1U);
+        dstArray->AsByteSizedArray()->Memmove(dstPos, srcArray->AsByteSizedArray(), srcPos, count);
+        return;
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+        DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 2U);
+        dstArray->AsShortSizedArray()->Memmove(dstPos, srcArray->AsShortSizedArray(), srcPos, count);
+        return;
+      case Primitive::kPrimInt:
+      case Primitive::kPrimFloat:
+        DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 4U);
+        dstArray->AsIntArray()->Memmove(dstPos, srcArray->AsIntArray(), srcPos, count);
+        return;
+      case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
+        DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 8U);
+        dstArray->AsLongArray()->Memmove(dstPos, srcArray->AsLongArray(), srcPos, count);
+        return;
+      case Primitive::kPrimNot: {
+        mirror::ObjectArray<mirror::Object>* dstObjArray = dstArray->AsObjectArray<mirror::Object>();
+        mirror::ObjectArray<mirror::Object>* srcObjArray = srcArray->AsObjectArray<mirror::Object>();
+        dstObjArray->AssignableMemmove(dstPos, srcObjArray, srcPos, count);
+        return;
       }
-    } else {
-      dstObjects[i] = NULL;
+      default:
+        LOG(FATAL) << "Unknown array type: " << PrettyTypeOf(srcArray);
+        return;
     }
   }
-
-  Runtime::Current()->GetHeap()->WriteBarrierArray(dstArray, dstPos, length);
-  if (UNLIKELY(i != length)) {
-    std::string actualSrcType(PrettyTypeOf(o));
+  // If one of the arrays holds a primitive type the other array must hold the exact same type.
+  if (UNLIKELY((dstComponentPrimitiveType != Primitive::kPrimNot) ||
+               srcComponentType->IsPrimitive())) {
+    std::string srcType(PrettyTypeOf(srcArray));
     std::string dstType(PrettyTypeOf(dstArray));
     ThrowLocation throw_location = soa.Self()->GetCurrentLocationForThrow();
     soa.Self()->ThrowNewExceptionF(throw_location, "Ljava/lang/ArrayStoreException;",
-                                   "source[%d] of type %s cannot be stored in destination array of type %s",
-                                   srcPos + i, actualSrcType.c_str(), dstType.c_str());
+                                   "Incompatible types: src=%s, dst=%s",
+                                   srcType.c_str(), dstType.c_str());
     return;
   }
+  // Arrays hold distinct types and so therefore can't alias - use memcpy instead of memmove.
+  mirror::ObjectArray<mirror::Object>* dstObjArray = dstArray->AsObjectArray<mirror::Object>();
+  mirror::ObjectArray<mirror::Object>* srcObjArray = srcArray->AsObjectArray<mirror::Object>();
+  // If we're assigning into say Object[] then we don't need per element checks.
+  if (dstComponentType->IsAssignableFrom(srcComponentType)) {
+    dstObjArray->AssignableMemcpy(dstPos, srcObjArray, srcPos, count);
+    return;
+  }
+  dstObjArray->AssignableCheckingMemcpy(dstPos, srcObjArray, srcPos, count, true);
 }
 
-static void System_arraycopyCharUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos, jobject javaDst, jint dstPos, jint length) {
+static void System_arraycopyCharUnchecked(JNIEnv* env, jclass, jobject javaSrc, jint srcPos,
+                                          jobject javaDst, jint dstPos, jint count) {
   ScopedFastNativeObjectAccess soa(env);
-  DCHECK(javaSrc != NULL);
-  DCHECK(javaDst != NULL);
   mirror::Object* srcObject = soa.Decode<mirror::Object*>(javaSrc);
   mirror::Object* dstObject = soa.Decode<mirror::Object*>(javaDst);
-  DCHECK(srcObject->IsArrayInstance());
-  DCHECK(dstObject->IsArrayInstance());
+  DCHECK(srcObject != nullptr);
+  DCHECK(dstObject != nullptr);
   mirror::Array* srcArray = srcObject->AsArray();
   mirror::Array* dstArray = dstObject->AsArray();
-  DCHECK(srcPos >= 0 && dstPos >= 0 && length >= 0 &&
-         srcPos + length <= srcArray->GetLength() && dstPos + length <= dstArray->GetLength());
-  DCHECK_EQ(srcArray->GetClass()->GetComponentType(), dstArray->GetClass()->GetComponentType());
-  DCHECK(srcArray->GetClass()->GetComponentType()->IsPrimitive());
-  DCHECK(dstArray->GetClass()->GetComponentType()->IsPrimitive());
-  DCHECK_EQ(srcArray->GetClass()->GetComponentSize(), static_cast<size_t>(2));
-  DCHECK_EQ(dstArray->GetClass()->GetComponentSize(), static_cast<size_t>(2));
-  uint8_t* dstBytes = reinterpret_cast<uint8_t*>(dstArray->GetRawData(2));
-  const uint8_t* srcBytes = reinterpret_cast<const uint8_t*>(srcArray->GetRawData(2));
-  move16(dstBytes + dstPos * 2, srcBytes + srcPos * 2, length * 2);
+  DCHECK_GE(srcPos, 0);
+  DCHECK_GE(dstPos, 0);
+  DCHECK_GE(count, 0);
+  DCHECK_LE(srcPos + count, srcArray->GetLength());
+  DCHECK_LE(dstPos + count, dstArray->GetLength());
+  DCHECK_EQ(srcArray->GetClass(), dstArray->GetClass());
+  DCHECK_EQ(srcArray->GetClass()->GetComponentType()->GetPrimitiveType(), Primitive::kPrimChar);
+  dstArray->AsCharArray()->Memmove(dstPos, srcArray->AsCharArray(), srcPos, count);
 }
 
 static jint System_identityHashCode(JNIEnv* env, jclass, jobject javaObject) {
-  if (javaObject == nullptr) {
+  if (UNLIKELY(javaObject == nullptr)) {
     return 0;
   }
   ScopedFastNativeObjectAccess soa(env);
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index 52cdb59..2197597 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -29,14 +29,16 @@
 static jobject Array_createMultiArray(JNIEnv* env, jclass, jclass javaElementClass, jobject javaDimArray) {
   ScopedFastNativeObjectAccess soa(env);
   DCHECK(javaElementClass != NULL);
-  mirror::Class* element_class = soa.Decode<mirror::Class*>(javaElementClass);
+  SirtRef<mirror::Class> element_class(soa.Self(), soa.Decode<mirror::Class*>(javaElementClass));
   DCHECK(element_class->IsClass());
   DCHECK(javaDimArray != NULL);
   mirror::Object* dimensions_obj = soa.Decode<mirror::Object*>(javaDimArray);
   DCHECK(dimensions_obj->IsArrayInstance());
   DCHECK_STREQ(ClassHelper(dimensions_obj->GetClass()).GetDescriptor(), "[I");
-  mirror::IntArray* dimensions_array = down_cast<mirror::IntArray*>(dimensions_obj);
-  mirror::Array* new_array = mirror::Array::CreateMultiArray(soa.Self(), element_class, dimensions_array);
+  SirtRef<mirror::IntArray> dimensions_array(soa.Self(),
+                                             down_cast<mirror::IntArray*>(dimensions_obj));
+  mirror::Array* new_array = mirror::Array::CreateMultiArray(soa.Self(), element_class,
+                                                             dimensions_array);
   return soa.AddLocalReference<jobject>(new_array);
 }
 
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index b5fc7e7..6c22003 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "atomic.h"
 #include "gc/accounting/card_table-inl.h"
 #include "jni_internal.h"
 #include "mirror/object.h"
@@ -23,40 +22,30 @@
 
 namespace art {
 
-static jboolean Unsafe_compareAndSwapInt(JNIEnv* env, jobject, jobject javaObj, jlong offset, jint expectedValue, jint newValue) {
+static jboolean Unsafe_compareAndSwapInt(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                         jint expectedValue, jint newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
-  volatile int32_t* address = reinterpret_cast<volatile int32_t*>(raw_addr);
-  // Note: android_atomic_release_cas() returns 0 on success, not failure.
-  int result = android_atomic_release_cas(expectedValue, newValue, address);
-  return (result == 0) ? JNI_TRUE : JNI_FALSE;
-}
-
-static jboolean Unsafe_compareAndSwapLong(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong expectedValue, jlong newValue) {
-  ScopedFastNativeObjectAccess soa(env);
-  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
-  volatile int64_t* address = reinterpret_cast<volatile int64_t*>(raw_addr);
-  // Note: android_atomic_cmpxchg() returns 0 on success, not failure.
-  bool success = QuasiAtomic::Cas64(expectedValue, newValue, address);
+  bool success = obj->CasField32(MemberOffset(offset), expectedValue, newValue);
   return success ? JNI_TRUE : JNI_FALSE;
 }
 
-static jboolean Unsafe_compareAndSwapObject(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaExpectedValue, jobject javaNewValue) {
+static jboolean Unsafe_compareAndSwapLong(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                          jlong expectedValue, jlong newValue) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
+  bool success = obj->CasField64(MemberOffset(offset), expectedValue, newValue);
+  return success ? JNI_TRUE : JNI_FALSE;
+}
+
+static jboolean Unsafe_compareAndSwapObject(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                            jobject javaExpectedValue, jobject javaNewValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   mirror::Object* expectedValue = soa.Decode<mirror::Object*>(javaExpectedValue);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
-  byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
-  int32_t* address = reinterpret_cast<int32_t*>(raw_addr);
-  // Note: android_atomic_cmpxchg() returns 0 on success, not failure.
-  int result = android_atomic_release_cas(reinterpret_cast<int32_t>(expectedValue),
-      reinterpret_cast<int32_t>(newValue), address);
-  if (result == 0) {
-    Runtime::Current()->GetHeap()->WriteBarrierField(obj, MemberOffset(offset), newValue);
-  }
-  return (result == 0) ? JNI_TRUE : JNI_FALSE;
+  bool success = obj->CasFieldObject(MemberOffset(offset), expectedValue, newValue);
+  return success ? JNI_TRUE : JNI_FALSE;
 }
 
 static jint Unsafe_getInt(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
@@ -77,13 +66,15 @@
   obj->SetField32(MemberOffset(offset), newValue, false);
 }
 
-static void Unsafe_putIntVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset, jint newValue) {
+static void Unsafe_putIntVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                  jint newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   obj->SetField32(MemberOffset(offset), newValue, true);
 }
 
-static void Unsafe_putOrderedInt(JNIEnv* env, jobject, jobject javaObj, jlong offset, jint newValue) {
+static void Unsafe_putOrderedInt(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                 jint newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   QuasiAtomic::MembarStoreStore();
@@ -108,13 +99,15 @@
   obj->SetField64(MemberOffset(offset), newValue, false);
 }
 
-static void Unsafe_putLongVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong newValue) {
+static void Unsafe_putLongVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                   jlong newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   obj->SetField64(MemberOffset(offset), newValue, true);
 }
 
-static void Unsafe_putOrderedLong(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong newValue) {
+static void Unsafe_putOrderedLong(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                  jlong newValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   QuasiAtomic::MembarStoreStore();
@@ -124,32 +117,35 @@
 static jobject Unsafe_getObjectVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  mirror::Object* value = obj->GetFieldObject<mirror::Object*>(MemberOffset(offset), true);
+  mirror::Object* value = obj->GetFieldObject<mirror::Object>(MemberOffset(offset), true);
   return soa.AddLocalReference<jobject>(value);
 }
 
 static jobject Unsafe_getObject(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
-  mirror::Object* value = obj->GetFieldObject<mirror::Object*>(MemberOffset(offset), false);
+  mirror::Object* value = obj->GetFieldObject<mirror::Object>(MemberOffset(offset), false);
   return soa.AddLocalReference<jobject>(value);
 }
 
-static void Unsafe_putObject(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaNewValue) {
+static void Unsafe_putObject(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                             jobject javaNewValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
   obj->SetFieldObject(MemberOffset(offset), newValue, false);
 }
 
-static void Unsafe_putObjectVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaNewValue) {
+static void Unsafe_putObjectVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                     jobject javaNewValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
   obj->SetFieldObject(MemberOffset(offset), newValue, true);
 }
 
-static void Unsafe_putOrderedObject(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaNewValue) {
+static void Unsafe_putOrderedObject(JNIEnv* env, jobject, jobject javaObj, jlong offset,
+                                    jobject javaNewValue) {
   ScopedFastNativeObjectAccess soa(env);
   mirror::Object* obj = soa.Decode<mirror::Object*>(javaObj);
   mirror::Object* newValue = soa.Decode<mirror::Object*>(javaNewValue);
diff --git a/runtime/oat.cc b/runtime/oat.cc
index caf18f1..81d4540 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '1', '3', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '1', '4', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index fa2b485..0f380ad 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -66,16 +66,16 @@
                        std::string* error_msg) {
   CHECK(!filename.empty()) << location;
   CheckLocation(filename);
-#ifdef ART_USE_PORTABLE_COMPILER
-  // If we are using PORTABLE, use dlopen to deal with relocations.
-  //
-  // We use our own ELF loader for Quick to deal with legacy apps that
-  // open a generated dex file by name, remove the file, then open
-  // another generated dex file with the same name. http://b/10614658
-  if (executable) {
-    return OpenDlopen(filename, location, requested_base, error_msg);
+  if (kUsePortableCompiler) {
+    // If we are using PORTABLE, use dlopen to deal with relocations.
+    //
+    // We use our own ELF loader for Quick to deal with legacy apps that
+    // open a generated dex file by name, remove the file, then open
+    // another generated dex file with the same name. http://b/10614658
+    if (executable) {
+      return OpenDlopen(filename, location, requested_base, error_msg);
+    }
   }
-#endif
   // If we aren't trying to execute, we just use our own ElfFile loader for a couple reasons:
   //
   // On target, dlopen may fail when compiling due to selinux restrictions on installd.
@@ -338,12 +338,17 @@
   }
 
   if (warn_if_not_found) {
+    std::string checksum("<unspecified>");
+    if (dex_location_checksum != NULL) {
+      checksum = StringPrintf("0x%08x", *dex_location_checksum);
+    }
     LOG(WARNING) << "Failed to find OatDexFile for DexFile " << dex_location
-                 << " in OatFile " << GetLocation();
+                 << " with checksum " << checksum << " in OatFile " << GetLocation();
     if (kIsDebugBuild) {
       for (Table::const_iterator it = oat_dex_files_.begin(); it != oat_dex_files_.end(); ++it) {
         LOG(WARNING) << "OatFile " << GetLocation()
-                     << " contains OatDexFile " << it->second->GetDexFileLocation();
+                     << " contains OatDexFile " << it->second->GetDexFileLocation()
+                     << " with checksum 0x" << std::hex << it->second->GetDexFileLocationChecksum();
       }
     }
   }
@@ -498,51 +503,40 @@
     mapping_table_offset_(mapping_table_offset),
     vmap_table_offset_(vmap_table_offset),
     native_gc_map_offset_(gc_map_offset) {
-#ifndef NDEBUG
-  if (mapping_table_offset_ != 0) {  // implies non-native, non-stub code
-    if (vmap_table_offset_ == 0) {
-      DCHECK_EQ(0U, static_cast<uint32_t>(__builtin_popcount(core_spill_mask_) +
-                                          __builtin_popcount(fp_spill_mask_)));
-    } else {
-      VmapTable vmap_table(reinterpret_cast<const uint8_t*>(begin_ + vmap_table_offset_));
+  if (kIsDebugBuild) {
+    if (mapping_table_offset_ != 0) {  // implies non-native, non-stub code
+      if (vmap_table_offset_ == 0) {
+        CHECK_EQ(0U, static_cast<uint32_t>(__builtin_popcount(core_spill_mask_) +
+                                           __builtin_popcount(fp_spill_mask_)));
+      } else {
+        VmapTable vmap_table(reinterpret_cast<const uint8_t*>(begin_ + vmap_table_offset_));
 
-      DCHECK_EQ(vmap_table.Size(), static_cast<uint32_t>(__builtin_popcount(core_spill_mask_) +
-                                                         __builtin_popcount(fp_spill_mask_)));
+        CHECK_EQ(vmap_table.Size(), static_cast<uint32_t>(__builtin_popcount(core_spill_mask_) +
+                                                          __builtin_popcount(fp_spill_mask_)));
+      }
+    } else {
+      CHECK_EQ(vmap_table_offset_, 0U);
     }
-  } else {
-    DCHECK_EQ(vmap_table_offset_, 0U);
   }
-#endif
 }
 
 OatFile::OatMethod::~OatMethod() {}
 
-const void* OatFile::OatMethod::GetCode() const {
-  return GetOatPointer<const void*>(code_offset_);
-}
 
-uint32_t OatFile::OatMethod::GetCodeSize() const {
-#if defined(ART_USE_PORTABLE_COMPILER)
-  // TODO: With Quick, we store the size before the code. With
-  // Portable, the code is in a .o file we don't manage ourselves. ELF
-  // symbols do have a concept of size, so we could capture that and
-  // store it somewhere, such as the OatMethod.
-  return 0;
-#else
-  uintptr_t code = reinterpret_cast<uint32_t>(GetCode());
-
+uint32_t OatFile::OatMethod::GetQuickCodeSize() const {
+  uintptr_t code = reinterpret_cast<uintptr_t>(GetQuickCode());
   if (code == 0) {
     return 0;
   }
   // TODO: make this Thumb2 specific
   code &= ~0x1;
   return reinterpret_cast<uint32_t*>(code)[-1];
-#endif
 }
 
 void OatFile::OatMethod::LinkMethod(mirror::ArtMethod* method) const {
   CHECK(method != NULL);
-  method->SetEntryPointFromCompiledCode(GetCode());
+  method->SetEntryPointFromPortableCompiledCode(GetPortableCode());
+  method->SetEntryPointFromQuickCompiledCode(GetQuickCode());
   method->SetFrameSizeInBytes(frame_size_in_bytes_);
   method->SetCoreSpillMask(core_spill_mask_);
   method->SetFpSpillMask(fp_spill_mask_);
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 887a9d1..d6e8dc0 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -97,8 +97,30 @@
       return native_gc_map_offset_;
     }
 
-    const void* GetCode() const;
-    uint32_t GetCodeSize() const;
+    const void* GetPortableCode() const {
+      // TODO: encode whether code is portable/quick in flags within OatMethod.
+      if (kUsePortableCompiler) {
+        return GetOatPointer<const void*>(code_offset_);
+      } else {
+        return nullptr;
+      }
+    }
+
+    const void* GetQuickCode() const {
+      if (kUsePortableCompiler) {
+        return nullptr;
+      } else {
+        return GetOatPointer<const void*>(code_offset_);
+      }
+    }
+
+    uint32_t GetPortableCodeSize() const {
+      // TODO: With Quick, we store the size before the code. With Portable, the code is in a .o
+      // file we don't manage ourselves. ELF symbols do have a concept of size, so we could capture
+      // that and store it somewhere, such as the OatMethod.
+      return 0;
+    }
+    uint32_t GetQuickCodeSize() const;
 
     const uint8_t* GetMappingTable() const {
       return GetOatPointer<const uint8_t*>(mapping_table_offset_);
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index 407aa65..0451f5d 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -69,30 +69,29 @@
 
 class ClassHelper {
  public:
-  explicit ClassHelper(const mirror::Class* c )
+  explicit ClassHelper(mirror::Class* c )
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : interface_type_list_(NULL),
-        klass_(NULL) {
-    if (c != NULL) {
+      : interface_type_list_(nullptr), klass_(nullptr) {
+    if (c != nullptr) {
       ChangeClass(c);
     }
   }
 
-  void ChangeClass(const mirror::Class* new_c)
+  void ChangeClass(mirror::Class* new_c)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(new_c != NULL) << "klass_=" << klass_;  // Log what we were changing from if any
+    CHECK(new_c != nullptr) << "klass_=" << klass_;  // Log what we were changing from if any
     if (!new_c->IsClass()) {
       LOG(FATAL) << "new_c=" << new_c << " cc " << new_c->GetClass() << " ccc "
-          << ((new_c->GetClass() != nullptr) ? new_c->GetClass()->GetClass() : NULL);
+          << ((new_c->GetClass() != nullptr) ? new_c->GetClass()->GetClass() : nullptr);
     }
     klass_ = new_c;
-    interface_type_list_ = NULL;
+    interface_type_list_ = nullptr;
   }
 
   // The returned const char* is only guaranteed to be valid for the lifetime of the ClassHelper.
   // If you need it longer, copy it into a std::string.
   const char* GetDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK(klass_ != NULL);
+    CHECK(klass_ != nullptr);
     if (UNLIKELY(klass_->IsArrayClass())) {
       return GetArrayDescriptor();
     } else if (UNLIKELY(klass_->IsPrimitive())) {
@@ -109,8 +108,8 @@
 
   const char* GetArrayDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     std::string result("[");
-    const mirror::Class* saved_klass = klass_;
-    CHECK(saved_klass != NULL);
+    mirror::Class* saved_klass = klass_;
+    CHECK(saved_klass != nullptr);
     ChangeClass(klass_->GetComponentType());
     result += GetDescriptor();
     ChangeClass(saved_klass);
@@ -128,7 +127,7 @@
   }
 
   uint32_t NumDirectInterfaces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(klass_ != NULL);
+    DCHECK(klass_ != nullptr);
     if (klass_->IsPrimitive()) {
       return 0;
     } else if (klass_->IsArrayClass()) {
@@ -137,7 +136,7 @@
       return klass_->GetIfTable()->GetLength();
     } else {
       const DexFile::TypeList* interfaces = GetInterfaceTypeList();
-      if (interfaces == NULL) {
+      if (interfaces == nullptr) {
         return 0;
       } else {
         return interfaces->Size();
@@ -147,7 +146,7 @@
 
   uint16_t GetDirectInterfaceTypeIdx(uint32_t idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(klass_ != NULL);
+    DCHECK(klass_ != nullptr);
     DCHECK(!klass_->IsPrimitive());
     DCHECK(!klass_->IsArrayClass());
     return GetInterfaceTypeList()->GetTypeItem(idx).type_idx_;
@@ -155,7 +154,7 @@
 
   mirror::Class* GetDirectInterface(uint32_t idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(klass_ != NULL);
+    DCHECK(klass_ != nullptr);
     DCHECK(!klass_->IsPrimitive());
     if (klass_->IsArrayClass()) {
       if (idx == 0) {
@@ -169,9 +168,9 @@
     } else {
       uint16_t type_idx = GetDirectInterfaceTypeIdx(idx);
       mirror::Class* interface = GetDexCache()->GetResolvedType(type_idx);
-      if (interface == NULL) {
+      if (interface == nullptr) {
         interface = GetClassLinker()->ResolveType(GetDexFile(), type_idx, klass_);
-        CHECK(interface != NULL || Thread::Current()->IsExceptionPending());
+        CHECK(interface != nullptr || Thread::Current()->IsExceptionPending());
       }
       return interface;
     }
@@ -181,13 +180,13 @@
     std::string descriptor(GetDescriptor());
     const DexFile& dex_file = GetDexFile();
     const DexFile::ClassDef* dex_class_def = GetClassDef();
-    CHECK(dex_class_def != NULL);
+    CHECK(dex_class_def != nullptr);
     return dex_file.GetSourceFile(*dex_class_def);
   }
 
   std::string GetLocation() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::DexCache* dex_cache = GetDexCache();
-    if (dex_cache != NULL && !klass_->IsProxyClass()) {
+    if (dex_cache != nullptr && !klass_->IsProxyClass()) {
       return dex_cache->GetLocation()->ToModifiedUtf8();
     } else {
       // Arrays and proxies are generated and have no corresponding dex file location.
@@ -207,9 +206,9 @@
   const DexFile::TypeList* GetInterfaceTypeList()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile::TypeList* result = interface_type_list_;
-    if (result == NULL) {
+    if (result == nullptr) {
       const DexFile::ClassDef* class_def = GetClassDef();
-      if (class_def != NULL) {
+      if (class_def != nullptr) {
         result =  GetDexFile().GetInterfacesList(*class_def);
         interface_type_list_ = result;
       }
@@ -222,7 +221,7 @@
   }
 
   const DexFile::TypeList* interface_type_list_;
-  const mirror::Class* klass_;
+  mirror::Class* klass_;
   std::string descriptor_;
 
   DISALLOW_COPY_AND_ASSIGN(ClassHelper);
@@ -230,11 +229,11 @@
 
 class FieldHelper {
  public:
-  FieldHelper() : field_(NULL) {}
-  explicit FieldHelper(const mirror::ArtField* f) : field_(f) {}
+  FieldHelper() : field_(nullptr) {}
+  explicit FieldHelper(mirror::ArtField* f) : field_(f) {}
 
-  void ChangeField(const mirror::ArtField* new_f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(new_f != NULL);
+  void ChangeField(mirror::ArtField* new_f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(new_f != nullptr);
     field_ = new_f;
   }
 
@@ -257,9 +256,9 @@
     const DexFile& dex_file = GetDexFile();
     const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
     mirror::Class* type = GetDexCache()->GetResolvedType(field_id.type_idx_);
-    if (resolve && (type == NULL)) {
+    if (resolve && (type == nullptr)) {
       type = GetClassLinker()->ResolveType(field_id.type_idx_, field_);
-      CHECK(type != NULL || Thread::Current()->IsExceptionPending());
+      CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
     }
     return type;
   }
@@ -320,7 +319,7 @@
   const DexFile& GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return *GetDexCache()->GetDexFile();
   }
-  const mirror::ArtField* field_;
+  mirror::ArtField* field_;
   std::string declaring_class_descriptor_;
 
   DISALLOW_COPY_AND_ASSIGN(FieldHelper);
@@ -328,20 +327,18 @@
 
 class MethodHelper {
  public:
-  MethodHelper()
-     : method_(NULL), shorty_(NULL),
-       shorty_len_(0) {}
+  MethodHelper() : method_(nullptr), shorty_(nullptr), shorty_len_(0) {}
 
-  explicit MethodHelper(const mirror::ArtMethod* m)
+  explicit MethodHelper(mirror::ArtMethod* m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      : method_(NULL), shorty_(NULL), shorty_len_(0) {
+      : method_(nullptr), shorty_(nullptr), shorty_len_(0) {
     SetMethod(m);
   }
 
   void ChangeMethod(mirror::ArtMethod* new_m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(new_m != NULL);
+    DCHECK(new_m != nullptr);
     SetMethod(new_m);
-    shorty_ = NULL;
+    shorty_ = nullptr;
   }
 
   const mirror::ArtMethod* GetMethod() const {
@@ -381,7 +378,7 @@
 
   const char* GetShorty() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const char* result = shorty_;
-    if (result == NULL) {
+    if (result == nullptr) {
       const DexFile& dex_file = GetDexFile();
       result = dex_file.GetMethodShorty(dex_file.GetMethodId(method_->GetDexMethodIndex()),
                                         &shorty_len_);
@@ -391,7 +388,7 @@
   }
 
   uint32_t GetShortyLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (shorty_ == NULL) {
+    if (shorty_ == nullptr) {
       GetShorty();
     }
     return shorty_len_;
@@ -529,15 +526,15 @@
 
   bool IsResolvedTypeIdx(uint16_t type_idx) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->GetDexCacheResolvedTypes()->Get(type_idx) != NULL;
+    return method_->GetDexCacheResolvedTypes()->Get(type_idx) != nullptr;
   }
 
   mirror::Class* GetClassFromTypeIdx(uint16_t type_idx, bool resolve = true)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::Class* type = method_->GetDexCacheResolvedTypes()->Get(type_idx);
-    if (type == NULL && resolve) {
+    if (type == nullptr && resolve) {
       type = GetClassLinker()->ResolveType(type_idx, method_);
-      CHECK(type != NULL || Thread::Current()->IsExceptionPending());
+      CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
     }
     return type;
   }
@@ -563,7 +560,7 @@
 
   mirror::String* ResolveString(uint32_t string_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::String* s = method_->GetDexCacheStrings()->Get(string_idx);
-    if (UNLIKELY(s == NULL)) {
+    if (UNLIKELY(s == nullptr)) {
       SirtRef<mirror::DexCache> dex_cache(Thread::Current(), GetDexCache());
       s = GetClassLinker()->ResolveString(GetDexFile(), string_idx, dex_cache);
     }
@@ -613,13 +610,13 @@
  private:
   // Set the method_ field, for proxy methods looking up the interface method via the resolved
   // methods table.
-  void SetMethod(const mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (method != NULL) {
+  void SetMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (method != nullptr) {
       mirror::Class* klass = method->GetDeclaringClass();
       if (UNLIKELY(klass->IsProxyClass())) {
         mirror::ArtMethod* interface_method =
             method->GetDexCacheResolvedMethods()->Get(method->GetDexMethodIndex());
-        DCHECK(interface_method != NULL);
+        DCHECK(interface_method != nullptr);
         DCHECK(interface_method == GetClassLinker()->FindMethodForProxy(klass, method));
         method = interface_method;
       }
@@ -631,7 +628,7 @@
     return Runtime::Current()->GetClassLinker();
   }
 
-  const mirror::ArtMethod* method_;
+  mirror::ArtMethod* method_;
   const char* shorty_;
   uint32_t shorty_len_;
 
diff --git a/runtime/offsets.h b/runtime/offsets.h
index 94ae805..e2dba9d 100644
--- a/runtime/offsets.h
+++ b/runtime/offsets.h
@@ -22,7 +22,7 @@
 
 namespace art {
 
-// Allow the meaning of offsets to be strongly typed
+// Allow the meaning of offsets to be strongly typed.
 class Offset {
  public:
   explicit Offset(size_t val) : val_(val) {}
@@ -37,7 +37,7 @@
 };
 std::ostream& operator<<(std::ostream& os, const Offset& offs);
 
-// Offsets relative to the current frame
+// Offsets relative to the current frame.
 class FrameOffset : public Offset {
  public:
   explicit FrameOffset(size_t val) : Offset(val) {}
@@ -45,13 +45,13 @@
   bool operator<(FrameOffset other) const { return val_ < other.val_; }
 };
 
-// Offsets relative to the current running thread
+// Offsets relative to the current running thread.
 class ThreadOffset : public Offset {
  public:
   explicit ThreadOffset(size_t val) : Offset(val) {}
 };
 
-// Offsets relative to an object
+// Offsets relative to an object.
 class MemberOffset : public Offset {
  public:
   explicit MemberOffset(size_t val) : Offset(val) {}
diff --git a/runtime/primitive.h b/runtime/primitive.h
index 5e07311..b436bd2 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -21,6 +21,7 @@
 
 #include "base/logging.h"
 #include "base/macros.h"
+#include "mirror/object_reference.h"
 
 namespace art {
 namespace mirror {
@@ -78,7 +79,7 @@
       case kPrimFloat:   return 4;
       case kPrimLong:
       case kPrimDouble:  return 8;
-      case kPrimNot:     return sizeof(mirror::Object*);
+      case kPrimNot:     return sizeof(mirror::HeapReference<mirror::Object>);
       default:
         LOG(FATAL) << "Invalid type " << static_cast<int>(type);
         return 0;
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 0e73812..20e08b8 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -36,6 +36,11 @@
 #include "ScopedLocalRef.h"
 #include "thread.h"
 #include "thread_list.h"
+
+#ifdef HAVE_ANDROID_OS
+#include "cutils/properties.h"
+#endif
+
 #if !defined(ART_USE_PORTABLE_COMPILER)
 #include "entrypoints/quick/quick_entrypoints.h"
 #endif
@@ -259,6 +264,17 @@
     }
   }
 
+  // Only on target...
+#ifdef HAVE_ANDROID_OS
+  // Switch off profiler if the dalvik.vm.profiler property has value 0.
+  char buf[PROP_VALUE_MAX];
+  property_get("dalvik.vm.profiler", buf, "0");
+  if (strcmp(buf, "0") == 0) {
+    LOG(INFO) << "Profiler disabled.  To enable setprop dalvik.vm.profiler 1";
+    return;
+  }
+#endif
+
   LOG(INFO) << "Starting profile with period " << period << "s, duration " << duration <<
       "s, interval " << interval_us << "us.  Profile file " << profile_file_name;
 
@@ -439,9 +455,7 @@
 }
 
 uint32_t ProfileSampleResults::Hash(mirror::ArtMethod* method) {
-  uint32_t value = reinterpret_cast<uint32_t>(method);
-  value >>= 2;
-  return value % kHashSize;
+  return (PointerToLowMemUInt32(method) >> 3) % kHashSize;
 }
 
 }  // namespace art
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index 6f65bff..b5ef735 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -59,7 +59,7 @@
 
 // If "obj" is an array, return the number of elements in the array.
 // Otherwise, return zero.
-static size_t GetElementCount(const mirror::Object* obj) {
+static size_t GetElementCount(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (obj == NULL || obj == kClearedJniWeakGlobal || !obj->IsArrayInstance()) {
     return 0;
   }
@@ -67,7 +67,7 @@
 }
 
 struct ObjectComparator {
-  bool operator()(const mirror::Object* obj1, const mirror::Object* obj2)
+  bool operator()(mirror::Object* obj1, mirror::Object* obj2)
     // TODO: enable analysis when analysis can work with the STL.
       NO_THREAD_SAFETY_ANALYSIS {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
@@ -105,7 +105,7 @@
 // Pass in the number of elements in the array (or 0 if this is not an
 // array object), and the number of additional objects that are identical
 // or equivalent to the original.
-static void DumpSummaryLine(std::ostream& os, const mirror::Object* obj, size_t element_count,
+static void DumpSummaryLine(std::ostream& os, mirror::Object* obj, size_t element_count,
                             int identical, int equiv)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (obj == NULL) {
@@ -159,7 +159,7 @@
   }
   os << "  Last " << (count - first) << " entries (of " << count << "):\n";
   for (int idx = count - 1; idx >= first; --idx) {
-    const mirror::Object* ref = entries[idx];
+    mirror::Object* ref = entries[idx];
     if (ref == NULL) {
       continue;
     }
@@ -212,8 +212,8 @@
   size_t equiv = 0;
   size_t identical = 0;
   for (size_t idx = 1; idx < count; idx++) {
-    const mirror::Object* prev = sorted_entries[idx-1];
-    const mirror::Object* current = sorted_entries[idx];
+    mirror::Object* prev = sorted_entries[idx-1];
+    mirror::Object* current = sorted_entries[idx];
     size_t element_count = GetElementCount(prev);
     if (current == prev) {
       // Same reference, added more than once.
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 2af569a..09d05d1 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -31,6 +31,7 @@
 #include "arch/arm/registers_arm.h"
 #include "arch/mips/registers_mips.h"
 #include "arch/x86/registers_x86.h"
+#include "arch/x86_64/registers_x86_64.h"
 #include "atomic.h"
 #include "class_linker.h"
 #include "debugger.h"
@@ -75,19 +76,27 @@
       is_zygote_(false),
       is_concurrent_gc_enabled_(true),
       is_explicit_gc_disabled_(false),
+      compiler_filter_(kSpeed),
+      huge_method_threshold_(0),
+      large_method_threshold_(0),
+      small_method_threshold_(0),
+      tiny_method_threshold_(0),
+      num_dex_methods_threshold_(0),
+      sea_ir_mode_(false),
       default_stack_size_(0),
-      heap_(NULL),
+      heap_(nullptr),
       max_spins_before_thin_lock_inflation_(Monitor::kDefaultMaxSpinsBeforeThinLockInflation),
-      monitor_list_(NULL),
-      thread_list_(NULL),
-      intern_table_(NULL),
-      class_linker_(NULL),
-      signal_catcher_(NULL),
-      java_vm_(NULL),
-      pre_allocated_OutOfMemoryError_(NULL),
-      resolution_method_(NULL),
-      imt_conflict_method_(NULL),
-      default_imt_(NULL),
+      monitor_list_(nullptr),
+      monitor_pool_(nullptr),
+      thread_list_(nullptr),
+      intern_table_(nullptr),
+      class_linker_(nullptr),
+      signal_catcher_(nullptr),
+      java_vm_(nullptr),
+      pre_allocated_OutOfMemoryError_(nullptr),
+      resolution_method_(nullptr),
+      imt_conflict_method_(nullptr),
+      default_imt_(nullptr),
       method_verifiers_lock_("Method verifiers lock"),
       threads_being_born_(0),
       shutdown_cond_(new ConditionVariable("Runtime shutdown", *Locks::runtime_shutdown_lock_)),
@@ -95,19 +104,25 @@
       shutting_down_started_(false),
       started_(false),
       finished_starting_(false),
-      vfprintf_(NULL),
-      exit_(NULL),
-      abort_(NULL),
+      vfprintf_(nullptr),
+      exit_(nullptr),
+      abort_(nullptr),
       stats_enabled_(false),
-      method_trace_(0),
+      profile_(false),
+      profile_period_s_(0),
+      profile_duration_s_(0),
+      profile_interval_us_(0),
+      profile_backoff_coefficient_(0),
+      method_trace_(false),
       method_trace_file_size_(0),
       instrumentation_(),
       use_compile_time_class_path_(false),
-      main_thread_group_(NULL),
-      system_thread_group_(NULL),
-      system_class_loader_(NULL) {
+      main_thread_group_(nullptr),
+      system_thread_group_(nullptr),
+      system_class_loader_(nullptr),
+      dump_gc_performance_on_shutdown_(false) {
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-    callee_save_methods_[i] = NULL;
+    callee_save_methods_[i] = nullptr;
   }
 }
 
@@ -141,6 +156,7 @@
   // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended.
   delete thread_list_;
   delete monitor_list_;
+  delete monitor_pool_;
   delete class_linker_;
   delete heap_;
   delete intern_table_;
@@ -149,8 +165,8 @@
   QuasiAtomic::Shutdown();
   verifier::MethodVerifier::Shutdown();
   // TODO: acquire a static mutex on Runtime to avoid racing.
-  CHECK(instance_ == NULL || instance_ == this);
-  instance_ = NULL;
+  CHECK(instance_ == nullptr || instance_ == this);
+  instance_ = nullptr;
 }
 
 struct AbortState {
@@ -318,10 +334,19 @@
   return 0;
 }
 
-size_t ParseIntegerOrDie(const std::string& s) {
-  std::string::size_type colon = s.find(':');
+static const std::string StringAfterChar(const std::string& s, char c) {
+  std::string::size_type colon = s.find(c);
   if (colon == std::string::npos) {
-    LOG(FATAL) << "Missing integer: " << s;
+    LOG(FATAL) << "Missing char " << c << " in string " << s;
+  }
+  // Add one to remove the char we were trimming until.
+  return s.substr(colon + 1);
+}
+
+static size_t ParseIntegerOrDie(const std::string& s, char after_char) {
+  std::string::size_type colon = s.find(after_char);
+  if (colon == std::string::npos) {
+    LOG(FATAL) << "Missing char " << after_char << " in string " << s;
   }
   const char* begin = &s[colon + 1];
   char* end;
@@ -332,10 +357,10 @@
   return result;
 }
 
-double ParseDoubleOrDie(const std::string& option, const char* prefix,
-                        double min, double max, bool ignore_unrecognized,
-                        double defval) {
-  std::istringstream iss(option.substr(strlen(prefix)));
+
+static double ParseDoubleOrDie(const std::string& option, char after_char, double min, double max,
+                               bool ignore_unrecognized, double defval) {
+  std::istringstream iss(StringAfterChar(option, after_char));
   double value;
   iss >> value;
   // Ensure that we have a value, there was no cruft after it and it satisfies a sensible range.
@@ -354,27 +379,21 @@
   GetInternTable()->SweepInternTableWeaks(visitor, arg);
   GetMonitorList()->SweepMonitorList(visitor, arg);
   GetJavaVM()->SweepJniWeakGlobals(visitor, arg);
+  Dbg::UpdateObjectPointers(visitor, arg);
 }
 
 static gc::CollectorType ParseCollectorType(const std::string& option) {
-  std::vector<std::string> gc_options;
-  Split(option, ',', gc_options);
-  gc::CollectorType collector_type = gc::kCollectorTypeNone;
-  for (size_t i = 0; i < gc_options.size(); ++i) {
-    if (gc_options[i] == "MS" || gc_options[i] == "nonconcurrent") {
-      collector_type = gc::kCollectorTypeMS;
-    } else if (gc_options[i] == "CMS" || gc_options[i] == "concurrent") {
-      collector_type = gc::kCollectorTypeCMS;
-    } else if (gc_options[i] == "SS") {
-      collector_type = gc::kCollectorTypeSS;
-    } else if (gc_options[i] == "GSS") {
-      collector_type = gc::kCollectorTypeGSS;
-    } else {
-      LOG(WARNING) << "Ignoring unknown -Xgc option: " << gc_options[i];
-      return gc::kCollectorTypeNone;
-    }
+  if (option == "MS" || option == "nonconcurrent") {
+    return gc::kCollectorTypeMS;
+  } else if (option == "CMS" || option == "concurrent") {
+    return gc::kCollectorTypeCMS;
+  } else if (option == "SS") {
+    return gc::kCollectorTypeSS;
+  } else if (option == "GSS") {
+    return gc::kCollectorTypeGSS;
+  } else {
+    return gc::kCollectorTypeNone;
   }
-  return collector_type;
 }
 
 Runtime::ParsedOptions* Runtime::ParsedOptions::Create(const Options& options, bool ignore_unrecognized) {
@@ -409,6 +428,10 @@
   parsed->max_spins_before_thin_lock_inflation_ = Monitor::kDefaultMaxSpinsBeforeThinLockInflation;
   parsed->low_memory_mode_ = false;
   parsed->use_tlab_ = false;
+  parsed->verify_pre_gc_heap_ = false;
+  parsed->verify_post_gc_heap_ = kIsDebugBuild;
+  parsed->verify_pre_gc_rosalloc_ = kIsDebugBuild;
+  parsed->verify_post_gc_rosalloc_ = false;
 
   parsed->compiler_callbacks_ = nullptr;
   parsed->is_zygote_ = false;
@@ -478,7 +501,7 @@
       parsed->boot_class_path_
           = reinterpret_cast<const std::vector<const DexFile*>*>(options[i].second);
     } else if (StartsWith(option, "-Ximage:")) {
-      parsed->image_ = option.substr(strlen("-Ximage:")).data();
+      parsed->image_ = StringAfterChar(option, ':');
     } else if (StartsWith(option, "-Xcheck:jni")) {
       parsed->check_jni_ = true;
     } else if (StartsWith(option, "-Xrunjdwp:") || StartsWith(option, "-agentlib:jdwp=")) {
@@ -544,15 +567,12 @@
       }
       parsed->heap_max_free_ = size;
     } else if (StartsWith(option, "-XX:HeapTargetUtilization=")) {
-      parsed->heap_target_utilization_ = ParseDoubleOrDie(option, "-XX:HeapTargetUtilization=",
-          0.1, 0.9, ignore_unrecognized,
-          parsed->heap_target_utilization_);
+      parsed->heap_target_utilization_ = ParseDoubleOrDie(
+          option, '=', 0.1, 0.9, ignore_unrecognized, parsed->heap_target_utilization_);
     } else if (StartsWith(option, "-XX:ParallelGCThreads=")) {
-      parsed->parallel_gc_threads_ =
-          ParseMemoryOption(option.substr(strlen("-XX:ParallelGCThreads=")).c_str(), 1024);
+      parsed->parallel_gc_threads_ = ParseIntegerOrDie(option, '=');
     } else if (StartsWith(option, "-XX:ConcGCThreads=")) {
-      parsed->conc_gc_threads_ =
-          ParseMemoryOption(option.substr(strlen("-XX:ConcGCThreads=")).c_str(), 1024);
+      parsed->conc_gc_threads_ = ParseIntegerOrDie(option, '=');
     } else if (StartsWith(option, "-Xss")) {
       size_t size = ParseMemoryOption(option.substr(strlen("-Xss")).c_str(), 1);
       if (size == 0) {
@@ -565,15 +585,11 @@
       }
       parsed->stack_size_ = size;
     } else if (StartsWith(option, "-XX:MaxSpinsBeforeThinLockInflation=")) {
-      parsed->max_spins_before_thin_lock_inflation_ =
-          strtoul(option.substr(strlen("-XX:MaxSpinsBeforeThinLockInflation=")).c_str(),
-                  nullptr, 10);
-    } else if (option == "-XX:LongPauseLogThreshold") {
-      parsed->long_pause_log_threshold_ =
-          ParseMemoryOption(option.substr(strlen("-XX:LongPauseLogThreshold=")).c_str(), 1024);
-    } else if (option == "-XX:LongGCLogThreshold") {
-          parsed->long_gc_log_threshold_ =
-              ParseMemoryOption(option.substr(strlen("-XX:LongGCLogThreshold")).c_str(), 1024);
+      parsed->max_spins_before_thin_lock_inflation_ = ParseIntegerOrDie(option, '=');
+    } else if (StartsWith(option, "-XX:LongPauseLogThreshold=")) {
+      parsed->long_pause_log_threshold_ = MsToNs(ParseIntegerOrDie(option, '='));
+    } else if (StartsWith(option, "-XX:LongGCLogThreshold=")) {
+      parsed->long_gc_log_threshold_ = MsToNs(ParseIntegerOrDie(option, '='));
     } else if (option == "-XX:DumpGCPerformanceOnShutdown") {
       parsed->dump_gc_performance_on_shutdown_ = true;
     } else if (option == "-XX:IgnoreMaxFootprint") {
@@ -594,15 +610,39 @@
     } else if (option == "-Xint") {
       parsed->interpreter_only_ = true;
     } else if (StartsWith(option, "-Xgc:")) {
-      gc::CollectorType collector_type = ParseCollectorType(option.substr(strlen("-Xgc:")));
-      if (collector_type != gc::kCollectorTypeNone) {
-        parsed->collector_type_ = collector_type;
+      std::vector<std::string> gc_options;
+      Split(option.substr(strlen("-Xgc:")), ',', gc_options);
+      for (const std::string& gc_option : gc_options) {
+        gc::CollectorType collector_type = ParseCollectorType(gc_option);
+        if (collector_type != gc::kCollectorTypeNone) {
+          parsed->collector_type_ = collector_type;
+        } else if (gc_option == "preverify") {
+          parsed->verify_pre_gc_heap_ = true;
+        } else if (gc_option == "nopreverify") {
+          parsed->verify_pre_gc_heap_ = false;
+        }  else if (gc_option == "postverify") {
+          parsed->verify_post_gc_heap_ = true;
+        } else if (gc_option == "nopostverify") {
+          parsed->verify_post_gc_heap_ = false;
+        } else if (gc_option == "preverify_rosalloc") {
+          parsed->verify_pre_gc_rosalloc_ = true;
+        } else if (gc_option == "nopreverify_rosalloc") {
+          parsed->verify_pre_gc_rosalloc_ = false;
+        } else if (gc_option == "postverify_rosalloc") {
+          parsed->verify_post_gc_rosalloc_ = true;
+        } else if (gc_option == "nopostverify_rosalloc") {
+          parsed->verify_post_gc_rosalloc_ = false;
+        } else {
+          LOG(WARNING) << "Ignoring unknown -Xgc option: " << gc_option;
+        }
       }
     } else if (StartsWith(option, "-XX:BackgroundGC=")) {
-      gc::CollectorType collector_type = ParseCollectorType(
-          option.substr(strlen("-XX:BackgroundGC=")));
+      const std::string substring = StringAfterChar(option, '=');
+      gc::CollectorType collector_type = ParseCollectorType(substring);
       if (collector_type != gc::kCollectorTypeNone) {
         parsed->background_collector_type_ = collector_type;
+      } else {
+        LOG(WARNING) << "Ignoring unknown -XX:BackgroundGC option: " << substring;
       }
     } else if (option == "-XX:+DisableExplicitGC") {
       parsed->is_explicit_gc_disabled_ = true;
@@ -639,9 +679,9 @@
     } else if (StartsWith(option, "-Xjnigreflimit:")) {
       // Silently ignored for backwards compatibility.
     } else if (StartsWith(option, "-Xlockprofthreshold:")) {
-      parsed->lock_profiling_threshold_ = ParseIntegerOrDie(option);
+      parsed->lock_profiling_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xstacktracefile:")) {
-      parsed->stack_trace_file_ = option.substr(strlen("-Xstacktracefile:"));
+      parsed->stack_trace_file_ = StringAfterChar(option, ':');
     } else if (option == "sensitiveThread") {
       parsed->hook_is_sensitive_thread_ = reinterpret_cast<bool (*)()>(const_cast<void*>(options[i].second));
     } else if (option == "vfprintf") {
@@ -660,7 +700,7 @@
     } else if (StartsWith(option, "-Xmethod-trace-file:")) {
       parsed->method_trace_file_ = option.substr(strlen("-Xmethod-trace-file:"));
     } else if (StartsWith(option, "-Xmethod-trace-file-size:")) {
-      parsed->method_trace_file_size_ = ParseIntegerOrDie(option);
+      parsed->method_trace_file_size_ = ParseIntegerOrDie(option, ':');
     } else if (option == "-Xprofile:threadcpuclock") {
       Trace::SetDefaultClockSource(kProfilerClockSourceThreadCpu);
     } else if (option == "-Xprofile:wallclock") {
@@ -668,18 +708,17 @@
     } else if (option == "-Xprofile:dualclock") {
       Trace::SetDefaultClockSource(kProfilerClockSourceDual);
     } else if (StartsWith(option, "-Xprofile:")) {
-      parsed->profile_output_filename_ = option.substr(strlen("-Xprofile:"));
+      parsed->profile_output_filename_ = StringAfterChar(option, ';');
       parsed->profile_ = true;
     } else if (StartsWith(option, "-Xprofile-period:")) {
-      parsed->profile_period_s_ = ParseIntegerOrDie(option);
+      parsed->profile_period_s_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xprofile-duration:")) {
-      parsed->profile_duration_s_ = ParseIntegerOrDie(option);
+      parsed->profile_duration_s_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xprofile-interval:")) {
-      parsed->profile_interval_us_ = ParseIntegerOrDie(option);
+      parsed->profile_interval_us_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-Xprofile-backoff:")) {
-      parsed->profile_backoff_coefficient_ = ParseDoubleOrDie(option, "-Xprofile-backoff:",
-          1.0, 10.0, ignore_unrecognized,
-          parsed->profile_backoff_coefficient_);
+      parsed->profile_backoff_coefficient_ = ParseDoubleOrDie(
+          option, ':', 1.0, 10.0, ignore_unrecognized, parsed->profile_backoff_coefficient_);
     } else if (option == "-compiler-filter:interpret-only") {
       parsed->compiler_filter_ = kInterpretOnly;
     } else if (option == "-compiler-filter:space") {
@@ -693,15 +732,15 @@
     } else if (option == "-sea_ir") {
       parsed->sea_ir_mode_ = true;
     } else if (StartsWith(option, "-huge-method-max:")) {
-      parsed->huge_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->huge_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-large-method-max:")) {
-      parsed->large_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->large_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-small-method-max:")) {
-      parsed->small_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->small_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-tiny-method-max:")) {
-      parsed->tiny_method_threshold_ = ParseIntegerOrDie(option);
+      parsed->tiny_method_threshold_ = ParseIntegerOrDie(option, ':');
     } else if (StartsWith(option, "-num-dex-methods-max:")) {
-      parsed->num_dex_methods_threshold_ = ParseIntegerOrDie(option);
+      parsed->num_dex_methods_threshold_ = ParseIntegerOrDie(option, ':');
     } else {
       if (!ignore_unrecognized) {
         // TODO: print usage via vfprintf
@@ -964,6 +1003,7 @@
   max_spins_before_thin_lock_inflation_ = options->max_spins_before_thin_lock_inflation_;
 
   monitor_list_ = new MonitorList;
+  monitor_pool_ = MonitorPool::Create();
   thread_list_ = new ThreadList;
   intern_table_ = new InternTable;
 
@@ -987,7 +1027,11 @@
                        options->long_pause_log_threshold_,
                        options->long_gc_log_threshold_,
                        options->ignore_max_footprint_,
-                       options->use_tlab_);
+                       options->use_tlab_,
+                       options->verify_pre_gc_heap_,
+                       options->verify_post_gc_heap_,
+                       options->verify_pre_gc_rosalloc_,
+                       options->verify_post_gc_rosalloc_);
 
   dump_gc_performance_on_shutdown_ = options->dump_gc_performance_on_shutdown_;
 
@@ -1073,7 +1117,8 @@
     std::string mapped_name(StringPrintf(OS_SHARED_LIB_FORMAT_STR, "javacore"));
     std::string reason;
     self->TransitionFromSuspendedToRunnable();
-    if (!instance_->java_vm_->LoadNativeLibrary(mapped_name, NULL, &reason)) {
+    SirtRef<mirror::ClassLoader> class_loader(self, nullptr);
+    if (!instance_->java_vm_->LoadNativeLibrary(mapped_name, class_loader, &reason)) {
       LOG(FATAL) << "LoadNativeLibrary failed for \"" << mapped_name << "\": " << reason;
     }
     self->TransitionFromRunnableToSuspended(kNative);
@@ -1330,40 +1375,53 @@
 
 mirror::ArtMethod* Runtime::CreateImtConflictMethod() {
   Thread* self = Thread::Current();
-  Runtime* r = Runtime::Current();
-  ClassLinker* cl = r->GetClassLinker();
-  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, class_linker->AllocArtMethod(self));
   method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
-  // TODO: use a special method for imt conflict method saves
+  // TODO: use a special method for imt conflict method saves.
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
   // When compiling, the code pointer will get set later when the image is loaded.
-  method->SetEntryPointFromCompiledCode(r->IsCompiler() ? NULL : GetImtConflictTrampoline(cl));
+  if (runtime->IsCompiler()) {
+    method->SetEntryPointFromPortableCompiledCode(nullptr);
+    method->SetEntryPointFromQuickCompiledCode(nullptr);
+  } else {
+    method->SetEntryPointFromPortableCompiledCode(GetPortableImtConflictTrampoline(class_linker));
+    method->SetEntryPointFromQuickCompiledCode(GetQuickImtConflictTrampoline(class_linker));
+  }
   return method.get();
 }
 
 mirror::ArtMethod* Runtime::CreateResolutionMethod() {
   Thread* self = Thread::Current();
-  Runtime* r = Runtime::Current();
-  ClassLinker* cl = r->GetClassLinker();
-  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, class_linker->AllocArtMethod(self));
   method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for resolution method saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
   // When compiling, the code pointer will get set later when the image is loaded.
-  method->SetEntryPointFromCompiledCode(r->IsCompiler() ? NULL : GetResolutionTrampoline(cl));
+  if (runtime->IsCompiler()) {
+    method->SetEntryPointFromPortableCompiledCode(nullptr);
+    method->SetEntryPointFromQuickCompiledCode(nullptr);
+  } else {
+    method->SetEntryPointFromPortableCompiledCode(GetPortableResolutionTrampoline(class_linker));
+    method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionTrampoline(class_linker));
+  }
   return method.get();
 }
 
 mirror::ArtMethod* Runtime::CreateCalleeSaveMethod(InstructionSet instruction_set,
                                                    CalleeSaveType type) {
   Thread* self = Thread::Current();
-  Runtime* r = Runtime::Current();
-  ClassLinker* cl = r->GetClassLinker();
-  SirtRef<mirror::ArtMethod> method(self, cl->AllocArtMethod(self));
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  SirtRef<mirror::ArtMethod> method(self, class_linker->AllocArtMethod(self));
   method->SetDeclaringClass(mirror::ArtMethod::GetJavaLangReflectArtMethod());
   // TODO: use a special method for callee saves
   method->SetDexMethodIndex(DexFile::kDexNoIndex);
-  method->SetEntryPointFromCompiledCode(NULL);
+  method->SetEntryPointFromPortableCompiledCode(nullptr);
+  method->SetEntryPointFromQuickCompiledCode(nullptr);
   if ((instruction_set == kThumb2) || (instruction_set == kArm)) {
     uint32_t ref_spills = (1 << art::arm::R5) | (1 << art::arm::R6)  | (1 << art::arm::R7) |
                           (1 << art::arm::R8) | (1 << art::arm::R10) | (1 << art::arm::R11);
@@ -1413,8 +1471,23 @@
     method->SetFrameSizeInBytes(frame_size);
     method->SetCoreSpillMask(core_spills);
     method->SetFpSpillMask(0);
+  } else if (instruction_set == kX86_64) {
+    uint32_t ref_spills =
+        (1 << art::x86_64::RBP) | (1 << art::x86_64::RSI) | (1 << art::x86_64::RDI) |
+        (1 << art::x86_64::R8)  | (1 << art::x86_64::R9)  | (1 << art::x86_64::R10) |
+        (1 << art::x86_64::R11) | (1 << art::x86_64::R12) | (1 << art::x86_64::R13) |
+        (1 << art::x86_64::R14) | (1 << art::x86_64::R15);
+    uint32_t arg_spills =
+        (1 << art::x86_64::RCX) | (1 << art::x86_64::RDX) | (1 << art::x86_64::RBX);
+    uint32_t core_spills = ref_spills | (type == kRefsAndArgs ? arg_spills : 0) |
+                         (1 << art::x86::kNumberOfCpuRegisters);  // fake return address callee save
+    size_t frame_size = RoundUp((__builtin_popcount(core_spills) /* gprs */ +
+                                 1 /* Method* */) * kPointerSize, kStackAlignment);
+    method->SetFrameSizeInBytes(frame_size);
+    method->SetCoreSpillMask(core_spills);
+    method->SetFpSpillMask(0);
   } else {
-    UNIMPLEMENTED(FATAL);
+    UNIMPLEMENTED(FATAL) << instruction_set;
   }
   return method.get();
 }
@@ -1423,12 +1496,14 @@
   monitor_list_->DisallowNewMonitors();
   intern_table_->DisallowNewInterns();
   java_vm_->DisallowNewWeakGlobals();
+  Dbg::DisallowNewObjectRegistryObjects();
 }
 
 void Runtime::AllowNewSystemWeaks() {
   monitor_list_->AllowNewMonitors();
   intern_table_->AllowNewInterns();
   java_vm_->AllowNewWeakGlobals();
+  Dbg::AllowNewObjectRegistryObjects();
 }
 
 void Runtime::SetCalleeSaveMethod(mirror::ArtMethod* method, CalleeSaveType type) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 30ab787..896a18b 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -61,6 +61,7 @@
 class InternTable;
 struct JavaVMExt;
 class MonitorList;
+class MonitorPool;
 class SignalCatcher;
 class ThreadList;
 class Trace;
@@ -106,6 +107,10 @@
     bool interpreter_only_;
     bool is_explicit_gc_disabled_;
     bool use_tlab_;
+    bool verify_pre_gc_heap_;
+    bool verify_post_gc_heap_;
+    bool verify_pre_gc_rosalloc_;
+    bool verify_post_gc_rosalloc_;
     size_t long_pause_log_threshold_;
     size_t long_gc_log_threshold_;
     bool dump_gc_performance_on_shutdown_;
@@ -312,6 +317,10 @@
     return monitor_list_;
   }
 
+  MonitorPool* GetMonitorPool() const {
+    return monitor_pool_;
+  }
+
   mirror::Throwable* GetPreAllocatedOutOfMemoryError() const
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -522,6 +531,7 @@
   // The number of spins that are done before thread suspension is used to forcibly inflate.
   size_t max_spins_before_thin_lock_inflation_;
   MonitorList* monitor_list_;
+  MonitorPool* monitor_pool_;
 
   ThreadList* thread_list_;
 
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index 47b72e9..d8f408a 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -133,7 +133,7 @@
 
   void Dump(std::ostream& os) {
     // TODO: support non-x86 hosts (not urgent because this code doesn't run on targets).
-#if defined(__APPLE__)
+#if defined(__APPLE__) && defined(__i386__)
     DumpRegister32(os, "eax", context->__ss.__eax);
     DumpRegister32(os, "ebx", context->__ss.__ebx);
     DumpRegister32(os, "ecx", context->__ss.__ecx);
@@ -159,7 +159,7 @@
     os << '\n';
     DumpRegister32(os, "gs",  context->__ss.__gs);
     DumpRegister32(os, "ss",  context->__ss.__ss);
-#else
+#elif defined(__linux__) && defined(__i386__)
     DumpRegister32(os, "eax", context.gregs[REG_EAX]);
     DumpRegister32(os, "ebx", context.gregs[REG_EBX]);
     DumpRegister32(os, "ecx", context.gregs[REG_ECX]);
@@ -185,6 +185,8 @@
     os << '\n';
     DumpRegister32(os, "gs",  context.gregs[REG_GS]);
     DumpRegister32(os, "ss",  context.gregs[REG_SS]);
+#else
+    os << "Unknown architecture/word size/OS in ucontext dump";
 #endif
   }
 
diff --git a/runtime/sirt_ref.h b/runtime/sirt_ref.h
index 56d81ec..3c5e4f8 100644
--- a/runtime/sirt_ref.h
+++ b/runtime/sirt_ref.h
@@ -35,13 +35,17 @@
     DCHECK_EQ(top_sirt, &sirt_);
   }
 
-  T& operator*() const { return *get(); }
-  T* operator->() const { return get(); }
-  T* get() const {
+  T& operator*() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return *get();
+  }
+  T* operator->() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return get();
+  }
+  T* get() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return down_cast<T*>(sirt_.GetReference(0));
   }
 
-  void reset(T* object = NULL) {
+  void reset(T* object = nullptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     sirt_.SetReference(0, object);
   }
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index e583ced..fd7d981 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -75,7 +75,7 @@
   return count;
 }
 
-bool ManagedStack::ShadowFramesContain(mirror::Object** shadow_frame_entry) const {
+bool ManagedStack::ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const {
   for (const ManagedStack* current_fragment = this; current_fragment != NULL;
        current_fragment = current_fragment->GetLink()) {
     for (ShadowFrame* current_frame = current_fragment->top_shadow_frame_; current_frame != NULL;
@@ -251,7 +251,7 @@
     return "upcall";
   }
   result += PrettyMethod(m);
-  result += StringPrintf("' at dex PC 0x%04zx", GetDexPc());
+  result += StringPrintf("' at dex PC 0x%04x", GetDexPc());
   if (!IsShadowFrame()) {
     result += StringPrintf(" (native PC %p)", reinterpret_cast<void*>(GetCurrentQuickFramePc()));
   }
@@ -264,23 +264,23 @@
 }
 
 void StackVisitor::SanityCheckFrame() const {
-#ifndef NDEBUG
-  mirror::ArtMethod* method = GetMethod();
-  CHECK(method->GetClass() == mirror::ArtMethod::GetJavaLangReflectArtMethod());
-  if (cur_quick_frame_ != NULL) {
-    method->AssertPcIsWithinCode(cur_quick_frame_pc_);
-    // Frame sanity.
-    size_t frame_size = method->GetFrameSizeInBytes();
-    CHECK_NE(frame_size, 0u);
-    // A rough guess at an upper size we expect to see for a frame. The 256 is
-    // a dex register limit. The 16 incorporates callee save spills and
-    // outgoing argument set up.
-    const size_t kMaxExpectedFrameSize = 256 * sizeof(word) + 16;
-    CHECK_LE(frame_size, kMaxExpectedFrameSize);
-    size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
-    CHECK_LT(return_pc_offset, frame_size);
+  if (kIsDebugBuild) {
+    mirror::ArtMethod* method = GetMethod();
+    CHECK(method->GetClass() == mirror::ArtMethod::GetJavaLangReflectArtMethod());
+    if (cur_quick_frame_ != nullptr) {
+      method->AssertPcIsWithinQuickCode(cur_quick_frame_pc_);
+      // Frame sanity.
+      size_t frame_size = method->GetFrameSizeInBytes();
+      CHECK_NE(frame_size, 0u);
+      // A rough guess at an upper size we expect to see for a frame. The 256 is
+      // a dex register limit. The 16 incorporates callee save spills and
+      // outgoing argument set up.
+      const size_t kMaxExpectedFrameSize = 256 * sizeof(word) + 16;
+      CHECK_LE(frame_size, kMaxExpectedFrameSize);
+      size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
+      CHECK_LT(return_pc_offset, frame_size);
+    }
   }
-#endif
 }
 
 void StackVisitor::WalkStack(bool include_transitions) {
diff --git a/runtime/stack.h b/runtime/stack.h
index 590f406..8466069 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -19,8 +19,10 @@
 
 #include "dex_file.h"
 #include "instrumentation.h"
+#include "base/casts.h"
 #include "base/macros.h"
 #include "arch/context.h"
+#include "mirror/object_reference.h"
 
 #include <stdint.h>
 #include <string>
@@ -52,16 +54,77 @@
   kUndefined,
 };
 
+/**
+ * @brief Represents the virtual register numbers that denote special meaning.
+ * @details This is used to make some virtual register numbers to have specific
+ * semantic meaning. This is done so that the compiler can treat all virtual
+ * registers the same way and only special case when needed. For example,
+ * calculating SSA does not care whether a virtual register is a normal one or
+ * a compiler temporary, so it can deal with them in a consistent manner. But,
+ * for example if backend cares about temporaries because it has custom spill
+ * location, then it can special case them only then.
+ */
+enum VRegBaseRegNum : int {
+  /**
+   * @brief Virtual registers originating from dex have number >= 0.
+   */
+  kVRegBaseReg = 0,
+
+  /**
+   * @brief Invalid virtual register number.
+   */
+  kVRegInvalid = -1,
+
+  /**
+   * @brief Used to denote the base register for compiler temporaries.
+   * @details Compiler temporaries are virtual registers not originating
+   * from dex but that are created by compiler.  All virtual register numbers
+   * that are <= kVRegTempBaseReg are categorized as compiler temporaries.
+   */
+  kVRegTempBaseReg = -2,
+
+  /**
+   * @brief Base register of temporary that holds the method pointer.
+   * @details This is a special compiler temporary because it has a specific
+   * location on stack.
+   */
+  kVRegMethodPtrBaseReg = kVRegTempBaseReg,
+
+  /**
+   * @brief Base register of non-special compiler temporary.
+   * @details A non-special compiler temporary is one whose spill location
+   * is flexible.
+   */
+  kVRegNonSpecialTempBaseReg = -3,
+};
+
+// A reference from the shadow stack to a MirrorType object within the Java heap.
+template<class MirrorType>
+class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> {
+ public:
+  StackReference<MirrorType>() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : mirror::ObjectReference<false, MirrorType>(nullptr) {}
+
+  static StackReference<MirrorType> FromMirrorPtr(MirrorType* p)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return StackReference<MirrorType>(p);
+  }
+
+ private:
+  StackReference<MirrorType>(MirrorType* p) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      : mirror::ObjectReference<false, MirrorType>(p) {}
+};
+
 // ShadowFrame has 3 possible layouts:
 //  - portable - a unified array of VRegs and references. Precise references need GC maps.
 //  - interpreter - separate VRegs and reference arrays. References are in the reference array.
 //  - JNI - just VRegs, but where every VReg holds a reference.
 class ShadowFrame {
  public:
-  // Compute size of ShadowFrame in bytes.
+  // Compute size of ShadowFrame in bytes assuming it has a reference array.
   static size_t ComputeSize(uint32_t num_vregs) {
     return sizeof(ShadowFrame) + (sizeof(uint32_t) * num_vregs) +
-           (sizeof(mirror::Object*) * num_vregs);
+           (sizeof(StackReference<mirror::Object>) * num_vregs);
   }
 
   // Create ShadowFrame in heap for deoptimization.
@@ -151,22 +214,19 @@
   }
 
   template <bool kChecked = false>
-  mirror::Object* GetVRegReference(size_t i) const {
+  mirror::Object* GetVRegReference(size_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LT(i, NumberOfVRegs());
     if (HasReferenceArray()) {
-      mirror::Object* ref = References()[i];
+      mirror::Object* ref = References()[i].AsMirrorPtr();
       if (kChecked) {
         CHECK(VerifyReference(ref)) << "VReg " << i << "(" << ref
                                     << ") is in protected space, reference array " << true;
       }
-      // If the vreg reference is not equal to the vreg then the vreg reference is stale.
-      if (UNLIKELY(reinterpret_cast<uint32_t>(ref) != vregs_[i])) {
-        return nullptr;
-      }
       return ref;
     } else {
-      const uint32_t* vreg = &vregs_[i];
-      mirror::Object* ref = *reinterpret_cast<mirror::Object* const*>(vreg);
+      const uint32_t* vreg_ptr = &vregs_[i];
+      mirror::Object* ref =
+          reinterpret_cast<const StackReference<mirror::Object>*>(vreg_ptr)->AsMirrorPtr();
       if (kChecked) {
         CHECK(VerifyReference(ref)) << "VReg " << i
             << "(" << ref << ") is in protected space, reference array " << false;
@@ -187,7 +247,7 @@
     // This is needed for moving collectors since these can update the vreg references if they
     // happen to agree with references in the reference array.
     if (kMovingCollector && HasReferenceArray()) {
-      References()[i] = nullptr;
+      References()[i].Clear();
     }
   }
 
@@ -198,7 +258,7 @@
     // This is needed for moving collectors since these can update the vreg references if they
     // happen to agree with references in the reference array.
     if (kMovingCollector && HasReferenceArray()) {
-      References()[i] = nullptr;
+      References()[i].Clear();
     }
   }
 
@@ -211,8 +271,8 @@
     // This is needed for moving collectors since these can update the vreg references if they
     // happen to agree with references in the reference array.
     if (kMovingCollector && HasReferenceArray()) {
-      References()[i] = nullptr;
-      References()[i + 1] = nullptr;
+      References()[i].Clear();
+      References()[i + 1].Clear();
     }
   }
 
@@ -225,24 +285,24 @@
     // This is needed for moving collectors since these can update the vreg references if they
     // happen to agree with references in the reference array.
     if (kMovingCollector && HasReferenceArray()) {
-      References()[i] = nullptr;
-      References()[i + 1] = nullptr;
+      References()[i].Clear();
+      References()[i + 1].Clear();
     }
   }
 
-  void SetVRegReference(size_t i, mirror::Object* val) {
+  void SetVRegReference(size_t i, mirror::Object* val) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LT(i, NumberOfVRegs());
     DCHECK(!kMovingCollector || VerifyReference(val))
         << "VReg " << i << "(" << val << ") is in protected space";
     uint32_t* vreg = &vregs_[i];
-    *reinterpret_cast<mirror::Object**>(vreg) = val;
+    reinterpret_cast<StackReference<mirror::Object>*>(vreg)->Assign(val);
     if (HasReferenceArray()) {
-      References()[i] = val;
+      References()[i].Assign(val);
     }
   }
 
-  mirror::ArtMethod* GetMethod() const {
-    DCHECK_NE(method_, static_cast<void*>(NULL));
+  mirror::ArtMethod* GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(method_ != nullptr);
     return method_;
   }
 
@@ -254,7 +314,7 @@
 
   void SetMethod(mirror::ArtMethod* method) {
 #if defined(ART_USE_PORTABLE_COMPILER)
-    DCHECK_NE(method, static_cast<void*>(NULL));
+    DCHECK(method != nullptr);
     method_ = method;
 #else
     UNUSED(method);
@@ -262,7 +322,7 @@
 #endif
   }
 
-  bool Contains(mirror::Object** shadow_frame_entry_obj) const {
+  bool Contains(StackReference<mirror::Object>* shadow_frame_entry_obj) const {
     if (HasReferenceArray()) {
       return ((&References()[0] <= shadow_frame_entry_obj) &&
               (shadow_frame_entry_obj <= (&References()[NumberOfVRegs() - 1])));
@@ -302,22 +362,22 @@
       CHECK_LT(num_vregs, static_cast<uint32_t>(kHasReferenceArray));
       number_of_vregs_ |= kHasReferenceArray;
 #endif
-      memset(vregs_, 0, num_vregs * (sizeof(uint32_t) + sizeof(mirror::Object*)));
+      memset(vregs_, 0, num_vregs * (sizeof(uint32_t) + sizeof(StackReference<mirror::Object>)));
     } else {
       memset(vregs_, 0, num_vregs * sizeof(uint32_t));
     }
   }
 
-  mirror::Object* const* References() const {
+  const StackReference<mirror::Object>* References() const {
     DCHECK(HasReferenceArray());
     const uint32_t* vreg_end = &vregs_[NumberOfVRegs()];
-    return reinterpret_cast<mirror::Object* const*>(vreg_end);
+    return reinterpret_cast<const StackReference<mirror::Object>*>(vreg_end);
   }
 
   bool VerifyReference(const mirror::Object* val) const;
 
-  mirror::Object** References() {
-    return const_cast<mirror::Object**>(const_cast<const ShadowFrame*>(this)->References());
+  StackReference<mirror::Object>* References() {
+    return const_cast<StackReference<mirror::Object>*>(const_cast<const ShadowFrame*>(this)->References());
   }
 
 #if defined(ART_USE_PORTABLE_COMPILER)
@@ -426,9 +486,9 @@
     return OFFSETOF_MEMBER(ManagedStack, top_shadow_frame_);
   }
 
-  size_t NumJniShadowFrameReferences() const;
+  size_t NumJniShadowFrameReferences() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool ShadowFramesContain(mirror::Object** shadow_frame_entry) const;
+  bool ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const;
 
  private:
   ManagedStack* link_;
@@ -450,18 +510,18 @@
   void WalkStack(bool include_transitions = false)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ArtMethod* GetMethod() const {
-    if (cur_shadow_frame_ != NULL) {
+  mirror::ArtMethod* GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (cur_shadow_frame_ != nullptr) {
       return cur_shadow_frame_->GetMethod();
-    } else if (cur_quick_frame_ != NULL) {
+    } else if (cur_quick_frame_ != nullptr) {
       return *cur_quick_frame_;
     } else {
-      return NULL;
+      return nullptr;
     }
   }
 
   bool IsShadowFrame() const {
-    return cur_shadow_frame_ != NULL;
+    return cur_shadow_frame_ != nullptr;
   }
 
   uint32_t GetDexPc() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -470,9 +530,10 @@
 
   size_t GetNativePcOffset() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uintptr_t* CalleeSaveAddress(int num, size_t frame_size) const {
+  uintptr_t* CalleeSaveAddress(int num, size_t frame_size) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     // Callee saves are held at the top of the frame
-    DCHECK(GetMethod() != NULL);
+    DCHECK(GetMethod() != nullptr);
     byte* save_addr =
         reinterpret_cast<byte*>(cur_quick_frame_) + frame_size - ((num + 1) * kPointerSize);
 #if defined(__i386__)
@@ -509,23 +570,30 @@
 
   // This is a fast-path for getting/setting values in a quick frame.
   uint32_t* GetVRegAddr(mirror::ArtMethod** cur_quick_frame, const DexFile::CodeItem* code_item,
-                   uint32_t core_spills, uint32_t fp_spills, size_t frame_size,
-                   uint16_t vreg) const {
+                        uint32_t core_spills, uint32_t fp_spills, size_t frame_size,
+                        uint16_t vreg) const {
     int offset = GetVRegOffset(code_item, core_spills, fp_spills, frame_size, vreg);
     DCHECK_EQ(cur_quick_frame, GetCurrentQuickFrame());
     byte* vreg_addr = reinterpret_cast<byte*>(cur_quick_frame) + offset;
     return reinterpret_cast<uint32_t*>(vreg_addr);
   }
 
-  uintptr_t GetReturnPc() const;
+  uintptr_t GetReturnPc() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetReturnPc(uintptr_t new_ret_pc);
+  void SetReturnPc(uintptr_t new_ret_pc) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   /*
    * Return sp-relative offset for a Dalvik virtual register, compiler
    * spill or Method* in bytes using Method*.
-   * Note that (reg >= 0) refers to a Dalvik register, (reg == -2)
-   * denotes Method* and (reg <= -3) denotes a compiler temp.
+   * Note that (reg >= 0) refers to a Dalvik register, (reg == -1)
+   * denotes an invalid Dalvik register, (reg == -2) denotes Method*
+   * and (reg <= -3) denotes a compiler temporary. A compiler temporary
+   * can be thought of as a virtual register that does not exist in the
+   * dex but holds intermediate values to help optimizations and code
+   * generation. A special compiler temporary is one whose location
+   * in frame is well known while non-special ones do not have a requirement
+   * on location in frame as long as code generator itself knows how
+   * to access them.
    *
    *     +------------------------+
    *     | IN[ins-1]              |  {Note: resides in caller's frame}
@@ -546,9 +614,9 @@
    *     | V[1]                   |  ... (reg == 1)
    *     | V[0]                   |  ... (reg == 0) <---- "locals_start"
    *     +------------------------+
-   *     | Compiler temps         |  ... (reg == -2)
-   *     |                        |  ... (reg == -3)
-   *     |                        |  ... (reg == -4)
+   *     | Compiler temp region   |  ... (reg <= -3)
+   *     |                        |
+   *     |                        |
    *     +------------------------+
    *     | stack alignment padding|  {0 to (kStackAlignWords-1) of padding}
    *     +------------------------+
@@ -556,23 +624,35 @@
    *     | OUT[outs-2]            |
    *     |       .                |
    *     | OUT[0]                 |
-   *     | curMethod*             |  ... (reg == -1) <<== sp, 16-byte aligned
+   *     | curMethod*             |  ... (reg == -2) <<== sp, 16-byte aligned
    *     +========================+
    */
   static int GetVRegOffset(const DexFile::CodeItem* code_item,
                            uint32_t core_spills, uint32_t fp_spills,
                            size_t frame_size, int reg) {
     DCHECK_EQ(frame_size & (kStackAlignment - 1), 0U);
+    DCHECK_NE(reg, static_cast<int>(kVRegInvalid));
+
     int num_spills = __builtin_popcount(core_spills) + __builtin_popcount(fp_spills) + 1;  // Filler.
     int num_ins = code_item->ins_size_;
     int num_regs = code_item->registers_size_ - num_ins;
     int locals_start = frame_size - ((num_spills + num_regs) * sizeof(uint32_t));
-    if (reg == -2) {
-      return 0;  // Method*
-    } else if (reg <= -3) {
-      return locals_start - ((reg + 1) * sizeof(uint32_t));  // Compiler temp.
-    } else if (reg < num_regs) {
-      return locals_start + (reg * sizeof(uint32_t));        // Dalvik local reg.
+    if (reg == static_cast<int>(kVRegMethodPtrBaseReg)) {
+      // The current method pointer corresponds to special location on stack.
+      return 0;
+    } else if (reg <= static_cast<int>(kVRegNonSpecialTempBaseReg)) {
+      /*
+       * Special temporaries may have custom locations and the logic above deals with that.
+       * However, non-special temporaries are placed relative to the locals. Since the
+       * virtual register numbers for temporaries "grow" in negative direction, reg number
+       * will always be <= to the temp base reg. Thus, the logic ensures that the first
+       * temp is at offset -4 bytes from locals, the second is at -8 bytes from locals,
+       * and so on.
+       */
+      int relative_offset = (reg + std::abs(static_cast<int>(kVRegNonSpecialTempBaseReg)) - 1) * sizeof(uint32_t);
+      return locals_start + relative_offset;
+    }  else if (reg < num_regs) {
+      return locals_start + (reg * sizeof(uint32_t));
     } else {
       return frame_size + ((reg - num_regs) * sizeof(uint32_t)) + sizeof(uint32_t);  // Dalvik in.
     }
diff --git a/runtime/stack_indirect_reference_table.h b/runtime/stack_indirect_reference_table.h
index 4c9b038..d22650b 100644
--- a/runtime/stack_indirect_reference_table.h
+++ b/runtime/stack_indirect_reference_table.h
@@ -19,6 +19,7 @@
 
 #include "base/logging.h"
 #include "base/macros.h"
+#include "stack.h"
 
 namespace art {
 namespace mirror {
@@ -33,7 +34,7 @@
  public:
   explicit StackIndirectReferenceTable(mirror::Object* object) :
       number_of_references_(1), link_(NULL) {
-    references_[0] = object;
+    references_[0].Assign(object);
   }
 
   ~StackIndirectReferenceTable() {}
@@ -53,17 +54,17 @@
     link_ = sirt;
   }
 
-  mirror::Object* GetReference(size_t i) const {
+  mirror::Object* GetReference(size_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LT(i, number_of_references_);
-    return references_[i];
+    return references_[i].AsMirrorPtr();
   }
 
-  void SetReference(size_t i, mirror::Object* object) {
+  void SetReference(size_t i, mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK_LT(i, number_of_references_);
-    references_[i] = object;
+    references_[i].Assign(object);
   }
 
-  bool Contains(mirror::Object** sirt_entry) const {
+  bool Contains(StackReference<mirror::Object>* sirt_entry) const {
     // A SIRT should always contain something. One created by the
     // jni_compiler should have a jobject/jclass as a native method is
     // passed in a this pointer or a class
@@ -89,7 +90,7 @@
   StackIndirectReferenceTable* link_;
 
   // number_of_references_ are available if this is allocated and filled in by jni_compiler.
-  mirror::Object* references_[1];
+  StackReference<mirror::Object> references_[1];
 
   DISALLOW_COPY_AND_ASSIGN(StackIndirectReferenceTable);
 };
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 621e350..c649765 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -178,7 +178,7 @@
         receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(soa.DecodeMethod(mid));
     JValue result;
     ArgArray arg_array(nullptr, 0);
-    arg_array.Append(reinterpret_cast<uint32_t>(receiver));
+    arg_array.Append(receiver);
     m->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), &result, 'V');
   }
   // Detach and delete self.
@@ -190,7 +190,7 @@
 Thread* Thread::FromManagedThread(const ScopedObjectAccessUnchecked& soa,
                                   mirror::Object* thread_peer) {
   mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer);
-  Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetInt(thread_peer)));
+  Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetLong(thread_peer)));
   // Sanity check that if we have a result it is either suspended or we hold the thread_list_lock_
   // to stop it from going away.
   if (kIsDebugBuild) {
@@ -260,8 +260,8 @@
 
   // Thread.start is synchronized, so we know that nativePeer is 0, and know that we're not racing to
   // assign it.
-  env->SetIntField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer,
-                   reinterpret_cast<jint>(child_thread));
+  env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer,
+                    reinterpret_cast<jlong>(child_thread));
 
   pthread_t new_pthread;
   pthread_attr_t attr;
@@ -283,7 +283,7 @@
     delete child_thread;
     child_thread = nullptr;
     // TODO: remove from thread group?
-    env->SetIntField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0);
+    env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0);
     {
       std::string msg(StringPrintf("pthread_create (%s stack) failed: %s",
                                    PrettySize(stack_size).c_str(), strerror(pthread_create_result)));
@@ -388,8 +388,8 @@
 
   Thread* self = this;
   DCHECK_EQ(self, Thread::Current());
-  jni_env_->SetIntField(peer.get(), WellKnownClasses::java_lang_Thread_nativePeer,
-                        reinterpret_cast<jint>(self));
+  jni_env_->SetLongField(peer.get(), WellKnownClasses::java_lang_Thread_nativePeer,
+                         reinterpret_cast<jlong>(self));
 
   ScopedObjectAccess soa(self);
   SirtRef<mirror::String> peer_thread_name(soa.Self(), GetThreadName(soa));
@@ -767,7 +767,7 @@
     }
     const int kMaxRepetition = 3;
     mirror::Class* c = m->GetDeclaringClass();
-    const mirror::DexCache* dex_cache = c->GetDexCache();
+    mirror::DexCache* dex_cache = c->GetDexCache();
     int line_number = -1;
     if (dex_cache != nullptr) {  // be tolerant of bad input
       const DexFile& dex_file = *dex_cache->GetDexFile();
@@ -1017,7 +1017,7 @@
     RemoveFromThreadGroup(soa);
 
     // this.nativePeer = 0;
-    soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer)->SetInt(opeer_, 0);
+    soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer)->SetLong(opeer_, 0);
     Dbg::PostThreadDeath(self);
 
     // Thread.join() is implemented as an Object.wait() on the Thread.lock object. Signal anyone
@@ -1046,9 +1046,11 @@
   }
   opeer_ = nullptr;
 
-  delete jni_env_;
-  jni_env_ = nullptr;
-
+  bool initialized = (jni_env_ != nullptr);  // Did Thread::Init run?
+  if (initialized) {
+    delete jni_env_;
+    jni_env_ = nullptr;
+  }
   CHECK_NE(GetState(), kRunnable);
   CHECK_NE(ReadFlag(kCheckpointRequest), true);
   CHECK(checkpoint_functions_[0] == nullptr);
@@ -1065,6 +1067,10 @@
     delete long_jump_context_;
   }
 
+  if (initialized) {
+    CleanupCpu();
+  }
+
   delete debug_invoke_req_;
   delete single_step_control_;
   delete instrumentation_stack_;
@@ -1127,7 +1133,8 @@
 }
 
 bool Thread::SirtContains(jobject obj) const {
-  mirror::Object** sirt_entry = reinterpret_cast<mirror::Object**>(obj);
+  StackReference<mirror::Object>* sirt_entry =
+      reinterpret_cast<StackReference<mirror::Object>*>(obj);
   for (StackIndirectReferenceTable* cur = top_sirt_; cur; cur = cur->GetLink()) {
     if (cur->Contains(sirt_entry)) {
       return true;
@@ -1166,10 +1173,11 @@
     IndirectReferenceTable& locals = jni_env_->locals;
     result = const_cast<mirror::Object*>(locals.Get(ref));
   } else if (kind == kSirtOrInvalid) {
-    // TODO: make stack indirect reference table lookup more efficient
-    // Check if this is a local reference in the SIRT
+    // TODO: make stack indirect reference table lookup more efficient.
+    // Check if this is a local reference in the SIRT.
     if (LIKELY(SirtContains(obj))) {
-      result = *reinterpret_cast<mirror::Object**>(obj);  // Read from SIRT
+      // Read from SIRT.
+      result = reinterpret_cast<StackReference<mirror::Object>*>(obj)->AsMirrorPtr();
     } else if (Runtime::Current()->GetJavaVM()->work_around_app_jni_bugs) {
       // Assume an invalid local reference is actually a direct pointer.
       result = reinterpret_cast<mirror::Object*>(obj);
@@ -1410,8 +1418,8 @@
     mirror::ArtMethod* method = down_cast<mirror::ArtMethod*>(method_trace->Get(i));
     MethodHelper mh(method);
     int32_t line_number;
-    SirtRef<mirror::String> class_name_object(soa.Self(), NULL);
-    SirtRef<mirror::String> source_name_object(soa.Self(), NULL);
+    SirtRef<mirror::String> class_name_object(soa.Self(), nullptr);
+    SirtRef<mirror::String> source_name_object(soa.Self(), nullptr);
     if (method->IsProxyMethod()) {
       line_number = -1;
       class_name_object.reset(method->GetDeclaringClass()->GetName());
@@ -1423,16 +1431,18 @@
       // Allocate element, potentially triggering GC
       // TODO: reuse class_name_object via Class::name_?
       const char* descriptor = mh.GetDeclaringClassDescriptor();
-      CHECK(descriptor != NULL);
+      CHECK(descriptor != nullptr);
       std::string class_name(PrettyDescriptor(descriptor));
       class_name_object.reset(mirror::String::AllocFromModifiedUtf8(soa.Self(), class_name.c_str()));
-      if (class_name_object.get() == NULL) {
-        return NULL;
+      if (class_name_object.get() == nullptr) {
+        return nullptr;
       }
       const char* source_file = mh.GetDeclaringClassSourceFile();
-      source_name_object.reset(mirror::String::AllocFromModifiedUtf8(soa.Self(), source_file));
-      if (source_name_object.get() == NULL) {
-        return NULL;
+      if (source_file != nullptr) {
+        source_name_object.reset(mirror::String::AllocFromModifiedUtf8(soa.Self(), source_file));
+        if (source_name_object.get() == nullptr) {
+          return nullptr;
+        }
       }
     }
     const char* method_name = mh.GetName();
@@ -1561,12 +1571,12 @@
     SetException(gc_safe_throw_location, exception.get());
   } else {
     ArgArray args("VLL", 3);
-    args.Append(reinterpret_cast<uint32_t>(exception.get()));
+    args.Append(exception.get());
     if (msg != nullptr) {
-      args.Append(reinterpret_cast<uint32_t>(msg_string.get()));
+      args.Append(msg_string.get());
     }
     if (cause.get() != nullptr) {
-      args.Append(reinterpret_cast<uint32_t>(cause.get()));
+      args.Append(cause.get());
     }
     JValue result;
     exception_init_method->Invoke(this, args.GetArray(), args.GetNumBytes(), &result, 'V');
@@ -1624,8 +1634,11 @@
   PORTABLE_ENTRY_POINT_INFO(pPortableResolutionTrampoline),
   PORTABLE_ENTRY_POINT_INFO(pPortableToInterpreterBridge),
   QUICK_ENTRY_POINT_INFO(pAllocArray),
+  QUICK_ENTRY_POINT_INFO(pAllocArrayResolved),
   QUICK_ENTRY_POINT_INFO(pAllocArrayWithAccessCheck),
   QUICK_ENTRY_POINT_INFO(pAllocObject),
+  QUICK_ENTRY_POINT_INFO(pAllocObjectResolved),
+  QUICK_ENTRY_POINT_INFO(pAllocObjectInitialized),
   QUICK_ENTRY_POINT_INFO(pAllocObjectWithAccessCheck),
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArray),
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArrayWithAccessCheck),
@@ -1855,7 +1868,7 @@
         // Java method.
         // Portable path use DexGcMap and store in Method.native_gc_map_.
         const uint8_t* gc_map = m->GetNativeGcMap();
-        CHECK(gc_map != NULL) << PrettyMethod(m);
+        CHECK(gc_map != nullptr) << PrettyMethod(m);
         verifier::DexPcToReferenceMap dex_gc_map(gc_map);
         uint32_t dex_pc = GetDexPc();
         const uint8_t* reg_bitmap = dex_gc_map.FindBitMap(dex_pc);
@@ -1909,13 +1922,13 @@
                   }
                 }
               } else {
-                uint32_t* reg_addr =
-                    GetVRegAddr(cur_quick_frame, code_item, core_spills, fp_spills, frame_size, reg);
+                uintptr_t* reg_addr = reinterpret_cast<uintptr_t*>(
+                    GetVRegAddr(cur_quick_frame, code_item, core_spills, fp_spills, frame_size, reg));
                 mirror::Object* ref = reinterpret_cast<mirror::Object*>(*reg_addr);
                 if (ref != nullptr) {
                   mirror::Object* new_ref = visitor_(ref, reg, this);
                   if (ref != new_ref) {
-                    *reg_addr = reinterpret_cast<uint32_t>(new_ref);
+                    *reg_addr = reinterpret_cast<uintptr_t>(new_ref);
                   }
                 }
               }
diff --git a/runtime/thread.h b/runtime/thread.h
index 0810909..b7f8bb0 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -487,23 +487,23 @@
                         ManagedStack::TopShadowFrameOffset());
   }
 
-  // Number of references allocated in JNI ShadowFrames on this thread
-  size_t NumJniShadowFrameReferences() const {
+  // Number of references allocated in JNI ShadowFrames on this thread.
+  size_t NumJniShadowFrameReferences() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return managed_stack_.NumJniShadowFrameReferences();
   }
 
-  // Number of references in SIRTs on this thread
+  // Number of references in SIRTs on this thread.
   size_t NumSirtReferences();
 
-  // Number of references allocated in SIRTs & JNI shadow frames on this thread
-  size_t NumStackReferences() {
+  // Number of references allocated in SIRTs & JNI shadow frames on this thread.
+  size_t NumStackReferences() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return NumSirtReferences() + NumJniShadowFrameReferences();
   };
 
   // Is the given obj in this thread's stack indirect reference table?
   bool SirtContains(jobject obj) const;
 
-  void SirtVisitRoots(RootVisitor* visitor, void* arg);
+  void SirtVisitRoots(RootVisitor* visitor, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void PushSirt(StackIndirectReferenceTable* sirt) {
     sirt->SetLink(top_sirt_);
@@ -619,6 +619,7 @@
   void Init(ThreadList*, JavaVMExt*) EXCLUSIVE_LOCKS_REQUIRED(Locks::runtime_shutdown_lock_);
   void InitCardTable();
   void InitCpu();
+  void CleanupCpu();
   void InitTlsEntryPoints();
   void InitTid();
   void InitPthreadKeySelf();
@@ -787,7 +788,7 @@
   // A positive value implies we're in a region where thread suspension isn't expected.
   uint32_t no_thread_suspension_;
 
-  // Cause for last suspension.
+  // If no_thread_suspension_ is > 0, what is causing that assertion.
   const char* last_no_thread_suspension_cause_;
 
   // Maximum number of checkpoint functions.
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index aca0561..e8c9ff8 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -31,7 +31,7 @@
       name_(name) {
   std::string error_msg;
   stack_.reset(MemMap::MapAnonymous(name.c_str(), nullptr, stack_size, PROT_READ | PROT_WRITE,
-                                    &error_msg));
+                                    false, &error_msg));
   CHECK(stack_.get() != nullptr) << error_msg;
   const char* reason = "new thread pool worker thread";
   pthread_attr_t attr;
diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc
index 2029d4b..c6f0e92 100644
--- a/runtime/thread_pool_test.cc
+++ b/runtime/thread_pool_test.cc
@@ -17,7 +17,7 @@
 
 #include <string>
 
-#include "atomic_integer.h"
+#include "atomic.h"
 #include "common_test.h"
 #include "thread_pool.h"
 
diff --git a/runtime/trace.cc b/runtime/trace.cc
index b0f6e37..18185d4 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -89,7 +89,7 @@
   explicit BuildStackTraceVisitor(Thread* thread) : StackVisitor(thread, NULL),
       method_trace_(Trace::AllocStackTrace()) {}
 
-  bool VisitFrame() {
+  bool VisitFrame() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::ArtMethod* m = GetMethod();
     // Ignore runtime frames (in particular callee save).
     if (!m->IsRuntimeMethod()) {
@@ -133,9 +133,9 @@
   return static_cast<TraceAction>(tmid & kTraceMethodActionMask);
 }
 
-static uint32_t EncodeTraceMethodAndAction(const mirror::ArtMethod* method,
+static uint32_t EncodeTraceMethodAndAction(mirror::ArtMethod* method,
                                            TraceAction action) {
-  uint32_t tmid = reinterpret_cast<uint32_t>(method) | action;
+  uint32_t tmid = PointerToLowMemUInt32(method) | action;
   DCHECK_EQ(method, DecodeTraceMethodId(tmid));
   return tmid;
 }
@@ -298,7 +298,7 @@
 
 void* Trace::RunSamplingThread(void* arg) {
   Runtime* runtime = Runtime::Current();
-  int interval_us = reinterpret_cast<int>(arg);
+  intptr_t interval_us = reinterpret_cast<intptr_t>(arg);
   CHECK_GE(interval_us, 0);
   CHECK(runtime->AttachCurrentThread("Sampling Profiler", true, runtime->GetSystemThreadGroup(),
                                      !runtime->IsCompiler()));
@@ -508,7 +508,7 @@
   } else {
     os << StringPrintf("clock=wall\n");
   }
-  os << StringPrintf("elapsed-time-usec=%llu\n", elapsed);
+  os << StringPrintf("elapsed-time-usec=%" PRIu64 "\n", elapsed);
   size_t num_records = (final_offset - kTraceHeaderLength) / GetRecordSize(clock_source_);
   os << StringPrintf("num-method-calls=%zd\n", num_records);
   os << StringPrintf("clock-call-overhead-nsec=%d\n", clock_overhead_ns);
@@ -548,13 +548,13 @@
 }
 
 void Trace::DexPcMoved(Thread* thread, mirror::Object* this_object,
-                       const mirror::ArtMethod* method, uint32_t new_dex_pc) {
+                       mirror::ArtMethod* method, uint32_t new_dex_pc) {
   // We're not recorded to listen to this kind of event, so complain.
   LOG(ERROR) << "Unexpected dex PC event in tracing " << PrettyMethod(method) << " " << new_dex_pc;
 };
 
 void Trace::MethodEntered(Thread* thread, mirror::Object* this_object,
-                          const mirror::ArtMethod* method, uint32_t dex_pc) {
+                          mirror::ArtMethod* method, uint32_t dex_pc) {
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
   ReadClocks(thread, &thread_clock_diff, &wall_clock_diff);
@@ -563,7 +563,7 @@
 }
 
 void Trace::MethodExited(Thread* thread, mirror::Object* this_object,
-                         const mirror::ArtMethod* method, uint32_t dex_pc,
+                         mirror::ArtMethod* method, uint32_t dex_pc,
                          const JValue& return_value) {
   UNUSED(return_value);
   uint32_t thread_clock_diff = 0;
@@ -574,7 +574,7 @@
 }
 
 void Trace::MethodUnwind(Thread* thread, mirror::Object* this_object,
-                         const mirror::ArtMethod* method, uint32_t dex_pc) {
+                         mirror::ArtMethod* method, uint32_t dex_pc) {
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
   ReadClocks(thread, &thread_clock_diff, &wall_clock_diff);
@@ -605,7 +605,7 @@
   }
 }
 
-void Trace::LogMethodTraceEvent(Thread* thread, const mirror::ArtMethod* method,
+void Trace::LogMethodTraceEvent(Thread* thread, mirror::ArtMethod* method,
                                 instrumentation::Instrumentation::InstrumentationEvent event,
                                 uint32_t thread_clock_diff, uint32_t wall_clock_diff) {
   // Advance cur_offset_ atomically.
diff --git a/runtime/trace.h b/runtime/trace.h
index 9be015a..d810df0 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -73,17 +73,17 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   virtual void MethodEntered(Thread* thread, mirror::Object* this_object,
-                             const mirror::ArtMethod* method, uint32_t dex_pc)
+                             mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void MethodExited(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc,
+                            mirror::ArtMethod* method, uint32_t dex_pc,
                             const JValue& return_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                            const mirror::ArtMethod* method, uint32_t dex_pc)
+                            mirror::ArtMethod* method, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                          const mirror::ArtMethod* method, uint32_t new_dex_pc)
+                          mirror::ArtMethod* method, uint32_t new_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   virtual void ExceptionCaught(Thread* thread, const ThrowLocation& throw_location,
                                mirror::ArtMethod* catch_method, uint32_t catch_dex_pc,
@@ -105,7 +105,7 @@
 
   void ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint32_t* wall_clock_diff);
 
-  void LogMethodTraceEvent(Thread* thread, const mirror::ArtMethod* method,
+  void LogMethodTraceEvent(Thread* thread, mirror::ArtMethod* method,
                            instrumentation::Instrumentation::InstrumentationEvent event,
                            uint32_t thread_clock_diff, uint32_t wall_clock_diff);
 
diff --git a/runtime/utf.cc b/runtime/utf.cc
index 5ec2ea1..e48d6d2 100644
--- a/runtime/utf.cc
+++ b/runtime/utf.cc
@@ -68,7 +68,7 @@
   }
 }
 
-int32_t ComputeUtf16Hash(const mirror::CharArray* chars, int32_t offset,
+int32_t ComputeUtf16Hash(mirror::CharArray* chars, int32_t offset,
                          size_t char_count) {
   int32_t hash = 0;
   for (size_t i = 0; i < char_count; i++) {
diff --git a/runtime/utf.h b/runtime/utf.h
index cc5e6d4..5b2289e 100644
--- a/runtime/utf.h
+++ b/runtime/utf.h
@@ -73,7 +73,7 @@
 /*
  * The java.lang.String hashCode() algorithm.
  */
-int32_t ComputeUtf16Hash(const mirror::CharArray* chars, int32_t offset, size_t char_count)
+int32_t ComputeUtf16Hash(mirror::CharArray* chars, int32_t offset, size_t char_count)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count);
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index a293043..aad21bc 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -215,14 +215,14 @@
   }
 }
 
-std::string PrettyDescriptor(const mirror::String* java_descriptor) {
+std::string PrettyDescriptor(mirror::String* java_descriptor) {
   if (java_descriptor == NULL) {
     return "null";
   }
   return PrettyDescriptor(java_descriptor->ToModifiedUtf8());
 }
 
-std::string PrettyDescriptor(const mirror::Class* klass) {
+std::string PrettyDescriptor(mirror::Class* klass) {
   if (klass == NULL) {
     return "null";
   }
@@ -283,7 +283,7 @@
   return PrettyDescriptor(descriptor_string);
 }
 
-std::string PrettyField(const mirror::ArtField* f, bool with_type) {
+std::string PrettyField(mirror::ArtField* f, bool with_type) {
   if (f == NULL) {
     return "null";
   }
@@ -358,7 +358,7 @@
   return PrettyDescriptor(return_type);
 }
 
-std::string PrettyMethod(const mirror::ArtMethod* m, bool with_signature) {
+std::string PrettyMethod(mirror::ArtMethod* m, bool with_signature) {
   if (m == nullptr) {
     return "null";
   }
@@ -401,7 +401,7 @@
   return result;
 }
 
-std::string PrettyTypeOf(const mirror::Object* obj) {
+std::string PrettyTypeOf(mirror::Object* obj) {
   if (obj == NULL) {
     return "null";
   }
@@ -417,7 +417,7 @@
   return result;
 }
 
-std::string PrettyClass(const mirror::Class* c) {
+std::string PrettyClass(mirror::Class* c) {
   if (c == NULL) {
     return "null";
   }
@@ -428,7 +428,7 @@
   return result;
 }
 
-std::string PrettyClassAndClassLoader(const mirror::Class* c) {
+std::string PrettyClassAndClassLoader(mirror::Class* c) {
   if (c == NULL) {
     return "null";
   }
@@ -445,7 +445,7 @@
 std::string PrettySize(int64_t byte_count) {
   // The byte thresholds at which we display amounts.  A byte count is displayed
   // in unit U when kUnitThresholds[U] <= bytes < kUnitThresholds[U+1].
-  static const size_t kUnitThresholds[] = {
+  static const int64_t kUnitThresholds[] = {
     0,              // B up to...
     3*1024,         // KB up to...
     2*1024*1024,    // MB up to...
@@ -464,7 +464,7 @@
       break;
     }
   }
-  return StringPrintf("%s%lld%s", negative_str, byte_count / kBytesPerUnit[i], kUnitStrings[i]);
+  return StringPrintf("%s%" PRId64 "%s", negative_str, byte_count / kBytesPerUnit[i], kUnitStrings[i]);
 }
 
 std::string PrettyDuration(uint64_t nano_duration) {
@@ -534,18 +534,18 @@
   uint64_t whole_part = nano_duration / divisor;
   uint64_t fractional_part = nano_duration % divisor;
   if (fractional_part == 0) {
-    return StringPrintf("%llu%s", whole_part, unit);
+    return StringPrintf("%" PRIu64 "%s", whole_part, unit);
   } else {
     while ((fractional_part % 1000) == 0) {
       zero_fill -= 3;
       fractional_part /= 1000;
     }
     if (zero_fill == 3) {
-      return StringPrintf("%llu.%03llu%s", whole_part, fractional_part, unit);
+      return StringPrintf("%" PRIu64 ".%03" PRIu64 "%s", whole_part, fractional_part, unit);
     } else if (zero_fill == 6) {
-      return StringPrintf("%llu.%06llu%s", whole_part, fractional_part, unit);
+      return StringPrintf("%" PRIu64 ".%06" PRIu64 "%s", whole_part, fractional_part, unit);
     } else {
-      return StringPrintf("%llu.%09llu%s", whole_part, fractional_part, unit);
+      return StringPrintf("%" PRIu64 ".%09" PRIu64 "%s", whole_part, fractional_part, unit);
     }
   }
 }
@@ -627,7 +627,7 @@
   return descriptor;
 }
 
-std::string JniShortName(const mirror::ArtMethod* m) {
+std::string JniShortName(mirror::ArtMethod* m) {
   MethodHelper mh(m);
   std::string class_name(mh.GetDeclaringClassDescriptor());
   // Remove the leading 'L' and trailing ';'...
@@ -646,7 +646,7 @@
   return short_name;
 }
 
-std::string JniLongName(const mirror::ArtMethod* m) {
+std::string JniLongName(mirror::ArtMethod* m) {
   std::string long_name;
   long_name += JniShortName(m);
   long_name += "__";
diff --git a/runtime/utils.h b/runtime/utils.h
index f063c0a..e2d8966 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -166,8 +166,29 @@
   return static_cast<int>(x & 0x0000003F);
 }
 
-#define CLZ(x) __builtin_clz(x)
-#define CTZ(x) __builtin_ctz(x)
+template<typename T>
+static inline int CLZ(T x) {
+  if (sizeof(T) == sizeof(uint32_t)) {
+    return __builtin_clz(x);
+  } else {
+    return __builtin_clzll(x);
+  }
+}
+
+template<typename T>
+static inline int CTZ(T x) {
+  if (sizeof(T) == sizeof(uint32_t)) {
+    return __builtin_ctz(x);
+  } else {
+    return __builtin_ctzll(x);
+  }
+}
+
+static inline uint32_t PointerToLowMemUInt32(const void* p) {
+  uintptr_t intp = reinterpret_cast<uintptr_t>(p);
+  DCHECK_LE(intp, 0xFFFFFFFFU);
+  return intp & 0xFFFFFFFFU;
+}
 
 static inline bool NeedsEscaping(uint16_t ch) {
   return (ch < ' ' || ch > '~');
@@ -200,21 +221,22 @@
 // Returns a human-readable equivalent of 'descriptor'. So "I" would be "int",
 // "[[I" would be "int[][]", "[Ljava/lang/String;" would be
 // "java.lang.String[]", and so forth.
-std::string PrettyDescriptor(const mirror::String* descriptor);
+std::string PrettyDescriptor(mirror::String* descriptor)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 std::string PrettyDescriptor(const std::string& descriptor);
 std::string PrettyDescriptor(Primitive::Type type);
-std::string PrettyDescriptor(const mirror::Class* klass)
+std::string PrettyDescriptor(mirror::Class* klass)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Returns a human-readable signature for 'f'. Something like "a.b.C.f" or
 // "int a.b.C.f" (depending on the value of 'with_type').
-std::string PrettyField(const mirror::ArtField* f, bool with_type = true)
+std::string PrettyField(mirror::ArtField* f, bool with_type = true)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 std::string PrettyField(uint32_t field_idx, const DexFile& dex_file, bool with_type = true);
 
 // Returns a human-readable signature for 'm'. Something like "a.b.C.m" or
 // "a.b.C.m(II)V" (depending on the value of 'with_signature').
-std::string PrettyMethod(const mirror::ArtMethod* m, bool with_signature = true)
+std::string PrettyMethod(mirror::ArtMethod* m, bool with_signature = true)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 std::string PrettyMethod(uint32_t method_idx, const DexFile& dex_file, bool with_signature = true);
 
@@ -222,7 +244,7 @@
 // So given an instance of java.lang.String, the output would
 // be "java.lang.String". Given an array of int, the output would be "int[]".
 // Given String.class, the output would be "java.lang.Class<java.lang.String>".
-std::string PrettyTypeOf(const mirror::Object* obj)
+std::string PrettyTypeOf(mirror::Object* obj)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Returns a human-readable form of the type at an index in the specified dex file.
@@ -231,11 +253,11 @@
 
 // Returns a human-readable form of the name of the given class.
 // Given String.class, the output would be "java.lang.Class<java.lang.String>".
-std::string PrettyClass(const mirror::Class* c)
+std::string PrettyClass(mirror::Class* c)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Returns a human-readable form of the name of the given class with its class loader.
-std::string PrettyClassAndClassLoader(const mirror::Class* c)
+std::string PrettyClassAndClassLoader(mirror::Class* c)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Returns a human-readable size string such as "1MB".
@@ -278,10 +300,10 @@
 bool IsValidMemberName(const char* s);
 
 // Returns the JNI native function name for the non-overloaded method 'm'.
-std::string JniShortName(const mirror::ArtMethod* m)
+std::string JniShortName(mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 // Returns the JNI native function name for the overloaded method 'm'.
-std::string JniLongName(const mirror::ArtMethod* m)
+std::string JniLongName(mirror::ArtMethod* m)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 bool ReadFileToString(const std::string& file_name, std::string* result);
diff --git a/runtime/verifier/method_verifier-inl.h b/runtime/verifier/method_verifier-inl.h
index 5cf234d..74c3e33 100644
--- a/runtime/verifier/method_verifier-inl.h
+++ b/runtime/verifier/method_verifier-inl.h
@@ -25,47 +25,47 @@
 namespace art {
 namespace verifier {
 
-const DexFile::CodeItem* MethodVerifier::CodeItem() const {
+inline const DexFile::CodeItem* MethodVerifier::CodeItem() const {
   return code_item_;
 }
 
-RegisterLine* MethodVerifier::GetRegLine(uint32_t dex_pc) {
+inline RegisterLine* MethodVerifier::GetRegLine(uint32_t dex_pc) {
   return reg_table_.GetLine(dex_pc);
 }
 
-const InstructionFlags& MethodVerifier::GetInstructionFlags(size_t index) const {
+inline const InstructionFlags& MethodVerifier::GetInstructionFlags(size_t index) const {
   return insn_flags_[index];
 }
 
-mirror::ClassLoader* MethodVerifier::GetClassLoader() {
+inline mirror::ClassLoader* MethodVerifier::GetClassLoader() {
   return class_loader_->get();
 }
 
-mirror::DexCache* MethodVerifier::GetDexCache() {
+inline mirror::DexCache* MethodVerifier::GetDexCache() {
   return dex_cache_->get();
 }
 
-MethodReference MethodVerifier::GetMethodReference() const {
+inline MethodReference MethodVerifier::GetMethodReference() const {
   return MethodReference(dex_file_, dex_method_idx_);
 }
 
-uint32_t MethodVerifier::GetAccessFlags() const {
+inline uint32_t MethodVerifier::GetAccessFlags() const {
   return method_access_flags_;
 }
 
-bool MethodVerifier::HasCheckCasts() const {
+inline bool MethodVerifier::HasCheckCasts() const {
   return has_check_casts_;
 }
 
-bool MethodVerifier::HasVirtualOrInterfaceInvokes() const {
+inline bool MethodVerifier::HasVirtualOrInterfaceInvokes() const {
   return has_virtual_or_interface_invokes_;
 }
 
-bool MethodVerifier::HasFailures() const {
+inline bool MethodVerifier::HasFailures() const {
   return !failure_messages_.empty();
 }
 
-const RegType& MethodVerifier::ResolveCheckedClass(uint32_t class_idx) {
+inline const RegType& MethodVerifier::ResolveCheckedClass(uint32_t class_idx) {
   DCHECK(!HasFailures());
   const RegType& result = ResolveClassAndCheckAccess(class_idx);
   DCHECK(!HasFailures());
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index d2681df..30be36c 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -85,7 +85,7 @@
   }
 }
 
-MethodVerifier::FailureKind MethodVerifier::VerifyClass(const mirror::Class* klass,
+MethodVerifier::FailureKind MethodVerifier::VerifyClass(mirror::Class* klass,
                                                         bool allow_soft_failures,
                                                         std::string* error) {
   if (klass->IsVerified()) {
@@ -837,7 +837,7 @@
   /* offset to array data table is a relative branch-style offset */
   array_data = insns + array_data_offset;
   /* make sure the table is 32-bit aligned */
-  if ((((uint32_t) array_data) & 0x03) != 0) {
+  if ((reinterpret_cast<uintptr_t>(array_data) & 0x03) != 0) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unaligned array data table: at " << cur_offset
                                       << ", data offset " << array_data_offset;
     return false;
@@ -941,7 +941,7 @@
   /* offset to switch table is a relative branch-style offset */
   const uint16_t* switch_insns = insns + switch_offset;
   /* make sure the table is 32-bit aligned */
-  if ((((uint32_t) switch_insns) & 0x03) != 0) {
+  if ((reinterpret_cast<uintptr_t>(switch_insns) & 0x03) != 0) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unaligned switch table: at " << cur_offset
                                       << ", switch offset " << switch_offset;
     return false;
@@ -3616,10 +3616,9 @@
 
 // Look for an instance field with this offset.
 // TODO: we may speed up the search if offsets are sorted by doing a quick search.
-static mirror::ArtField* FindInstanceFieldWithOffset(const mirror::Class* klass,
-                                                  uint32_t field_offset)
+static mirror::ArtField* FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  const mirror::ObjectArray<mirror::ArtField>* instance_fields = klass->GetIFields();
+  mirror::ObjectArray<mirror::ArtField>* instance_fields = klass->GetIFields();
   if (instance_fields != NULL) {
     for (int32_t i = 0, e = instance_fields->GetLength(); i < e; ++i) {
       mirror::ArtField* field = instance_fields->Get(i);
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 053cee5..7c75c9c 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -140,8 +140,7 @@
   };
 
   /* Verify a class. Returns "kNoFailure" on success. */
-  static FailureKind VerifyClass(const mirror::Class* klass, bool allow_soft_failures,
-                                 std::string* error)
+  static FailureKind VerifyClass(mirror::Class* klass, bool allow_soft_failures, std::string* error)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static FailureKind VerifyClass(const DexFile* dex_file, SirtRef<mirror::DexCache>& dex_cache,
                                  SirtRef<mirror::ClassLoader>& class_loader,
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 4be1d02..3818375 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -203,7 +203,7 @@
   bool IsObjectArrayTypes() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   Primitive::Type GetPrimitiveType() const;
   bool IsJavaLangObjectArray() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  bool IsInstantiableTypes() const;
+  bool IsInstantiableTypes() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   const std::string& GetDescriptor() const {
     DCHECK(HasClass() || (IsUnresolvedTypes() && !IsUnresolvedMergedReference() &&
                           !IsUnresolvedSuperClass()));
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index e3946f7..546eb40 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -179,7 +179,7 @@
   java_lang_Thread_name = CacheField(env, java_lang_Thread, false, "name", "Ljava/lang/String;");
   java_lang_Thread_priority = CacheField(env, java_lang_Thread, false, "priority", "I");
   java_lang_Thread_uncaughtHandler = CacheField(env, java_lang_Thread, false, "uncaughtHandler", "Ljava/lang/Thread$UncaughtExceptionHandler;");
-  java_lang_Thread_nativePeer = CacheField(env, java_lang_Thread, false, "nativePeer", "I");
+  java_lang_Thread_nativePeer = CacheField(env, java_lang_Thread, false, "nativePeer", "J");
   java_lang_ThreadGroup_mainThreadGroup = CacheField(env, java_lang_ThreadGroup, true, "mainThreadGroup", "Ljava/lang/ThreadGroup;");
   java_lang_ThreadGroup_name = CacheField(env, java_lang_ThreadGroup, false, "name", "Ljava/lang/String;");
   java_lang_ThreadGroup_systemThreadGroup = CacheField(env, java_lang_ThreadGroup, true, "systemThreadGroup", "Ljava/lang/ThreadGroup;");
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index 8cb1993..ba0b91e 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -55,7 +55,7 @@
   name += entry_filename;
   UniquePtr<MemMap> map(MemMap::MapAnonymous(name.c_str(),
                                              NULL, GetUncompressedLength(),
-                                             PROT_READ | PROT_WRITE, error_msg));
+                                             PROT_READ | PROT_WRITE, false, error_msg));
   if (map.get() == nullptr) {
     DCHECK(!error_msg->empty());
     return nullptr;
diff --git a/test/003-omnibus-opcodes/src/InternedString.java b/test/003-omnibus-opcodes/src/InternedString.java
index 804564b..35812e6 100644
--- a/test/003-omnibus-opcodes/src/InternedString.java
+++ b/test/003-omnibus-opcodes/src/InternedString.java
@@ -34,14 +34,14 @@
 
     private static void testDeadInternedString() {
         WeakReference<String> strRef = makeWeakString();
-        System.gc();
+        Runtime.getRuntime().gc();
         // "blahblah" should disappear from the intern list
         Main.assertTrue(strRef.get() == null);
     }
 
     private static void testImmortalInternedString() {
         WeakReference strRef = new WeakReference<String>(CONST.intern());
-        System.gc();
+        Runtime.getRuntime().gc();
         // Class constant string should be entered to the interned table when
         // loaded
         Main.assertTrue(CONST == CONST.intern());
@@ -53,7 +53,7 @@
         strRef = new WeakReference<String>(s.intern());
         // Kill s, otherwise the string object is still accessible from root set
         s = "";
-        System.gc();
+        Runtime.getRuntime().gc();
         Main.assertTrue(strRef.get() == CONST);
     }
 }
diff --git a/test/003-omnibus-opcodes/src/Main.java b/test/003-omnibus-opcodes/src/Main.java
index 5dcc79c..25050df 100644
--- a/test/003-omnibus-opcodes/src/Main.java
+++ b/test/003-omnibus-opcodes/src/Main.java
@@ -26,7 +26,7 @@
         main.run();
 
         /* run through the heap to see if we trashed something */
-        System.gc();
+        Runtime.getRuntime().gc();
 
         System.out.println("Done!");
     }
diff --git a/test/023-many-interfaces/src/ManyInterfaces.java b/test/023-many-interfaces/src/ManyInterfaces.java
index c4cb4ab..d69a490 100644
--- a/test/023-many-interfaces/src/ManyInterfaces.java
+++ b/test/023-many-interfaces/src/ManyInterfaces.java
@@ -200,7 +200,7 @@
          * Clear the heap.  The various classes involved should already
          * be loaded and ready as a result of instantiating ManyInterfaces.
          */
-        System.gc();
+        Runtime.getRuntime().gc();
 
         start = System.nanoTime();
         testIface001(obj, iter);
diff --git a/test/030-bad-finalizer/src/Main.java b/test/030-bad-finalizer/src/Main.java
index e19fd3e..330e344 100644
--- a/test/030-bad-finalizer/src/Main.java
+++ b/test/030-bad-finalizer/src/Main.java
@@ -25,12 +25,12 @@
         bf = null;
 
         System.out.println("Nulled. Requestion gc.");
-        System.gc();
+        Runtime.getRuntime().gc();
 
         for (int i = 0; i < 8; i++) {
             BadFinalizer.snooze(4000);
             System.out.println("Requesting another GC.");
-            System.gc();
+            Runtime.getRuntime().gc();
         }
 
         System.out.println("Done waiting.");
diff --git a/test/036-finalizer/src/Main.java b/test/036-finalizer/src/Main.java
index 4bc7d8d..6195aff 100644
--- a/test/036-finalizer/src/Main.java
+++ b/test/036-finalizer/src/Main.java
@@ -93,7 +93,7 @@
 
         /* this will try to collect and finalize ft */
         System.out.println("gc");
-        System.gc();
+        Runtime.getRuntime().gc();
 
         System.out.println("wimp: " + wimpString(wimp));
         System.out.println("finalize");
@@ -106,7 +106,7 @@
         System.out.println("reborn: " + FinalizerTest.mReborn);
         System.out.println("wimp: " + wimpString(wimp));
         System.out.println("reset reborn");
-        System.gc();
+        Runtime.getRuntime().gc();
         FinalizerTest.mReborn = FinalizerTest.mNothing;
         System.out.println("gc + finalize");
         System.gc();
diff --git a/test/067-preemptive-unpark/src/Main.java b/test/067-preemptive-unpark/src/Main.java
index a16219e..2c099b9 100644
--- a/test/067-preemptive-unpark/src/Main.java
+++ b/test/067-preemptive-unpark/src/Main.java
@@ -18,6 +18,7 @@
 
         System.out.println("GC'ing");
         System.gc();
+        System.runFinalization();
         System.gc();
 
         System.out.println("Asking thread to park");
diff --git a/test/072-precise-gc/src/Main.java b/test/072-precise-gc/src/Main.java
index e049221..f6cd8b1 100644
--- a/test/072-precise-gc/src/Main.java
+++ b/test/072-precise-gc/src/Main.java
@@ -88,7 +88,7 @@
          * Getting a zero result here isn't conclusive, but it's a strong
          * indicator that precise GC is having an impact.
          */
-        System.gc();
+        Runtime.getRuntime().gc();
 
         for (int i = 0; i < wrefs.length; i++) {
             if (wrefs[i].get() != null)
diff --git a/test/074-gc-thrash/src/Main.java b/test/074-gc-thrash/src/Main.java
index f85aa4b..78413f3 100644
--- a/test/074-gc-thrash/src/Main.java
+++ b/test/074-gc-thrash/src/Main.java
@@ -232,7 +232,7 @@
         for (int i = 0; i < MAX_DEPTH; i++)
             strong[i] = null;
 
-        System.gc();
+        Runtime.getRuntime().gc();
 
         for (int i = 0; i < MAX_DEPTH; i++) {
             if (weak[i].get() != null) {
diff --git a/test/079-phantom/src/Main.java b/test/079-phantom/src/Main.java
index 9c459c9..c54bc0b 100644
--- a/test/079-phantom/src/Main.java
+++ b/test/079-phantom/src/Main.java
@@ -38,29 +38,29 @@
     public void run() {
         createBitmaps();
 
-        System.gc();
+        Runtime.getRuntime().gc();
         sleep(250);
 
         mBitmap2.drawAt(0, 0);
 
         System.out.println("nulling 1");
         mBitmap1 = null;
-        System.gc();
+        Runtime.getRuntime().gc();
         sleep(500);
 
         System.out.println("nulling 2");
         mBitmap2 = null;
-        System.gc();
+        Runtime.getRuntime().gc();
         sleep(500);
 
         System.out.println("nulling 3");
         mBitmap3 = null;
-        System.gc();
+        Runtime.getRuntime().gc();
         sleep(500);
 
         System.out.println("nulling 4");
         mBitmap4 = null;
-        System.gc();
+        Runtime.getRuntime().gc();
         sleep(500);
 
         Bitmap.shutDown();
diff --git a/test/083-compiler-regressions/src/Main.java b/test/083-compiler-regressions/src/Main.java
index 4d6aca3..3307e50 100644
--- a/test/083-compiler-regressions/src/Main.java
+++ b/test/083-compiler-regressions/src/Main.java
@@ -61,19 +61,19 @@
 
     static void wideIdentityTest() {
         Foo foo = new Foo();
-        long i = 1;
+        long i = 0x200000001L;
         i += foo.wideIdent0(i);
         i += foo.wideIdent1(0,i);
         i += foo.wideIdent2(0,0,i);
         i += foo.wideIdent3(0,0,0,i);
         i += foo.wideIdent4(0,0,0,0,i);
         i += foo.wideIdent5(0,0,0,0,0,i);
-        if (i == 64) {
+        if (i == 0x8000000040L) {
             System.out.println("wideIdentityTest passes");
         }
         else {
-            System.out.println("wideIdentityTest fails: " + i +
-                               " (expecting 64)");
+            System.out.println("wideIdentityTest fails: 0x" + Long.toHexString(i) +
+                               " (expecting 0x8000000040)");
         }
     }
 
@@ -90,12 +90,25 @@
         foo.wideSetBar4(0,0,0,sum);
         sum += foo.wideGetBar5(1,2,3,4,5);
         foo.wideSetBar5(0,0,0,0,sum);
-        if (foo.wideGetBar0() == 39488) {
+        long result1 = foo.wideGetBar0();
+        long expected1 = 1234L << 5;
+        sum += foo.wideGetBar0();
+        foo.wideSetBar2i(0,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar3i(0,0,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar4i(0,0,0,sum);
+        sum += foo.wideGetBar0();
+        foo.wideSetBar5i(0,0,0,0,sum);
+        long result2 = foo.wideGetBar0();
+        long expected2 = 1234L << 9;
+        if (result1 == expected1 && result2 == expected2) {
             System.out.println("wideGetterSetterTest passes");
         }
         else {
             System.out.println("wideGetterSetterTest fails: " +
-                                foo.wideGetBar0() + " (expecting 39488)");
+                                "result1: " + result1 + " (expecting " + expected1 + "), " +
+                                "result2: " + result2 + " (expecting " + expected2 + ")");
         }
     }
 
@@ -172,7 +185,7 @@
     }
 
     static void b2302318Test() {
-        System.gc();
+        Runtime.getRuntime().gc();
 
         SpinThread slow = new SpinThread(Thread.MIN_PRIORITY);
         SpinThread fast1 = new SpinThread(Thread.NORM_PRIORITY);
@@ -188,7 +201,7 @@
         try {
             Thread.sleep(3000);
         } catch (InterruptedException ie) {/*ignore */}
-        System.gc();
+        Runtime.getRuntime().gc();
 
         System.out.println("b2302318 passes");
     }
@@ -8374,6 +8387,18 @@
     public void wideSetBar5(long a1, long a2, long a3, long a4, long a5) {
         lbar = a5;
     }
+    public void wideSetBar2i(int a1, long a2) {
+      lbar = a2;
+    }
+    public void wideSetBar3i(int a1, int a2, long a3) {
+        lbar = a3;
+    }
+    public void wideSetBar4i(int a1, int a2, int a3, long a4) {
+        lbar = a4;
+    }
+    public void wideSetBar5i(int a1, int a2, int a3, int a4, long a5) {
+        lbar = a5;
+    }
     public long wideGetBar0() {
         return lbar;
     }
diff --git a/test/087-gc-after-link/src/Main.java b/test/087-gc-after-link/src/Main.java
index c99a7d5..e0a187a 100644
--- a/test/087-gc-after-link/src/Main.java
+++ b/test/087-gc-after-link/src/Main.java
@@ -170,7 +170,7 @@
                 ite.printStackTrace();
             }
         }
-        System.gc();
+        Runtime.getRuntime().gc();
         System.out.println("GC complete.");
     }
 }
diff --git a/test/103-string-append/run b/test/103-string-append/run
new file mode 100755
index 0000000..e27a622
--- /dev/null
+++ b/test/103-string-append/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2012 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# As this is a performance test we always use the non-debug build.
+exec ${RUN} "${@/#libartd.so/libart.so}"
diff --git a/test/109-suspend-check/src/Main.java b/test/109-suspend-check/src/Main.java
index d92b9e5..ae10576 100644
--- a/test/109-suspend-check/src/Main.java
+++ b/test/109-suspend-check/src/Main.java
@@ -28,7 +28,7 @@
         doWhileLoop.start();
         garbage.start();
         for (int i = 0; i < TEST_TIME; i++) {
-          System.gc();
+          Runtime.getRuntime().gc();
           System.out.println(".");
           sleep(1000);
         }
diff --git a/test/Android.mk b/test/Android.mk
index 5187724..d716f9b 100644
--- a/test/Android.mk
+++ b/test/Android.mk
@@ -71,10 +71,11 @@
     LOCAL_NO_STANDARD_LIBRARIES := true
     LOCAL_MODULE_PATH := $(3)
     LOCAL_DEX_PREOPT_IMAGE := $(TARGET_CORE_IMG_OUT)
+    LOCAL_DEX_PREOPT := false
     LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
     LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
     include $(BUILD_JAVA_LIBRARY)
-    ART_TEST_TARGET_DEX_FILES += $(3)/$$(LOCAL_MODULE).jar
+    ART_TEST_TARGET_DEX_FILES += $$(LOCAL_INSTALLED_MODULE)
   endif
 
   ifeq ($(ART_BUILD_HOST),true)
@@ -84,10 +85,11 @@
     LOCAL_JAVA_LIBRARIES := $(HOST_CORE_JARS)
     LOCAL_NO_STANDARD_LIBRARIES := true
     LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_OUT)
+    LOCAL_DEX_PREOPT := false
     LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common.mk
     LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
     include $(BUILD_HOST_DALVIK_JAVA_LIBRARY)
-    ART_TEST_HOST_DEX_FILES += $$(LOCAL_MODULE_PATH)/$$(LOCAL_MODULE).jar
+    ART_TEST_HOST_DEX_FILES += $$(LOCAL_INSTALLED_MODULE)
   endif
 endef
 $(foreach dir,$(TEST_DEX_DIRECTORIES), $(eval $(call build-art-test-dex,art-test-dex,$(dir),$(ART_NATIVETEST_OUT))))