Merge "Migrated dexdump from Dalvik (libdex) into Art (libart)"
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 29b3573..77f39c4 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -33,6 +33,16 @@
 ART_BUILD_TARGET_DEBUG ?= true
 ART_BUILD_HOST_NDEBUG ?= true
 ART_BUILD_HOST_DEBUG ?= true
+ART_BUILD_HOST_STATIC ?= true
+
+# Asan does not support static linkage
+ifdef SANITIZE_HOST
+  ART_BUILD_HOST_STATIC := false
+endif
+
+ifneq ($$(HOST_OS),linux)
+  ART_BUILD_HOST_STATIC := false
+endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),false)
 $(info Disabling ART_BUILD_TARGET_NDEBUG)
@@ -46,6 +56,9 @@
 ifeq ($(ART_BUILD_HOST_DEBUG),false)
 $(info Disabling ART_BUILD_HOST_DEBUG)
 endif
+ifeq ($(ART_BUILD_HOST_STATIC),true)
+$(info Enabling ART_BUILD_HOST_STATIC)
+endif
 
 #
 # Used to enable JIT
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 7b03682..a251c92 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -28,6 +28,7 @@
 # $(5): target or host
 # $(6): ndebug or debug
 # $(7): value for LOCAL_MULTILIB (empty means default)
+# $(8): static or shared (empty means shared, applies only for host)
 define build-art-executable
   ifneq ($(5),target)
     ifneq ($(5),host)
@@ -42,11 +43,12 @@
 
   art_executable := $(1)
   art_source := $(2)
-  art_shared_libraries := $(3)
+  art_libraries := $(3)
   art_c_includes := $(4)
   art_target_or_host := $(5)
   art_ndebug_or_debug := $(6)
   art_multilib := $(7)
+  art_static_or_shared := $(8)
   art_out_binary_name :=
 
   include $(CLEAR_VARS)
@@ -54,8 +56,12 @@
   LOCAL_MODULE_TAGS := optional
   LOCAL_SRC_FILES := $$(art_source)
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime art/cmdline $$(art_c_includes)
-  LOCAL_SHARED_LIBRARIES += $$(art_shared_libraries)
-  LOCAL_WHOLE_STATIC_LIBRARIES += libsigchain
+
+  ifeq ($$(art_static_or_shared),static)
+    LOCAL_STATIC_LIBRARIES += $$(art_libraries)
+  else
+    LOCAL_SHARED_LIBRARIES += $$(art_libraries)
+  endif
 
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := $$(art_executable)
@@ -63,6 +69,10 @@
     LOCAL_MODULE := $$(art_executable)d
   endif
 
+  ifeq ($$(art_static_or_shared),static)
+    LOCAL_MODULE := $(LOCAL_MODULE)s
+  endif
+
   LOCAL_CFLAGS := $(ART_EXECUTABLES_CFLAGS)
   # Mac OS linker doesn't understand --export-dynamic.
   ifneq ($$(HOST_OS)-$$(art_target_or_host),darwin-host)
@@ -84,12 +94,29 @@
       LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS)
     endif
     LOCAL_LDLIBS += -lpthread -ldl
+    ifeq ($$(art_static_or_shared),static)
+      LOCAL_LDFLAGS += -static
+      # We need this because GC stress mode makes use of _Unwind_GetIP and _Unwind_Backtrace and
+      # the symbols are also defined in libgcc_eh.a(unwind-dw2.o)
+      # TODO: Having this is not ideal as it might obscure errors. Try to get rid of it.
+      LOCAL_LDFLAGS += -z muldefs
+      ifeq ($$(HOST_OS),linux)
+        LOCAL_LDLIBS += -lrt
+      endif
+    endif
+
   endif
 
+  # If dynamically linked add libart by default. Statically linked executables
+  # needs to specify it in art_libraries to ensure proper ordering.
   ifeq ($$(art_ndebug_or_debug),ndebug)
-    LOCAL_SHARED_LIBRARIES += libart
+    ifneq ($$(art_static_or_shared),static)
+      LOCAL_SHARED_LIBRARIES += libart
+    endif
   else # debug
-    LOCAL_SHARED_LIBRARIES += libartd
+    ifneq ($$(art_static_or_shared),static)
+      LOCAL_SHARED_LIBRARIES += libartd
+    endif
   endif
 
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
@@ -144,11 +171,12 @@
   # Clear out local variables now that we're done with them.
   art_executable :=
   art_source :=
-  art_shared_libraries :=
+  art_libraries :=
   art_c_includes :=
   art_target_or_host :=
   art_ndebug_or_debug :=
   art_multilib :=
+  art_static_or_shared :=
   art_out_binary_name :=
 
 endef
diff --git a/cmdline/cmdline.h b/cmdline/cmdline.h
index 2967e27..2e9f208 100644
--- a/cmdline/cmdline.h
+++ b/cmdline/cmdline.h
@@ -104,7 +104,9 @@
   options.push_back(
       std::make_pair("imageinstructionset",
                      reinterpret_cast<const void*>(GetInstructionSetString(instruction_set))));
-
+  // None of the command line tools need sig chain. If this changes we'll need
+  // to upgrade this option to a proper parameter.
+  options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
   if (!Runtime::Create(options, false)) {
     fprintf(stderr, "Failed to create runtime\n");
     return nullptr;
diff --git a/compiler/Android.mk b/compiler/Android.mk
index dd21406..3947078 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -181,6 +181,7 @@
 
 # $(1): target or host
 # $(2): ndebug or debug
+# $(3): static or shared (empty means shared, applies only for host)
 define build-libart-compiler
   ifneq ($(1),target)
     ifneq ($(1),host)
@@ -195,6 +196,7 @@
 
   art_target_or_host := $(1)
   art_ndebug_or_debug := $(2)
+  art_static_or_shared := $(3)
 
   include $(CLEAR_VARS)
   ifeq ($$(art_target_or_host),host)
@@ -203,17 +205,29 @@
   LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
   ifeq ($$(art_ndebug_or_debug),ndebug)
     LOCAL_MODULE := libart-compiler
-    LOCAL_SHARED_LIBRARIES += libart
+    ifeq ($$(art_static_or_shared), static)
+      LOCAL_STATIC_LIBRARIES += libart
+    else
+      LOCAL_SHARED_LIBRARIES += libart
+    endif
     ifeq ($$(art_target_or_host),target)
       LOCAL_FDO_SUPPORT := true
     endif
   else # debug
     LOCAL_MODULE := libartd-compiler
-    LOCAL_SHARED_LIBRARIES += libartd
+    ifeq ($$(art_static_or_shared), static)
+      LOCAL_STATIC_LIBRARIES += libartd
+    else
+      LOCAL_SHARED_LIBRARIES += libartd
+    endif
   endif
 
   LOCAL_MODULE_TAGS := optional
-  LOCAL_MODULE_CLASS := SHARED_LIBRARIES
+  ifeq ($$(art_static_or_shared), static)
+    LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+  else
+    LOCAL_MODULE_CLASS := SHARED_LIBRARIES
+  endif
 
   LOCAL_SRC_FILES := $$(LIBART_COMPILER_SRC_FILES)
 
@@ -237,6 +251,9 @@
     LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
     LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS)
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS)
+    ifeq ($$(art_static_or_shared),static)
+      LOCAL_LDFLAGS += -static
+    endif
     ifeq ($$(art_ndebug_or_debug),debug)
       LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS)
     else
@@ -254,9 +271,17 @@
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
   # Vixl assembly support for ARM64 targets.
   ifeq ($$(art_ndebug_or_debug),debug)
-    LOCAL_SHARED_LIBRARIES += libvixld
+    ifeq ($$(art_static_or_shared), static)
+      LOCAL_WHOLESTATIC_LIBRARIES += libvixld
+    else
+      LOCAL_SHARED_LIBRARIES += libvixld
+    endif
   else
-    LOCAL_SHARED_LIBRARIES += libvixl
+    ifeq ($$(art_static_or_shared), static)
+      LOCAL_WHOLE_STATIC_LIBRARIES += libvixl
+    else
+      LOCAL_SHARED_LIBRARIES += libvixl
+    endif
   endif
 
   LOCAL_NATIVE_COVERAGE := $(ART_COVERAGE)
@@ -267,7 +292,11 @@
     include $(BUILD_SHARED_LIBRARY)
   else # host
     LOCAL_MULTILIB := both
-    include $(BUILD_HOST_SHARED_LIBRARY)
+    ifeq ($$(art_static_or_shared), static)
+      include $(BUILD_HOST_STATIC_LIBRARY)
+    else
+      include $(BUILD_HOST_SHARED_LIBRARY)
+    endif
   endif
 
   ifeq ($$(art_target_or_host),target)
@@ -278,20 +307,38 @@
     endif
   else # host
     ifeq ($$(art_ndebug_or_debug),debug)
-      $(HOST_OUT_EXECUTABLES)/dex2oatd: $$(LOCAL_INSTALLED_MODULE)
+      ifeq ($$(art_static_or_shared),static)
+        $(HOST_OUT_EXECUTABLES)/dex2oatds: $$(LOCAL_INSTALLED_MODULE)
+      else
+        $(HOST_OUT_EXECUTABLES)/dex2oatd: $$(LOCAL_INSTALLED_MODULE)
+      endif
     else
-      $(HOST_OUT_EXECUTABLES)/dex2oat: $$(LOCAL_INSTALLED_MODULE)
+      ifeq ($$(art_static_or_shared),static)
+        $(HOST_OUT_EXECUTABLES)/dex2oats: $$(LOCAL_INSTALLED_MODULE)
+      else
+        $(HOST_OUT_EXECUTABLES)/dex2oat: $$(LOCAL_INSTALLED_MODULE)
+      endif
     endif
   endif
 
+  # Clear locally defined variables.
+  art_target_or_host :=
+  art_ndebug_or_debug :=
+  art_static_or_shared :=
 endef
 
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
   $(eval $(call build-libart-compiler,host,ndebug))
+  ifeq ($(ART_BUILD_HOST_STATIC),true)
+    $(eval $(call build-libart-compiler,host,ndebug,static))
+  endif
 endif
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
   $(eval $(call build-libart-compiler,host,debug))
+  ifeq ($(ART_BUILD_HOST_STATIC),true)
+    $(eval $(call build-libart-compiler,host,debug,static))
+  endif
 endif
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
   $(eval $(call build-libart-compiler,target,ndebug))
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index b828fcf..d215662 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -108,6 +108,13 @@
   std::list<std::vector<uint8_t>> header_code_and_maps_chunks_;
 };
 
+// TODO: When heap reference poisoning works with all compilers in use, get rid of this.
+#define TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK() \
+  if (kPoisonHeapReferences && GetCompilerKind() == Compiler::kQuick) { \
+    printf("WARNING: TEST DISABLED FOR HEAP REFERENCE POISONING WITH QUICK\n"); \
+    return; \
+  }
+
 // TODO: When non-PIC works with all compilers in use, get rid of this.
 #define TEST_DISABLED_FOR_NON_PIC_COMPILING_WITH_OPTIMIZING() \
   if (GetCompilerKind() == Compiler::kOptimizing) { \
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index da12d8e..853980d 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -24,6 +24,7 @@
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
 #include "mips_lir.h"
@@ -285,12 +286,25 @@
   RegStorage check_reg = AllocPtrSizeTemp();
   RegStorage new_sp = AllocPtrSizeTemp();
   const RegStorage rs_sp = TargetPtrReg(kSp);
+  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(target);
+  const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
+  bool generate_explicit_stack_overflow_check = large_frame ||
+    !cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks();
+
   if (!skip_overflow_check) {
-    // Load stack limit.
-    if (cu_->target64) {
-      LoadWordDisp(TargetPtrReg(kSelf), Thread::StackEndOffset<8>().Int32Value(), check_reg);
+    if (generate_explicit_stack_overflow_check) {
+      // Load stack limit.
+      if (cu_->target64) {
+        LoadWordDisp(TargetPtrReg(kSelf), Thread::StackEndOffset<8>().Int32Value(), check_reg);
+      } else {
+        Load32Disp(TargetPtrReg(kSelf), Thread::StackEndOffset<4>().Int32Value(), check_reg);
+      }
     } else {
-      Load32Disp(TargetPtrReg(kSelf), Thread::StackEndOffset<4>().Int32Value(), check_reg);
+      // Implicit stack overflow check.
+      // Generate a load from [sp, #-overflowsize].  If this is in the stack
+      // redzone we will get a segmentation fault.
+      Load32Disp(rs_sp, -kStackOverflowReservedUsableBytes, rs_rZERO);
+      MarkPossibleStackOverflowException();
     }
   }
   // Spill core callee saves.
@@ -298,7 +312,7 @@
   // NOTE: promotion of FP regs currently unsupported, thus no FP spill.
   DCHECK_EQ(num_fp_spills_, 0);
   const int frame_sub = frame_size_ - spill_count * ptr_size;
-  if (!skip_overflow_check) {
+  if (!skip_overflow_check && generate_explicit_stack_overflow_check) {
     class StackOverflowSlowPath : public LIRSlowPath {
      public:
       StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace)
@@ -329,6 +343,8 @@
     OpRegCopy(rs_sp, new_sp);  // Establish stack.
     cfi_.AdjustCFAOffset(frame_sub);
   } else {
+    // Here if skip_overflow_check or doing implicit stack overflow check.
+    // Just make room on the stack for the frame now.
     OpRegImm(kOpSub, rs_sp, frame_sub);
     cfi_.AdjustCFAOffset(frame_sub);
   }
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index 713264e..43fbcbd 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -79,6 +79,7 @@
   OVERRIDE;
   LIR* CheckSuspendUsingLoad() OVERRIDE;
   RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
+  void ForceImplicitNullCheck(RegStorage reg, int opt_flags, bool is_wide);
   LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
                     VolatileKind is_volatile) OVERRIDE;
   LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale,
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index f5ad7c7..1099303 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -691,6 +691,9 @@
     reg_len = AllocTemp();
     // Get len.
     Load32Disp(rl_array.reg, len_offset, reg_len);
+    MarkPossibleNullPointerException(opt_flags);
+  } else {
+    ForceImplicitNullCheck(rl_array.reg, opt_flags, false);
   }
   // reg_ptr -> array data.
   OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
@@ -781,6 +784,9 @@
     // NOTE: max live temps(4) here.
     // Get len.
     Load32Disp(rl_array.reg, len_offset, reg_len);
+    MarkPossibleNullPointerException(opt_flags);
+  } else {
+    ForceImplicitNullCheck(rl_array.reg, opt_flags, false);
   }
   // reg_ptr -> array data.
   OpRegImm(kOpAdd, reg_ptr, data_offset);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 4c0bd83..b098bc2 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -791,6 +791,7 @@
   RegStorage reg_ptr = TargetReg(kArg0);
   OpRegRegImm(kOpAdd, reg_ptr, r_base, displacement);
   RegStorage r_tgt = LoadHelper(kQuickA64Load);
+  ForceImplicitNullCheck(reg_ptr, 0, true);  // is_wide = true
   LIR *ret = OpReg(kOpBlx, r_tgt);
   RegStorage reg_ret;
   if (cu_->target64) {
@@ -813,6 +814,7 @@
   LockCallTemps();  // Using fixed registers.
   RegStorage temp_ptr = AllocTemp();
   OpRegRegImm(kOpAdd, temp_ptr, r_base, displacement);
+  ForceImplicitNullCheck(temp_ptr, 0, true);  // is_wide = true
   RegStorage temp_value = AllocTempWide();
   OpRegCopyWide(temp_value, r_src);
   if (cu_->target64) {
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 95c61cd..37e5804 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -21,7 +21,9 @@
 #include "base/logging.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
+#include "dex/mir_graph.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "mips_lir.h"
 
 namespace art {
@@ -830,6 +832,22 @@
   return res;
 }
 
+void MipsMir2Lir::ForceImplicitNullCheck(RegStorage reg, int opt_flags, bool is_wide) {
+  if (cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
+    if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
+      return;
+    }
+    // Force an implicit null check by performing a memory operation (load) from the given
+    // register with offset 0.  This will cause a signal if the register contains 0 (null).
+    LIR* load = Load32Disp(reg, LOWORD_OFFSET, rs_rZERO);
+    MarkSafepointPC(load);
+    if (is_wide) {
+      load = Load32Disp(reg, HIWORD_OFFSET, rs_rZERO);
+      MarkSafepointPC(load);
+    }
+  }
+}
+
 LIR* MipsMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest, OpSize size,
                                VolatileKind is_volatile) {
   if (UNLIKELY(is_volatile == kVolatile && (size == k64 || size == kDouble))
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 28c485a..39496a4 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -653,6 +653,12 @@
                                        uint32_t method_idx,
                                        jobject class_loader,
                                        const DexFile& dex_file) const {
+  if (kPoisonHeapReferences) {
+    VLOG(compiler) << "Skipping method : " << PrettyMethod(method_idx, dex_file)
+                   << "  Reason = Quick does not support heap poisoning.";
+    return nullptr;
+  }
+
   // TODO: check method fingerprint here to determine appropriate backend type.  Until then, use
   // build default.
   CompilerDriver* driver = GetCompilerDriver();
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index ba03f5a..b358f4f 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -146,7 +146,7 @@
 }
 
 TEST_F(CompilerDriverTest, AbstractMethodErrorStub) {
-  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
@@ -192,6 +192,7 @@
 };
 
 TEST_F(CompilerDriverMethodsTest, Selection) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
   Thread* self = Thread::Current();
   jobject class_loader;
   {
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 85fd696..953dfcb 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -138,7 +138,7 @@
     FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
     // Check handle scope offset is within frame
     CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
-    // Note this LoadRef() doesn't need heap poisoning since its from the ArtMethod.
+    // Note this LoadRef() doesn't need heap unpoisoning since it's from the ArtMethod.
     // Note this LoadRef() does not include read barrier. It will be handled below.
     __ LoadRef(main_jni_conv->InterproceduralScratchRegister(),
                mr_conv->MethodRegister(), ArtMethod::DeclaringClassOffset(), false);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 54155db..732630d 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -327,109 +327,118 @@
     return;
   }
 
-  for (size_t idx = 0; idx < code_item.tries_size_; ++idx) {
-    const DexFile::TryItem* try_item = DexFile::GetTryItems(code_item, idx);
-    uint32_t try_start = try_item->start_addr_;
-    uint32_t try_end = try_start + try_item->insn_count_;
+  // Iterate over all blocks, find those covered by some TryItem and:
+  //   (a) split edges which enter/exit the try range,
+  //   (b) create TryBoundary instructions in the new blocks,
+  //   (c) link the new blocks to corresponding exception handlers.
+  // We cannot iterate only over blocks in `branch_targets_` because switch-case
+  // blocks share the same dex_pc.
+  for (size_t block_id = 1, e = graph_->GetBlocks().Size(); block_id < e; ++block_id) {
+    HBasicBlock* try_block = graph_->GetBlocks().Get(block_id);
 
-    // Iterate over all blocks in the dex pc range of the TryItem and:
-    //   (a) split edges which enter/exit the try range,
-    //   (b) create TryBoundary instructions in the new blocks,
-    //   (c) link the new blocks to corresponding exception handlers.
-    for (uint32_t inner_pc = try_start; inner_pc < try_end; ++inner_pc) {
-      HBasicBlock* try_block = FindBlockStartingAt(inner_pc);
-      if (try_block == nullptr) {
-        continue;
-      }
+    // Iteration starts from 1 to skip the entry block.
+    DCHECK_NE(try_block, entry_block_);
+    // Exit block has not yet been added to the graph at this point.
+    DCHECK_NE(try_block, exit_block_);
+    // TryBoundary blocks are added at the end of the list and not iterated over.
+    DCHECK(!try_block->IsSingleTryBoundary());
 
-      if (try_block->IsCatchBlock()) {
-        // Catch blocks are always considered an entry point into the TryItem in
-        // order to avoid splitting exceptional edges (they might not have been
-        // created yet). We separate the move-exception (if present) from the
-        // rest of the block and insert a TryBoundary after it, creating a
-        // landing pad for the exceptional edges.
-        HInstruction* first_insn = try_block->GetFirstInstruction();
-        HInstruction* split_position = nullptr;
-        if (first_insn->IsLoadException()) {
-          // Catch block starts with a LoadException. Split the block after the
-          // StoreLocal that must come after the load.
-          DCHECK(first_insn->GetNext()->IsStoreLocal());
-          split_position = first_insn->GetNext()->GetNext();
-        } else {
-          // Catch block does not obtain the exception. Split at the beginning
-          // to create an empty catch block.
-          split_position = first_insn;
-        }
-        DCHECK(split_position != nullptr);
-        HBasicBlock* catch_block = try_block;
-        try_block = catch_block->SplitBefore(split_position);
-        SplitTryBoundaryEdge(catch_block, try_block, HTryBoundary::kEntry, code_item, *try_item);
+    // Find the TryItem for this block.
+    int32_t try_item_idx = DexFile::FindTryItem(code_item, try_block->GetDexPc());
+    if (try_item_idx == -1) {
+      continue;
+    }
+    const DexFile::TryItem& try_item = *DexFile::GetTryItems(code_item, try_item_idx);
+    uint32_t try_start = try_item.start_addr_;
+    uint32_t try_end = try_start + try_item.insn_count_;
+
+    if (try_block->IsCatchBlock()) {
+      // Catch blocks are always considered an entry point into the TryItem in
+      // order to avoid splitting exceptional edges (they might not have been
+      // created yet). We separate the move-exception (if present) from the
+      // rest of the block and insert a TryBoundary after it, creating a
+      // landing pad for the exceptional edges.
+      HInstruction* first_insn = try_block->GetFirstInstruction();
+      HInstruction* split_position = nullptr;
+      if (first_insn->IsLoadException()) {
+        // Catch block starts with a LoadException. Split the block after the
+        // StoreLocal that must come after the load.
+        DCHECK(first_insn->GetNext()->IsStoreLocal());
+        split_position = first_insn->GetNext()->GetNext();
       } else {
-        // For non-catch blocks, find predecessors which are not covered by the
-        // same TryItem range. Such edges enter the try block and will have
-        // a TryBoundary inserted.
-        for (size_t i = 0; i < try_block->GetPredecessors().Size(); ++i) {
-          HBasicBlock* predecessor = try_block->GetPredecessors().Get(i);
-          if (predecessor->IsSingleTryBoundary()) {
-            // The edge was already split because of an exit from a neighbouring
-            // TryItem. We split it again and insert an entry point.
-            if (kIsDebugBuild) {
-              HTryBoundary* last_insn = predecessor->GetLastInstruction()->AsTryBoundary();
-              DCHECK(!last_insn->IsEntry());
-              DCHECK_EQ(last_insn->GetNormalFlowSuccessor(), try_block);
-              DCHECK(try_block->IsFirstIndexOfPredecessor(predecessor, i));
-              DCHECK(!IsBlockInPcRange(predecessor->GetSinglePredecessor(), try_start, try_end));
-            }
-          } else if (!IsBlockInPcRange(predecessor, try_start, try_end)) {
-            // This is an entry point into the TryItem and the edge has not been
-            // split yet. That means that `predecessor` is not in a TryItem, or
-            // it is in a different TryItem and we happened to iterate over this
-            // block first. We split the edge and insert an entry point.
-          } else {
-            // Not an edge on the boundary of the try block.
-            continue;
-          }
-          SplitTryBoundaryEdge(predecessor, try_block, HTryBoundary::kEntry, code_item, *try_item);
-        }
+        // Catch block does not obtain the exception. Split at the beginning
+        // to create an empty catch block.
+        split_position = first_insn;
       }
-
-      // Find successors which are not covered by the same TryItem range. Such
-      // edges exit the try block and will have a TryBoundary inserted.
-      for (size_t i = 0; i < try_block->GetSuccessors().Size(); ++i) {
-        HBasicBlock* successor = try_block->GetSuccessors().Get(i);
-        if (successor->IsCatchBlock()) {
-          // A catch block is always considered an entry point into its TryItem.
-          // We therefore assume this is an exit point, regardless of whether
-          // the catch block is in a different TryItem or not.
-        } else if (successor->IsSingleTryBoundary()) {
-          // The edge was already split because of an entry into a neighbouring
-          // TryItem. We split it again and insert an exit.
+      DCHECK(split_position != nullptr);
+      HBasicBlock* catch_block = try_block;
+      try_block = catch_block->SplitBefore(split_position);
+      SplitTryBoundaryEdge(catch_block, try_block, HTryBoundary::kEntry, code_item, try_item);
+    } else {
+      // For non-catch blocks, find predecessors which are not covered by the
+      // same TryItem range. Such edges enter the try block and will have
+      // a TryBoundary inserted.
+      for (size_t i = 0; i < try_block->GetPredecessors().Size(); ++i) {
+        HBasicBlock* predecessor = try_block->GetPredecessors().Get(i);
+        if (predecessor->IsSingleTryBoundary()) {
+          // The edge was already split because of an exit from a neighbouring
+          // TryItem. We split it again and insert an entry point.
           if (kIsDebugBuild) {
-            HTryBoundary* last_insn = successor->GetLastInstruction()->AsTryBoundary();
-            DCHECK_EQ(try_block, successor->GetSinglePredecessor());
-            DCHECK(last_insn->IsEntry());
-            DCHECK(!IsBlockInPcRange(last_insn->GetNormalFlowSuccessor(), try_start, try_end));
+            HTryBoundary* last_insn = predecessor->GetLastInstruction()->AsTryBoundary();
+            DCHECK(!last_insn->IsEntry());
+            DCHECK_EQ(last_insn->GetNormalFlowSuccessor(), try_block);
+            DCHECK(try_block->IsFirstIndexOfPredecessor(predecessor, i));
+            DCHECK(!IsBlockInPcRange(predecessor->GetSinglePredecessor(), try_start, try_end));
           }
-        } else if (!IsBlockInPcRange(successor, try_start, try_end)) {
-          // This is an exit out of the TryItem and the edge has not been split
-          // yet. That means that either `successor` is not in a TryItem, or it
-          // is in a different TryItem and we happened to iterate over this
-          // block first. We split the edge and insert an exit.
-          HInstruction* last_instruction = try_block->GetLastInstruction();
-          if (last_instruction->IsReturn() || last_instruction->IsReturnVoid()) {
-            DCHECK_EQ(successor, exit_block_);
-            // Control flow exits the try block with a Return(Void). Because
-            // splitting the edge would invalidate the invariant that Return
-            // always jumps to Exit, we move the Return outside the try block.
-            successor = try_block->SplitBefore(last_instruction);
-          }
+        } else if (!IsBlockInPcRange(predecessor, try_start, try_end)) {
+          // This is an entry point into the TryItem and the edge has not been
+          // split yet. That means that `predecessor` is not in a TryItem, or
+          // it is in a different TryItem and we happened to iterate over this
+          // block first. We split the edge and insert an entry point.
         } else {
           // Not an edge on the boundary of the try block.
           continue;
         }
-        SplitTryBoundaryEdge(try_block, successor, HTryBoundary::kExit, code_item, *try_item);
+        SplitTryBoundaryEdge(predecessor, try_block, HTryBoundary::kEntry, code_item, try_item);
       }
     }
+
+    // Find successors which are not covered by the same TryItem range. Such
+    // edges exit the try block and will have a TryBoundary inserted.
+    for (size_t i = 0; i < try_block->GetSuccessors().Size(); ++i) {
+      HBasicBlock* successor = try_block->GetSuccessors().Get(i);
+      if (successor->IsCatchBlock()) {
+        // A catch block is always considered an entry point into its TryItem.
+        // We therefore assume this is an exit point, regardless of whether
+        // the catch block is in a different TryItem or not.
+      } else if (successor->IsSingleTryBoundary()) {
+        // The edge was already split because of an entry into a neighbouring
+        // TryItem. We split it again and insert an exit.
+        if (kIsDebugBuild) {
+          HTryBoundary* last_insn = successor->GetLastInstruction()->AsTryBoundary();
+          DCHECK_EQ(try_block, successor->GetSinglePredecessor());
+          DCHECK(last_insn->IsEntry());
+          DCHECK(!IsBlockInPcRange(last_insn->GetNormalFlowSuccessor(), try_start, try_end));
+        }
+      } else if (!IsBlockInPcRange(successor, try_start, try_end)) {
+        // This is an exit out of the TryItem and the edge has not been split
+        // yet. That means that either `successor` is not in a TryItem, or it
+        // is in a different TryItem and we happened to iterate over this
+        // block first. We split the edge and insert an exit.
+        HInstruction* last_instruction = try_block->GetLastInstruction();
+        if (last_instruction->IsReturn() || last_instruction->IsReturnVoid()) {
+          DCHECK_EQ(successor, exit_block_);
+          // Control flow exits the try block with a Return(Void). Because
+          // splitting the edge would invalidate the invariant that Return
+          // always jumps to Exit, we move the Return outside the try block.
+          successor = try_block->SplitBefore(last_instruction);
+        }
+      } else {
+        // Not an edge on the boundary of the try block.
+        continue;
+      }
+      SplitTryBoundaryEdge(try_block, successor, HTryBoundary::kExit, code_item, try_item);
+    }
   }
 }
 
@@ -563,11 +572,10 @@
         uint32_t target = dex_pc + table.GetEntryAt(i + offset);
         FindOrCreateBlockStartingAt(target);
 
-        // The next case gets its own block.
-        if (i < num_entries) {
-          block = new (arena_) HBasicBlock(graph_, target);
-          branch_targets_.Put(table.GetDexPcForIndex(i), block);
-        }
+        // Create a block for the switch-case logic. The block gets the dex_pc
+        // of the SWITCH instruction because it is part of its semantics.
+        block = new (arena_) HBasicBlock(graph_, dex_pc);
+        branch_targets_.Put(table.GetDexPcForIndex(i), block);
       }
 
       // Fall-through. Add a block if there is more code afterwards.
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 4cecd61..eb63b49 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -294,6 +294,12 @@
     allocated_registers_.Add(location);
   }
 
+  bool HasAllocatedRegister(bool is_core, int reg) const {
+    return is_core
+        ? allocated_registers_.ContainsCoreRegister(reg)
+        : allocated_registers_.ContainsFloatingPointRegister(reg);
+  }
+
   void AllocateLocations(HInstruction* instruction);
 
   // Tells whether the stack frame of the compiled method is
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 39c316f..e3683ef 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -332,8 +332,6 @@
 };
 
 #undef __
-
-#undef __
 #define __ down_cast<ArmAssembler*>(GetAssembler())->
 
 inline Condition ARMCondition(IfCondition cond) {
@@ -1383,6 +1381,7 @@
   DCHECK(receiver.IsRegister());
   __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
       kArmWordSize).Int32Value();
@@ -1422,6 +1421,7 @@
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetImtEntryAt(method_offset);
   uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
       kArmWordSize).Int32Value();
@@ -2778,6 +2778,8 @@
 void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
+  // Note: if heap poisoning is enabled, the entry point takes cares
+  // of poisoning the reference.
   codegen_->InvokeRuntime(GetThreadOffset<kArmWordSize>(instruction->GetEntrypoint()).Int32Value(),
                           instruction,
                           instruction->GetDexPc(),
@@ -2797,6 +2799,8 @@
 void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
+  // Note: if heap poisoning is enabled, the entry point takes cares
+  // of poisoning the reference.
   codegen_->InvokeRuntime(GetThreadOffset<kArmWordSize>(instruction->GetEntrypoint()).Int32Value(),
                           instruction,
                           instruction->GetDexPc(),
@@ -3030,10 +3034,12 @@
   bool generate_volatile = field_info.IsVolatile()
       && is_wide
       && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
   // Temporary registers for the write barrier.
   // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
-  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
-    locations->AddTemp(Location::RequiresRegister());
+  if (needs_write_barrier) {
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     locations->AddTemp(Location::RequiresRegister());
   } else if (generate_volatile) {
     // Arm encoding have some additional constraints for ldrexd/strexd:
@@ -3066,6 +3072,8 @@
   bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
   Primitive::Type field_type = field_info.GetFieldType();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   if (is_volatile) {
     GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
@@ -3086,7 +3094,18 @@
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      __ StoreToOffset(kStoreWord, value.AsRegister<Register>(), base, offset);
+      if (kPoisonHeapReferences && needs_write_barrier) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        DCHECK_EQ(field_type, Primitive::kPrimNot);
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
+        __ Mov(temp, value.AsRegister<Register>());
+        __ PoisonHeapReference(temp);
+        __ StoreToOffset(kStoreWord, temp, base, offset);
+      } else {
+        __ StoreToOffset(kStoreWord, value.AsRegister<Register>(), base, offset);
+      }
       break;
     }
 
@@ -3265,6 +3284,10 @@
   if (is_volatile) {
     GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   }
+
+  if (field_type == Primitive::kPrimNot) {
+    __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
+  }
 }
 
 void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
@@ -3352,8 +3375,9 @@
   LocationSummary* locations = instruction->GetLocations();
   Register obj = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
+  Primitive::Type type = instruction->GetType();
 
-  switch (instruction->GetType()) {
+  switch (type) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       Register out = locations->Out().AsRegister<Register>();
@@ -3470,10 +3494,15 @@
     }
 
     case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+      LOG(FATAL) << "Unreachable type " << type;
       UNREACHABLE();
   }
   codegen_->MaybeRecordImplicitNullCheck(instruction);
+
+  if (type == Primitive::kPrimNot) {
+    Register out = locations->Out().AsRegister<Register>();
+    __ MaybeUnpoisonHeapReference(out);
+  }
 }
 
 void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) {
@@ -3501,7 +3530,7 @@
 
     if (needs_write_barrier) {
       // Temporary registers for the write barrier.
-      locations->AddTemp(Location::RequiresRegister());
+      locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
       locations->AddTemp(Location::RequiresRegister());
     }
   }
@@ -3552,14 +3581,25 @@
       if (!needs_runtime_call) {
         uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
         Register value = locations->InAt(2).AsRegister<Register>();
+        Register source = value;
+        if (kPoisonHeapReferences && needs_write_barrier) {
+          // Note that in the case where `value` is a null reference,
+          // we do not enter this block, as a null reference does not
+          // need poisoning.
+          DCHECK_EQ(value_type, Primitive::kPrimNot);
+          Register temp = locations->GetTemp(0).AsRegister<Register>();
+          __ Mov(temp, value);
+          __ PoisonHeapReference(temp);
+          source = temp;
+        }
         if (index.IsConstant()) {
           size_t offset =
               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          __ StoreToOffset(kStoreWord, value, obj, offset);
+          __ StoreToOffset(kStoreWord, source, obj, offset);
         } else {
           DCHECK(index.IsRegister()) << index;
           __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
-          __ StoreToOffset(kStoreWord, value, IP, data_offset);
+          __ StoreToOffset(kStoreWord, source, IP, data_offset);
         }
         codegen_->MaybeRecordImplicitNullCheck(instruction);
         if (needs_write_barrier) {
@@ -3570,6 +3610,8 @@
         }
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
+        // Note: if heap poisoning is enabled, pAputObject takes cares
+        // of poisoning the reference.
         codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
                                 instruction,
                                 instruction->GetDexPc(),
@@ -3994,6 +4036,7 @@
                       current_method,
                       ArtMethod::DexCacheResolvedTypesOffset().Int32Value());
     __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
+    __ MaybeUnpoisonHeapReference(out);
 
     SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
@@ -4053,7 +4096,9 @@
   __ LoadFromOffset(
       kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
   __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
+  __ MaybeUnpoisonHeapReference(out);
   __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
+  __ MaybeUnpoisonHeapReference(out);
   __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -4111,6 +4156,7 @@
   }
   // Compare the class of `obj` with `cls`.
   __ LoadFromOffset(kLoadWord, out, obj, class_offset);
+  __ MaybeUnpoisonHeapReference(out);
   __ cmp(out, ShifterOperand(cls));
   if (instruction->IsClassFinal()) {
     // Classes must be equal for the instanceof to succeed.
@@ -4164,7 +4210,10 @@
   }
   // Compare the class of `obj` with `cls`.
   __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+  __ MaybeUnpoisonHeapReference(temp);
   __ cmp(temp, ShifterOperand(cls));
+  // The checkcast succeeds if the classes are equal (fast path).
+  // Otherwise, we need to go into the slow path to check the types.
   __ b(slow_path->GetEntryLabel(), NE);
   __ Bind(slow_path->GetExitLabel());
 }
@@ -4316,5 +4365,8 @@
   LOG(FATAL) << "Unreachable";
 }
 
+#undef __
+#undef QUICK_ENTRY_POINT
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 9b7124d..a9a95d3 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1250,6 +1250,7 @@
 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
                                                    const FieldInfo& field_info) {
   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+  Primitive::Type field_type = field_info.GetFieldType();
   BlockPoolsScope block_pools(GetVIXLAssembler());
 
   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
@@ -1260,15 +1261,19 @@
       // NB: LoadAcquire will record the pc info if needed.
       codegen_->LoadAcquire(instruction, OutputCPURegister(instruction), field);
     } else {
-      codegen_->Load(field_info.GetFieldType(), OutputCPURegister(instruction), field);
+      codegen_->Load(field_type, OutputCPURegister(instruction), field);
       codegen_->MaybeRecordImplicitNullCheck(instruction);
       // For IRIW sequential consistency kLoadAny is not sufficient.
       GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
     }
   } else {
-    codegen_->Load(field_info.GetFieldType(), OutputCPURegister(instruction), field);
+    codegen_->Load(field_type, OutputCPURegister(instruction), field);
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
+
+  if (field_type == Primitive::kPrimNot) {
+    GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W());
+  }
 }
 
 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
@@ -1290,23 +1295,38 @@
 
   Register obj = InputRegisterAt(instruction, 0);
   CPURegister value = InputCPURegisterAt(instruction, 1);
+  CPURegister source = value;
   Offset offset = field_info.GetFieldOffset();
   Primitive::Type field_type = field_info.GetFieldType();
   bool use_acquire_release = codegen_->GetInstructionSetFeatures().PreferAcquireRelease();
 
-  if (field_info.IsVolatile()) {
-    if (use_acquire_release) {
-      codegen_->StoreRelease(field_type, value, HeapOperand(obj, offset));
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
-    } else {
-      GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
-      codegen_->Store(field_type, value, HeapOperand(obj, offset));
-      codegen_->MaybeRecordImplicitNullCheck(instruction);
-      GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+  {
+    // We use a block to end the scratch scope before the write barrier, thus
+    // freeing the temporary registers so they can be used in `MarkGCCard`.
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+
+    if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
+      DCHECK(value.IsW());
+      Register temp = temps.AcquireW();
+      __ Mov(temp, value.W());
+      GetAssembler()->PoisonHeapReference(temp.W());
+      source = temp;
     }
-  } else {
-    codegen_->Store(field_type, value, HeapOperand(obj, offset));
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
+
+    if (field_info.IsVolatile()) {
+      if (use_acquire_release) {
+        codegen_->StoreRelease(field_type, source, HeapOperand(obj, offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      } else {
+        GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+        codegen_->Store(field_type, source, HeapOperand(obj, offset));
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+      }
+    } else {
+      codegen_->Store(field_type, source, HeapOperand(obj, offset));
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+    }
   }
 
   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
@@ -1464,6 +1484,10 @@
 
   codegen_->Load(type, OutputCPURegister(instruction), source);
   codegen_->MaybeRecordImplicitNullCheck(instruction);
+
+  if (type == Primitive::kPrimNot) {
+    GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W());
+  }
 }
 
 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
@@ -1506,12 +1530,15 @@
   bool needs_runtime_call = locations->WillCall();
 
   if (needs_runtime_call) {
+    // Note: if heap poisoning is enabled, pAputObject takes cares
+    // of poisoning the reference.
     codegen_->InvokeRuntime(
         QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc(), nullptr);
     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
   } else {
     Register obj = InputRegisterAt(instruction, 0);
     CPURegister value = InputCPURegisterAt(instruction, 2);
+    CPURegister source = value;
     Location index = locations->InAt(1);
     size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
     MemOperand destination = HeapOperand(obj);
@@ -1522,6 +1549,14 @@
       // freeing the temporary registers so they can be used in `MarkGCCard`.
       UseScratchRegisterScope temps(masm);
 
+      if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
+        DCHECK(value.IsW());
+        Register temp = temps.AcquireW();
+        __ Mov(temp, value.W());
+        GetAssembler()->PoisonHeapReference(temp.W());
+        source = temp;
+      }
+
       if (index.IsConstant()) {
         offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
         destination = HeapOperand(obj, offset);
@@ -1532,7 +1567,7 @@
         destination = HeapOperand(temp, offset);
       }
 
-      codegen_->Store(value_type, value, destination);
+      codegen_->Store(value_type, source, destination);
       codegen_->MaybeRecordImplicitNullCheck(instruction);
     }
     if (CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue())) {
@@ -1585,7 +1620,10 @@
   }
   // Compare the class of `obj` with `cls`.
   __ Ldr(obj_cls, HeapOperand(obj, mirror::Object::ClassOffset()));
+  GetAssembler()->MaybeUnpoisonHeapReference(obj_cls.W());
   __ Cmp(obj_cls, cls);
+  // The checkcast succeeds if the classes are equal (fast path).
+  // Otherwise, we need to go into the slow path to check the types.
   __ B(ne, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -2152,6 +2190,7 @@
 
   // Compare the class of `obj` with `cls`.
   __ Ldr(out, HeapOperand(obj, mirror::Object::ClassOffset()));
+  GetAssembler()->MaybeUnpoisonHeapReference(out.W());
   __ Cmp(out, cls);
   if (instruction->IsClassFinal()) {
     // Classes must be equal for the instanceof to succeed.
@@ -2225,6 +2264,7 @@
     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   // temp = temp->GetImtEntryAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
@@ -2350,6 +2390,7 @@
   DCHECK(receiver.IsRegister());
   __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
   // temp = temp->GetMethodAt(method_offset);
   __ Ldr(temp, MemOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
@@ -2379,6 +2420,7 @@
     DCHECK(cls->CanCallRuntime());
     __ Ldr(out, MemOperand(current_method, ArtMethod::DexCacheResolvedTypesOffset().Int32Value()));
     __ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+    GetAssembler()->MaybeUnpoisonHeapReference(out.W());
 
     SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
@@ -2428,7 +2470,9 @@
   Register current_method = InputRegisterAt(load, 0);
   __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
   __ Ldr(out, HeapOperand(out, mirror::Class::DexCacheStringsOffset()));
+  GetAssembler()->MaybeUnpoisonHeapReference(out.W());
   __ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
+  GetAssembler()->MaybeUnpoisonHeapReference(out.W());
   __ Cbz(out, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -2563,6 +2607,8 @@
   Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt);
   DCHECK(type_index.Is(w0));
   __ Mov(type_index, instruction->GetTypeIndex());
+  // Note: if heap poisoning is enabled, the entry point takes cares
+  // of poisoning the reference.
   codegen_->InvokeRuntime(
       GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(),
       instruction,
@@ -2586,6 +2632,8 @@
   Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt);
   DCHECK(type_index.Is(w0));
   __ Mov(type_index, instruction->GetTypeIndex());
+  // Note: if heap poisoning is enabled, the entry point takes cares
+  // of poisoning the reference.
   codegen_->InvokeRuntime(
       GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(),
       instruction,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 931d751..262b234 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1315,9 +1315,11 @@
   LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  // temp = object->GetClass();
   DCHECK(receiver.IsRegister());
   __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
@@ -1354,7 +1356,8 @@
   } else {
     __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
   }
-    codegen_->MaybeRecordImplicitNullCheck(invoke);
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetImtEntryAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
@@ -3001,6 +3004,8 @@
 void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
   InvokeRuntimeCallingConvention calling_convention;
   __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
+  // Note: if heap poisoning is enabled, the entry point takes cares
+  // of poisoning the reference.
   __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(instruction->GetEntrypoint())));
 
   codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -3021,6 +3026,8 @@
   InvokeRuntimeCallingConvention calling_convention;
   __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
 
+  // Note: if heap poisoning is enabled, the entry point takes cares
+  // of poisoning the reference.
   __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(instruction->GetEntrypoint())));
 
   codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -3397,6 +3404,10 @@
   if (is_volatile) {
     GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   }
+
+  if (field_type == Primitive::kPrimNot) {
+    __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
+  }
 }
 
 void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) {
@@ -3420,9 +3431,9 @@
   } else {
     locations->SetInAt(1, Location::RequiresRegister());
   }
-  // Temporary registers for the write barrier.
   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
-    locations->AddTemp(Location::RequiresRegister());
+    // Temporary registers for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     // Ensure the card is in a byte register.
     locations->AddTemp(Location::RegisterLocation(ECX));
   } else if (is_volatile && (field_type == Primitive::kPrimLong)) {
@@ -3447,6 +3458,8 @@
   bool is_volatile = field_info.IsVolatile();
   Primitive::Type field_type = field_info.GetFieldType();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   if (is_volatile) {
     GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
@@ -3467,7 +3480,18 @@
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      __ movl(Address(base, offset), value.AsRegister<Register>());
+      if (kPoisonHeapReferences && needs_write_barrier) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as the reference does not
+        // need poisoning.
+        DCHECK_EQ(field_type, Primitive::kPrimNot);
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
+        __ movl(temp, value.AsRegister<Register>());
+        __ PoisonHeapReference(temp);
+        __ movl(Address(base, offset), temp);
+      } else {
+        __ movl(Address(base, offset), value.AsRegister<Register>());
+      }
       break;
     }
 
@@ -3508,7 +3532,7 @@
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
-  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+  if (needs_write_barrier) {
     Register temp = locations->GetTemp(0).AsRegister<Register>();
     Register card = locations->GetTemp(1).AsRegister<Register>();
     codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null);
@@ -3737,6 +3761,11 @@
   if (type != Primitive::kPrimLong) {
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
+
+  if (type == Primitive::kPrimNot) {
+    Register out = locations->Out().AsRegister<Register>();
+    __ MaybeUnpoisonHeapReference(out);
+  }
 }
 
 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
@@ -3776,9 +3805,9 @@
     } else {
       locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
     }
-    // Temporary registers for the write barrier.
     if (needs_write_barrier) {
-      locations->AddTemp(Location::RequiresRegister());
+      // Temporary registers for the write barrier.
+      locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
       // Ensure the card is in a byte register.
       locations->AddTemp(Location::RegisterLocation(ECX));
     }
@@ -3852,21 +3881,43 @@
           size_t offset =
               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
           if (value.IsRegister()) {
-            __ movl(Address(obj, offset), value.AsRegister<Register>());
+            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
+              Register temp = locations->GetTemp(0).AsRegister<Register>();
+              __ movl(temp, value.AsRegister<Register>());
+              __ PoisonHeapReference(temp);
+              __ movl(Address(obj, offset), temp);
+            } else {
+              __ movl(Address(obj, offset), value.AsRegister<Register>());
+            }
           } else {
             DCHECK(value.IsConstant()) << value;
-            __ movl(Address(obj, offset),
-                    Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
+            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+            // `value_type == Primitive::kPrimNot` implies `v == 0`.
+            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
+            // Note: if heap poisoning is enabled, no need to poison
+            // (negate) `v` if it is a reference, as it would be null.
+            __ movl(Address(obj, offset), Immediate(v));
           }
         } else {
           DCHECK(index.IsRegister()) << index;
           if (value.IsRegister()) {
-            __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset),
-                    value.AsRegister<Register>());
+            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
+              Register temp = locations->GetTemp(0).AsRegister<Register>();
+              __ movl(temp, value.AsRegister<Register>());
+              __ PoisonHeapReference(temp);
+              __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), temp);
+            } else {
+              __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset),
+                      value.AsRegister<Register>());
+            }
           } else {
             DCHECK(value.IsConstant()) << value;
-            __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset),
-                    Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
+            int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+            // `value_type == Primitive::kPrimNot` implies `v == 0`.
+            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
+            // Note: if heap poisoning is enabled, no need to poison
+            // (negate) `v` if it is a reference, as it would be null.
+            __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset), Immediate(v));
           }
         }
         codegen_->MaybeRecordImplicitNullCheck(instruction);
@@ -3880,6 +3931,8 @@
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
         DCHECK(!codegen_->IsLeafMethod());
+        // Note: if heap poisoning is enabled, pAputObject takes cares
+        // of poisoning the reference.
         __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAputObject)));
         codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
       }
@@ -4343,6 +4396,7 @@
     __ movl(out, Address(
         current_method, ArtMethod::DexCacheResolvedTypesOffset().Int32Value()));
     __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+    __ MaybeUnpoisonHeapReference(out);
 
     SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
@@ -4400,7 +4454,9 @@
   Register current_method = locations->InAt(0).AsRegister<Register>();
   __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
   __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
+  __ MaybeUnpoisonHeapReference(out);
   __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
+  __ MaybeUnpoisonHeapReference(out);
   __ testl(out, out);
   __ j(kEqual, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
@@ -4455,8 +4511,9 @@
     __ testl(obj, obj);
     __ j(kEqual, &zero);
   }
-  __ movl(out, Address(obj, class_offset));
   // Compare the class of `obj` with `cls`.
+  __ movl(out, Address(obj, class_offset));
+  __ MaybeUnpoisonHeapReference(out);
   if (cls.IsRegister()) {
     __ cmpl(out, cls.AsRegister<Register>());
   } else {
@@ -4514,16 +4571,17 @@
     __ testl(obj, obj);
     __ j(kEqual, slow_path->GetExitLabel());
   }
-
-  __ movl(temp, Address(obj, class_offset));
   // Compare the class of `obj` with `cls`.
+  __ movl(temp, Address(obj, class_offset));
+  __ MaybeUnpoisonHeapReference(temp);
   if (cls.IsRegister()) {
     __ cmpl(temp, cls.AsRegister<Register>());
   } else {
     DCHECK(cls.IsStackSlot()) << cls;
     __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
   }
-
+  // The checkcast succeeds if the classes are equal (fast path).
+  // Otherwise, we need to go into the slow path to check the types.
   __ j(kNotEqual, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -4687,5 +4745,7 @@
   LOG(FATAL) << "Unreachable";
 }
 
+#undef __
+
 }  // namespace x86
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index afffbe2..c9d19c8 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -213,7 +213,7 @@
     __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
     __ gs()->call(Address::Absolute((do_clinit_
           ? QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeStaticStorage)
-          : QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeType)) , true));
+          : QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeType)), true));
     RecordPcInfo(codegen, at_, dex_pc_);
 
     Location out = locations->Out();
@@ -1429,6 +1429,7 @@
   DCHECK(receiver.IsRegister());
   __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
@@ -1466,6 +1467,7 @@
     __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetImtEntryAt(method_offset);
   __ movq(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
@@ -3060,6 +3062,8 @@
   InvokeRuntimeCallingConvention calling_convention;
   codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
                            instruction->GetTypeIndex());
+  // Note: if heap poisoning is enabled, the entry point takes cares
+  // of poisoning the reference.
   __ gs()->call(
       Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
 
@@ -3082,6 +3086,8 @@
   codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
                            instruction->GetTypeIndex());
 
+  // Note: if heap poisoning is enabled, the entry point takes cares
+  // of poisoning the reference.
   __ gs()->call(
       Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
 
@@ -3270,6 +3276,10 @@
   if (is_volatile) {
     GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   }
+
+  if (field_type == Primitive::kPrimNot) {
+    __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
+  }
 }
 
 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
@@ -3278,8 +3288,9 @@
 
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  Primitive::Type field_type = field_info.GetFieldType();
   bool needs_write_barrier =
-      CodeGenerator::StoreNeedsWriteBarrier(field_info.GetFieldType(), instruction->InputAt(1));
+      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
@@ -3289,7 +3300,10 @@
   }
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     locations->AddTemp(Location::RequiresRegister());
+  } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
+    // Temporary register for the reference poisoning.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -3337,9 +3351,20 @@
     case Primitive::kPrimNot: {
       if (value.IsConstant()) {
         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        // `field_type == Primitive::kPrimNot` implies `v == 0`.
+        DCHECK((field_type != Primitive::kPrimNot) || (v == 0));
+        // Note: if heap poisoning is enabled, no need to poison
+        // (negate) `v` if it is a reference, as it would be null.
         __ movl(Address(base, offset), Immediate(v));
       } else {
-        __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
+        if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
+          CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+          __ movl(temp, value.AsRegister<CpuRegister>());
+          __ PoisonHeapReference(temp);
+          __ movl(Address(base, offset), temp);
+        } else {
+          __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
+        }
       }
       break;
     }
@@ -3483,8 +3508,9 @@
   LocationSummary* locations = instruction->GetLocations();
   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
+  Primitive::Type type = instruction->GetType();
 
-  switch (instruction->GetType()) {
+  switch (type) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
       CpuRegister out = locations->Out().AsRegister<CpuRegister>();
@@ -3585,10 +3611,15 @@
     }
 
     case Primitive::kPrimVoid:
-      LOG(FATAL) << "Unreachable type " << instruction->GetType();
+      LOG(FATAL) << "Unreachable type " << type;
       UNREACHABLE();
   }
   codegen_->MaybeRecordImplicitNullCheck(instruction);
+
+  if (type == Primitive::kPrimNot) {
+    CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+    __ MaybeUnpoisonHeapReference(out);
+  }
 }
 
 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
@@ -3620,7 +3651,7 @@
 
     if (needs_write_barrier) {
       // Temporary registers for the write barrier.
-      locations->AddTemp(Location::RequiresRegister());
+      locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
       locations->AddTemp(Location::RequiresRegister());
     }
   }
@@ -3696,20 +3727,42 @@
           size_t offset =
               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
           if (value.IsRegister()) {
-            __ movl(Address(obj, offset), value.AsRegister<CpuRegister>());
+            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
+              CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+              __ movl(temp, value.AsRegister<CpuRegister>());
+              __ PoisonHeapReference(temp);
+              __ movl(Address(obj, offset), temp);
+            } else {
+              __ movl(Address(obj, offset), value.AsRegister<CpuRegister>());
+            }
           } else {
             DCHECK(value.IsConstant()) << value;
             int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+            // `value_type == Primitive::kPrimNot` implies `v == 0`.
+            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
+            // Note: if heap poisoning is enabled, no need to poison
+            // (negate) `v` if it is a reference, as it would be null.
             __ movl(Address(obj, offset), Immediate(v));
           }
         } else {
           DCHECK(index.IsRegister()) << index;
           if (value.IsRegister()) {
-            __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
-                    value.AsRegister<CpuRegister>());
+            if (kPoisonHeapReferences && value_type == Primitive::kPrimNot) {
+              CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+              __ movl(temp, value.AsRegister<CpuRegister>());
+              __ PoisonHeapReference(temp);
+              __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset), temp);
+            } else {
+              __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
+                      value.AsRegister<CpuRegister>());
+            }
           } else {
             DCHECK(value.IsConstant()) << value;
             int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+            // `value_type == Primitive::kPrimNot` implies `v == 0`.
+            DCHECK((value_type != Primitive::kPrimNot) || (v == 0));
+            // Note: if heap poisoning is enabled, no need to poison
+            // (negate) `v` if it is a reference, as it would be null.
             __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
                     Immediate(v));
           }
@@ -3724,6 +3777,8 @@
         }
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
+        // Note: if heap poisoning is enabled, pAputObject takes cares
+        // of poisoning the reference.
         __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAputObject),
                                         true));
         DCHECK(!codegen_->IsLeafMethod());
@@ -3876,7 +3931,7 @@
       Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true));
   __ movq(temp, object);
   __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
-  __ movb(Address(temp, card, TIMES_1, 0),  card);
+  __ movb(Address(temp, card, TIMES_1, 0), card);
   if (value_can_be_null) {
     __ Bind(&is_null);
   }
@@ -4187,6 +4242,8 @@
     __ movl(out, Address(
         current_method, ArtMethod::DexCacheResolvedTypesOffset().Int32Value()));
     __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+    __ MaybeUnpoisonHeapReference(out);
+
     SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
     codegen_->AddSlowPath(slow_path);
@@ -4234,7 +4291,9 @@
   CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
   __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
   __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
+  __ MaybeUnpoisonHeapReference(out);
   __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
+  __ MaybeUnpoisonHeapReference(out);
   __ testl(out, out);
   __ j(kEqual, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
@@ -4293,6 +4352,7 @@
   }
   // Compare the class of `obj` with `cls`.
   __ movl(out, Address(obj, class_offset));
+  __ MaybeUnpoisonHeapReference(out);
   if (cls.IsRegister()) {
     __ cmpl(out, cls.AsRegister<CpuRegister>());
   } else {
@@ -4351,13 +4411,15 @@
   }
   // Compare the class of `obj` with `cls`.
   __ movl(temp, Address(obj, class_offset));
+  __ MaybeUnpoisonHeapReference(temp);
   if (cls.IsRegister()) {
     __ cmpl(temp, cls.AsRegister<CpuRegister>());
   } else {
     DCHECK(cls.IsStackSlot()) << cls;
     __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
   }
-  // Classes must be equal for the checkcast to succeed.
+  // The checkcast succeeds if the classes are equal (fast path).
+  // Otherwise, we need to go into the slow path to check the types.
   __ j(kNotEqual, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -4576,5 +4638,7 @@
   return Address::RIP(fixup);
 }
 
+#undef __
+
 }  // namespace x86_64
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 71fadfb..b4dbf75 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -510,6 +510,11 @@
   if (is_volatile) {
     __ dmb(ISH);
   }
+
+  if (type == Primitive::kPrimNot) {
+    Register trg = locations->Out().AsRegister<Register>();
+    __ MaybeUnpoisonHeapReference(trg);
+  }
 }
 
 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -649,8 +654,15 @@
       __ strd(value_lo, Address(IP));
     }
   } else {
-    value =  locations->InAt(3).AsRegister<Register>();
-    __ str(value, Address(base, offset));
+    value = locations->InAt(3).AsRegister<Register>();
+    Register source = value;
+    if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+      Register temp = locations->GetTemp(0).AsRegister<Register>();
+      __ Mov(temp, value);
+      __ PoisonHeapReference(temp);
+      source = temp;
+    }
+    __ str(source, Address(base, offset));
   }
 
   if (is_volatile) {
@@ -738,6 +750,11 @@
 
   __ add(tmp_ptr, base, ShifterOperand(offset));
 
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    codegen->GetAssembler()->PoisonHeapReference(expected_lo);
+    codegen->GetAssembler()->PoisonHeapReference(value_lo);
+  }
+
   // do {
   //   tmp = [r_ptr] - expected;
   // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
@@ -761,6 +778,11 @@
   __ rsbs(out, tmp_lo, ShifterOperand(1));
   __ it(CC);
   __ mov(out, ShifterOperand(0), CC);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    codegen->GetAssembler()->UnpoisonHeapReference(value_lo);
+    codegen->GetAssembler()->UnpoisonHeapReference(expected_lo);
+  }
 }
 
 void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) {
@@ -1047,5 +1069,9 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
+#undef UNIMPLEMENTED_INTRINSIC
+
+#undef __
+
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 8bcb88b..78ac167 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -683,6 +683,11 @@
   } else {
     codegen->Load(type, trg, mem_op);
   }
+
+  if (type == Primitive::kPrimNot) {
+    DCHECK(trg.IsW());
+    codegen->GetAssembler()->MaybeUnpoisonHeapReference(trg);
+  }
 }
 
 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -781,22 +786,37 @@
   Register base = WRegisterFrom(locations->InAt(1));    // Object pointer.
   Register offset = XRegisterFrom(locations->InAt(2));  // Long offset.
   Register value = RegisterFrom(locations->InAt(3), type);
+  Register source = value;
   bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
 
   MemOperand mem_op(base.X(), offset);
 
-  if (is_volatile || is_ordered) {
-    if (use_acquire_release) {
-      codegen->StoreRelease(type, value, mem_op);
-    } else {
-      __ Dmb(InnerShareable, BarrierAll);
-      codegen->Store(type, value, mem_op);
-      if (is_volatile) {
-        __ Dmb(InnerShareable, BarrierReads);
-      }
+  {
+    // We use a block to end the scratch scope before the write barrier, thus
+    // freeing the temporary registers so they can be used in `MarkGCCard`.
+    UseScratchRegisterScope temps(masm);
+
+    if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+      DCHECK(value.IsW());
+      Register temp = temps.AcquireW();
+      __ Mov(temp.W(), value.W());
+      codegen->GetAssembler()->PoisonHeapReference(temp.W());
+      source = temp;
     }
-  } else {
-    codegen->Store(type, value, mem_op);
+
+    if (is_volatile || is_ordered) {
+      if (use_acquire_release) {
+        codegen->StoreRelease(type, source, mem_op);
+      } else {
+        __ Dmb(InnerShareable, BarrierAll);
+        codegen->Store(type, source, mem_op);
+        if (is_volatile) {
+          __ Dmb(InnerShareable, BarrierReads);
+        }
+      }
+    } else {
+      codegen->Store(type, source, mem_op);
+    }
   }
 
   if (type == Primitive::kPrimNot) {
@@ -872,6 +892,11 @@
 
   __ Add(tmp_ptr, base.X(), Operand(offset));
 
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    codegen->GetAssembler()->PoisonHeapReference(expected);
+    codegen->GetAssembler()->PoisonHeapReference(value);
+  }
+
   // do {
   //   tmp_value = [tmp_ptr] - expected;
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
@@ -897,6 +922,11 @@
   }
   __ Bind(&exit_loop);
   __ Cset(out, eq);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    codegen->GetAssembler()->UnpoisonHeapReference(value);
+    codegen->GetAssembler()->UnpoisonHeapReference(expected);
+  }
 }
 
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
@@ -1173,5 +1203,9 @@
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
 
+#undef UNIMPLEMENTED_INTRINSIC
+
+#undef __
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index b04cc5c..0d6ca09 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1335,9 +1335,14 @@
 
   switch (type) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimNot:
-      __ movl(output.AsRegister<Register>(), Address(base, offset, ScaleFactor::TIMES_1, 0));
+    case Primitive::kPrimNot: {
+      Register output_reg = output.AsRegister<Register>();
+      __ movl(output_reg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+      if (type == Primitive::kPrimNot) {
+        __ MaybeUnpoisonHeapReference(output_reg);
+      }
       break;
+    }
 
     case Primitive::kPrimLong: {
         Register output_lo = output.AsRegisterPairLow<Register>();
@@ -1436,7 +1441,7 @@
   locations->SetInAt(3, Location::RequiresRegister());
   if (type == Primitive::kPrimNot) {
     // Need temp registers for card-marking.
-    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     // Ensure the value is in a byte register.
     locations->AddTemp(Location::RegisterLocation(ECX));
   } else if (type == Primitive::kPrimLong && is_volatile) {
@@ -1498,6 +1503,11 @@
       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
       __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
     }
+  } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    Register temp = locations->GetTemp(0).AsRegister<Register>();
+    __ movl(temp, value_loc.AsRegister<Register>());
+    __ PoisonHeapReference(temp);
+    __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
   } else {
     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
   }
@@ -1604,7 +1614,8 @@
     __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
   } else {
     // Integer or object.
-    DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
+    Register expected = locations->InAt(3).AsRegister<Register>();
+    DCHECK_EQ(expected, EAX);
     Register value = locations->InAt(4).AsRegister<Register>();
     if (type == Primitive::kPrimNot) {
       // Mark card for object assuming new value is stored.
@@ -1614,6 +1625,11 @@
                           base,
                           value,
                           value_can_be_null);
+
+      if (kPoisonHeapReferences) {
+        __ PoisonHeapReference(expected);
+        __ PoisonHeapReference(value);
+      }
     }
 
     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
@@ -1625,6 +1641,13 @@
   // Convert ZF into the boolean result.
   __ setb(kZero, out.AsRegister<Register>());
   __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    Register value = locations->InAt(4).AsRegister<Register>();
+    __ UnpoisonHeapReference(value);
+    // Do not unpoison the reference contained in register `expected`,
+    // as it is the same as register `out`.
+  }
 }
 
 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
@@ -1734,5 +1757,9 @@
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
+#undef UNIMPLEMENTED_INTRINSIC
+
+#undef __
+
 }  // namespace x86
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 888c7b8..ea342e9 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1251,6 +1251,9 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
       __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+      if (type == Primitive::kPrimNot) {
+        __ MaybeUnpoisonHeapReference(trg);
+      }
       break;
 
     case Primitive::kPrimLong:
@@ -1325,7 +1328,7 @@
   locations->SetInAt(3, Location::RequiresRegister());
   if (type == Primitive::kPrimNot) {
     // Need temp registers for card-marking.
-    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -1369,6 +1372,11 @@
 
   if (type == Primitive::kPrimLong) {
     __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
+  } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+    __ movl(temp, value);
+    __ PoisonHeapReference(temp);
+    __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
   } else {
     __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
   }
@@ -1471,6 +1479,11 @@
                           base,
                           value,
                           value_can_be_null);
+
+      if (kPoisonHeapReferences) {
+        __ PoisonHeapReference(expected);
+        __ PoisonHeapReference(value);
+      }
     }
 
     __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
@@ -1482,6 +1495,11 @@
   // Convert ZF into the boolean result.
   __ setcc(kZero, out);
   __ movzxb(out, out);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ UnpoisonHeapReference(value);
+    __ UnpoisonHeapReference(expected);
+  }
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
@@ -1598,5 +1616,9 @@
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
+#undef UNIMPLEMENTED_INTRINSIC
+
+#undef __
+
 }  // namespace x86_64
 }  // namespace art
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index f41a782..4b25046 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -427,11 +427,11 @@
     }
   }
 
-  bool ContainsCoreRegister(uint32_t id) {
+  bool ContainsCoreRegister(uint32_t id) const {
     return Contains(core_registers_, id);
   }
 
-  bool ContainsFloatingPointRegister(uint32_t id) {
+  bool ContainsFloatingPointRegister(uint32_t id) const {
     return Contains(floating_point_registers_, id);
   }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 04c3963..2cffe02 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -3709,6 +3709,7 @@
   uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
   uint16_t GetTypeIndex() const { return type_index_; }
   bool IsReferrersClass() const { return is_referrers_class_; }
+  bool CanBeNull() const OVERRIDE { return false; }
 
   bool NeedsEnvironment() const OVERRIDE {
     // Will call runtime and load the class if the class is not loaded yet.
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 3d6606b..68316c2 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -99,6 +99,12 @@
     return;
   }
 
+  if (!obj->CanBeNull() || obj->IsNullConstant()) {
+    // Null check is dead code and will be removed by DCE.
+    return;
+  }
+  DCHECK(!obj->IsLoadClass()) << "We should not replace HLoadClass instructions";
+
   // We only need to bound the type if we have uses in the relevant block.
   // So start with null and create the HBoundType lazily, only if it's needed.
   HBoundType* bound_type = nullptr;
@@ -160,6 +166,7 @@
     // input.
     return;
   }
+  DCHECK(!obj->IsLoadClass()) << "We should not replace HLoadClass instructions";
   for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
     HInstruction* user = it.Current()->GetUser();
     if (instanceOfTrueBlock->Dominates(user->GetBlock())) {
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 7b23d02..72ddabe 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -209,6 +209,8 @@
     Location temp = locations->GetTemp(i);
     if (temp.IsRegister() || temp.IsFpuRegister()) {
       BlockRegister(temp, position, position + 1);
+      // Ensure that an explicit temporary register is marked as being allocated.
+      codegen_->AddAllocatedRegister(temp);
     } else {
       DCHECK(temp.IsUnallocated());
       switch (temp.GetPolicy()) {
@@ -507,6 +509,11 @@
       }
 
       if (current->HasRegister()) {
+        if (kIsDebugBuild && log_fatal_on_failure && !current->IsFixed()) {
+          // Only check when an error is fatal. Only tests code ask for non-fatal failures
+          // and test code may not properly fill the right information to the code generator.
+          CHECK(codegen.HasAllocatedRegister(processing_core_registers, current->GetRegister()));
+        }
         BitVector* liveness_of_register = liveness_of_values.Get(current->GetRegister());
         for (size_t j = it.CurrentRange()->GetStart(); j < it.CurrentRange()->GetEnd(); ++j) {
           if (liveness_of_register->IsBitSet(j)) {
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 0086fe8..09d2270 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -529,13 +529,13 @@
 }
 
 void ArmAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                           bool poison_reference) {
+                           bool unpoison_reference) {
   ArmManagedRegister dst = mdest.AsArm();
   CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst;
   LoadFromOffset(kLoadWord, dst.AsCoreRegister(),
                  base.AsArm().AsCoreRegister(), offs.Int32Value());
-  if (kPoisonHeapReferences && poison_reference) {
-    rsb(dst.AsCoreRegister(), dst.AsCoreRegister(), ShifterOperand(0));
+  if (unpoison_reference) {
+    MaybeUnpoisonHeapReference(dst.AsCoreRegister());
   }
 }
 
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index f8ca48e..5d85d11 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -774,7 +774,7 @@
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool poison_reference) OVERRIDE;
+               bool unpoison_reference) OVERRIDE;
 
   void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
 
@@ -857,6 +857,27 @@
      return r >= R8;
   }
 
+  //
+  // Heap poisoning.
+  //
+
+  // Poison a heap reference contained in `reg`.
+  void PoisonHeapReference(Register reg) {
+    // reg = -reg.
+    rsb(reg, reg, ShifterOperand(0));
+  }
+  // Unpoison a heap reference contained in `reg`.
+  void UnpoisonHeapReference(Register reg) {
+    // reg = -reg.
+    rsb(reg, reg, ShifterOperand(0));
+  }
+  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybeUnpoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      UnpoisonHeapReference(reg);
+    }
+  }
+
  protected:
   // Returns whether or not the given register is used for passing parameters.
   static int RegisterCompare(const Register* reg1, const Register* reg2) {
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 077579c..0e17512 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -298,15 +298,15 @@
 }
 
 void Arm64Assembler::LoadRef(ManagedRegister m_dst, ManagedRegister m_base, MemberOffset offs,
-                             bool poison_reference) {
+                             bool unpoison_reference) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
   Arm64ManagedRegister base = m_base.AsArm64();
   CHECK(dst.IsXRegister() && base.IsXRegister());
   LoadWFromOffset(kLoadWord, dst.AsOverlappingWRegister(), base.AsXRegister(),
                   offs.Int32Value());
-  if (kPoisonHeapReferences && poison_reference) {
+  if (unpoison_reference) {
     WRegister ref_reg = dst.AsOverlappingWRegister();
-    ___ Neg(reg_w(ref_reg), vixl::Operand(reg_w(ref_reg)));
+    MaybeUnpoisonHeapReference(reg_w(ref_reg));
   }
 }
 
@@ -784,5 +784,25 @@
   cfi_.DefCFAOffset(frame_size);
 }
 
+void Arm64Assembler::PoisonHeapReference(vixl::Register reg) {
+  DCHECK(reg.IsW());
+  // reg = -reg.
+  ___ Neg(reg, vixl::Operand(reg));
+}
+
+void Arm64Assembler::UnpoisonHeapReference(vixl::Register reg) {
+  DCHECK(reg.IsW());
+  // reg = -reg.
+  ___ Neg(reg, vixl::Operand(reg));
+}
+
+void Arm64Assembler::MaybeUnpoisonHeapReference(vixl::Register reg) {
+  if (kPoisonHeapReferences) {
+    UnpoisonHeapReference(reg);
+  }
+}
+
+#undef ___
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index db95537..05882a3 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -10,7 +10,7 @@
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
+ * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 
@@ -116,7 +116,7 @@
   void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size) OVERRIDE;
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
   void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool poison_reference) OVERRIDE;
+               bool unpoison_reference) OVERRIDE;
   void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
   void LoadRawPtrFromThread64(ManagedRegister dest, ThreadOffset<8> offs) OVERRIDE;
 
@@ -182,6 +182,17 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
+  //
+  // Heap poisoning.
+  //
+
+  // Poison a heap reference contained in `reg`.
+  void PoisonHeapReference(vixl::Register reg);
+  // Unpoison a heap reference contained in `reg`.
+  void UnpoisonHeapReference(vixl::Register reg);
+  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybeUnpoisonHeapReference(vixl::Register reg);
+
  private:
   static vixl::Register reg_x(int code) {
     CHECK(code < kNumberOfXRegisters) << code;
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index ee2d594..3097cd5 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -441,9 +441,9 @@
   virtual void LoadFromThread64(ManagedRegister dest, ThreadOffset<8> src, size_t size);
 
   virtual void LoadRef(ManagedRegister dest, FrameOffset src) = 0;
-  // If poison_reference is true and kPoisonReference is true, then we negate the read reference.
+  // If unpoison_reference is true and kPoisonReference is true, then we negate the read reference.
   virtual void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-                       bool poison_reference) = 0;
+                       bool unpoison_reference) = 0;
 
   virtual void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) = 0;
 
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index e55b461..c09dfcc 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -697,12 +697,12 @@
 }
 
 void MipsAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                            bool poison_reference) {
+                            bool unpoison_reference) {
   MipsManagedRegister dest = mdest.AsMips();
   CHECK(dest.IsCoreRegister() && dest.IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(),
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
-  if (kPoisonHeapReferences && poison_reference) {
+  if (kPoisonHeapReferences && unpoison_reference) {
     Subu(dest.AsCoreRegister(), ZERO, dest.AsCoreRegister());
   }
 }
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 7b0fc39..0d1b82c 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -192,7 +192,7 @@
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
   void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-               bool poison_reference) OVERRIDE;
+               bool unpoison_reference) OVERRIDE;
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 3333cd2..24ea9e2 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1242,12 +1242,12 @@
 }
 
 void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                              bool poison_reference) {
+                              bool unpoison_reference) {
   Mips64ManagedRegister dest = mdest.AsMips64();
   CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister());
   LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(),
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
-  if (kPoisonHeapReferences && poison_reference) {
+  if (kPoisonHeapReferences && unpoison_reference) {
     // TODO: review
     // Negate the 32-bit ref
     Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 88cc4bc..47b146a 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -265,7 +265,7 @@
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
   void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-               bool poison_reference) OVERRIDE;
+               bool unpoison_reference) OVERRIDE;
 
   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
 
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 390d46e..fa85ada 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1910,12 +1910,12 @@
 }
 
 void X86Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                           bool poison_reference) {
+                           bool unpoison_reference) {
   X86ManagedRegister dest = mdest.AsX86();
   CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
   movl(dest.AsCpuRegister(), Address(base.AsX86().AsCpuRegister(), offs));
-  if (kPoisonHeapReferences && poison_reference) {
-    negl(dest.AsCpuRegister());
+  if (unpoison_reference) {
+    MaybeUnpoisonHeapReference(dest.AsCpuRegister());
   }
 }
 
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 1c1c023..d1b4e1d 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -541,7 +541,7 @@
   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool poison_reference) OVERRIDE;
+               bool unpoison_reference) OVERRIDE;
 
   void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
 
@@ -616,6 +616,21 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
+  //
+  // Heap poisoning.
+  //
+
+  // Poison a heap reference contained in `reg`.
+  void PoisonHeapReference(Register reg) { negl(reg); }
+  // Unpoison a heap reference contained in `reg`.
+  void UnpoisonHeapReference(Register reg) { negl(reg); }
+  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybeUnpoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      UnpoisonHeapReference(reg);
+    }
+  }
+
  private:
   inline void EmitUint8(uint8_t value);
   inline void EmitInt32(int32_t value);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index ac95c71..f35f51c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2597,12 +2597,12 @@
 }
 
 void X86_64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
-                              bool poison_reference) {
+                              bool unpoison_reference) {
   X86_64ManagedRegister dest = mdest.AsX86_64();
   CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
   movl(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
-  if (kPoisonHeapReferences && poison_reference) {
-    negl(dest.AsCpuRegister());
+  if (unpoison_reference) {
+    MaybeUnpoisonHeapReference(dest.AsCpuRegister());
   }
 }
 
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 6b2b65d..61ffeab 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -669,7 +669,7 @@
   void LoadRef(ManagedRegister dest, FrameOffset  src) OVERRIDE;
 
   void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs,
-               bool poison_reference) OVERRIDE;
+               bool unpoison_reference) OVERRIDE;
 
   void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE;
 
@@ -767,6 +767,21 @@
   // Is the constant area empty? Return true if there are no literals in the constant area.
   bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
 
+  //
+  // Heap poisoning.
+  //
+
+  // Poison a heap reference contained in `reg`.
+  void PoisonHeapReference(CpuRegister reg) { negl(reg); }
+  // Unpoison a heap reference contained in `reg`.
+  void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
+  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybeUnpoisonHeapReference(CpuRegister reg) {
+    if (kPoisonHeapReferences) {
+      UnpoisonHeapReference(reg);
+    }
+  }
+
  private:
   void EmitUint8(uint8_t value);
   void EmitInt32(int32_t value);
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index 321cd75..d27ee3e 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -38,16 +38,24 @@
 endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler,art/compiler,target,ndebug,$(dex2oat_target_arch)))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain,art/compiler,target,ndebug,$(dex2oat_target_arch)))
 endif
+
 ifeq ($(ART_BUILD_TARGET_DEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler,art/compiler,target,debug,$(dex2oat_target_arch)))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain,art/compiler,target,debug,$(dex2oat_target_arch)))
 endif
 
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libziparchive-host,art/compiler,host,ndebug,$(dex2oat_host_arch)))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libart-compiler libsigchain libziparchive-host,art/compiler,host,ndebug,$(dex2oat_host_arch)))
+  ifeq ($(ART_BUILD_HOST_STATIC),true)
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart libart-compiler libart libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixl liblog libz libbacktrace libcutils libunwindbacktrace libutils libbase,art/compiler,host,ndebug,$(dex2oat_host_arch),static))
+  endif
 endif
+
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
-  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libziparchive-host,art/compiler,host,debug,$(dex2oat_host_arch)))
+  $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libcutils libartd-compiler libsigchain libziparchive-host,art/compiler,host,debug,$(dex2oat_host_arch)))
+  ifeq ($(ART_BUILD_HOST_STATIC),true)
+    $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd libartd-compiler libartd libziparchive-host libnativehelper libnativebridge libsigchain_dummy libvixld liblog libz libbacktrace libcutils libunwindbacktrace libutils libbase,art/compiler,host,debug,$(dex2oat_host_arch),static))
+  endif
 endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 74d5c0c..a4e74d4 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -975,6 +975,8 @@
       case kArm64:
       case kX86:
       case kX86_64:
+      case kMips:
+      case kMips64:
         implicit_null_checks = true;
         implicit_so_checks = true;
         break;
@@ -1038,10 +1040,6 @@
   bool OpenFile() {
     bool create_file = !oat_unstripped_.empty();  // as opposed to using open file descriptor
     if (create_file) {
-      // We're supposed to create this file. If the file already exists, it may be in use currently.
-      // We must not change the content of that file, then. So unlink it first.
-      unlink(oat_unstripped_.c_str());
-
       oat_file_.reset(OS::CreateEmptyFile(oat_unstripped_.c_str()));
       if (oat_location_.empty()) {
         oat_location_ = oat_filename_;
@@ -1129,6 +1127,9 @@
     if (!image_) {
       runtime_options.push_back(std::make_pair("-Xno-dex-file-fallback", nullptr));
     }
+    // Disable libsigchain. We don't don't need it during compilation and it prevents us
+    // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT).
+    runtime_options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
 
     if (!CreateRuntime(runtime_options)) {
       return false;
diff --git a/patchoat/Android.mk b/patchoat/Android.mk
index 68ca923..8f9ffca 100644
--- a/patchoat/Android.mk
+++ b/patchoat/Android.mk
@@ -30,16 +30,16 @@
 endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
-  $(eval $(call build-art-executable,patchoat,$(PATCHOAT_SRC_FILES),libcutils,art/compiler,target,ndebug,$(patchoat_arch)))
+  $(eval $(call build-art-executable,patchoat,$(PATCHOAT_SRC_FILES),libcutils libsigchain,art/compiler,target,ndebug,$(patchoat_arch)))
 endif
 ifeq ($(ART_BUILD_TARGET_DEBUG),true)
-  $(eval $(call build-art-executable,patchoat,$(PATCHOAT_SRC_FILES),libcutils,art/compiler,target,debug,$(patchoat_arch)))
+  $(eval $(call build-art-executable,patchoat,$(PATCHOAT_SRC_FILES),libcutils libsigchain,art/compiler,target,debug,$(patchoat_arch)))
 endif
 
 # We always build patchoat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
-  $(eval $(call build-art-executable,patchoat,$(PATCHOAT_SRC_FILES),libcutils,art/compiler,host,ndebug))
+  $(eval $(call build-art-executable,patchoat,$(PATCHOAT_SRC_FILES),libcutils libsigchain,art/compiler,host,ndebug))
 endif
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
-  $(eval $(call build-art-executable,patchoat,$(PATCHOAT_SRC_FILES),libcutils,art/compiler,host,debug))
+  $(eval $(call build-art-executable,patchoat,$(PATCHOAT_SRC_FILES),libcutils libsigchain,art/compiler,host,debug))
 endif
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 0401727..3a155be 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -138,6 +138,7 @@
   std::string img = "-Ximage:" + image_location;
   options.push_back(std::make_pair(img.c_str(), nullptr));
   options.push_back(std::make_pair("imageinstructionset", reinterpret_cast<const void*>(isa_name)));
+  options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
   if (!Runtime::Create(options, false)) {
     LOG(ERROR) << "Unable to initialize runtime";
     return false;
@@ -233,6 +234,7 @@
   std::string img = "-Ximage:" + image_location;
   options.push_back(std::make_pair(img.c_str(), nullptr));
   options.push_back(std::make_pair("imageinstructionset", reinterpret_cast<const void*>(isa_name)));
+  options.push_back(std::make_pair("-Xno-sig-chain", nullptr));
   if (!Runtime::Create(options, false)) {
     LOG(ERROR) << "Unable to initialize runtime";
     return false;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 19079cb..7f103a4 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -369,6 +369,7 @@
 
 # $(1): target or host
 # $(2): ndebug or debug
+# $(3): static or shared (empty means shared, applies only for host)
 define build-libart
   ifneq ($(1),target)
     ifneq ($(1),host)
@@ -383,6 +384,7 @@
 
   art_target_or_host := $(1)
   art_ndebug_or_debug := $(2)
+  art_static_or_shared := $(3)
 
   include $$(CLEAR_VARS)
   # Clang assembler has problem with macros in asm_support_x86.S, http://b/17443165,
@@ -403,7 +405,12 @@
   endif
 
   LOCAL_MODULE_TAGS := optional
-  LOCAL_MODULE_CLASS := SHARED_LIBRARIES
+
+  ifeq ($$(art_static_or_shared),static)
+    LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+  else
+    LOCAL_MODULE_CLASS := SHARED_LIBRARIES
+  endif
 
   ifeq ($$(art_target_or_host),target)
     LOCAL_SRC_FILES := $$(LIBART_TARGET_SRC_FILES)
@@ -431,8 +438,11 @@
   LOCAL_LDFLAGS := $$(LIBART_LDFLAGS)
   ifeq ($$(art_target_or_host),target)
     LOCAL_LDFLAGS += $$(LIBART_TARGET_LDFLAGS)
-  else
+  else #host
     LOCAL_LDFLAGS += $$(LIBART_HOST_LDFLAGS)
+    ifeq ($$(art_static_or_shared),static)
+      LOCAL_LDFLAGS += -static
+    endif
   endif
   $$(foreach arch,$$(ART_TARGET_SUPPORTED_ARCH), \
     $$(eval LOCAL_LDFLAGS_$$(arch) := $$(LIBART_TARGET_LDFLAGS_$$(arch))))
@@ -467,8 +477,12 @@
   LOCAL_C_INCLUDES += art/sigchainlib
   LOCAL_C_INCLUDES += art
 
-  LOCAL_SHARED_LIBRARIES := libnativehelper libnativebridge libsigchain
-  LOCAL_SHARED_LIBRARIES += libbacktrace
+  ifeq ($$(art_static_or_shared),static)
+    LOCAL_STATIC_LIBRARIES := libnativehelper libnativebridge libsigchain_dummy libbacktrace
+  else
+    LOCAL_SHARED_LIBRARIES := libnativehelper libnativebridge libsigchain libbacktrace
+  endif
+
   ifeq ($$(art_target_or_host),target)
     LOCAL_SHARED_LIBRARIES += libdl
     # ZipArchive support, the order matters here to get all symbols.
@@ -478,9 +492,15 @@
     # For liblog, atrace, properties, ashmem, set_sched_policy and socket_peer_is_trusted.
     LOCAL_SHARED_LIBRARIES += libcutils
   else # host
-    LOCAL_SHARED_LIBRARIES += libziparchive-host libz-host
-    # For ashmem_create_region.
-    LOCAL_SHARED_LIBRARIES += libcutils
+    ifeq ($$(art_static_or_shared),static)
+      LOCAL_STATIC_LIBRARIES += libziparchive-host libz
+      # For ashmem_create_region.
+      LOCAL_STATIC_LIBRARIES += libcutils
+    else
+      LOCAL_SHARED_LIBRARIES += libziparchive-host libz-host
+      # For ashmem_create_region.
+      LOCAL_SHARED_LIBRARIES += libcutils
+    endif
   endif
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $$(LOCAL_PATH)/Android.mk
@@ -499,7 +519,11 @@
     endif
     include $$(BUILD_SHARED_LIBRARY)
   else # host
-    include $$(BUILD_HOST_SHARED_LIBRARY)
+    ifeq ($$(art_static_or_shared),static)
+      include $$(BUILD_HOST_STATIC_LIBRARY)
+    else
+      include $$(BUILD_HOST_SHARED_LIBRARY)
+    endif
   endif
 
   # Clear locally defined variables.
@@ -508,15 +532,22 @@
   ENUM_OPERATOR_OUT_GEN :=
   art_target_or_host :=
   art_ndebug_or_debug :=
+  art_static_or_shared :=
 endef
 
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since
 # they are used to cross compile for the target.
 ifeq ($(ART_BUILD_HOST_NDEBUG),true)
   $(eval $(call build-libart,host,ndebug))
+  ifeq ($(ART_BUILD_HOST_STATIC),true)
+    $(eval $(call build-libart,host,ndebug,static))
+  endif
 endif
 ifeq ($(ART_BUILD_HOST_DEBUG),true)
   $(eval $(call build-libart,host,debug))
+  ifeq ($(ART_BUILD_HOST_STATIC),true)
+    $(eval $(call build-libart,host,debug,static))
+  endif
 endif
 
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index abe495b..8ea78eb 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -17,11 +17,17 @@
 
 #include "fault_handler.h"
 #include <sys/ucontext.h>
+#include "art_method-inl.h"
 #include "base/macros.h"
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "registers_mips.h"
+#include "thread.h"
+#include "thread-inl.h"
 
+extern "C" void art_quick_throw_stack_overflow();
+extern "C" void art_quick_throw_null_pointer_exception();
 
 //
 // Mips specific fault handler functions.
@@ -33,16 +39,52 @@
                                       void* context ATTRIBUTE_UNUSED) {
 }
 
-void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo ATTRIBUTE_UNUSED,
-                                             void* context ATTRIBUTE_UNUSED,
-                                             ArtMethod** out_method ATTRIBUTE_UNUSED,
-                                             uintptr_t* out_return_pc ATTRIBUTE_UNUSED,
-                                             uintptr_t* out_sp ATTRIBUTE_UNUSED) {
+void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
+                                             ArtMethod** out_method,
+                                             uintptr_t* out_return_pc, uintptr_t* out_sp) {
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  *out_sp = static_cast<uintptr_t>(sc->sc_regs[29]);   // SP register
+  VLOG(signals) << "sp: " << *out_sp;
+  if (*out_sp == 0) {
+    return;
+  }
+
+  // In the case of a stack overflow, the stack is not valid and we can't
+  // get the method from the top of the stack.  However it's in r0.
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(siginfo->si_addr);  // BVA addr
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kMips));
+  if (overflow_addr == fault_addr) {
+    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[4]);  // A0 register
+  } else {
+    // The method is at the top of the stack.
+    *out_method = *reinterpret_cast<ArtMethod**>(*out_sp);
+  }
+
+  // Work out the return PC.  This will be the address of the instruction
+  // following the faulting ldr/str instruction.
+
+  VLOG(signals) << "pc: " << std::hex
+      << static_cast<void*>(reinterpret_cast<uint8_t*>(sc->sc_pc));
+
+  *out_return_pc = sc->sc_pc + 4;
 }
 
 bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context ATTRIBUTE_UNUSED) {
-  return false;
+                                void* context) {
+  // The code that looks for the catch location needs to know the value of the
+  // PC at the point of call.  For Null checks we insert a GC map that is immediately after
+  // the load/store instruction that might cause the fault.
+
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+
+  sc->sc_regs[31] = sc->sc_pc + 4;      // RA needs to point to gc map location
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  VLOG(signals) << "Generating null pointer exception";
+  return true;
 }
 
 bool SuspensionHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
@@ -50,8 +92,51 @@
   return false;
 }
 
-bool StackOverflowHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                  void* context ATTRIBUTE_UNUSED) {
-  return false;
+// Stack overflow fault handler.
+//
+// This checks that the fault address is equal to the current stack pointer
+// minus the overflow region size (16K typically). The instruction that
+// generates this signal is:
+//
+// lw zero, -16384(sp)
+//
+// It will fault if sp is inside the protected region on the stack.
+//
+// If we determine this is a stack overflow we need to move the stack pointer
+// to the overflow region below the protected region.
+
+bool StackOverflowHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
+  VLOG(signals) << "sigcontext: " << std::hex << sc;
+
+  uintptr_t sp = sc->sc_regs[29];  // SP register
+  VLOG(signals) << "sp: " << std::hex << sp;
+
+  uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);  // BVA addr
+  VLOG(signals) << "fault_addr: " << std::hex << fault_addr;
+  VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
+    ", fault_addr: " << fault_addr;
+
+  uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kMips);
+
+  // Check that the fault address is the value expected for a stack overflow.
+  if (fault_addr != overflow_addr) {
+    VLOG(signals) << "Not a stack overflow";
+    return false;
+  }
+
+  VLOG(signals) << "Stack overflow found";
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from.
+  // The value of RA must be the same as it was when we entered the code that
+  // caused this fault.  This will be inserted into a callee save frame by
+  // the function to which this handler returns (art_quick_throw_stack_overflow).
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+
+  // The kernel will now return to the address in sc->arm_pc.
+  return true;
 }
 }       // namespace art
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index 277c2b2..4abfcf1 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -17,11 +17,17 @@
 
 #include "fault_handler.h"
 #include <sys/ucontext.h>
+#include "art_method-inl.h"
 #include "base/macros.h"
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "registers_mips64.h"
+#include "thread.h"
+#include "thread-inl.h"
 
+extern "C" void art_quick_throw_stack_overflow();
+extern "C" void art_quick_throw_null_pointer_exception();
 
 //
 // Mips64 specific fault handler functions.
@@ -33,16 +39,52 @@
                                       void* context ATTRIBUTE_UNUSED) {
 }
 
-void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo ATTRIBUTE_UNUSED,
-                                             void* context ATTRIBUTE_UNUSED,
-                                             ArtMethod** out_method ATTRIBUTE_UNUSED,
-                                             uintptr_t* out_return_pc ATTRIBUTE_UNUSED,
-                                             uintptr_t* out_sp ATTRIBUTE_UNUSED) {
+void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
+                                             ArtMethod** out_method,
+                                             uintptr_t* out_return_pc, uintptr_t* out_sp) {
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  *out_sp = static_cast<uintptr_t>(sc->sc_regs[29]);   // SP register
+  VLOG(signals) << "sp: " << *out_sp;
+  if (*out_sp == 0) {
+    return;
+  }
+
+  // In the case of a stack overflow, the stack is not valid and we can't
+  // get the method from the top of the stack.  However it's in r0.
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(siginfo->si_addr);  // BVA addr
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kMips64));
+  if (overflow_addr == fault_addr) {
+    *out_method = reinterpret_cast<ArtMethod*>(sc->sc_regs[4]);  // A0 register
+  } else {
+    // The method is at the top of the stack.
+    *out_method = *reinterpret_cast<ArtMethod**>(*out_sp);
+  }
+
+  // Work out the return PC.  This will be the address of the instruction
+  // following the faulting ldr/str instruction.
+
+  VLOG(signals) << "pc: " << std::hex
+      << static_cast<void*>(reinterpret_cast<uint8_t*>(sc->sc_pc));
+
+  *out_return_pc = sc->sc_pc + 4;
 }
 
 bool NullPointerHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                void* context ATTRIBUTE_UNUSED) {
-  return false;
+                                void* context) {
+  // The code that looks for the catch location needs to know the value of the
+  // PC at the point of call.  For Null checks we insert a GC map that is immediately after
+  // the load/store instruction that might cause the fault.
+
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+
+  sc->sc_regs[31] = sc->sc_pc + 4;      // RA needs to point to gc map location
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+  VLOG(signals) << "Generating null pointer exception";
+  return true;
 }
 
 bool SuspensionHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
@@ -50,8 +92,51 @@
   return false;
 }
 
-bool StackOverflowHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                  void* context ATTRIBUTE_UNUSED) {
-  return false;
+// Stack overflow fault handler.
+//
+// This checks that the fault address is equal to the current stack pointer
+// minus the overflow region size (16K typically). The instruction that
+// generates this signal is:
+//
+// lw zero, -16384(sp)
+//
+// It will fault if sp is inside the protected region on the stack.
+//
+// If we determine this is a stack overflow we need to move the stack pointer
+// to the overflow region below the protected region.
+
+bool StackOverflowHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* info, void* context) {
+  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
+  VLOG(signals) << "sigcontext: " << std::hex << sc;
+
+  uintptr_t sp = sc->sc_regs[29];  // SP register
+  VLOG(signals) << "sp: " << std::hex << sp;
+
+  uintptr_t fault_addr = reinterpret_cast<uintptr_t>(info->si_addr);  // BVA addr
+  VLOG(signals) << "fault_addr: " << std::hex << fault_addr;
+  VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
+    ", fault_addr: " << fault_addr;
+
+  uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kMips64);
+
+  // Check that the fault address is the value expected for a stack overflow.
+  if (fault_addr != overflow_addr) {
+    VLOG(signals) << "Not a stack overflow";
+    return false;
+  }
+
+  VLOG(signals) << "Stack overflow found";
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from.
+  // The value of RA must be the same as it was when we entered the code that
+  // caused this fault.  This will be inserted into a callee save frame by
+  // the function to which this handler returns (art_quick_throw_stack_overflow).
+  sc->sc_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+  sc->sc_regs[25] = sc->sc_pc;          // make sure T9 points to the function
+
+  // The kernel will now return to the address in sc->arm_pc.
+  return true;
 }
 }       // namespace art
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 0987c00..2332f97 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -174,13 +174,6 @@
   DISALLOW_COPY_AND_ASSIGN(CheckJniAbortCatcher);
 };
 
-// TODO: When heap reference poisoning works with the compiler, get rid of this.
-#define TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING() \
-  if (kPoisonHeapReferences) { \
-    printf("WARNING: TEST DISABLED FOR HEAP REFERENCE POISONING\n"); \
-    return; \
-  }
-
 // TODO: When read barrier works with the compiler, get rid of this.
 #define TEST_DISABLED_FOR_READ_BARRIER() \
   if (kUseReadBarrier) { \
diff --git a/runtime/os.h b/runtime/os.h
index 6248d5f..befe2e8 100644
--- a/runtime/os.h
+++ b/runtime/os.h
@@ -35,7 +35,8 @@
   // Open an existing file with read/write access.
   static File* OpenFileReadWrite(const char* name);
 
-  // Create an empty file with read/write access.
+  // Create an empty file with read/write access. This is a *new* file, that is, if the file
+  // already exists, it is *not* overwritten, but unlinked, and a new inode will be used.
   static File* CreateEmptyFile(const char* name);
 
   // Open a file with the specified open(2) flags.
diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc
index 2282789..675699d 100644
--- a/runtime/os_linux.cc
+++ b/runtime/os_linux.cc
@@ -36,6 +36,10 @@
 }
 
 File* OS::CreateEmptyFile(const char* name) {
+  // In case the file exists, unlink it so we get a new file. This is necessary as the previous
+  // file may be in use and must not be changed.
+  unlink(name);
+
   return OpenFileWithFlags(name, O_RDWR | O_CREAT | O_TRUNC);
 }
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 5e84df5..d08af71 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -257,6 +257,8 @@
           .IntoKey(M::ZygoteMaxFailedBoots)
       .Define("-Xno-dex-file-fallback")
           .IntoKey(M::NoDexFileFallback)
+      .Define("-Xno-sig-chain")
+          .IntoKey(M::NoSigChain)
       .Define("--cpu-abilist=_")
           .WithType<std::string>()
           .IntoKey(M::CpuAbiList)
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index 6f17e7d..9707fb8 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -505,6 +505,7 @@
 };
 
 TEST_F(ReflectionTest, StaticMainMethod) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK();
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Main");
   StackHandleScope<1> hs(soa.Self());
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 96c15ea..20e4149 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -191,6 +191,7 @@
       implicit_null_checks_(false),
       implicit_so_checks_(false),
       implicit_suspend_checks_(false),
+      no_sig_chain_(false),
       is_native_bridge_loaded_(false),
       zygote_max_failed_boots_(0),
       experimental_lambdas_(false) {
@@ -487,6 +488,8 @@
 bool Runtime::Start() {
   VLOG(startup) << "Runtime::Start entering";
 
+  CHECK(!no_sig_chain_) << "A started runtime should have sig chain enabled";
+
   // Restore main thread state to kNative as expected by native code.
   Thread* self = Thread::Current();
 
@@ -838,6 +841,8 @@
   verify_ = runtime_options.GetOrDefault(Opt::Verify);
   allow_dex_file_fallback_ = !runtime_options.Exists(Opt::NoDexFileFallback);
 
+  no_sig_chain_ = runtime_options.Exists(Opt::NoSigChain);
+
   Split(runtime_options.GetOrDefault(Opt::CpuAbiList), ',', &cpu_abilist_);
 
   if (runtime_options.GetOrDefault(Opt::Interpret)) {
@@ -924,6 +929,8 @@
     case kX86:
     case kArm64:
     case kX86_64:
+    case kMips:
+    case kMips64:
       implicit_null_checks_ = true;
       // Installing stack protection does not play well with valgrind.
       implicit_so_checks_ = (RUNNING_ON_VALGRIND == 0);
@@ -933,33 +940,37 @@
       break;
   }
 
-  // Always initialize the signal chain so that any calls to sigaction get
-  // correctly routed to the next in the chain regardless of whether we
-  // have claimed the signal or not.
-  InitializeSignalChain();
+  if (!no_sig_chain_) {
+    // Dex2Oat's Runtime does not need the signal chain or the fault handler.
 
-  if (implicit_null_checks_ || implicit_so_checks_ || implicit_suspend_checks_) {
-    fault_manager.Init();
+    // Initialize the signal chain so that any calls to sigaction get
+    // correctly routed to the next in the chain regardless of whether we
+    // have claimed the signal or not.
+    InitializeSignalChain();
 
-    // These need to be in a specific order.  The null point check handler must be
-    // after the suspend check and stack overflow check handlers.
-    //
-    // Note: the instances attach themselves to the fault manager and are handled by it. The manager
-    //       will delete the instance on Shutdown().
-    if (implicit_suspend_checks_) {
-      new SuspensionHandler(&fault_manager);
-    }
+    if (implicit_null_checks_ || implicit_so_checks_ || implicit_suspend_checks_) {
+      fault_manager.Init();
 
-    if (implicit_so_checks_) {
-      new StackOverflowHandler(&fault_manager);
-    }
+      // These need to be in a specific order.  The null point check handler must be
+      // after the suspend check and stack overflow check handlers.
+      //
+      // Note: the instances attach themselves to the fault manager and are handled by it. The manager
+      //       will delete the instance on Shutdown().
+      if (implicit_suspend_checks_) {
+        new SuspensionHandler(&fault_manager);
+      }
 
-    if (implicit_null_checks_) {
-      new NullPointerHandler(&fault_manager);
-    }
+      if (implicit_so_checks_) {
+        new StackOverflowHandler(&fault_manager);
+      }
 
-    if (kEnableJavaStackTraceHandler) {
-      new JavaStackTraceHandler(&fault_manager);
+      if (implicit_null_checks_) {
+        new NullPointerHandler(&fault_manager);
+      }
+
+      if (kEnableJavaStackTraceHandler) {
+        new JavaStackTraceHandler(&fault_manager);
+      }
     }
   }
 
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 3cd7404..bcc7118 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -715,6 +715,11 @@
   bool implicit_so_checks_;         // StackOverflow checks are implicit.
   bool implicit_suspend_checks_;    // Thread suspension checks are implicit.
 
+  // Whether or not the sig chain (and implicitly the fault handler) should be
+  // disabled. Tools like dex2oat or patchoat don't need them. This enables
+  // building a statically link version of dex2oat.
+  bool no_sig_chain_;
+
   // Whether or not a native bridge has been loaded.
   //
   // The native bridge allows running native code compiled for a foreign ISA. The way it works is,
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index fc527b5..dc4c0c7 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -79,10 +79,10 @@
 RUNTIME_OPTIONS_KEY (bool,                Relocate,                       kDefaultMustRelocate)
 RUNTIME_OPTIONS_KEY (bool,                Dex2Oat,                        true)
 RUNTIME_OPTIONS_KEY (bool,                ImageDex2Oat,                   true)
-                                                        // kPoisonHeapReferences currently works with
+                                                        // kUseReadBarrier currently works with
                                                         // the interpreter only.
                                                         // TODO: make it work with the compiler.
-RUNTIME_OPTIONS_KEY (bool,                Interpret,                      (kPoisonHeapReferences || kUseReadBarrier)) // -Xint
+RUNTIME_OPTIONS_KEY (bool,                Interpret,                      kUseReadBarrier) // -Xint
                                                         // Disable the compiler for CC (for now).
 RUNTIME_OPTIONS_KEY (XGcOption,           GcOption)  // -Xgc:
 RUNTIME_OPTIONS_KEY (gc::space::LargeObjectSpaceType, \
@@ -91,6 +91,7 @@
 RUNTIME_OPTIONS_KEY (BackgroundGcOption,  BackgroundGc)
 
 RUNTIME_OPTIONS_KEY (Unit,                DisableExplicitGC)
+RUNTIME_OPTIONS_KEY (Unit,                NoSigChain)
 RUNTIME_OPTIONS_KEY (LogVerbosity,        Verbose)
 RUNTIME_OPTIONS_KEY (unsigned int,        LockProfThreshold)
 RUNTIME_OPTIONS_KEY (std::string,         StackTraceFile)
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 8a8b455..92c9eb8 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -1023,12 +1023,21 @@
   }
   /* offset to array data table is a relative branch-style offset */
   array_data = insns + array_data_offset;
-  /* make sure the table is 32-bit aligned */
-  if ((reinterpret_cast<uintptr_t>(array_data) & 0x03) != 0) {
+  // Make sure the table is at an even dex pc, that is, 32-bit aligned.
+  if (!IsAligned<4>(array_data)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unaligned array data table: at " << cur_offset
                                       << ", data offset " << array_data_offset;
     return false;
   }
+  // Make sure the array-data is marked as an opcode. This ensures that it was reached when
+  // traversing the code item linearly. It is an approximation for a by-spec padding value.
+  if (!insn_flags_[cur_offset + array_data_offset].IsOpcode()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "array data table at " << cur_offset
+                                      << ", data offset " << array_data_offset
+                                      << " not correctly visited, probably bad padding.";
+    return false;
+  }
+
   uint32_t value_width = array_data[1];
   uint32_t value_count = *reinterpret_cast<const uint32_t*>(&array_data[2]);
   uint32_t table_size = 4 + (value_width * value_count + 1) / 2;
@@ -1126,12 +1135,21 @@
   }
   /* offset to switch table is a relative branch-style offset */
   const uint16_t* switch_insns = insns + switch_offset;
-  /* make sure the table is 32-bit aligned */
-  if ((reinterpret_cast<uintptr_t>(switch_insns) & 0x03) != 0) {
+  // Make sure the table is at an even dex pc, that is, 32-bit aligned.
+  if (!IsAligned<4>(switch_insns)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unaligned switch table: at " << cur_offset
                                       << ", switch offset " << switch_offset;
     return false;
   }
+  // Make sure the switch data is marked as an opcode. This ensures that it was reached when
+  // traversing the code item linearly. It is an approximation for a by-spec padding value.
+  if (!insn_flags_[cur_offset + switch_offset].IsOpcode()) {
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "switch table at " << cur_offset
+                                      << ", switch offset " << switch_offset
+                                      << " not correctly visited, probably bad padding.";
+    return false;
+  }
+
   uint32_t switch_count = switch_insns[1];
   int32_t keys_offset, targets_offset;
   uint16_t expected_signature;
diff --git a/sigchainlib/Android.mk b/sigchainlib/Android.mk
index 11f44fe..b9e37a1 100644
--- a/sigchainlib/Android.mk
+++ b/sigchainlib/Android.mk
@@ -76,3 +76,19 @@
 LOCAL_LDLIBS = -ldl
 LOCAL_MULTILIB := both
 include $(BUILD_HOST_STATIC_LIBRARY)
+
+# Create a dummy version of libsigchain which expose the necessary symbols
+# but throws when called. This can be used to get static binaries which don't
+# need the real functionality of the sig chain but need to please the linker.
+include $(CLEAR_VARS)
+LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION)
+LOCAL_MODULE_TAGS := optional
+LOCAL_IS_HOST_MODULE := true
+LOCAL_CFLAGS += $(ART_HOST_CFLAGS)
+LOCAL_CLANG = $(ART_HOST_CLANG)
+LOCAL_SRC_FILES := sigchain_dummy.cc
+LOCAL_MODULE:= libsigchain_dummy
+LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
+LOCAL_LDLIBS = -ldl
+LOCAL_MULTILIB := both
+include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/test/510-checker-try-catch/smali/Builder.smali b/test/510-checker-try-catch/smali/Builder.smali
index 95708a2..4ea7b61 100644
--- a/test/510-checker-try-catch/smali/Builder.smali
+++ b/test/510-checker-try-catch/smali/Builder.smali
@@ -630,6 +630,165 @@
     goto :return
 .end method
 
+## CHECK-START: int Builder.testSwitchTryEnter(int, int, int, int) builder (after)
+
+## CHECK:  name             "B0"
+## CHECK:  successors       "<<BPSwitch0:B\d+>>"
+
+## CHECK:  name             "<<BPSwitch0>>"
+## CHECK:  predecessors     "B0"
+## CHECK:  successors       "<<BEnterTry2:B\d+>>" "<<BPSwitch1:B\d+>>"
+## CHECK:  If
+
+## CHECK:  name             "<<BPSwitch1>>"
+## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  successors       "<<BOutside:B\d+>>" "<<BEnterTry1:B\d+>>"
+## CHECK:  If
+
+## CHECK:  name             "<<BTry1:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry1>>"
+## CHECK:  successors       "<<BTry2:B\d+>>"
+## CHECK:  Div
+
+## CHECK:  name             "<<BTry2>>"
+## CHECK:  predecessors     "<<BEnterTry2>>" "<<BTry1>>"
+## CHECK:  successors       "<<BExitTry:B\d+>>"
+## CHECK:  Div
+
+## CHECK:  name             "<<BOutside>>"
+## CHECK:  predecessors     "<<BPSwitch1>>" "<<BExitTry>>"
+## CHECK:  successors       "<<BCatchReturn:B\d+>>"
+## CHECK:  Div
+
+## CHECK:  name             "<<BCatchReturn>>"
+## CHECK:  predecessors     "<<BOutside>>" "<<BEnterTry1>>" "<<BEnterTry2>>" "<<BExitTry>>"
+## CHECK:  flags            "catch_block"
+## CHECK:  Return
+
+## CHECK:  name             "<<BEnterTry1>>"
+## CHECK:  predecessors     "<<BPSwitch1>>"
+## CHECK:  successors       "<<BTry1>>"
+## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  TryBoundary      kind:entry
+
+## CHECK:  name             "<<BEnterTry2>>"
+## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  successors       "<<BTry2>>"
+## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  TryBoundary      kind:entry
+
+## CHECK:  name             "<<BExitTry>>"
+## CHECK:  predecessors     "<<BTry2>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  TryBoundary      kind:exit
+
+.method public static testSwitchTryEnter(IIII)I
+    .registers 4
+
+    packed-switch p0, :pswitch_data
+
+    :try_start
+    div-int/2addr p0, p1
+
+    :pswitch1
+    div-int/2addr p0, p2
+    goto :pswitch2
+
+    :pswitch_data
+    .packed-switch 0x0
+        :pswitch1
+        :pswitch2
+    .end packed-switch
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :pswitch2
+    div-int/2addr p0, p3
+
+    :catch_all
+    return p0
+.end method
+
+## CHECK-START: int Builder.testSwitchTryExit(int, int, int, int) builder (after)
+
+## CHECK:  name             "B0"
+## CHECK:  successors       "<<BEnterTry:B\d+>>"
+
+## CHECK:  name             "<<BPSwitch0:B\d+>>"
+## CHECK:  predecessors     "<<BEnterTry>>"
+## CHECK:  successors       "<<BTry2:B\d+>>" "<<BPSwitch1:B\d+>>"
+## CHECK:  If
+
+## CHECK:  name             "<<BPSwitch1>>"
+## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  successors       "<<BExitTry1:B\d+>>" "<<BTry1:B\d+>>"
+## CHECK:  If
+
+## CHECK:  name             "<<BTry1>>"
+## CHECK:  predecessors     "<<BPSwitch1>>"
+## CHECK:  successors       "<<BTry2>>"
+## CHECK:  Div
+
+## CHECK:  name             "<<BTry2>>"
+## CHECK:  predecessors     "<<BPSwitch0>>"
+## CHECK:  successors       "<<BExitTry2:B\d+>>"
+## CHECK:  Div
+
+## CHECK:  name             "<<BOutside:B\d+>>"
+## CHECK:  predecessors     "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  successors       "<<BCatchReturn:B\d+>>"
+## CHECK:  Div
+
+## CHECK:  name             "<<BCatchReturn>>"
+## CHECK:  predecessors     "<<BOutside>>" "<<BEnterTry>>" "<<BExitTry1>>" "<<BExitTry2>>"
+## CHECK:  flags            "catch_block"
+## CHECK:  Return
+
+## CHECK:  name             "<<BEnterTry>>"
+## CHECK:  predecessors     "B0"
+## CHECK:  successors       "<<BPSwitch0>>"
+## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  TryBoundary      kind:entry
+
+## CHECK:  name             "<<BExitTry1>>"
+## CHECK:  predecessors     "<<BPSwitch1>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  TryBoundary      kind:exit
+
+## CHECK:  name             "<<BExitTry2>>"
+## CHECK:  predecessors     "<<BTry2>>"
+## CHECK:  successors       "<<BOutside>>"
+## CHECK:  xhandlers        "<<BCatchReturn>>"
+## CHECK:  TryBoundary      kind:exit
+
+.method public static testSwitchTryExit(IIII)I
+    .registers 4
+
+    :try_start
+    packed-switch p0, :pswitch_data
+
+    div-int/2addr p0, p1
+
+    :pswitch1
+    div-int/2addr p0, p2
+    :try_end
+    .catchall {:try_start .. :try_end} :catch_all
+
+    :pswitch2
+    div-int/2addr p0, p3
+
+    :catch_all
+    return p0
+
+    :pswitch_data
+    .packed-switch 0x0
+        :pswitch1
+        :pswitch2
+    .end packed-switch
+.end method
+
 # Test that a TryBoundary is inserted between a Throw instruction and the exit
 # block when covered by a try range.
 
diff --git a/test/519-bound-load-class/src/Main.java b/test/519-bound-load-class/src/Main.java
index 41bb951..cddeb09 100644
--- a/test/519-bound-load-class/src/Main.java
+++ b/test/519-bound-load-class/src/Main.java
@@ -16,9 +16,24 @@
 
 public class Main {
   public static void main(String[] args) {
+    testInstanceOf();
+    try {
+      testNull();
+      throw new Error("Expected ClassClastException");
+    } catch (ClassCastException e) { /* ignore */ }
+  }
+
+  public static void testInstanceOf() {
     Object o = Main.class;
     if (o instanceof Main) {
       System.out.println((Main)o);
     }
   }
+
+  public static void testNull() {
+    Object o = Main.class;
+    if (o != null) {
+      System.out.println((Main)o);
+    }
+  }
 }
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 5b5c368..c4111f6 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -412,9 +412,6 @@
 
 # Known broken tests for the MIPS64 optimizing compiler backend in 64-bit mode.  b/21555893
 TEST_ART_BROKEN_OPTIMIZING_MIPS64_64BIT_RUN_TESTS := \
-  004-SignalTest \
-  018-stack-overflow \
-  107-int-math2 \
   449-checker-bce
 
 ifeq ($(TARGET_ARCH),mips64)
@@ -427,20 +424,6 @@
 
 TEST_ART_BROKEN_OPTIMIZING_MIPS64_64BIT_RUN_TESTS :=
 
-# Known broken tests for the MIPS64 optimizing compiler backend in 32-bit mode.  b/21555893
-TEST_ART_BROKEN_OPTIMIZING_MIPS64_32BIT_RUN_TESTS := \
-  496-checker-inlining-and-class-loader
-
-ifeq ($(TARGET_ARCH),mips64)
-  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
-    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-        optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_MIPS64_32BIT_RUN_TESTS),32)
-  endif
-endif
-
-TEST_ART_BROKEN_OPTIMIZING_MIPS64_32BIT_RUN_TESTS :=
-
 # Known broken tests for the optimizing compiler.
 TEST_ART_BROKEN_OPTIMIZING_RUN_TESTS :=
 
@@ -498,18 +481,45 @@
 
 TEST_ART_BROKEN_READ_BARRIER_RUN_TESTS :=
 
-# Tests that should fail in the heap poisoning configuration.
-# 137: Heap poisoning forces interpreter. Cannot run this with the interpreter.
-TEST_ART_BROKEN_HEAP_POISONING_RUN_TESTS := \
+# Tests that should fail in the heap poisoning configuration with the default (Quick) compiler.
+# 137: Quick punts to the interpreter, and this test cannot run this with the interpreter.
+TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS := \
+  137-cfi
+# Tests that should fail in the heap poisoning configuration with the Optimizing compiler.
+# 055-enum-performance: Exceeds run time limits due to heap poisoning instrumentation.
+TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS := \
+  055-enum-performance
+# Tests that should fail in the heap poisoning configuration with the interpreter.
+# 137: Cannot run this with the interpreter.
+TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS := \
   137-cfi
 
 ifeq ($(ART_HEAP_POISONING),true)
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  ifneq (,$(filter default,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),default,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
+
+  ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
+
+  ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
+    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
+        $(PREBUILD_TYPES),interpreter,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+        $(TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
+  endif
 endif
 
-TEST_ART_BROKEN_HEAP_POISONING_RUN_TESTS :=
+TEST_ART_BROKEN_INTERPRETER_HEAP_POISONING_RUN_TESTS :=
+TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS :=
+TEST_ART_BROKEN_DEFAULT_HEAP_POISONING_RUN_TESTS :=
 
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 682a27b..65c3fed 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -137,5 +137,12 @@
   result: EXEC_FAILED,
   bug:22106064,
   name: "libcore.java.lang.OldThreadGroupTest#test_enumerateLThreadArrayLZtest_enumerateLThreadArrayLZ"
+},
+{
+  description: "test_xattr fails on arm64 on the buildbots only: needs investigation",
+  result: EXEC_FAILED,
+  modes: [device],
+  names: ["libcore.io.OsTest#test_xattr"],
+  bug: 22258911
 }
 ]