Merge "Fix build without -Bsymbolic."
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 7e38157..52d1ee3 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -26,18 +26,6 @@
 # List of known broken tests that we won't attempt to execute. The test name must be the full
 # rule name such as test-art-host-oat-optimizing-HelloWorld64.
 ART_TEST_KNOWN_BROKEN := \
-  test-art-host-run-test-gcstress-optimizing-no-prebuild-004-SignalTest32 \
-  test-art-host-run-test-gcstress-optimizing-prebuild-004-SignalTest32 \
-  test-art-host-run-test-gcstress-optimizing-norelocate-004-SignalTest32 \
-  test-art-host-run-test-gcstress-optimizing-relocate-004-SignalTest32 \
-  test-art-host-run-test-gcverify-optimizing-no-prebuild-004-SignalTest32 \
-  test-art-host-run-test-gcverify-optimizing-prebuild-004-SignalTest32 \
-  test-art-host-run-test-gcverify-optimizing-norelocate-004-SignalTest32 \
-  test-art-host-run-test-gcverify-optimizing-relocate-004-SignalTest32 \
-  test-art-host-run-test-optimizing-no-prebuild-004-SignalTest32 \
-  test-art-host-run-test-optimizing-prebuild-004-SignalTest32 \
-  test-art-host-run-test-optimizing-norelocate-004-SignalTest32 \
-  test-art-host-run-test-optimizing-relocate-004-SignalTest32 \
   test-art-target-run-test-gcstress-optimizing-prebuild-004-SignalTest32 \
   test-art-target-run-test-gcstress-optimizing-norelocate-004-SignalTest32 \
   test-art-target-run-test-gcstress-default-prebuild-004-SignalTest32 \
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 17c478c..dd9f414 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -61,6 +61,9 @@
 # The elf writer test has dependencies on core.oat.
 ART_GTEST_elf_writer_test_HOST_DEPS := $(HOST_CORE_OAT_OUT) $(2ND_HOST_CORE_OAT_OUT)
 ART_GTEST_elf_writer_test_TARGET_DEPS := $(TARGET_CORE_OAT_OUT) $(2ND_TARGET_CORE_OAT_OUT)
+ART_GTEST_jni_internal_test_TARGET_DEPS := $(TARGET_CORE_JARS)
+ART_GTEST_proxy_test_TARGET_DEPS := $(TARGET_CORE_JARS)
+ART_GTEST_proxy_test_HOST_DEPS := $(HOST_CORE_OAT_OUT) $(2ND_HOST_CORE_OAT_OUT)
 
 # The path for which all the source files are relative, not actually the current directory.
 LOCAL_PATH := art
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 7983040..5c5163d 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -107,6 +107,7 @@
 	utils/arm64/assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
 	utils/assembler.cc \
+	utils/dwarf_cfi.cc \
 	utils/mips/assembler_mips.cc \
 	utils/mips/managed_register_mips.cc \
 	utils/x86/assembler_x86.cc \
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index f098a34..f2a8d84 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -165,14 +165,15 @@
                                const std::vector<uint8_t>& code,
                                const size_t frame_size_in_bytes,
                                const uint32_t core_spill_mask,
-                               const uint32_t fp_spill_mask)
+                               const uint32_t fp_spill_mask,
+                               const std::vector<uint8_t>* cfi_info)
     : CompiledCode(driver, instruction_set, code),
       frame_size_in_bytes_(frame_size_in_bytes),
       core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask),
       mapping_table_(driver->DeduplicateMappingTable(std::vector<uint8_t>())),
       vmap_table_(driver->DeduplicateVMapTable(std::vector<uint8_t>())),
       gc_map_(driver->DeduplicateGCMap(std::vector<uint8_t>())),
-      cfi_info_(nullptr) {
+      cfi_info_(driver->DeduplicateCFIInfo(cfi_info)) {
 }
 
 // Constructs a CompiledMethod for the Portable compiler.
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index b8cd851..c98d06a 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -120,7 +120,8 @@
                  const std::vector<uint8_t>& quick_code,
                  const size_t frame_size_in_bytes,
                  const uint32_t core_spill_mask,
-                 const uint32_t fp_spill_mask);
+                 const uint32_t fp_spill_mask,
+                 const std::vector<uint8_t>* cfi_info);
 
   // Constructs a CompiledMethod for the Portable compiler.
   CompiledMethod(CompilerDriver* driver, InstructionSet instruction_set, const std::string& code,
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 072acbe..cd6c9cc 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -108,6 +108,7 @@
     bool GenInlinedSqrt(CallInfo* info);
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
+    bool GenInlinedArrayCopyCharArray(CallInfo* info) OVERRIDE;
     RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
     void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 6711ab3..b9a17cc 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -949,6 +949,100 @@
   return true;
 }
 
+bool ArmMir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
+  constexpr int kLargeArrayThreshold = 256;
+
+  RegLocation rl_src = info->args[0];
+  RegLocation rl_src_pos = info->args[1];
+  RegLocation rl_dst = info->args[2];
+  RegLocation rl_dst_pos = info->args[3];
+  RegLocation rl_length = info->args[4];
+  // Compile time check, handle exception by non-inline method to reduce related meta-data.
+  if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) ||
+      (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) ||
+      (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) {
+    return false;
+  }
+
+  ClobberCallerSave();
+  LockCallTemps();  // Prepare for explicit register usage.
+  LockTemp(rs_r12);
+  RegStorage rs_src = rs_r0;
+  RegStorage rs_dst = rs_r1;
+  LoadValueDirectFixed(rl_src, rs_src);
+  LoadValueDirectFixed(rl_dst, rs_dst);
+
+  // Handle null pointer exception in slow-path.
+  LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr);
+  LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr);
+  // Handle potential overlapping in slow-path.
+  LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr);
+  // Handle exception or big length in slow-path.
+  RegStorage rs_length = rs_r2;
+  LoadValueDirectFixed(rl_length, rs_length);
+  LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr);
+  // Src bounds check.
+  RegStorage rs_pos = rs_r3;
+  RegStorage rs_arr_length = rs_r12;
+  LoadValueDirectFixed(rl_src_pos, rs_pos);
+  LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr);
+  Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
+  OpRegReg(kOpSub, rs_arr_length, rs_pos);
+  LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
+  // Dst bounds check.
+  LoadValueDirectFixed(rl_dst_pos, rs_pos);
+  LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr);
+  Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
+  OpRegReg(kOpSub, rs_arr_length, rs_pos);
+  LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
+
+  // Everything is checked now.
+  OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value());
+  OpRegReg(kOpAdd, rs_dst, rs_pos);
+  OpRegReg(kOpAdd, rs_dst, rs_pos);
+  OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value());
+  LoadValueDirectFixed(rl_src_pos, rs_pos);
+  OpRegReg(kOpAdd, rs_src, rs_pos);
+  OpRegReg(kOpAdd, rs_src, rs_pos);
+
+  RegStorage rs_tmp = rs_pos;
+  OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
+
+  // Copy one element.
+  OpRegRegImm(kOpAnd, rs_tmp, rs_length, 2);
+  LIR* jmp_to_begin_loop = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
+  OpRegImm(kOpSub, rs_length, 2);
+  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
+  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
+
+  // Copy two elements.
+  LIR *begin_loop = NewLIR0(kPseudoTargetLabel);
+  LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr);
+  OpRegImm(kOpSub, rs_length, 4);
+  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
+  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
+  OpUnconditionalBranch(begin_loop);
+
+  LIR *check_failed = NewLIR0(kPseudoTargetLabel);
+  LIR* launchpad_branch = OpUnconditionalBranch(nullptr);
+  LIR* return_point = NewLIR0(kPseudoTargetLabel);
+
+  src_check_branch->target = check_failed;
+  dst_check_branch->target = check_failed;
+  src_dst_same->target = check_failed;
+  len_neg_or_too_big->target = check_failed;
+  src_pos_negative->target = check_failed;
+  src_bad_len->target = check_failed;
+  dst_pos_negative->target = check_failed;
+  dst_bad_len->target = check_failed;
+  jmp_to_begin_loop->target = begin_loop;
+  jmp_to_ret->target = return_point;
+
+  AddIntrinsicSlowPath(info, launchpad_branch, return_point);
+
+  return true;
+}
+
 LIR* ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target);
 }
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 2cd24c6..3e1c18b 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -168,6 +168,7 @@
   bool GenInlinedPeek(CallInfo* info, OpSize size) OVERRIDE;
   bool GenInlinedPoke(CallInfo* info, OpSize size) OVERRIDE;
   bool GenInlinedAbsLong(CallInfo* info) OVERRIDE;
+  bool GenInlinedArrayCopyCharArray(CallInfo* info) OVERRIDE;
   void GenIntToLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
   void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                       RegLocation rl_src2) OVERRIDE;
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 147fee8..d00c57d 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -272,6 +272,7 @@
       ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
       branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
     }
+    // TODO: Use tbz/tbnz for < 0 or >= 0.
   }
 
   if (branch == nullptr) {
@@ -788,6 +789,121 @@
   return true;
 }
 
+bool Arm64Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
+  constexpr int kLargeArrayThreshold = 512;
+
+  RegLocation rl_src = info->args[0];
+  RegLocation rl_src_pos = info->args[1];
+  RegLocation rl_dst = info->args[2];
+  RegLocation rl_dst_pos = info->args[3];
+  RegLocation rl_length = info->args[4];
+  // Compile time check, handle exception by non-inline method to reduce related meta-data.
+  if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) ||
+      (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) ||
+      (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) {
+    return false;
+  }
+
+  ClobberCallerSave();
+  LockCallTemps();  // Prepare for explicit register usage.
+  RegStorage rs_src = rs_x0;
+  RegStorage rs_dst = rs_x1;
+  LoadValueDirectFixed(rl_src, rs_src);
+  LoadValueDirectFixed(rl_dst, rs_dst);
+
+  // Handle null pointer exception in slow-path.
+  LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr);
+  LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr);
+  // Handle potential overlapping in slow-path.
+  // TUNING: Support overlapping cases.
+  LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr);
+  // Handle exception or big length in slow-path.
+  RegStorage rs_length = rs_w2;
+  LoadValueDirectFixed(rl_length, rs_length);
+  LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr);
+  // Src bounds check.
+  RegStorage rs_src_pos = rs_w3;
+  RegStorage rs_arr_length = rs_w4;
+  LoadValueDirectFixed(rl_src_pos, rs_src_pos);
+  LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_src_pos, 0, nullptr);
+  Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
+  OpRegReg(kOpSub, rs_arr_length, rs_src_pos);
+  LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
+  // Dst bounds check.
+  RegStorage rs_dst_pos = rs_w5;
+  LoadValueDirectFixed(rl_dst_pos, rs_dst_pos);
+  LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_dst_pos, 0, nullptr);
+  Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
+  OpRegReg(kOpSub, rs_arr_length, rs_dst_pos);
+  LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
+
+  // Everything is checked now.
+  // Set rs_src to the address of the first element to be copied.
+  rs_src_pos = As64BitReg(rs_src_pos);
+  OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value());
+  OpRegRegImm(kOpLsl, rs_src_pos, rs_src_pos, 1);
+  OpRegReg(kOpAdd, rs_src, rs_src_pos);
+  // Set rs_src to the address of the first element to be copied.
+  rs_dst_pos = As64BitReg(rs_dst_pos);
+  OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value());
+  OpRegRegImm(kOpLsl, rs_dst_pos, rs_dst_pos, 1);
+  OpRegReg(kOpAdd, rs_dst, rs_dst_pos);
+
+  // rs_arr_length won't be not used anymore.
+  RegStorage rs_tmp = rs_arr_length;
+  // Use 64-bit view since rs_length will be used as index.
+  rs_length = As64BitReg(rs_length);
+  OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
+
+  // Copy one element.
+  OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 2);
+  LIR* jmp_to_copy_two = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
+  OpRegImm(kOpSub, rs_length, 2);
+  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
+  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
+
+  // Copy two elements.
+  LIR *copy_two = NewLIR0(kPseudoTargetLabel);
+  OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 4);
+  LIR* jmp_to_copy_four = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr);
+  OpRegImm(kOpSub, rs_length, 4);
+  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
+  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
+
+  // Copy four elements.
+  LIR *copy_four = NewLIR0(kPseudoTargetLabel);
+  LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr);
+  LIR *begin_loop = NewLIR0(kPseudoTargetLabel);
+  OpRegImm(kOpSub, rs_length, 8);
+  rs_tmp = As64BitReg(rs_tmp);
+  LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k64);
+  StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k64);
+  LIR* jmp_to_loop = OpCmpImmBranch(kCondNe, rs_length, 0, nullptr);
+  LIR* loop_finished = OpUnconditionalBranch(nullptr);
+
+  LIR *check_failed = NewLIR0(kPseudoTargetLabel);
+  LIR* launchpad_branch = OpUnconditionalBranch(nullptr);
+  LIR* return_point = NewLIR0(kPseudoTargetLabel);
+
+  src_check_branch->target = check_failed;
+  dst_check_branch->target = check_failed;
+  src_dst_same->target = check_failed;
+  len_neg_or_too_big->target = check_failed;
+  src_pos_negative->target = check_failed;
+  src_bad_len->target = check_failed;
+  dst_pos_negative->target = check_failed;
+  dst_bad_len->target = check_failed;
+  jmp_to_copy_two->target = copy_two;
+  jmp_to_copy_four->target = copy_four;
+  jmp_to_ret->target = return_point;
+  jmp_to_loop->target = begin_loop;
+  loop_finished->target = return_point;
+
+  AddIntrinsicSlowPath(info, launchpad_branch, return_point);
+
+  return true;
+}
+
 LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 511297c..be79b63 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1085,7 +1085,7 @@
     vmap_encoder.PushBackUnsigned(0u);  // Size is 0.
   }
 
-  std::unique_ptr<std::vector<uint8_t>> cfi_info(ReturnCallFrameInformation());
+  std::unique_ptr<std::vector<uint8_t>> cfi_info(ReturnFrameDescriptionEntry());
   CompiledMethod* result =
       new CompiledMethod(cu_->compiler_driver, cu_->instruction_set, code_buffer_, frame_size_,
                          core_spill_mask_, fp_spill_mask_, encoded_mapping_table_,
@@ -1250,7 +1250,7 @@
   AppendLIR(load_pc_rel);
 }
 
-std::vector<uint8_t>* Mir2Lir::ReturnCallFrameInformation() {
+std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() {
   // Default case is to do nothing.
   return nullptr;
 }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 2221bb5..4b8f794 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -1528,10 +1528,10 @@
                                     uint32_t type_idx, RegLocation rl_dest,
                                     RegLocation rl_src);
     /*
-     * @brief Generate the debug_frame FDE information if possible.
-     * @returns pointer to vector containg CFE information, or NULL.
+     * @brief Generate the eh_frame FDE information if possible.
+     * @returns pointer to vector containg FDE information, or NULL.
      */
-    virtual std::vector<uint8_t>* ReturnCallFrameInformation();
+    virtual std::vector<uint8_t>* ReturnFrameDescriptionEntry();
 
     /**
      * @brief Used to insert marker that can be used to associate MIR with LIR.
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index d3ed48d..24a3fe3 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -360,7 +360,7 @@
    * @brief Generate the debug_frame FDE information.
    * @returns pointer to vector containing CFE information
    */
-  std::vector<uint8_t>* ReturnCallFrameInformation() OVERRIDE;
+  std::vector<uint8_t>* ReturnFrameDescriptionEntry() OVERRIDE;
 
   LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
 
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 69f3e67..fb68f45 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -24,6 +24,7 @@
 #include "mirror/array.h"
 #include "mirror/string.h"
 #include "x86_lir.h"
+#include "utils/dwarf_cfi.h"
 
 namespace art {
 
@@ -880,9 +881,13 @@
 LIR* X86Mir2Lir::CheckSuspendUsingLoad() {
   // First load the pointer in fs:[suspend-trigger] into eax
   // Then use a test instruction to indirect via that address.
-  NewLIR2(kX86Mov32RT, rs_rAX.GetReg(),   cu_->target64 ?
-      Thread::ThreadSuspendTriggerOffset<8>().Int32Value() :
-      Thread::ThreadSuspendTriggerOffset<4>().Int32Value());
+  if (cu_->target64) {
+    NewLIR2(kX86Mov64RT, rs_rAX.GetReg(),
+        Thread::ThreadSuspendTriggerOffset<8>().Int32Value());
+  } else {
+    NewLIR2(kX86Mov32RT, rs_rAX.GetReg(),
+        Thread::ThreadSuspendTriggerOffset<4>().Int32Value());
+  }
   return NewLIR3(kX86Test32RM, rs_rAX.GetReg(), rs_rAX.GetReg(), 0);
 }
 
@@ -1009,19 +1014,6 @@
   return call;
 }
 
-/*
- * @brief Enter a 32 bit quantity into a buffer
- * @param buf buffer.
- * @param data Data value.
- */
-
-static void PushWord(std::vector<uint8_t>&buf, int32_t data) {
-  buf.push_back(data & 0xff);
-  buf.push_back((data >> 8) & 0xff);
-  buf.push_back((data >> 16) & 0xff);
-  buf.push_back((data >> 24) & 0xff);
-}
-
 void X86Mir2Lir::InstallLiteralPools() {
   // These are handled differently for x86.
   DCHECK(code_literal_list_ == nullptr);
@@ -1042,10 +1034,10 @@
       align_size--;
     }
     for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
-      PushWord(code_buffer_, p->operands[0]);
-      PushWord(code_buffer_, p->operands[1]);
-      PushWord(code_buffer_, p->operands[2]);
-      PushWord(code_buffer_, p->operands[3]);
+      PushWord(&code_buffer_, p->operands[0]);
+      PushWord(&code_buffer_, p->operands[1]);
+      PushWord(&code_buffer_, p->operands[2]);
+      PushWord(&code_buffer_, p->operands[3]);
     }
   }
 
@@ -1293,14 +1285,14 @@
   // Compute the number of words to search in to rCX.
   Load32Disp(rs_rDX, count_offset, rs_rCX);
 
-  if (!cu_->target64) {
-    // Possible signal here due to null pointer dereference.
-    // Note that the signal handler will expect the top word of
-    // the stack to be the ArtMethod*.  If the PUSH edi instruction
-    // below is ahead of the load above then this will not be true
-    // and the signal handler will not work.
-    MarkPossibleNullPointerException(0);
+  // Possible signal here due to null pointer dereference.
+  // Note that the signal handler will expect the top word of
+  // the stack to be the ArtMethod*.  If the PUSH edi instruction
+  // below is ahead of the load above then this will not be true
+  // and the signal handler will not work.
+  MarkPossibleNullPointerException(0);
 
+  if (!cu_->target64) {
     // EDI is callee-save register in 32-bit mode.
     NewLIR1(kX86Push32R, rs_rDI.GetReg());
   }
@@ -1418,47 +1410,6 @@
   return true;
 }
 
-/*
- * @brief Enter an 'advance LOC' into the FDE buffer
- * @param buf FDE buffer.
- * @param increment Amount by which to increase the current location.
- */
-static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) {
-  if (increment < 64) {
-    // Encoding in opcode.
-    buf.push_back(0x1 << 6 | increment);
-  } else if (increment < 256) {
-    // Single byte delta.
-    buf.push_back(0x02);
-    buf.push_back(increment);
-  } else if (increment < 256 * 256) {
-    // Two byte delta.
-    buf.push_back(0x03);
-    buf.push_back(increment & 0xff);
-    buf.push_back((increment >> 8) & 0xff);
-  } else {
-    // Four byte delta.
-    buf.push_back(0x04);
-    PushWord(buf, increment);
-  }
-}
-
-static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
-  uint8_t buffer[12];
-  uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
-  for (uint8_t *p = buffer; p < ptr; p++) {
-    buf.push_back(*p);
-  }
-}
-
-static void EncodeSignedLeb128(std::vector<uint8_t>& buf, int32_t value) {
-  uint8_t buffer[12];
-  uint8_t *ptr = EncodeSignedLeb128(buffer, value);
-  for (uint8_t *p = buffer; p < ptr; p++) {
-    buf.push_back(*p);
-  }
-}
-
 static bool ARTRegIDToDWARFRegID(bool is_x86_64, int art_reg_id, int* dwarf_reg_id) {
   if (is_x86_64) {
     switch (art_reg_id) {
@@ -1481,36 +1432,23 @@
   }
 }
 
-std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
-  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
+std::vector<uint8_t>* X86Mir2Lir::ReturnFrameDescriptionEntry() {
+  std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
 
   // Generate the FDE for the method.
   DCHECK_NE(data_offset_, 0U);
 
-  // Length (will be filled in later in this routine).
-  PushWord(*cfi_info, 0);
-
-  // 'CIE_pointer' (filled in by linker).
-  PushWord(*cfi_info, 0);
-
-  // 'initial_location' (filled in by linker).
-  PushWord(*cfi_info, 0);
-
-  // 'address_range' (number of bytes in the method).
-  PushWord(*cfi_info, data_offset_);
-
-  // Augmentation length: 0
-  cfi_info->push_back(0);
+  WriteFDEHeader(cfi_info);
+  WriteFDEAddressRange(cfi_info, data_offset_);
 
   // The instructions in the FDE.
   if (stack_decrement_ != nullptr) {
     // Advance LOC to just past the stack decrement.
     uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
-    AdvanceLoc(*cfi_info, pc);
+    DW_CFA_advance_loc(cfi_info, pc);
 
     // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
-    cfi_info->push_back(0x0e);
-    EncodeUnsignedLeb128(*cfi_info, frame_size_);
+    DW_CFA_def_cfa_offset(cfi_info, frame_size_);
 
     // Handle register spills
     const uint32_t kSpillInstLen = (cu_->target64) ? 5 : 4;
@@ -1522,14 +1460,12 @@
         pc += kSpillInstLen;
 
         // Advance LOC to pass this instruction
-        AdvanceLoc(*cfi_info, kSpillInstLen);
+        DW_CFA_advance_loc(cfi_info, kSpillInstLen);
 
         int dwarf_reg_id;
         if (ARTRegIDToDWARFRegID(cu_->target64, reg, &dwarf_reg_id)) {
-          // DW_CFA_offset_extended_sf reg_no offset
-          cfi_info->push_back(0x11);
-          EncodeUnsignedLeb128(*cfi_info, dwarf_reg_id);
-          EncodeSignedLeb128(*cfi_info, offset / kDataAlignmentFactor);
+          // DW_CFA_offset_extended_sf reg offset
+          DW_CFA_offset_extended_sf(cfi_info, dwarf_reg_id, offset / kDataAlignmentFactor);
         }
 
         offset += GetInstructionSetPointerSize(cu_->instruction_set);
@@ -1539,16 +1475,15 @@
     // We continue with that stack until the epilogue.
     if (stack_increment_ != nullptr) {
       uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
-      AdvanceLoc(*cfi_info, new_pc - pc);
+      DW_CFA_advance_loc(cfi_info, new_pc - pc);
 
       // We probably have code snippets after the epilogue, so save the
       // current state: DW_CFA_remember_state.
-      cfi_info->push_back(0x0a);
+      DW_CFA_remember_state(cfi_info);
 
       // We have now popped the stack: DW_CFA_def_cfa_offset 4/8.
       // There is only the return PC on the stack now.
-      cfi_info->push_back(0x0e);
-      EncodeUnsignedLeb128(*cfi_info, GetInstructionSetPointerSize(cu_->instruction_set));
+      DW_CFA_def_cfa_offset(cfi_info, GetInstructionSetPointerSize(cu_->instruction_set));
 
       // Everything after that is the same as before the epilogue.
       // Stack bump was followed by RET instruction.
@@ -1556,25 +1491,16 @@
       if (post_ret_insn != nullptr) {
         pc = new_pc;
         new_pc = post_ret_insn->offset;
-        AdvanceLoc(*cfi_info, new_pc - pc);
+        DW_CFA_advance_loc(cfi_info, new_pc - pc);
         // Restore the state: DW_CFA_restore_state.
-        cfi_info->push_back(0x0b);
+        DW_CFA_restore_state(cfi_info);
       }
     }
   }
 
-  // Padding to a multiple of 4
-  while ((cfi_info->size() & 3) != 0) {
-    // DW_CFA_nop is encoded as 0.
-    cfi_info->push_back(0);
-  }
+  PadCFI(cfi_info);
+  WriteCFILength(cfi_info);
 
-  // Set the length of the FDE inside the generated bytes.
-  uint32_t length = cfi_info->size() - 4;
-  (*cfi_info)[0] = length;
-  (*cfi_info)[1] = length >> 8;
-  (*cfi_info)[2] = length >> 16;
-  (*cfi_info)[3] = length >> 24;
   return cfi_info;
 }
 
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 12e9401..bb5f7e0 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -46,13 +46,6 @@
   (*buf)[offset+3] = data >> 24;
 }
 
-static void PushWord(std::vector<uint8_t>* buf, int data) {
-  buf->push_back(data & 0xff);
-  buf->push_back((data >> 8) & 0xff);
-  buf->push_back((data >> 16) & 0xff);
-  buf->push_back((data >> 24) & 0xff);
-}
-
 static void PushHalf(std::vector<uint8_t>* buf, int data) {
   buf->push_back(data & 0xff);
   buf->push_back((data >> 8) & 0xff);
@@ -842,24 +835,6 @@
   }
 }
 
-static void EncodeUnsignedLeb128(uint32_t data, std::vector<uint8_t>* dst) {
-  size_t encoded_size = UnsignedLeb128Size(data);
-  size_t cur_index = dst->size();
-  dst->resize(dst->size() + encoded_size);
-  uint8_t* write_pos = &((*dst)[cur_index]);
-  uint8_t* write_pos_after = EncodeUnsignedLeb128(write_pos, data);
-  DCHECK_EQ(static_cast<size_t>(write_pos_after - write_pos), encoded_size);
-}
-
-static void EncodeSignedLeb128(int32_t data, std::vector<uint8_t>* dst) {
-  size_t encoded_size = SignedLeb128Size(data);
-  size_t cur_index = dst->size();
-  dst->resize(dst->size() + encoded_size);
-  uint8_t* write_pos = &((*dst)[cur_index]);
-  uint8_t* write_pos_after = EncodeSignedLeb128(write_pos, data);
-  DCHECK_EQ(static_cast<size_t>(write_pos_after - write_pos), encoded_size);
-}
-
 std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) {
   std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
 
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index c38cfaf..1a35da0 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -90,6 +90,7 @@
 
   // Assembler that holds generated instructions
   std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set));
+  jni_asm->InitializeFrameDescriptionEntry();
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
@@ -432,12 +433,14 @@
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());
   __ FinalizeInstructions(code);
+  jni_asm->FinalizeFrameDescriptionEntry();
   return new CompiledMethod(driver,
                             instruction_set,
                             managed_code,
                             frame_size,
                             main_jni_conv->CoreSpillMask(),
-                            main_jni_conv->FpSpillMask());
+                            main_jni_conv->FpSpillMask(),
+                            jni_asm->GetFrameDescriptionEntry());
 }
 
 // Copy a single parameter from the managed to the JNI calling convention
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 2264638..ab53b17 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -15,17 +15,17 @@
  */
 
 #include "code_generator_x86.h"
+
+#include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
+#include "mirror/array.h"
+#include "mirror/art_method.h"
+#include "thread.h"
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
 #include "utils/x86/assembler_x86.h"
 #include "utils/x86/managed_register_x86.h"
 
-#include "entrypoints/quick/quick_entrypoints.h"
-#include "mirror/array.h"
-#include "mirror/art_method.h"
-#include "thread.h"
-
 namespace art {
 
 x86::X86ManagedRegister Location::AsX86() const {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2ff2a17..e4259f5 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -35,7 +35,7 @@
 
 namespace x86_64 {
 
-static constexpr bool kExplicitStackOverflowCheck = true;
+static constexpr bool kExplicitStackOverflowCheck = false;
 
 // Some x86_64 instructions require a register to be available as temp.
 static constexpr Register TMP = R11;
@@ -208,25 +208,26 @@
   static const int kFakeReturnRegister = 16;
   core_spill_mask_ |= (1 << kFakeReturnRegister);
 
+  bool skip_overflow_check = IsLeafMethod()
+      && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86_64);
+
+  if (!skip_overflow_check && !kExplicitStackOverflowCheck) {
+    __ testq(CpuRegister(RAX), Address(
+        CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
+    RecordPcInfo(0);
+  }
+
   // The return PC has already been pushed on the stack.
   __ subq(CpuRegister(RSP),
           Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize));
 
-  bool skip_overflow_check = IsLeafMethod()
-      && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86_64);
+  if (!skip_overflow_check && kExplicitStackOverflowCheck) {
+    SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64();
+    AddSlowPath(slow_path);
 
-  if (!skip_overflow_check) {
-    if (kExplicitStackOverflowCheck) {
-      SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64();
-      AddSlowPath(slow_path);
-
-      __ gs()->cmpq(CpuRegister(RSP),
-                    Address::Absolute(Thread::StackEndOffset<kX86_64WordSize>(), true));
-      __ j(kLess, slow_path->GetEntryLabel());
-    } else {
-      __ testq(CpuRegister(RAX), Address(
-          CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
-    }
+    __ gs()->cmpq(CpuRegister(RSP),
+                  Address::Absolute(Thread::StackEndOffset<kX86_64WordSize>(), true));
+    __ j(kLess, slow_path->GetEntryLabel());
   }
 
   __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index f72f5e5..4addfa0 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -499,6 +499,10 @@
   // and branch to a ExceptionSlowPath if it is.
   virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
 
+  virtual void InitializeFrameDescriptionEntry() {}
+  virtual void FinalizeFrameDescriptionEntry() {}
+  virtual std::vector<uint8_t>* GetFrameDescriptionEntry() { return nullptr; }
+
   virtual ~Assembler() {}
 
  protected:
diff --git a/compiler/utils/dwarf_cfi.cc b/compiler/utils/dwarf_cfi.cc
new file mode 100644
index 0000000..b3d1a47
--- /dev/null
+++ b/compiler/utils/dwarf_cfi.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "leb128.h"
+#include "utils.h"
+
+#include "dwarf_cfi.h"
+
+namespace art {
+
+void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment) {
+  if (increment < 64) {
+    // Encoding in opcode.
+    buf->push_back(0x1 << 6 | increment);
+  } else if (increment < 256) {
+    // Single byte delta.
+    buf->push_back(0x02);
+    buf->push_back(increment);
+  } else if (increment < 256 * 256) {
+    // Two byte delta.
+    buf->push_back(0x03);
+    buf->push_back(increment & 0xff);
+    buf->push_back((increment >> 8) & 0xff);
+  } else {
+    // Four byte delta.
+    buf->push_back(0x04);
+    PushWord(buf, increment);
+  }
+}
+
+void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset) {
+  buf->push_back(0x11);
+  EncodeUnsignedLeb128(reg, buf);
+  EncodeSignedLeb128(offset, buf);
+}
+
+void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset) {
+  buf->push_back((0x2 << 6) | reg);
+  EncodeUnsignedLeb128(offset, buf);
+}
+
+void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset) {
+  buf->push_back(0x0e);
+  EncodeUnsignedLeb128(offset, buf);
+}
+
+void DW_CFA_remember_state(std::vector<uint8_t>* buf) {
+  buf->push_back(0x0a);
+}
+
+void DW_CFA_restore_state(std::vector<uint8_t>* buf) {
+  buf->push_back(0x0b);
+}
+
+void WriteFDEHeader(std::vector<uint8_t>* buf) {
+  // 'length' (filled in by other functions).
+  PushWord(buf, 0);
+
+  // 'CIE_pointer' (filled in by linker).
+  PushWord(buf, 0);
+
+  // 'initial_location' (filled in by linker).
+  PushWord(buf, 0);
+
+  // 'address_range' (filled in by other functions).
+  PushWord(buf, 0);
+
+  // Augmentation length: 0
+  buf->push_back(0);
+}
+
+void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint32_t data) {
+  const int kOffsetOfAddressRange = 12;
+  CHECK(buf->size() >= kOffsetOfAddressRange + sizeof(uint32_t));
+
+  uint8_t *p = buf->data() + kOffsetOfAddressRange;
+  p[0] = data;
+  p[1] = data >> 8;
+  p[2] = data >> 16;
+  p[3] = data >> 24;
+}
+
+void WriteCFILength(std::vector<uint8_t>* buf) {
+  uint32_t length = buf->size() - 4;
+  DCHECK_EQ((length & 0x3), 0U);
+  DCHECK_GT(length, 4U);
+
+  uint8_t *p = buf->data();
+  p[0] = length;
+  p[1] = length >> 8;
+  p[2] = length >> 16;
+  p[3] = length >> 24;
+}
+
+void PadCFI(std::vector<uint8_t>* buf) {
+  while (buf->size() & 0x3) {
+    buf->push_back(0);
+  }
+}
+
+}  // namespace art
diff --git a/compiler/utils/dwarf_cfi.h b/compiler/utils/dwarf_cfi.h
new file mode 100644
index 0000000..e5acc0e
--- /dev/null
+++ b/compiler/utils/dwarf_cfi.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DWARF_CFI_H_
+#define ART_COMPILER_UTILS_DWARF_CFI_H_
+
+#include <vector>
+
+namespace art {
+
+/**
+ * @brief Enter a 'DW_CFA_advance_loc' into an FDE buffer
+ * @param buf FDE buffer.
+ * @param increment Amount by which to increase the current location.
+ */
+void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment);
+
+/**
+ * @brief Enter a 'DW_CFA_offset_extended_sf' into an FDE buffer
+ * @param buf FDE buffer.
+ * @param reg Register number.
+ * @param offset Offset of register address from CFA.
+ */
+void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset);
+
+/**
+ * @brief Enter a 'DW_CFA_offset' into an FDE buffer
+ * @param buf FDE buffer.
+ * @param reg Register number.
+ * @param offset Offset of register address from CFA.
+ */
+void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset);
+
+/**
+ * @brief Enter a 'DW_CFA_def_cfa_offset' into an FDE buffer
+ * @param buf FDE buffer.
+ * @param offset New offset of CFA.
+ */
+void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset);
+
+/**
+ * @brief Enter a 'DW_CFA_remember_state' into an FDE buffer
+ * @param buf FDE buffer.
+ */
+void DW_CFA_remember_state(std::vector<uint8_t>* buf);
+
+/**
+ * @brief Enter a 'DW_CFA_restore_state' into an FDE buffer
+ * @param buf FDE buffer.
+ */
+void DW_CFA_restore_state(std::vector<uint8_t>* buf);
+
+/**
+ * @brief Write FDE header into an FDE buffer
+ * @param buf FDE buffer.
+ */
+void WriteFDEHeader(std::vector<uint8_t>* buf);
+
+/**
+ * @brief Set 'address_range' field of an FDE buffer
+ * @param buf FDE buffer.
+ */
+void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint32_t data);
+
+/**
+ * @brief Set 'length' field of an FDE buffer
+ * @param buf FDE buffer.
+ */
+void WriteCFILength(std::vector<uint8_t>* buf);
+
+/**
+ * @brief Pad an FDE buffer with 0 until its size is a multiple of 4
+ * @param buf FDE buffer.
+ */
+void PadCFI(std::vector<uint8_t>* buf);
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_DWARF_CFI_H_
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index b6a5c20..48edb15 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -20,6 +20,7 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "memory_region.h"
 #include "thread.h"
+#include "utils/dwarf_cfi.h"
 
 namespace art {
 namespace x86 {
@@ -1407,20 +1408,61 @@
   EmitOperand(reg_or_opcode, Operand(operand));
 }
 
+void X86Assembler::InitializeFrameDescriptionEntry() {
+  WriteFDEHeader(&cfi_info_);
+}
+
+void X86Assembler::FinalizeFrameDescriptionEntry() {
+  WriteFDEAddressRange(&cfi_info_, buffer_.Size());
+  PadCFI(&cfi_info_);
+  WriteCFILength(&cfi_info_);
+}
+
 constexpr size_t kFramePointerSize = 4;
 
 void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                               const std::vector<ManagedRegister>& spill_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
+  cfi_cfa_offset_ = kFramePointerSize;  // Only return address on stack
+  cfi_pc_ = buffer_.Size();  // Nothing emitted yet
+  DCHECK_EQ(cfi_pc_, 0U);
+
+  uint32_t reg_offset = 1;
   CHECK_ALIGNED(frame_size, kStackAlignment);
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
     pushl(spill_regs.at(i).AsX86().AsCpuRegister());
+
+    // DW_CFA_advance_loc
+    DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
+    cfi_pc_ = buffer_.Size();
+    // DW_CFA_def_cfa_offset
+    cfi_cfa_offset_ += kFramePointerSize;
+    DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+    // DW_CFA_offset reg offset
+    reg_offset++;
+    DW_CFA_offset(&cfi_info_, spill_regs.at(i).AsX86().DWARFRegId(), reg_offset);
   }
+
   // return address then method on stack
-  addl(ESP, Immediate(-frame_size + (spill_regs.size() * kFramePointerSize) +
-                      sizeof(StackReference<mirror::ArtMethod>) /*method*/ +
-                      kFramePointerSize /*return address*/));
+  int32_t adjust = frame_size - (spill_regs.size() * kFramePointerSize) -
+                   sizeof(StackReference<mirror::ArtMethod>) /*method*/ -
+                   kFramePointerSize /*return address*/;
+  addl(ESP, Immediate(-adjust));
+  // DW_CFA_advance_loc
+  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
+  cfi_pc_ = buffer_.Size();
+  // DW_CFA_def_cfa_offset
+  cfi_cfa_offset_ += adjust;
+  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+
   pushl(method_reg.AsX86().AsCpuRegister());
+  // DW_CFA_advance_loc
+  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
+  cfi_pc_ = buffer_.Size();
+  // DW_CFA_def_cfa_offset
+  cfi_cfa_offset_ += kFramePointerSize;
+  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     movl(Address(ESP, frame_size + sizeof(StackReference<mirror::ArtMethod>) +
                  (i * kFramePointerSize)),
@@ -1442,6 +1484,12 @@
 void X86Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addl(ESP, Immediate(-adjust));
+  // DW_CFA_advance_loc
+  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
+  cfi_pc_ = buffer_.Size();
+  // DW_CFA_def_cfa_offset
+  cfi_cfa_offset_ += adjust;
+  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 }
 
 void X86Assembler::DecreaseFrameSize(size_t adjust) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index ce20768..5c4e34f 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -571,6 +571,12 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
+  void InitializeFrameDescriptionEntry() OVERRIDE;
+  void FinalizeFrameDescriptionEntry() OVERRIDE;
+  std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
+    return &cfi_info_;
+  }
+
  private:
   inline void EmitUint8(uint8_t value);
   inline void EmitInt32(int32_t value);
@@ -589,6 +595,9 @@
   void EmitGenericShift(int rm, Register reg, const Immediate& imm);
   void EmitGenericShift(int rm, Register operand, Register shifter);
 
+  std::vector<uint8_t> cfi_info_;
+  uint32_t cfi_cfa_offset_, cfi_pc_;
+
   DISALLOW_COPY_AND_ASSIGN(X86Assembler);
 };
 
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index 09d2b49..5d46ee2 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -88,6 +88,14 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86ManagedRegister : public ManagedRegister {
  public:
+  int DWARFRegId() const {
+    CHECK(IsCpuRegister());
+    // For all the X86 registers we care about:
+    // EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
+    // DWARF register id is the same as id_.
+    return static_cast<int>(id_);
+  }
+
   ByteRegister AsByteRegister() const {
     CHECK(IsCpuRegister());
     CHECK_LT(AsCpuRegister(), ESP);  // ESP, EBP, ESI and EDI cannot be encoded as byte registers.
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 7684271..62b72c2 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -20,6 +20,7 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "memory_region.h"
 #include "thread.h"
+#include "utils/dwarf_cfi.h"
 
 namespace art {
 namespace x86_64 {
@@ -1714,11 +1715,26 @@
   }
 }
 
+void X86_64Assembler::InitializeFrameDescriptionEntry() {
+  WriteFDEHeader(&cfi_info_);
+}
+
+void X86_64Assembler::FinalizeFrameDescriptionEntry() {
+  WriteFDEAddressRange(&cfi_info_, buffer_.Size());
+  PadCFI(&cfi_info_);
+  WriteCFILength(&cfi_info_);
+}
+
 constexpr size_t kFramePointerSize = 8;
 
 void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                                  const std::vector<ManagedRegister>& spill_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
+  cfi_cfa_offset_ = kFramePointerSize;  // Only return address on stack
+  cfi_pc_ = buffer_.Size();  // Nothing emitted yet
+  DCHECK_EQ(cfi_pc_, 0U);
+
+  uint32_t reg_offset = 1;
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
@@ -1726,6 +1742,16 @@
     if (spill.IsCpuRegister()) {
       pushq(spill.AsCpuRegister());
       gpr_count++;
+
+      // DW_CFA_advance_loc
+      DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
+      cfi_pc_ = buffer_.Size();
+      // DW_CFA_def_cfa_offset
+      cfi_cfa_offset_ += kFramePointerSize;
+      DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+      // DW_CFA_offset reg offset
+      reg_offset++;
+      DW_CFA_offset(&cfi_info_, spill.DWARFRegId(), reg_offset);
     }
   }
   // return address then method on stack
@@ -1733,6 +1759,13 @@
                           - (gpr_count * kFramePointerSize)
                           - kFramePointerSize /*return address*/;
   subq(CpuRegister(RSP), Immediate(rest_of_frame));
+  // DW_CFA_advance_loc
+  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
+  cfi_pc_ = buffer_.Size();
+  // DW_CFA_def_cfa_offset
+  cfi_cfa_offset_ += rest_of_frame;
+  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+
   // spill xmms
   int64_t offset = rest_of_frame;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
@@ -1796,6 +1829,12 @@
 void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
+  // DW_CFA_advance_loc
+  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
+  cfi_pc_ = buffer_.Size();
+  // DW_CFA_def_cfa_offset
+  cfi_cfa_offset_ += adjust;
+  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
 }
 
 void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 2f814df..ee11575 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -614,6 +614,12 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
+  void InitializeFrameDescriptionEntry() OVERRIDE;
+  void FinalizeFrameDescriptionEntry() OVERRIDE;
+  std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
+    return &cfi_info_;
+  }
+
  private:
   void EmitUint8(uint8_t value);
   void EmitInt32(int32_t value);
@@ -655,6 +661,9 @@
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
 
+  std::vector<uint8_t> cfi_info_;
+  uint32_t cfi_cfa_offset_, cfi_pc_;
+
   DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
 };
 
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
index 822659f..3a96ad0 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.h
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -87,6 +87,21 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86_64ManagedRegister : public ManagedRegister {
  public:
+  int DWARFRegId() const {
+    CHECK(IsCpuRegister());
+    switch (id_) {
+      case RAX: return  0;
+      case RDX: return  1;
+      case RCX: return  2;
+      case RBX: return  3;
+      case RSI: return  4;
+      case RDI: return  5;
+      case RBP: return  6;
+      case RSP: return  7;
+      default: return static_cast<int>(id_);  // R8 ~ R15
+    }
+  }
+
   CpuRegister AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return CpuRegister(static_cast<Register>(id_));
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index ac3eb39..bb86a74 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1169,6 +1169,7 @@
     case kThumb2:
     case kArm64:
     case kX86:
+    case kX86_64:
       implicit_null_checks = true;
       implicit_so_checks = true;
       break;
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 302e835..09ec004 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -248,6 +248,8 @@
 LIBART_TARGET_SRC_FILES_x86 := \
   $(LIBART_SRC_FILES_x86)
 
+# Note that the fault_handler_x86.cc is not a mistake.  This file is
+# shared between the x86 and x86_64 architectures.
 LIBART_SRC_FILES_x86_64 := \
   arch/x86_64/context_x86_64.cc \
   arch/x86_64/entrypoints_init_x86_64.cc \
@@ -257,7 +259,7 @@
   arch/x86_64/quick_entrypoints_x86_64.S \
   arch/x86_64/thread_x86_64.cc \
   monitor_pool.cc \
-  arch/x86_64/fault_handler_x86_64.cc
+  arch/x86/fault_handler_x86.cc
 
 LIBART_TARGET_SRC_FILES_x86_64 := \
   $(LIBART_SRC_FILES_x86_64) \
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 48582f4..be28544 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -46,7 +46,7 @@
   return instr_size;
 }
 
-void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
                                              mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index dc82cc2..3a7e689 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -37,7 +37,7 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
                                              mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
   struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 5a64a69..0e76aab 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -29,7 +29,7 @@
 
 namespace art {
 
-void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
                                              mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
 }
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 435f280..8b6c9b1 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -31,14 +31,21 @@
 #define CTX_ESP uc_mcontext->__ss.__esp
 #define CTX_EIP uc_mcontext->__ss.__eip
 #define CTX_EAX uc_mcontext->__ss.__eax
+#define CTX_METHOD uc_mcontext->__ss.__eax
+#elif defined(__x86_64__)
+#define CTX_ESP uc_mcontext.gregs[REG_RSP]
+#define CTX_EIP uc_mcontext.gregs[REG_RIP]
+#define CTX_EAX uc_mcontext.gregs[REG_RAX]
+#define CTX_METHOD uc_mcontext.gregs[REG_RDI]
 #else
 #define CTX_ESP uc_mcontext.gregs[REG_ESP]
 #define CTX_EIP uc_mcontext.gregs[REG_EIP]
 #define CTX_EAX uc_mcontext.gregs[REG_EAX]
+#define CTX_METHOD uc_mcontext.gregs[REG_EAX]
 #endif
 
 //
-// X86 specific fault handler functions.
+// X86 (and X86_64) specific fault handler functions.
 //
 
 namespace art {
@@ -47,129 +54,146 @@
 extern "C" void art_quick_throw_stack_overflow_from_signal();
 extern "C" void art_quick_test_suspend();
 
-// From the x86 disassembler...
-enum SegmentPrefix {
-  kCs = 0x2e,
-  kSs = 0x36,
-  kDs = 0x3e,
-  kEs = 0x26,
-  kFs = 0x64,
-  kGs = 0x65,
-};
-
 // Get the size of an instruction in bytes.
-static uint32_t GetInstructionSize(uint8_t* pc) {
-  uint8_t* instruction_start = pc;
-  bool have_prefixes = true;
-  bool two_byte = false;
-
-  // Skip all the prefixes.
-  do {
-    switch (*pc) {
-        // Group 1 - lock and repeat prefixes:
-      case 0xF0:
-      case 0xF2:
-      case 0xF3:
-        // Group 2 - segment override prefixes:
-      case kCs:
-      case kSs:
-      case kDs:
-      case kEs:
-      case kFs:
-      case kGs:
-        // Group 3 - operand size override:
-      case 0x66:
-        // Group 4 - address size override:
-      case 0x67:
-        break;
-      default:
-        have_prefixes = false;
-        break;
-    }
-    if (have_prefixes) {
-      pc++;
-    }
-  } while (have_prefixes);
-
-#if defined(__x86_64__)
-  // Skip REX is present.
-  if (*pc >= 0x40 && *pc <= 0x4F) {
-    ++pc;
-  }
+// Return 0 if the instruction is not handled.
+static uint32_t GetInstructionSize(const uint8_t* pc) {
+#if defined(__x86_64)
+  const bool x86_64 = true;
+#else
+  const bool x86_64 = false;
 #endif
 
-  // Check for known instructions.
-  uint32_t known_length = 0;
-  switch (*pc) {
-  case 0x83:                // cmp [r + v], b: 4 byte instruction
-    known_length = 4;
-    break;
-  }
+  const uint8_t* startpc = pc;
 
-  if (known_length > 0) {
-    VLOG(signals) << "known instruction with length " << known_length;
-    return known_length;
-  }
-
-  // Unknown instruction, work out length.
-
-  // Work out if we have a ModR/M byte.
   uint8_t opcode = *pc++;
-  if (opcode == 0xf) {
+  uint8_t modrm;
+  bool has_modrm = false;
+  bool two_byte = false;
+  uint32_t displacement_size = 0;
+  uint32_t immediate_size = 0;
+
+  // Prefixes.
+  while (true) {
+    bool prefix_present = false;
+    switch (opcode) {
+      // Group 1
+      case 0xf0:
+      case 0xf2:
+      case 0xf3:
+
+      // Group 2
+      case 0x2e:
+      case 0x36:
+      case 0x3e:
+      case 0x26:
+      case 0x64:
+      case 0x65:
+
+      // Group 3
+      case 0x66:
+
+      // Group 4
+      case 0x67:
+        opcode = *pc++;
+        prefix_present = true;
+        break;
+    }
+    if (!prefix_present) {
+      break;
+    }
+  }
+
+  if (x86_64 && opcode >= 0x40 && opcode <= 0x4f) {
+    opcode = *pc++;
+  }
+
+  if (opcode == 0x0f) {
+    // Two byte opcode
     two_byte = true;
     opcode = *pc++;
   }
 
-  bool has_modrm = false;         // Is ModR/M byte present?
-  uint8_t hi = opcode >> 4;       // Opcode high nybble.
-  uint8_t lo = opcode & 0b1111;   // Opcode low nybble.
+  bool unhandled_instruction = false;
 
-  // From the Intel opcode tables.
   if (two_byte) {
-    has_modrm = true;   // TODO: all of these?
-  } else if (hi < 4) {
-    has_modrm = lo < 4 || (lo >= 8 && lo <= 0xb);
-  } else if (hi == 6) {
-    has_modrm = lo == 3 || lo == 9 || lo == 0xb;
-  } else if (hi == 8) {
-    has_modrm = lo != 0xd;
-  } else if (hi == 0xc) {
-    has_modrm = lo == 1 || lo == 2 || lo == 6 || lo == 7;
-  } else if (hi == 0xd) {
-    has_modrm = lo < 4;
-  } else if (hi == 0xf) {
-    has_modrm = lo == 6 || lo == 7;
-  }
+    switch (opcode) {
+      case 0x10:            // vmovsd/ss
+      case 0x11:            // vmovsd/ss
+      case 0xb6:        // movzx
+      case 0xb7:
+      case 0xbe:        // movsx
+      case 0xbf:
+        modrm = *pc++;
+        has_modrm = true;
+        break;
+      default:
+        unhandled_instruction = true;
+        break;
+    }
+  } else {
+    switch (opcode) {
+      case 0x89:            // mov
+      case 0x8b:
+      case 0x38:        // cmp with memory.
+      case 0x39:
+      case 0x3a:
+      case 0x3b:
+      case 0x3c:
+      case 0x3d:
+      case 0x85:        // test.
+        modrm = *pc++;
+        has_modrm = true;
+        break;
 
-  if (has_modrm) {
-    uint8_t modrm = *pc++;
-    uint8_t mod = (modrm >> 6) & 0b11;
-    uint8_t reg = (modrm >> 3) & 0b111;
-    switch (mod) {
-      case 0:
+      case 0x80:        // group 1, byte immediate.
+      case 0x83:
+        modrm = *pc++;
+        has_modrm = true;
+        immediate_size = 1;
         break;
-      case 1:
-        if (reg == 4) {
-          // SIB + 1 byte displacement.
-          pc += 2;
-        } else {
-          pc += 1;
-        }
+
+      case 0x81:        // group 1, word immediate.
+        modrm = *pc++;
+        has_modrm = true;
+        immediate_size = 4;
         break;
-      case 2:
-        // SIB + 4 byte displacement.
-        pc += 5;
-        break;
-      case 3:
+
+      default:
+        unhandled_instruction = true;
         break;
     }
   }
 
-  VLOG(signals) << "calculated X86 instruction size is " << (pc - instruction_start);
-  return pc - instruction_start;
+  if (unhandled_instruction) {
+    VLOG(signals) << "Unhandled x86 instruction with opcode " << static_cast<int>(opcode);
+    return 0;
+  }
+
+  if (has_modrm) {
+    uint8_t mod = (modrm >> 6) & 0b11;
+
+    // Check for SIB.
+    if (mod != 0b11 && (modrm & 0b111) == 4) {
+      ++pc;     // SIB
+    }
+
+    switch (mod) {
+      case 0b00: break;
+      case 0b01: displacement_size = 1; break;
+      case 0b10: displacement_size = 4; break;
+      case 0b11:
+        break;
+    }
+  }
+
+  // Skip displacement and immediate.
+  pc += displacement_size + immediate_size;
+
+  VLOG(signals) << "x86 instruction length calculated as " << (pc - startpc);
+  return pc - startpc;
 }
 
-void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
+void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
                                              mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
   struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
@@ -180,21 +204,30 @@
   }
 
   // In the case of a stack overflow, the stack is not valid and we can't
-  // get the method from the top of the stack.  However it's in EAX.
+  // get the method from the top of the stack.  However it's in EAX(x86)/RDI(x86_64).
   uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(siginfo->si_addr);
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+#if defined(__x86_64__)
+      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kX86_64));
+#else
       reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kX86));
+#endif
   if (overflow_addr == fault_addr) {
-    *out_method = reinterpret_cast<mirror::ArtMethod*>(uc->CTX_EAX);
+    *out_method = reinterpret_cast<mirror::ArtMethod*>(uc->CTX_METHOD);
   } else {
     // The method is at the top of the stack.
-    *out_method = reinterpret_cast<mirror::ArtMethod*>(reinterpret_cast<uintptr_t*>(*out_sp)[0]);
+    *out_method = (reinterpret_cast<StackReference<mirror::ArtMethod>* >(*out_sp)[0]).AsMirrorPtr();
   }
 
   uint8_t* pc = reinterpret_cast<uint8_t*>(uc->CTX_EIP);
   VLOG(signals) << HexDump(pc, 32, true, "PC ");
 
   uint32_t instr_size = GetInstructionSize(pc);
+  if (instr_size == 0) {
+    // Unknown instruction, tell caller it's not ours.
+    *out_method = nullptr;
+    return;
+  }
   *out_return_pc = reinterpret_cast<uintptr_t>(pc + instr_size);
 }
 
@@ -204,16 +237,21 @@
   uint8_t* sp = reinterpret_cast<uint8_t*>(uc->CTX_ESP);
 
   uint32_t instr_size = GetInstructionSize(pc);
+  if (instr_size == 0) {
+    // Unknown instruction, can't really happen.
+    return false;
+  }
+
   // We need to arrange for the signal handler to return to the null pointer
   // exception generator.  The return address must be the address of the
   // next instruction (this instruction + instruction size).  The return address
   // is on the stack at the top address of the current frame.
 
   // Push the return address onto the stack.
-  uint32_t retaddr = reinterpret_cast<uint32_t>(pc + instr_size);
-  uint32_t* next_sp = reinterpret_cast<uint32_t*>(sp - 4);
+  uintptr_t retaddr = reinterpret_cast<uintptr_t>(pc + instr_size);
+  uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp - sizeof(uintptr_t));
   *next_sp = retaddr;
-  uc->CTX_ESP = reinterpret_cast<uint32_t>(next_sp);
+  uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
   uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
   VLOG(signals) << "Generating null pointer exception";
@@ -221,9 +259,14 @@
 }
 
 // A suspend check is done using the following instruction sequence:
+// (x86)
 // 0xf720f1df:         648B058C000000      mov     eax, fs:[0x8c]  ; suspend_trigger
 // .. some intervening instructions.
 // 0xf720f1e6:                   8500      test    eax, [eax]
+// (x86_64)
+// 0x7f579de45d9e: 65488B0425A8000000      movq    rax, gs:[0xa8]  ; suspend_trigger
+// .. some intervening instructions.
+// 0x7f579de45da7:               8500      test    eax, [eax]
 
 // The offset from fs is Thread::ThreadSuspendTriggerOffset().
 // To check for a suspend check, we examine the instructions that caused
@@ -231,11 +274,20 @@
 bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
   // These are the instructions to check for.  The first one is the mov eax, fs:[xxx]
   // where xxx is the offset of the suspend trigger.
+#if defined(__x86_64__)
+  uint32_t trigger = Thread::ThreadSuspendTriggerOffset<8>().Int32Value();
+#else
   uint32_t trigger = Thread::ThreadSuspendTriggerOffset<4>().Int32Value();
+#endif
 
   VLOG(signals) << "Checking for suspension point";
+#if defined(__x86_64__)
+  uint8_t checkinst1[] = {0x65, 0x48, 0x8b, 0x04, 0x25, static_cast<uint8_t>(trigger & 0xff),
+      static_cast<uint8_t>((trigger >> 8) & 0xff), 0, 0};
+#else
   uint8_t checkinst1[] = {0x64, 0x8b, 0x05, static_cast<uint8_t>(trigger & 0xff),
       static_cast<uint8_t>((trigger >> 8) & 0xff), 0, 0};
+#endif
   uint8_t checkinst2[] = {0x85, 0x00};
 
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
@@ -270,10 +322,10 @@
     // is on the stack at the top address of the current frame.
 
     // Push the return address onto the stack.
-    uint32_t retaddr = reinterpret_cast<uint32_t>(pc + 2);
-    uint32_t* next_sp = reinterpret_cast<uint32_t*>(sp - 4);
+    uintptr_t retaddr = reinterpret_cast<uintptr_t>(pc + 2);
+    uintptr_t* next_sp = reinterpret_cast<uintptr_t*>(sp - sizeof(uintptr_t));
     *next_sp = retaddr;
-    uc->CTX_ESP = reinterpret_cast<uint32_t>(next_sp);
+    uc->CTX_ESP = reinterpret_cast<uintptr_t>(next_sp);
 
     uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_test_suspend);
 
@@ -302,7 +354,11 @@
   VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
     ", fault_addr: " << fault_addr;
 
+#if defined(__x86_64__)
+  uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kX86_64);
+#else
   uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kX86);
+#endif
 
   Thread* self = Thread::Current();
   uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
diff --git a/runtime/arch/x86_64/fault_handler_x86_64.cc b/runtime/arch/x86_64/fault_handler_x86_64.cc
deleted file mode 100644
index 88ae7f3..0000000
--- a/runtime/arch/x86_64/fault_handler_x86_64.cc
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-#include "fault_handler.h"
-#include <sys/ucontext.h>
-#include "base/macros.h"
-#include "globals.h"
-#include "base/logging.h"
-#include "base/hex_dump.h"
-
-
-//
-// X86_64 specific fault handler functions.
-//
-
-namespace art {
-
-void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
-                                             mirror::ArtMethod** out_method,
-                                             uintptr_t* out_return_pc, uintptr_t* out_sp) {
-}
-
-bool NullPointerHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
-}
-
-bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
-}
-
-bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
-}
-}       // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 48bc240..f021ada 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -284,6 +284,18 @@
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
 
+// On entry to this function, RAX contains the ESP value for the overflow region.
+DEFINE_FUNCTION art_quick_throw_stack_overflow_from_signal
+    // Here, the RSP is above the protected region.  We need to create a
+    // callee save frame and then move RSP down to the overflow region.
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov %rsp, %rsi                    // get current stack pointer, pass SP as second arg
+    mov %rax, %rsp                    // move RSP to the overflow region.
+    mov %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current() as first arg
+    call PLT_SYMBOL(artThrowStackOverflowFromCode)    // artThrowStackOverflowFromCode(Thread*, SP)
+    int3                              // unreached
+END_FUNCTION art_quick_throw_stack_overflow_from_signal
+
     /*
      * Called by managed code, saves callee saves and then calls artThrowException
      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 1b91628..8ddaf5c 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -76,6 +76,7 @@
   // Also, there is only an 8K stack available here to logging can cause memory
   // overwrite issues if you are unlucky.  If you want to enable logging and
   // are getting crashes, allocate more space for the alternate signal stack.
+
   VLOG(signals) << "Handling fault";
   if (IsInGeneratedCode(info, context, true)) {
     VLOG(signals) << "in generated code, looking for handler";
@@ -91,6 +92,7 @@
       return;
     }
   }
+
   art_sigsegv_fault();
 
   // Pass this on to the next handler in the chain, or the default if none.
@@ -150,7 +152,7 @@
 
   // Get the architecture specific method address and return address.  These
   // are in architecture specific files in arch/<arch>/fault_handler_<arch>.
-  GetMethodAndReturnPCAndSP(siginfo, context, &method_obj, &return_pc, &sp);
+  GetMethodAndReturnPcAndSp(siginfo, context, &method_obj, &return_pc, &sp);
 
   // If we don't have a potential method, we're outta here.
   VLOG(signals) << "potential method: " << method_obj;
@@ -236,7 +238,7 @@
     mirror::ArtMethod* method = nullptr;
     uintptr_t return_pc = 0;
     uintptr_t sp = 0;
-    manager_->GetMethodAndReturnPCAndSP(siginfo, context, &method, &return_pc, &sp);
+    manager_->GetMethodAndReturnPcAndSp(siginfo, context, &method, &return_pc, &sp);
     Thread* self = Thread::Current();
     // Inside of generated code, sp[0] is the method, so sp is the frame.
     StackReference<mirror::ArtMethod>* frame =
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 71c9977..1acd024 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -43,8 +43,14 @@
   void HandleFault(int sig, siginfo_t* info, void* context);
   void AddHandler(FaultHandler* handler, bool generated_code);
   void RemoveHandler(FaultHandler* handler);
-  void GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context, mirror::ArtMethod** out_method,
-                                 uintptr_t* out_return_pc, uintptr_t* out_sp);
+
+  // Note that the following two functions are called in the context of a signal handler.
+  // The IsInGeneratedCode() function checks that the mutator lock is held before it
+  // calls GetMethodAndReturnPCAndSP().
+  // TODO: think about adding lock assertions and fake lock and unlock functions.
+  void GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context, mirror::ArtMethod** out_method,
+                                 uintptr_t* out_return_pc, uintptr_t* out_sp)
+                                 NO_THREAD_SAFETY_ANALYSIS;
   bool IsInGeneratedCode(siginfo_t* siginfo, void *context, bool check_dex_pc)
                          NO_THREAD_SAFETY_ANALYSIS;
 
diff --git a/runtime/gc/accounting/card_table_test.cc b/runtime/gc/accounting/card_table_test.cc
index a88b2c9..433855a 100644
--- a/runtime/gc/accounting/card_table_test.cc
+++ b/runtime/gc/accounting/card_table_test.cc
@@ -33,14 +33,16 @@
   class Object;
 }  // namespace mirror
 
+namespace gc {
+namespace accounting {
+
 class CardTableTest : public CommonRuntimeTest {
  public:
-  std::unique_ptr<gc::accounting::CardTable> card_table_;
-  static constexpr size_t kCardSize = gc::accounting::CardTable::kCardSize;
+  std::unique_ptr<CardTable> card_table_;
 
   void CommonSetup() {
     if (card_table_.get() == nullptr) {
-      card_table_.reset(gc::accounting::CardTable::Create(heap_begin_, heap_size_));
+      card_table_.reset(CardTable::Create(heap_begin_, heap_size_));
       EXPECT_TRUE(card_table_.get() != nullptr);
     } else {
       ClearCardTable();
@@ -58,15 +60,16 @@
   byte* HeapLimit() const {
     return HeapBegin() + heap_size_;
   }
-  byte PRandCard(const byte* addr) const {
-    size_t offset = RoundDown(addr - heap_begin_, kCardSize);
+  // Return a pseudo random card for an address.
+  byte PseudoRandomCard(const byte* addr) const {
+    size_t offset = RoundDown(addr - heap_begin_, CardTable::kCardSize);
     return 1 + offset % 254;
   }
   void FillRandom() {
-    for (const byte* addr = HeapBegin(); addr != HeapLimit(); addr += kCardSize) {
+    for (const byte* addr = HeapBegin(); addr != HeapLimit(); addr += CardTable::kCardSize) {
       EXPECT_TRUE(card_table_->AddrIsInCardTable(addr));
       byte* card = card_table_->CardFromAddr(addr);
-      *card = PRandCard(addr);
+      *card = PseudoRandomCard(addr);
     }
   }
 
@@ -79,15 +82,15 @@
   CommonSetup();
   for (const byte* addr = HeapBegin(); addr < HeapLimit(); addr += kObjectAlignment) {
     auto obj = reinterpret_cast<const mirror::Object*>(addr);
-    EXPECT_EQ(card_table_->GetCard(obj), gc::accounting::CardTable::kCardClean);
+    EXPECT_EQ(card_table_->GetCard(obj), CardTable::kCardClean);
     EXPECT_TRUE(!card_table_->IsDirty(obj));
     card_table_->MarkCard(addr);
     EXPECT_TRUE(card_table_->IsDirty(obj));
-    EXPECT_EQ(card_table_->GetCard(obj), gc::accounting::CardTable::kCardDirty);
+    EXPECT_EQ(card_table_->GetCard(obj), CardTable::kCardDirty);
     byte* card_addr = card_table_->CardFromAddr(addr);
-    EXPECT_EQ(*card_addr, gc::accounting::CardTable::kCardDirty);
-    *card_addr = gc::accounting::CardTable::kCardClean;
-    EXPECT_EQ(*card_addr, gc::accounting::CardTable::kCardClean);
+    EXPECT_EQ(*card_addr, CardTable::kCardDirty);
+    *card_addr = CardTable::kCardClean;
+    EXPECT_EQ(*card_addr, CardTable::kCardClean);
   }
 }
 
@@ -103,33 +106,36 @@
 TEST_F(CardTableTest, TestModifyCardsAtomic) {
   CommonSetup();
   FillRandom();
-  const size_t delta = std::min(static_cast<size_t>(HeapLimit() - HeapBegin()), 8U * kCardSize);
+  const size_t delta = std::min(static_cast<size_t>(HeapLimit() - HeapBegin()),
+                                8U * CardTable::kCardSize);
   UpdateVisitor visitor;
   size_t start_offset = 0;
-  for (byte* cstart = HeapBegin(); cstart < HeapBegin() + delta; cstart += kCardSize) {
-    start_offset = (start_offset + kObjectAlignment) % kCardSize;
+  for (byte* cstart = HeapBegin(); cstart < HeapBegin() + delta; cstart += CardTable::kCardSize) {
+    start_offset = (start_offset + kObjectAlignment) % CardTable::kCardSize;
     size_t end_offset = 0;
-    for (byte* cend = HeapLimit() - delta; cend < HeapLimit(); cend += kCardSize) {
+    for (byte* cend = HeapLimit() - delta; cend < HeapLimit(); cend += CardTable::kCardSize) {
       // Don't always start at a card boundary.
       byte* start = cstart + start_offset;
       byte* end = cend - end_offset;
-      end_offset = (end_offset + kObjectAlignment) % kCardSize;
+      end_offset = (end_offset + kObjectAlignment) % CardTable::kCardSize;
       // Modify cards.
       card_table_->ModifyCardsAtomic(start, end, visitor, visitor);
       // Check adjacent cards not modified.
-      for (byte* cur = start - kCardSize; cur >= HeapBegin(); cur -= kCardSize) {
-        EXPECT_EQ(card_table_->GetCard(reinterpret_cast<mirror::Object*>(cur)), PRandCard(cur));
+      for (byte* cur = start - CardTable::kCardSize; cur >= HeapBegin();
+          cur -= CardTable::kCardSize) {
+        EXPECT_EQ(card_table_->GetCard(reinterpret_cast<mirror::Object*>(cur)),
+                  PseudoRandomCard(cur));
       }
-      for (byte* cur = end + kCardSize; cur < HeapLimit(); cur += kCardSize) {
-        EXPECT_EQ(card_table_->GetCard(reinterpret_cast<mirror::Object*>(cur)), PRandCard(cur));
+      for (byte* cur = end + CardTable::kCardSize; cur < HeapLimit();
+          cur += CardTable::kCardSize) {
+        EXPECT_EQ(card_table_->GetCard(reinterpret_cast<mirror::Object*>(cur)),
+                  PseudoRandomCard(cur));
       }
       // Verify Range.
-      for (byte* cur = start; cur < AlignUp(end, kCardSize); cur += kCardSize) {
+      for (byte* cur = start; cur < AlignUp(end, CardTable::kCardSize);
+          cur += CardTable::kCardSize) {
         byte* card = card_table_->CardFromAddr(cur);
-        byte value = PRandCard(cur);
-        if (visitor(value) != *card) {
-          LOG(ERROR) << reinterpret_cast<void*>(start) << " " << reinterpret_cast<void*>(cur) << " " << reinterpret_cast<void*>(end);
-        }
+        byte value = PseudoRandomCard(cur);
         EXPECT_EQ(visitor(value), *card);
         // Restore for next iteration.
         *card = value;
@@ -139,5 +145,6 @@
 }
 
 // TODO: Add test for CardTable::Scan.
-
+}  // namespace accounting
+}  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 46d79bf..646c032 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -55,7 +55,8 @@
     : heap_(heap),
       name_(name),
       pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount),
-      cumulative_timings_(name) {
+      cumulative_timings_(name),
+      pause_histogram_lock_("pause histogram lock", kDefaultMutexLevel, true) {
   ResetCumulativeStatistics();
 }
 
@@ -65,10 +66,11 @@
 
 void GarbageCollector::ResetCumulativeStatistics() {
   cumulative_timings_.Reset();
-  pause_histogram_.Reset();
   total_time_ns_ = 0;
   total_freed_objects_ = 0;
   total_freed_bytes_ = 0;
+  MutexLock mu(Thread::Current(), pause_histogram_lock_);
+  pause_histogram_.Reset();
 }
 
 void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) {
@@ -95,6 +97,7 @@
   }
   total_time_ns_ += current_iteration->GetDurationNs();
   for (uint64_t pause_time : current_iteration->GetPauseTimes()) {
+    MutexLock mu(self, pause_histogram_lock_);
     pause_histogram_.AddValue(pause_time / 1000);
   }
   ATRACE_END();
@@ -137,8 +140,11 @@
 }
 
 void GarbageCollector::ResetMeasurements() {
+  {
+    MutexLock mu(Thread::Current(), pause_histogram_lock_);
+    pause_histogram_.Reset();
+  }
   cumulative_timings_.Reset();
-  pause_histogram_.Reset();
   total_time_ns_ = 0;
   total_freed_objects_ = 0;
   total_freed_bytes_ = 0;
@@ -171,6 +177,36 @@
   heap_->RecordFree(freed.objects, freed.bytes);
 }
 
+uint64_t GarbageCollector::GetTotalPausedTimeNs() {
+  MutexLock mu(Thread::Current(), pause_histogram_lock_);
+  return pause_histogram_.AdjustedSum();
+}
+
+void GarbageCollector::DumpPerformanceInfo(std::ostream& os) {
+  const CumulativeLogger& logger = GetCumulativeTimings();
+  const size_t iterations = logger.GetIterations();
+  if (iterations == 0) {
+    return;
+  }
+  os << ConstDumpable<CumulativeLogger>(logger);
+  const uint64_t total_ns = logger.GetTotalNs();
+  double seconds = NsToMs(logger.GetTotalNs()) / 1000.0;
+  const uint64_t freed_bytes = GetTotalFreedBytes();
+  const uint64_t freed_objects = GetTotalFreedObjects();
+  {
+    MutexLock mu(Thread::Current(), pause_histogram_lock_);
+    Histogram<uint64_t>::CumulativeData cumulative_data;
+    pause_histogram_.CreateHistogram(&cumulative_data);
+    pause_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data);
+  }
+  os << GetName() << " total time: " << PrettyDuration(total_ns)
+     << " mean time: " << PrettyDuration(total_ns / iterations) << "\n"
+     << GetName() << " freed: " << freed_objects
+     << " objects with total size " << PrettySize(freed_bytes) << "\n"
+     << GetName() << " throughput: " << freed_objects / seconds << "/s / "
+     << PrettySize(freed_bytes / seconds) << "/s\n";
+}
+
 }  // namespace collector
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 885569e..b809469 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -119,18 +119,13 @@
 
   GarbageCollector(Heap* heap, const std::string& name);
   virtual ~GarbageCollector() { }
-
   const char* GetName() const {
     return name_.c_str();
   }
-
   virtual GcType GetGcType() const = 0;
-
   virtual CollectorType GetCollectorType() const = 0;
-
   // Run the garbage collector.
   void Run(GcCause gc_cause, bool clear_soft_references);
-
   Heap* GetHeap() const {
     return heap_;
   }
@@ -138,24 +133,17 @@
   const CumulativeLogger& GetCumulativeTimings() const {
     return cumulative_timings_;
   }
-
   void ResetCumulativeStatistics();
-
   // Swap the live and mark bitmaps of spaces that are active for the collector. For partial GC,
   // this is the allocation space, for full GC then we swap the zygote bitmaps too.
   void SwapBitmaps() EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_);
-  uint64_t GetTotalPausedTimeNs() const {
-    return pause_histogram_.AdjustedSum();
-  }
+  uint64_t GetTotalPausedTimeNs() LOCKS_EXCLUDED(pause_histogram_lock_);
   int64_t GetTotalFreedBytes() const {
     return total_freed_bytes_;
   }
   uint64_t GetTotalFreedObjects() const {
     return total_freed_objects_;
   }
-  const Histogram<uint64_t>& GetPauseHistogram() const {
-    return pause_histogram_;
-  }
   // Reset the cumulative timings and pause histogram.
   void ResetMeasurements();
   // Returns the estimated throughput in bytes / second.
@@ -174,11 +162,11 @@
   void RecordFree(const ObjectBytePair& freed);
   // Record a free of large objects.
   void RecordFreeLOS(const ObjectBytePair& freed);
+  void DumpPerformanceInfo(std::ostream& os) LOCKS_EXCLUDED(pause_histogram_lock_);
 
  protected:
   // Run all of the GC phases.
   virtual void RunPhases() = 0;
-
   // Revoke all the thread-local buffers.
   virtual void RevokeAllThreadLocalBuffers() = 0;
 
@@ -188,11 +176,12 @@
   Heap* const heap_;
   std::string name_;
   // Cumulative statistics.
-  Histogram<uint64_t> pause_histogram_;
+  Histogram<uint64_t> pause_histogram_ GUARDED_BY(pause_histogram_lock_);
   uint64_t total_time_ns_;
   uint64_t total_freed_objects_;
   int64_t total_freed_bytes_;
   CumulativeLogger cumulative_timings_;
+  mutable Mutex pause_histogram_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 };
 
 }  // namespace collector
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index bf8cca7..b61105f 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -772,28 +772,9 @@
   // Dump cumulative loggers for each GC type.
   uint64_t total_paused_time = 0;
   for (auto& collector : garbage_collectors_) {
-    const CumulativeLogger& logger = collector->GetCumulativeTimings();
-    const size_t iterations = logger.GetIterations();
-    const Histogram<uint64_t>& pause_histogram = collector->GetPauseHistogram();
-    if (iterations != 0 && pause_histogram.SampleSize() != 0) {
-      os << ConstDumpable<CumulativeLogger>(logger);
-      const uint64_t total_ns = logger.GetTotalNs();
-      const uint64_t total_pause_ns = collector->GetTotalPausedTimeNs();
-      double seconds = NsToMs(logger.GetTotalNs()) / 1000.0;
-      const uint64_t freed_bytes = collector->GetTotalFreedBytes();
-      const uint64_t freed_objects = collector->GetTotalFreedObjects();
-      Histogram<uint64_t>::CumulativeData cumulative_data;
-      pause_histogram.CreateHistogram(&cumulative_data);
-      pause_histogram.PrintConfidenceIntervals(os, 0.99, cumulative_data);
-      os << collector->GetName() << " total time: " << PrettyDuration(total_ns)
-         << " mean time: " << PrettyDuration(total_ns / iterations) << "\n"
-         << collector->GetName() << " freed: " << freed_objects
-         << " objects with total size " << PrettySize(freed_bytes) << "\n"
-         << collector->GetName() << " throughput: " << freed_objects / seconds << "/s / "
-         << PrettySize(freed_bytes / seconds) << "/s\n";
-      total_duration += total_ns;
-      total_paused_time += total_pause_ns;
-    }
+    total_duration += collector->GetCumulativeTimings().GetTotalNs();
+    total_paused_time += collector->GetTotalPausedTimeNs();
+    collector->DumpPerformanceInfo(os);
     collector->ResetMeasurements();
   }
   uint64_t allocation_time =
@@ -1634,9 +1615,16 @@
         RemoveSpace(bump_pointer_space_);
         bump_pointer_space_ = nullptr;
         const char* name = kUseRosAlloc ? kRosAllocSpaceName[1] : kDlMallocSpaceName[1];
+        // Temporarily unprotect the backup mem map so rosalloc can write the debug magic number.
+        if (kIsDebugBuild && kUseRosAlloc) {
+          mem_map->Protect(PROT_READ | PROT_WRITE);
+        }
         main_space_backup_.reset(CreateMallocSpaceFromMemMap(mem_map.get(), kDefaultInitialSize,
                                                              mem_map->Size(), mem_map->Size(),
                                                              name, true));
+        if (kIsDebugBuild && kUseRosAlloc) {
+          mem_map->Protect(PROT_NONE);
+        }
         mem_map.release();
       }
       break;
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 6c7ee5b..c281b22 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -135,7 +135,7 @@
                              uintptr_t end,
                              std::string* error_msg) {
   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
-  if (!map->Build()) {
+  if (map.get() == nullptr) {
     *error_msg = StringPrintf("Failed to build process map");
     return false;
   }
@@ -158,7 +158,7 @@
                                 uintptr_t end,
                                 std::string* error_msg) {
   std::unique_ptr<BacktraceMap> map(BacktraceMap::Create(getpid(), true));
-  if (!map->Build()) {
+  if (map.get() == nullptr) {
     *error_msg = StringPrintf("Failed to build process map");
     return false;
   }
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index fe877d5..e0c0d63 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -621,6 +621,7 @@
     case kThumb2:
     case kX86:
     case kArm64:
+    case kX86_64:
       implicit_null_checks_ = true;
       implicit_so_checks_ = true;
       break;
diff --git a/runtime/thread_linux.cc b/runtime/thread_linux.cc
index ee66ccc..9aacb30 100644
--- a/runtime/thread_linux.cc
+++ b/runtime/thread_linux.cc
@@ -32,11 +32,18 @@
   }
 }
 
+// The default SIGSTKSZ on linux is 8K.  If we do any logging in a signal
+// handler this is too small.  We allocate 16K instead.
+static constexpr int kHostAltSigStackSize = 16*1024;    // 16K signal stack.
+
 void Thread::SetUpAlternateSignalStack() {
   // Create and set an alternate signal stack.
+#ifdef HAVE_ANDROID_OS
+  LOG(FATAL) << "Invalid use of alternate signal stack on Android";
+#endif
   stack_t ss;
-  ss.ss_sp = new uint8_t[SIGSTKSZ];
-  ss.ss_size = SIGSTKSZ;
+  ss.ss_sp = new uint8_t[kHostAltSigStackSize];
+  ss.ss_size = kHostAltSigStackSize;
   ss.ss_flags = 0;
   CHECK(ss.ss_sp != NULL);
   SigAltStack(&ss, NULL);
@@ -56,7 +63,7 @@
   // Tell the kernel to stop using it.
   ss.ss_sp = NULL;
   ss.ss_flags = SS_DISABLE;
-  ss.ss_size = SIGSTKSZ;  // Avoid ENOMEM failure with Mac OS' buggy libc.
+  ss.ss_size = kHostAltSigStackSize;  // Avoid ENOMEM failure with Mac OS' buggy libc.
   SigAltStack(&ss, NULL);
 
   // Free it.
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 4d49809..f966fbd 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1354,4 +1354,29 @@
   return true;
 }
 
+void EncodeUnsignedLeb128(uint32_t data, std::vector<uint8_t>* dst) {
+  size_t encoded_size = UnsignedLeb128Size(data);
+  size_t cur_index = dst->size();
+  dst->resize(dst->size() + encoded_size);
+  uint8_t* write_pos = &((*dst)[cur_index]);
+  uint8_t* write_pos_after = EncodeUnsignedLeb128(write_pos, data);
+  DCHECK_EQ(static_cast<size_t>(write_pos_after - write_pos), encoded_size);
+}
+
+void EncodeSignedLeb128(int32_t data, std::vector<uint8_t>* dst) {
+  size_t encoded_size = SignedLeb128Size(data);
+  size_t cur_index = dst->size();
+  dst->resize(dst->size() + encoded_size);
+  uint8_t* write_pos = &((*dst)[cur_index]);
+  uint8_t* write_pos_after = EncodeSignedLeb128(write_pos, data);
+  DCHECK_EQ(static_cast<size_t>(write_pos_after - write_pos), encoded_size);
+}
+
+void PushWord(std::vector<uint8_t>* buf, int data) {
+  buf->push_back(data & 0xff);
+  buf->push_back((data >> 8) & 0xff);
+  buf->push_back((data >> 16) & 0xff);
+  buf->push_back((data >> 24) & 0xff);
+}
+
 }  // namespace art
diff --git a/runtime/utils.h b/runtime/utils.h
index 73872d3..49bcbf9 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -497,6 +497,11 @@
   }
 };
 
+void PushWord(std::vector<uint8_t>* buf, int32_t data);
+
+void EncodeUnsignedLeb128(uint32_t data, std::vector<uint8_t>* buf);
+void EncodeSignedLeb128(int32_t data, std::vector<uint8_t>* buf);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_