Merge "Fix GetDexCanonicalLocation test for relative paths."
diff --git a/Android.mk b/Android.mk
index 451edf1..15e8308 100644
--- a/Android.mk
+++ b/Android.mk
@@ -67,8 +67,13 @@
 	rm -f $(2ND_TARGET_OUT_INTERMEDIATES)/JAVA_LIBRARIES/*_intermediates/javalib.odex
 	rm -f $(2ND_TARGET_OUT_INTERMEDIATES)/APPS/*_intermediates/*.odex
 endif
+ifneq ($(TMPDIR),)
+	rm -rf $(TMPDIR)/$(USER)/test-*/dalvik-cache/*
+	rm -rf $(TMPDIR)/android-data/dalvik-cache/*
+else
 	rm -rf /tmp/$(USER)/test-*/dalvik-cache/*
 	rm -rf /tmp/android-data/dalvik-cache/*
+endif
 
 .PHONY: clean-oat-target
 clean-oat-target:
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 6e27190..17c478c 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -91,6 +91,7 @@
   runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc \
   runtime/entrypoints_order_test.cc \
   runtime/exception_test.cc \
+  runtime/gc/accounting/card_table_test.cc \
   runtime/gc/accounting/space_bitmap_test.cc \
   runtime/gc/heap_test.cc \
   runtime/gc/space/dlmalloc_space_base_test.cc \
@@ -113,6 +114,7 @@
   runtime/monitor_pool_test.cc \
   runtime/monitor_test.cc \
   runtime/parsed_options_test.cc \
+  runtime/proxy_test.cc \
   runtime/reference_table_test.cc \
   runtime/thread_pool_test.cc \
   runtime/transaction_test.cc \
@@ -123,7 +125,6 @@
 
 COMPILER_GTEST_COMMON_SRC_FILES := \
   runtime/jni_internal_test.cc \
-  runtime/proxy_test.cc \
   runtime/reflection_test.cc \
   compiler/dex/global_value_numbering_test.cc \
   compiler/dex/local_value_numbering_test.cc \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 69f9387..7983040 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -282,10 +282,10 @@
 endef
 
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
-ifeq ($(ART_BUILD_NDEBUG),true)
+ifeq ($(ART_BUILD_HOST_NDEBUG),true)
   $(eval $(call build-libart-compiler,host,ndebug))
 endif
-ifeq ($(ART_BUILD_DEBUG),true)
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
   $(eval $(call build-libart-compiler,host,debug))
 endif
 ifeq ($(ART_BUILD_TARGET_NDEBUG),true)
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index 250924a..5cf846f 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -38,9 +38,6 @@
                                                          uint32_t access_flags, uint32_t method_idx,
                                                          const art::DexFile& dex_file);
 
-// Hack for CFI CIE initialization
-extern std::vector<uint8_t>* X86CFIInitialization(bool is_x86_64);
-
 void QuickCompiler::Init() const {
   ArtInitQuickCompilerContext(GetCompilerDriver());
 }
@@ -126,17 +123,6 @@
   return mir_to_lir;
 }
 
-std::vector<uint8_t>* QuickCompiler::GetCallFrameInformationInitialization(
-    const CompilerDriver& driver) const {
-  if (driver.GetInstructionSet() == kX86) {
-    return X86CFIInitialization(false);
-  }
-  if (driver.GetInstructionSet() == kX86_64) {
-    return X86CFIInitialization(true);
-  }
-  return nullptr;
-}
-
 CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
                                             uint32_t access_flags,
                                             InvokeType invoke_type,
diff --git a/compiler/compilers.h b/compiler/compilers.h
index 2c231e1..151bf6f 100644
--- a/compiler/compilers.h
+++ b/compiler/compilers.h
@@ -56,17 +56,6 @@
 
   void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE {}
 
-  /*
-   * @brief Generate and return Dwarf CFI initialization, if supported by the
-   * backend.
-   * @param driver CompilerDriver for this compile.
-   * @returns nullptr if not supported by backend or a vector of bytes for CFI DWARF
-   * information.
-   * @note This is used for backtrace information in generated code.
-   */
-  std::vector<uint8_t>* GetCallFrameInformationInitialization(const CompilerDriver& driver) const
-      OVERRIDE;
-
  private:
   DISALLOW_COPY_AND_ASSIGN(QuickCompiler);
 };
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 3de4483..0b05bbe 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -841,6 +841,54 @@
 
   // 113 MIR_SELECT
   AN_NONE,
+
+  // 114 MirOpConstVector
+  AN_NONE,
+
+  // 115 MirOpMoveVector
+  AN_NONE,
+
+  // 116 MirOpPackedMultiply
+  AN_NONE,
+
+  // 117 MirOpPackedAddition
+  AN_NONE,
+
+  // 118 MirOpPackedSubtract
+  AN_NONE,
+
+  // 119 MirOpPackedShiftLeft
+  AN_NONE,
+
+  // 120 MirOpPackedSignedShiftRight
+  AN_NONE,
+
+  // 121 MirOpPackedUnsignedShiftRight
+  AN_NONE,
+
+  // 122 MirOpPackedAnd
+  AN_NONE,
+
+  // 123 MirOpPackedOr
+  AN_NONE,
+
+  // 124 MirOpPackedXor
+  AN_NONE,
+
+  // 125 MirOpPackedAddReduce
+  AN_NONE,
+
+  // 126 MirOpPackedReduce
+  AN_NONE,
+
+  // 127 MirOpPackedSet
+  AN_NONE,
+
+  // 128 MirOpReserveVectorRegisters
+  AN_NONE,
+
+  // 129 MirOpReturnVectorRegisters
+  AN_NONE,
 };
 
 struct MethodStats {
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 5059c5f..b133991 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -43,8 +43,7 @@
  *   add   rARM_PC, r_disp   ; This is the branch from which we compute displacement
  *   cbnz  r_idx, lp
  */
-void ArmMir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
-                                 RegLocation rl_src) {
+void ArmMir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   if (cu_->verbose) {
     DumpSparseSwitchTable(table);
@@ -92,8 +91,7 @@
 }
 
 
-void ArmMir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
-                                 RegLocation rl_src) {
+void ArmMir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   if (cu_->verbose) {
     DumpPackedSwitchTable(table);
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 21322a6..072acbe 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -130,8 +130,8 @@
                                        int first_bit, int second_bit);
     void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
     void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
-    void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
-    void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
 
     // Required for target - single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 6fa8a4a..28b747b 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -43,8 +43,7 @@
  *   br    r_base
  * quit:
  */
-void Arm64Mir2Lir::GenSparseSwitch(MIR* mir, uint32_t table_offset,
-                                   RegLocation rl_src) {
+void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   if (cu_->verbose) {
     DumpSparseSwitchTable(table);
@@ -96,8 +95,7 @@
 }
 
 
-void Arm64Mir2Lir::GenPackedSwitch(MIR* mir, uint32_t table_offset,
-                                   RegLocation rl_src) {
+void Arm64Mir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   if (cu_->verbose) {
     DumpPackedSwitchTable(table);
@@ -322,6 +320,11 @@
   LockTemp(rs_xIP0);
   LockTemp(rs_xIP1);
 
+  /* TUNING:
+   * Use AllocTemp() and reuse LR if possible to give us the freedom on adjusting the number
+   * of temp registers.
+   */
+
   /*
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
@@ -339,16 +342,15 @@
       // Load stack limit
       LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP1);
     } else {
-      // TODO(Arm64) Implement implicit checks.
       // Implicit stack overflow check.
       // Generate a load from [sp, #-framesize].  If this is in the stack
       // redzone we will get a segmentation fault.
-      // Load32Disp(rs_wSP, -Thread::kStackOverflowReservedBytes, rs_wzr);
-      // MarkPossibleStackOverflowException();
-      //
+
       // TODO: If the frame size is small enough, is it possible to make this a pre-indexed load,
       //       so that we can avoid the following "sub sp" when spilling?
-      LOG(FATAL) << "Implicit stack overflow checks not implemented.";
+      OpRegRegImm(kOpSub, rs_x8, rs_sp, GetStackOverflowReservedBytes(kArm64));
+      LoadWordDisp(rs_x8, 0, rs_x8);
+      MarkPossibleStackOverflowException();
     }
   }
 
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index b182cc0..2cd24c6 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -196,8 +196,8 @@
                                      int first_bit, int second_bit) OVERRIDE;
   void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
   void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-  void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
-  void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
+  void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
+  void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
 
   // Required for target - single operation generators.
   LIR* OpUnconditionalBranch(LIR* target) OVERRIDE;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 2a51b49..511297c 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -394,6 +394,18 @@
   return nullptr;
 }
 
+/* Search the existing constants in the literal pool for an exact class match */
+LIR* Mir2Lir::ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx) {
+  while (data_target) {
+    if (static_cast<uint32_t>(data_target->operands[0]) == type_idx &&
+        UnwrapPointer(data_target->operands[1]) == &dex_file) {
+      return data_target;
+    }
+    data_target = data_target->next;
+  }
+  return nullptr;
+}
+
 /*
  * The following are building blocks to insert constants into the pool or
  * instruction streams.
@@ -492,12 +504,15 @@
   data_lir = class_literal_list_;
   while (data_lir != NULL) {
     uint32_t target_method_idx = data_lir->operands[0];
+    const DexFile* class_dex_file =
+      reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
     cu_->compiler_driver->AddClassPatch(cu_->dex_file,
                                         cu_->class_def_idx,
                                         cu_->method_idx,
                                         target_method_idx,
+                                        class_dex_file,
                                         code_buffer_.size());
-    const DexFile::TypeId& target_method_id = cu_->dex_file->GetTypeId(target_method_idx);
+    const DexFile::TypeId& target_method_id = class_dex_file->GetTypeId(target_method_idx);
     // unique value based on target to ensure code deduplication works
     PushPointer(code_buffer_, &target_method_id, cu_->target64);
     data_lir = NEXT_LIR(data_lir);
@@ -1222,11 +1237,13 @@
   DCHECK_NE(cu_->instruction_set, kMips) << reinterpret_cast<void*>(data_target);
 }
 
-void Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
+void Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx,
+                            SpecialTargetRegister symbolic_reg) {
   // Use the literal pool and a PC-relative load from a data word.
-  LIR* data_target = ScanLiteralPool(class_literal_list_, type_idx, 0);
+  LIR* data_target = ScanLiteralPoolClass(class_literal_list_, dex_file, type_idx);
   if (data_target == nullptr) {
     data_target = AddWordData(&class_literal_list_, type_idx);
+    data_target->operands[1] = WrapPointer(const_cast<DexFile*>(&dex_file));
   }
   // Loads a Class pointer, which is a reference as it lives in the heap.
   LIR* load_pc_rel = OpPcRelLoad(TargetReg(symbolic_reg, kRef), data_target);
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 7abf3e7..dbceaff 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -53,7 +53,7 @@
     true,   // kIntrinsicRint
     true,   // kIntrinsicRoundFloat
     true,   // kIntrinsicRoundDouble
-    false,  // kIntrinsicGet
+    false,  // kIntrinsicReferenceGet
     false,  // kIntrinsicCharAt
     false,  // kIntrinsicCompareTo
     false,  // kIntrinsicIsEmptyOrLength
@@ -85,7 +85,7 @@
 COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicRint], Rint_must_be_static);
 COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicRoundFloat], RoundFloat_must_be_static);
 COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicRoundDouble], RoundDouble_must_be_static);
-COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicGet], Get_must_not_be_static);
+COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicReferenceGet], Get_must_not_be_static);
 COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicCharAt], CharAt_must_not_be_static);
 COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicCompareTo], CompareTo_must_not_be_static);
 COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicIsEmptyOrLength], IsEmptyOrLength_must_not_be_static);
@@ -169,7 +169,7 @@
     "floor",                 // kNameCacheFloor
     "rint",                  // kNameCacheRint
     "round",                 // kNameCacheRound
-    "get",                   // kNameCacheGet
+    "get",                   // kNameCacheReferenceGet
     "charAt",                // kNameCacheCharAt
     "compareTo",             // kNameCacheCompareTo
     "isEmpty",               // kNameCacheIsEmpty
@@ -339,7 +339,7 @@
     INTRINSIC(JavaLangMath,       Round, D_J, kIntrinsicRoundDouble, 0),
     INTRINSIC(JavaLangStrictMath, Round, D_J, kIntrinsicRoundDouble, 0),
 
-    INTRINSIC(JavaLangRefReference, Get, _Object, kIntrinsicGet, 0),
+    INTRINSIC(JavaLangRefReference, ReferenceGet, _Object, kIntrinsicReferenceGet, 0),
 
     INTRINSIC(JavaLangString, CharAt, I_C, kIntrinsicCharAt, 0),
     INTRINSIC(JavaLangString, CompareTo, String_I, kIntrinsicCompareTo, 0),
@@ -471,8 +471,8 @@
       return backend->GenInlinedRound(info, false /* is_double */);
     case kIntrinsicRoundDouble:
       return backend->GenInlinedRound(info, true /* is_double */);
-    case kIntrinsicGet:
-      return backend->GenInlinedGet(info);
+    case kIntrinsicReferenceGet:
+      return backend->GenInlinedReferenceGet(info);
     case kIntrinsicCharAt:
       return backend->GenInlinedCharAt(info);
     case kIntrinsicCompareTo:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index 1bd3c48..b875e2b 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -145,7 +145,7 @@
       kNameCacheFloor,
       kNameCacheRint,
       kNameCacheRound,
-      kNameCacheGet,
+      kNameCacheReferenceGet,
       kNameCacheCharAt,
       kNameCacheCompareTo,
       kNameCacheIsEmpty,
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 0054f34..3f22913 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -361,7 +361,7 @@
                                    &direct_type_ptr, &is_finalizable)) {
       // The fast path.
       if (!use_direct_type_ptr) {
-        LoadClassType(type_idx, kArg0);
+        LoadClassType(*dex_file, type_idx, kArg0);
         CallRuntimeHelperRegMethodRegLocation(kQuickAllocArrayResolved, TargetReg(kArg0, kNotWide),
                                               rl_src, true);
       } else {
@@ -961,7 +961,7 @@
                                    !is_finalizable) {
       // The fast path.
       if (!use_direct_type_ptr) {
-        LoadClassType(type_idx, kArg0);
+        LoadClassType(*dex_file, type_idx, kArg0);
         if (!is_type_initialized) {
           CallRuntimeHelperRegMethod(kQuickAllocObjectResolved, TargetReg(kArg0, kRef), true);
         } else {
@@ -2008,4 +2008,92 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void Mir2Lir::GenSmallPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  const uint16_t entries = table[1];
+  // Chained cmp-and-branch.
+  const int32_t* as_int32 = reinterpret_cast<const int32_t*>(&table[2]);
+  int32_t current_key = as_int32[0];
+  const int32_t* targets = &as_int32[1];
+  rl_src = LoadValue(rl_src, kCoreReg);
+  int i = 0;
+  for (; i < entries; i++, current_key++) {
+    if (!InexpensiveConstantInt(current_key, Instruction::Code::IF_EQ)) {
+      // Switch to using a temp and add.
+      break;
+    }
+    BasicBlock* case_block =
+        mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
+    OpCmpImmBranch(kCondEq, rl_src.reg, current_key, &block_label_list_[case_block->id]);
+  }
+  if (i < entries) {
+    // The rest do not seem to be inexpensive. Try to allocate a temp and use add.
+    RegStorage key_temp = AllocTypedTemp(false, kCoreReg, false);
+    if (key_temp.Valid()) {
+      LoadConstantNoClobber(key_temp, current_key);
+      for (; i < entries - 1; i++, current_key++) {
+        BasicBlock* case_block =
+            mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
+        OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block->id]);
+        OpRegImm(kOpAdd, key_temp, 1);  // Increment key.
+      }
+      BasicBlock* case_block =
+          mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
+      OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block->id]);
+    } else {
+      // No free temp, just finish the old loop.
+      for (; i < entries; i++, current_key++) {
+        BasicBlock* case_block =
+            mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
+        OpCmpImmBranch(kCondEq, rl_src.reg, current_key, &block_label_list_[case_block->id]);
+      }
+    }
+  }
+}
+
+void Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  if (cu_->verbose) {
+    DumpSparseSwitchTable(table);
+  }
+
+  const uint16_t entries = table[1];
+  if (entries <= kSmallSwitchThreshold) {
+    GenSmallPackedSwitch(mir, table_offset, rl_src);
+  } else {
+    // Use the backend-specific implementation.
+    GenLargePackedSwitch(mir, table_offset, rl_src);
+  }
+}
+
+void Mir2Lir::GenSmallSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  const uint16_t entries = table[1];
+  // Chained cmp-and-branch.
+  const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]);
+  const int32_t* targets = &keys[entries];
+  rl_src = LoadValue(rl_src, kCoreReg);
+  for (int i = 0; i < entries; i++) {
+    int key = keys[i];
+    BasicBlock* case_block =
+        mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
+    OpCmpImmBranch(kCondEq, rl_src.reg, key, &block_label_list_[case_block->id]);
+  }
+}
+
+void Mir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
+  const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
+  if (cu_->verbose) {
+    DumpSparseSwitchTable(table);
+  }
+
+  const uint16_t entries = table[1];
+  if (entries <= kSmallSwitchThreshold) {
+    GenSmallSparseSwitch(mir, table_offset, rl_src);
+  } else {
+    // Use the backend-specific implementation.
+    GenLargeSparseSwitch(mir, table_offset, rl_src);
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index e70b0c5..3cfc9a6 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -25,10 +25,8 @@
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
 #include "mirror/object_array-inl.h"
-#include "mirror/reference-inl.h"
 #include "mirror/string.h"
 #include "mir_to_lir-inl.h"
-#include "scoped_thread_state_change.h"
 #include "x86/codegen_x86.h"
 
 namespace art {
@@ -1129,63 +1127,32 @@
   return res;
 }
 
-bool Mir2Lir::GenInlinedGet(CallInfo* info) {
+bool Mir2Lir::GenInlinedReferenceGet(CallInfo* info) {
   if (cu_->instruction_set == kMips) {
     // TODO - add Mips implementation
     return false;
   }
 
-  // the refrence class is stored in the image dex file which might not be the same as the cu's
-  // dex file. Query the reference class for the image dex file then reset to starting dex file
-  // in after loading class type.
-  uint16_t type_idx = 0;
-  const DexFile* ref_dex_file = nullptr;
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    type_idx = mirror::Reference::GetJavaLangRefReference()->GetDexTypeIndex();
-    ref_dex_file = mirror::Reference::GetJavaLangRefReference()->GetDexCache()->GetDexFile();
-  }
-  CHECK(LIKELY(ref_dex_file != nullptr));
-
-  // address is either static within the image file, or needs to be patched up after compilation.
-  bool unused_type_initialized;
   bool use_direct_type_ptr;
   uintptr_t direct_type_ptr;
-  bool is_finalizable;
-  const DexFile* old_dex = cu_->dex_file;
-  cu_->dex_file = ref_dex_file;
+  ClassReference ref;
+  if (!cu_->compiler_driver->CanEmbedReferenceTypeInCode(&ref,
+        &use_direct_type_ptr, &direct_type_ptr)) {
+    return false;
+  }
+
   RegStorage reg_class = TargetReg(kArg1, kRef);
   Clobber(reg_class);
   LockTemp(reg_class);
-  if (!cu_->compiler_driver->CanEmbedTypeInCode(*ref_dex_file, type_idx, &unused_type_initialized,
-                                                &use_direct_type_ptr, &direct_type_ptr,
-                                                &is_finalizable) || is_finalizable) {
-    cu_->dex_file = old_dex;
-    // address is not known and post-compile patch is not possible, cannot insert intrinsic.
-    return false;
-  }
   if (use_direct_type_ptr) {
     LoadConstant(reg_class, direct_type_ptr);
-  } else if (cu_->dex_file == old_dex) {
-    // TODO: Bug 16656190 If cu_->dex_file != old_dex the patching could retrieve the wrong class
-    // since the load class is indexed only by the type_idx. We should include which dex file a
-    // class is from in the LoadClassType LIR.
-    LoadClassType(type_idx, kArg1);
   } else {
-    cu_->dex_file = old_dex;
-    return false;
+    uint16_t type_idx = ref.first->GetClassDef(ref.second).class_idx_;
+    LoadClassType(*ref.first, type_idx, kArg1);
   }
-  cu_->dex_file = old_dex;
 
-  // get the offset for flags in reference class.
-  uint32_t slow_path_flag_offset = 0;
-  uint32_t disable_flag_offset = 0;
-  {
-    ScopedObjectAccess soa(Thread::Current());
-    mirror::Class* reference_class = mirror::Reference::GetJavaLangRefReference();
-    slow_path_flag_offset = reference_class->GetSlowPathFlagOffset().Uint32Value();
-    disable_flag_offset = reference_class->GetDisableIntrinsicFlagOffset().Uint32Value();
-  }
+  uint32_t slow_path_flag_offset = cu_->compiler_driver->GetReferenceSlowFlagOffset();
+  uint32_t disable_flag_offset = cu_->compiler_driver->GetReferenceDisableFlagOffset();
   CHECK(slow_path_flag_offset && disable_flag_offset &&
         (slow_path_flag_offset != disable_flag_offset));
 
diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc
index eec2b32..e0f4691 100644
--- a/compiler/dex/quick/local_optimizations.cc
+++ b/compiler/dex/quick/local_optimizations.cc
@@ -200,7 +200,7 @@
     /* Initialize alias list */
     alias_list.clear();
     ResourceMask alias_reg_list_mask = kEncodeNone;
-    if (!this_mem_mask.Intersects(kEncodeLiteral)) {
+    if (!this_mem_mask.Intersects(kEncodeMem) && !this_mem_mask.Intersects(kEncodeLiteral)) {
       alias_list.push_back(dest_reg_id);
       SetupRegMask(&alias_reg_list_mask, dest_reg_id);
     }
@@ -248,7 +248,7 @@
         bool is_check_lir_load = check_flags & IS_LOAD;
         bool reg_compatible = RegStorage::SameRegType(check_lir->operands[0], native_reg_id);
 
-        if (alias_mem_mask.Equals(kEncodeLiteral)) {
+        if (!alias_mem_mask.Intersects(kEncodeMem) && alias_mem_mask.Equals(kEncodeLiteral)) {
           DCHECK(check_flags & IS_LOAD);
           /* Same value && same register type */
           if (reg_compatible && (this_lir->target == check_lir->target)) {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 9adddf0..4577a4c 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -61,8 +61,7 @@
  * done:
  *
  */
-void MipsMir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset,
-                                  RegLocation rl_src) {
+void MipsMir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   if (cu_->verbose) {
     DumpSparseSwitchTable(table);
@@ -139,8 +138,7 @@
  *   jr    rRA
  * done:
  */
-void MipsMir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset,
-                                  RegLocation rl_src) {
+void MipsMir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   if (cu_->verbose) {
     DumpPackedSwitchTable(table);
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index bd0c020..43cbde7 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -128,8 +128,8 @@
                                        int first_bit, int second_bit);
     void GenNegDouble(RegLocation rl_dest, RegLocation rl_src);
     void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
-    void GenPackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src);
-    void GenSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src);
+    void GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src);
+    void GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src);
     bool GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
 
     // Required for target - single operation generators.
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4ed9929..2221bb5 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -229,6 +229,9 @@
     static constexpr bool kFailOnSizeError = true && kIsDebugBuild;
     static constexpr bool kReportSizeError = true && kIsDebugBuild;
 
+    // TODO: If necessary, this could be made target-dependent.
+    static constexpr uint16_t kSmallSwitchThreshold = 5;
+
     /*
      * Auxiliary information describing the location of data embedded in the Dalvik
      * byte code stream.
@@ -682,6 +685,7 @@
     LIR* ScanLiteralPool(LIR* data_target, int value, unsigned int delta);
     LIR* ScanLiteralPoolWide(LIR* data_target, int val_lo, int val_hi);
     LIR* ScanLiteralPoolMethod(LIR* data_target, const MethodReference& method);
+    LIR* ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx);
     LIR* AddWordData(LIR* *constant_list_p, int value);
     LIR* AddWideData(LIR* *constant_list_p, int val_lo, int val_hi);
     void ProcessSwitchTables();
@@ -955,7 +959,7 @@
      */
     RegLocation InlineTargetWide(CallInfo* info);
 
-    bool GenInlinedGet(CallInfo* info);
+    bool GenInlinedReferenceGet(CallInfo* info);
     virtual bool GenInlinedCharAt(CallInfo* info);
     bool GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty);
     virtual bool GenInlinedReverseBits(CallInfo* info, OpSize size);
@@ -1110,11 +1114,13 @@
 
     /*
      * @brief Load the Class* of a Dex Class type into the register.
+     * @param dex DexFile that contains the class type.
      * @param type How the method will be invoked.
      * @param register that will contain the code address.
      * @note register will be passed to TargetReg to get physical register.
      */
-    virtual void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
+    virtual void LoadClassType(const DexFile& dex_file, uint32_t type_idx,
+                               SpecialTargetRegister symbolic_reg);
 
     // Routines that work for the generic case, but may be overriden by target.
     /*
@@ -1355,8 +1361,19 @@
                                                int first_bit, int second_bit) = 0;
     virtual void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) = 0;
     virtual void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) = 0;
-    virtual void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0;
-    virtual void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0;
+
+    // Create code for switch statements. Will decide between short and long versions below.
+    void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+
+    // Potentially backend-specific versions of switch instructions for shorter switch statements.
+    // The default implementation will create a chained compare-and-branch.
+    virtual void GenSmallPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    virtual void GenSmallSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    // Backend-specific versions of switch instructions for longer switch statements.
+    virtual void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0;
+    virtual void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) = 0;
+
     virtual void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale) = 0;
     virtual void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 15aae9e..f5f8671 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -27,8 +27,7 @@
  * The sparse table in the literal pool is an array of <key,displacement>
  * pairs.
  */
-void X86Mir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset,
-                                 RegLocation rl_src) {
+void X86Mir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   if (cu_->verbose) {
     DumpSparseSwitchTable(table);
@@ -61,8 +60,7 @@
  * jmp  r_start_of_method
  * done:
  */
-void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset,
-                                 RegLocation rl_src) {
+void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
   const uint16_t* table = cu_->insns + current_dalvik_offset_ + table_offset;
   if (cu_->verbose) {
     DumpPackedSwitchTable(table);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 266191a..d3ed48d 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -246,8 +246,8 @@
                                      int first_bit, int second_bit) OVERRIDE;
   void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
   void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
-  void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
-  void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
+  void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
+  void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
 
   /**
    * @brief Implement instanceof a final class with x86 specific code.
@@ -319,11 +319,13 @@
 
   /*
    * @brief Load the Class* of a Dex Class type into the register.
+   * @param dex DexFile that contains the class type.
    * @param type How the method will be invoked.
    * @param register that will contain the code address.
    * @note register will be passed to TargetReg to get physical register.
    */
-  void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) OVERRIDE;
+  void LoadClassType(const DexFile& dex_file, uint32_t type_idx,
+                     SpecialTargetRegister symbolic_reg) OVERRIDE;
 
   void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
 
@@ -355,12 +357,6 @@
   void InstallLiteralPools() OVERRIDE;
 
   /*
-   * @brief Generate the debug_frame CFI information.
-   * @returns pointer to vector containing CFE information
-   */
-  static std::vector<uint8_t>* ReturnCommonCallFrameInformation(bool is_x86_64);
-
-  /*
    * @brief Generate the debug_frame FDE information.
    * @returns pointer to vector containing CFE information
    */
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index bd2e0f3..69f3e67 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -971,19 +971,21 @@
   method_address_insns_.Insert(move);
 }
 
-void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) {
+void X86Mir2Lir::LoadClassType(const DexFile& dex_file, uint32_t type_idx,
+                               SpecialTargetRegister symbolic_reg) {
   /*
    * For x86, just generate a 32 bit move immediate instruction, that will be filled
    * in at 'link time'.  For now, put a unique value based on target to ensure that
    * code deduplication works.
    */
-  const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx);
+  const DexFile::TypeId& id = dex_file.GetTypeId(type_idx);
   uintptr_t ptr = reinterpret_cast<uintptr_t>(&id);
 
   // Generate the move instruction with the unique pointer and save index and type.
   LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI,
                      TargetReg(symbolic_reg, kNotWide).GetReg(),
-                     static_cast<int>(ptr), type_idx);
+                     static_cast<int>(ptr), type_idx,
+                     WrapPointer(const_cast<DexFile*>(&dex_file)));
   AppendLIR(move);
   class_type_address_insns_.Insert(move);
 }
@@ -1068,12 +1070,16 @@
   for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) {
       LIR* p = class_type_address_insns_.Get(i);
       DCHECK_EQ(p->opcode, kX86Mov32RI);
+
+      const DexFile* class_dex_file =
+        reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3]));
       uint32_t target_method_idx = p->operands[2];
 
       // The offset to patch is the last 4 bytes of the instruction.
       int patch_offset = p->offset + p->flags.size - 4;
       cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx,
-                                          cu_->method_idx, target_method_idx, patch_offset);
+                                          cu_->method_idx, target_method_idx, class_dex_file,
+                                          patch_offset);
   }
 
   // And now the PC-relative calls to methods.
@@ -1437,11 +1443,6 @@
   }
 }
 
-
-std::vector<uint8_t>* X86CFIInitialization(bool is_x86_64) {
-  return X86Mir2Lir::ReturnCommonCallFrameInformation(is_x86_64);
-}
-
 static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) {
   uint8_t buffer[12];
   uint8_t *ptr = EncodeUnsignedLeb128(buffer, value);
@@ -1458,84 +1459,6 @@
   }
 }
 
-std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation(bool is_x86_64) {
-  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
-
-  // Length (will be filled in later in this routine).
-  PushWord(*cfi_info, 0);
-
-  // CIE id: always 0.
-  PushWord(*cfi_info, 0);
-
-  // Version: always 1.
-  cfi_info->push_back(0x01);
-
-  // Augmentation: 'zR\0'
-  cfi_info->push_back(0x7a);
-  cfi_info->push_back(0x52);
-  cfi_info->push_back(0x0);
-
-  // Code alignment: 1.
-  EncodeUnsignedLeb128(*cfi_info, 1);
-
-  // Data alignment.
-  if (is_x86_64) {
-    EncodeSignedLeb128(*cfi_info, -8);
-  } else {
-    EncodeSignedLeb128(*cfi_info, -4);
-  }
-
-  // Return address register.
-  if (is_x86_64) {
-    // R16(RIP)
-    cfi_info->push_back(0x10);
-  } else {
-    // R8(EIP)
-    cfi_info->push_back(0x08);
-  }
-
-  // Augmentation length: 1.
-  cfi_info->push_back(1);
-
-  // Augmentation data: 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
-  cfi_info->push_back(0x03);
-
-  // Initial instructions.
-  if (is_x86_64) {
-    // DW_CFA_def_cfa R7(RSP) 8.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x07);
-    cfi_info->push_back(0x08);
-
-    // DW_CFA_offset R16(RIP) 1 (* -8).
-    cfi_info->push_back(0x90);
-    cfi_info->push_back(0x01);
-  } else {
-    // DW_CFA_def_cfa R4(ESP) 4.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x04);
-    cfi_info->push_back(0x04);
-
-    // DW_CFA_offset R8(EIP) 1 (* -4).
-    cfi_info->push_back(0x88);
-    cfi_info->push_back(0x01);
-  }
-
-  // Padding to a multiple of 4
-  while ((cfi_info->size() & 3) != 0) {
-    // DW_CFA_nop is encoded as 0.
-    cfi_info->push_back(0);
-  }
-
-  // Set the length of the CIE inside the generated bytes.
-  uint32_t length = cfi_info->size() - 4;
-  (*cfi_info)[0] = length;
-  (*cfi_info)[1] = length >> 8;
-  (*cfi_info)[2] = length >> 16;
-  (*cfi_info)[3] = length >> 24;
-  return cfi_info;
-}
-
 static bool ARTRegIDToDWARFRegID(bool is_x86_64, int art_reg_id, int* dwarf_reg_id) {
   if (is_x86_64) {
     switch (art_reg_id) {
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index ed126ad..1233a0d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -353,7 +353,6 @@
       compiler_enable_auto_elf_loading_(NULL),
       compiler_get_method_code_addr_(NULL),
       support_boot_image_fixup_(instruction_set != kMips),
-      cfi_info_(nullptr),
       dedupe_code_("dedupe code"),
       dedupe_mapping_table_("dedupe mapping table"),
       dedupe_vmap_table_("dedupe vmap table"),
@@ -376,11 +375,6 @@
     CHECK(image_classes_.get() == nullptr);
   }
 
-  // Are we generating CFI information?
-  if (compiler_options->GetGenerateGDBInformation()) {
-    cfi_info_.reset(compiler_->GetCallFrameInformationInitialization(*this));
-  }
-
   // Read the profile file if one is provided.
   if (!profile_file.empty()) {
     profile_present_ = profile_file_.LoadFile(profile_file);
@@ -597,7 +591,7 @@
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != nullptr);
-    ResolveDexFile(class_loader, *dex_file, thread_pool, timings);
+    ResolveDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
 }
 
@@ -811,11 +805,15 @@
 bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) {
   if (IsImage() &&
       IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) {
-    if (kIsDebugBuild) {
+    {
       ScopedObjectAccess soa(Thread::Current());
       mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file);
       mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
-      CHECK(resolved_class != NULL);
+      if (resolved_class == nullptr) {
+        // Erroneous class.
+        stats_->TypeNotInDexCache();
+        return false;
+      }
     }
     stats_->TypeInDexCache();
     return true;
@@ -969,6 +967,43 @@
   }
 }
 
+bool CompilerDriver::CanEmbedReferenceTypeInCode(ClassReference* ref,
+                                                 bool* use_direct_ptr,
+                                                 uintptr_t* direct_type_ptr) {
+  CHECK(ref != nullptr);
+  CHECK(use_direct_ptr != nullptr);
+  CHECK(direct_type_ptr != nullptr);
+
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* reference_class = mirror::Reference::GetJavaLangRefReference();
+  bool is_initialized;
+  bool unused_finalizable;
+  // Make sure we have a finished Reference class object before attempting to use it.
+  if (!CanEmbedTypeInCode(*reference_class->GetDexCache()->GetDexFile(),
+                          reference_class->GetDexTypeIndex(), &is_initialized,
+                          use_direct_ptr, direct_type_ptr, &unused_finalizable) ||
+      !is_initialized) {
+    return false;
+  }
+  ref->first = &reference_class->GetDexFile();
+  ref->second = reference_class->GetDexClassDefIndex();
+  return true;
+}
+
+uint32_t CompilerDriver::GetReferenceSlowFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetSlowPathFlagOffset().Uint32Value();
+}
+
+uint32_t CompilerDriver::GetReferenceDisableFlagOffset() const {
+  ScopedObjectAccess soa(Thread::Current());
+  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+  DCHECK(klass->IsInitialized());
+  return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
+}
+
 void CompilerDriver::ProcessedInstanceField(bool resolved) {
   if (!resolved) {
     stats_->UnresolvedInstanceField();
@@ -1340,12 +1375,14 @@
                                     uint16_t referrer_class_def_idx,
                                     uint32_t referrer_method_idx,
                                     uint32_t target_type_idx,
+                                    const DexFile* target_type_dex_file,
                                     size_t literal_offset) {
   MutexLock mu(Thread::Current(), compiled_methods_lock_);
   classes_to_patch_.push_back(new TypePatchInformation(dex_file,
                                                        referrer_class_def_idx,
                                                        referrer_method_idx,
                                                        target_type_idx,
+                                                       target_type_dex_file,
                                                        literal_offset));
 }
 
@@ -1357,12 +1394,14 @@
                              jobject class_loader,
                              CompilerDriver* compiler,
                              const DexFile* dex_file,
+                             const std::vector<const DexFile*>& dex_files,
                              ThreadPool* thread_pool)
     : index_(0),
       class_linker_(class_linker),
       class_loader_(class_loader),
       compiler_(compiler),
       dex_file_(dex_file),
+      dex_files_(dex_files),
       thread_pool_(thread_pool) {}
 
   ClassLinker* GetClassLinker() const {
@@ -1384,6 +1423,10 @@
     return dex_file_;
   }
 
+  const std::vector<const DexFile*>& GetDexFiles() const {
+    return dex_files_;
+  }
+
   void ForAll(size_t begin, size_t end, Callback callback, size_t work_units) {
     Thread* self = Thread::Current();
     self->AssertNoPendingException();
@@ -1441,11 +1484,24 @@
   const jobject class_loader_;
   CompilerDriver* const compiler_;
   const DexFile* const dex_file_;
+  const std::vector<const DexFile*>& dex_files_;
   ThreadPool* const thread_pool_;
 
   DISALLOW_COPY_AND_ASSIGN(ParallelCompilationManager);
 };
 
+static bool SkipClassCheckClassPath(const char* descriptor, const DexFile& dex_file,
+                                    const std::vector<const DexFile*>& classpath) {
+  DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, classpath);
+  CHECK(pair.second != NULL);
+  if (pair.first != &dex_file) {
+    LOG(WARNING) << "Skipping class " << descriptor << " from " << dex_file.GetLocation()
+                 << " previously found in " << pair.first->GetLocation();
+    return true;
+  }
+  return false;
+}
+
 // Return true if the class should be skipped during compilation.
 //
 // The first case where we skip is for redundant class definitions in
@@ -1454,20 +1510,23 @@
 // The second case where we skip is when an app bundles classes found
 // in the boot classpath. Since at runtime we will select the class from
 // the boot classpath, we ignore the one from the app.
+//
+// The third case is if the app itself has the class defined in multiple dex files. Then we skip
+// it if it is not the first occurrence.
 static bool SkipClass(ClassLinker* class_linker, jobject class_loader, const DexFile& dex_file,
+                      const std::vector<const DexFile*>& dex_files,
                       const DexFile::ClassDef& class_def) {
   const char* descriptor = dex_file.GetClassDescriptor(class_def);
+
   if (class_loader == NULL) {
-    DexFile::ClassPathEntry pair = DexFile::FindInClassPath(descriptor, class_linker->GetBootClassPath());
-    CHECK(pair.second != NULL);
-    if (pair.first != &dex_file) {
-      LOG(WARNING) << "Skipping class " << descriptor << " from " << dex_file.GetLocation()
-                   << " previously found in " << pair.first->GetLocation();
-      return true;
-    }
-    return false;
+    return SkipClassCheckClassPath(descriptor, dex_file, class_linker->GetBootClassPath());
   }
-  return class_linker->IsInBootClassPath(descriptor);
+
+  if (class_linker->IsInBootClassPath(descriptor)) {
+    return true;
+  }
+
+  return SkipClassCheckClassPath(descriptor, dex_file, dex_files);
 }
 
 // A fast version of SkipClass above if the class pointer is available
@@ -1525,7 +1584,7 @@
   // definitions, since many of them many never be referenced by
   // generated code.
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
-  if (!SkipClass(class_linker, jclass_loader, dex_file, class_def)) {
+  if (!SkipClass(class_linker, jclass_loader, dex_file, manager->GetDexFiles(), class_def)) {
     ScopedObjectAccess soa(self);
     StackHandleScope<2> hs(soa.Self());
     Handle<mirror::ClassLoader> class_loader(
@@ -1632,13 +1691,15 @@
 }
 
 void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+                                    const std::vector<const DexFile*>& dex_files,
                                     ThreadPool* thread_pool, TimingLogger* timings) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
   // TODO: we could resolve strings here, although the string table is largely filled with class
   //       and method names.
 
-  ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool);
+  ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
+                                     thread_pool);
   if (IsImage()) {
     // For images we resolve all types, such as array, whereas for applications just those with
     // classdefs are resolved by ResolveClassFieldsAndMethods.
@@ -1655,7 +1716,7 @@
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
-    VerifyDexFile(class_loader, *dex_file, thread_pool, timings);
+    VerifyDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
 }
 
@@ -1707,10 +1768,12 @@
 }
 
 void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+                                   const std::vector<const DexFile*>& dex_files,
                                    ThreadPool* thread_pool, TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Verify Dex File", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, thread_pool);
+  ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files,
+                                     thread_pool);
   context.ForAll(0, dex_file.NumClassDefs(), VerifyClass, thread_count_);
 }
 
@@ -1800,10 +1863,12 @@
 }
 
 void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file,
+                                       const std::vector<const DexFile*>& dex_files,
                                        ThreadPool* thread_pool, TimingLogger* timings) {
   TimingLogger::ScopedTiming t("InitializeNoClinit", timings);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, thread_pool);
+  ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
+                                     thread_pool);
   size_t thread_count;
   if (IsImage()) {
     // TODO: remove this when transactional mode supports multithreading.
@@ -1824,7 +1889,7 @@
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
-    InitializeClasses(class_loader, *dex_file, thread_pool, timings);
+    InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
 }
 
@@ -1833,7 +1898,7 @@
   for (size_t i = 0; i != dex_files.size(); ++i) {
     const DexFile* dex_file = dex_files[i];
     CHECK(dex_file != NULL);
-    CompileDexFile(class_loader, *dex_file, thread_pool, timings);
+    CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
 }
 
@@ -1843,7 +1908,7 @@
   const DexFile& dex_file = *manager->GetDexFile();
   const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index);
   ClassLinker* class_linker = manager->GetClassLinker();
-  if (SkipClass(class_linker, jclass_loader, dex_file, class_def)) {
+  if (SkipClass(class_linker, jclass_loader, dex_file, manager->GetDexFiles(), class_def)) {
     return;
   }
   ClassReference ref(&dex_file, class_def_index);
@@ -1912,10 +1977,11 @@
 }
 
 void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file,
+                                    const std::vector<const DexFile*>& dex_files,
                                     ThreadPool* thread_pool, TimingLogger* timings) {
   TimingLogger::ScopedTiming t("Compile Dex File", timings);
   ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this,
-                                     &dex_file, thread_pool);
+                                     &dex_file, dex_files, thread_pool);
   context.ForAll(0, dex_file.NumClassDefs(), CompilerDriver::CompileClass, thread_count_);
 }
 
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 6dae398..2a5cdb9 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -213,6 +213,12 @@
                           bool* is_type_initialized, bool* use_direct_type_ptr,
                           uintptr_t* direct_type_ptr, bool* out_is_finalizable);
 
+  // Query methods for the java.lang.ref.Reference class.
+  bool CanEmbedReferenceTypeInCode(ClassReference* ref,
+                                   bool* use_direct_type_ptr, uintptr_t* direct_type_ptr);
+  uint32_t GetReferenceSlowFlagOffset() const;
+  uint32_t GetReferenceDisableFlagOffset() const;
+
   // Get the DexCache for the
   mirror::DexCache* GetDexCache(const DexCompilationUnit* mUnit)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -356,6 +362,7 @@
                      uint16_t referrer_class_def_idx,
                      uint32_t referrer_method_idx,
                      uint32_t target_method_idx,
+                     const DexFile* target_dex_file,
                      size_t literal_offset)
       LOCKS_EXCLUDED(compiled_methods_lock_);
 
@@ -402,10 +409,6 @@
     return dump_passes_;
   }
 
-  bool DidIncludeDebugSymbols() const {
-    return compiler_options_->GetIncludeDebugSymbols();
-  }
-
   CumulativeLogger* GetTimingsLogger() const {
     return timings_logger_;
   }
@@ -549,6 +552,10 @@
 
   class TypePatchInformation : public PatchInformation {
    public:
+    const DexFile& GetTargetTypeDexFile() const {
+      return *target_type_dex_file_;
+    }
+
     uint32_t GetTargetTypeIdx() const {
       return target_type_idx_;
     }
@@ -565,13 +572,15 @@
                          uint16_t referrer_class_def_idx,
                          uint32_t referrer_method_idx,
                          uint32_t target_type_idx,
+                         const DexFile* target_type_dex_file,
                          size_t literal_offset)
         : PatchInformation(dex_file, referrer_class_def_idx,
                            referrer_method_idx, literal_offset),
-          target_type_idx_(target_type_idx) {
+          target_type_idx_(target_type_idx), target_type_dex_file_(target_type_dex_file) {
     }
 
     const uint32_t target_type_idx_;
+    const DexFile* target_type_dex_file_;
 
     friend class CompilerDriver;
     DISALLOW_COPY_AND_ASSIGN(TypePatchInformation);
@@ -599,14 +608,6 @@
   std::vector<uint8_t>* DeduplicateGCMap(const std::vector<uint8_t>& code);
   std::vector<uint8_t>* DeduplicateCFIInfo(const std::vector<uint8_t>* cfi_info);
 
-  /*
-   * @brief return the pointer to the Call Frame Information.
-   * @return pointer to call frame information for this compilation.
-   */
-  std::vector<uint8_t>* GetCallFrameInformation() const {
-    return cfi_info_.get();
-  }
-
   ProfileFile profile_file_;
   bool profile_present_;
 
@@ -658,12 +659,14 @@
                ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void ResolveDexFile(jobject class_loader, const DexFile& dex_file,
+                      const std::vector<const DexFile*>& dex_files,
                       ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
   void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files,
               ThreadPool* thread_pool, TimingLogger* timings);
   void VerifyDexFile(jobject class_loader, const DexFile& dex_file,
+                     const std::vector<const DexFile*>& dex_files,
                      ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
@@ -671,6 +674,7 @@
                          ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void InitializeClasses(jobject class_loader, const DexFile& dex_file,
+                         const std::vector<const DexFile*>& dex_files,
                          ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_, compiled_classes_lock_);
 
@@ -681,6 +685,7 @@
   void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                ThreadPool* thread_pool, TimingLogger* timings);
   void CompileDexFile(jobject class_loader, const DexFile& dex_file,
+                      const std::vector<const DexFile*>& dex_files,
                       ThreadPool* thread_pool, TimingLogger* timings)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
   void CompileMethod(const DexFile::CodeItem* code_item, uint32_t access_flags,
@@ -766,9 +771,6 @@
 
   bool support_boot_image_fixup_;
 
-  // Call Frame Information, which might be generated to help stack tracebacks.
-  std::unique_ptr<std::vector<uint8_t>> cfi_info_;
-
   // DeDuplication data structures, these own the corresponding byte arrays.
   class DedupeHashFunc {
    public:
diff --git a/compiler/elf_patcher.cc b/compiler/elf_patcher.cc
index 137110f..9ae755d 100644
--- a/compiler/elf_patcher.cc
+++ b/compiler/elf_patcher.cc
@@ -99,11 +99,13 @@
 mirror::Class* ElfPatcher::GetTargetType(const CompilerDriver::TypePatchInformation* patch) {
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(patch->GetDexFile())));
-  mirror::Class* klass = class_linker->ResolveType(patch->GetDexFile(), patch->GetTargetTypeIdx(),
+  Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
+          patch->GetTargetTypeDexFile())));
+  mirror::Class* klass = class_linker->ResolveType(patch->GetTargetTypeDexFile(),
+                                                   patch->GetTargetTypeIdx(),
                                                    dex_cache, NullHandle<mirror::ClassLoader>());
   CHECK(klass != NULL)
-    << patch->GetDexFile().GetLocation() << " " << patch->GetTargetTypeIdx();
+    << patch->GetTargetTypeDexFile().GetLocation() << " " << patch->GetTargetTypeIdx();
   CHECK(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx()) == klass)
     << patch->GetDexFile().GetLocation() << " " << patch->GetReferrerMethodIdx() << " "
     << PrettyClass(dex_cache->GetResolvedTypes()->Get(patch->GetTargetTypeIdx())) << " "
@@ -152,7 +154,7 @@
     }
     if (patch->IsType()) {
       const CompilerDriver::TypePatchInformation* tpatch = patch->AsType();
-      const DexFile::TypeId& id = tpatch->GetDexFile().GetTypeId(tpatch->GetTargetTypeIdx());
+      const DexFile::TypeId& id = tpatch->GetTargetTypeDexFile().GetTypeId(tpatch->GetTargetTypeIdx());
       uint32_t expected = reinterpret_cast<uintptr_t>(&id) & 0xFFFFFFFF;
       uint32_t actual = *patch_location;
       CHECK(actual == expected || actual == value) << "Patching type failed: " << std::hex
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 1fde12e..12e9401 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -24,6 +24,7 @@
 #include "elf_utils.h"
 #include "file_output_stream.h"
 #include "globals.h"
+#include "leb128.h"
 #include "oat.h"
 #include "oat_writer.h"
 #include "utils.h"
@@ -38,6 +39,25 @@
   return ((binding) << 4) + ((type) & 0xf);
 }
 
+static void UpdateWord(std::vector<uint8_t>* buf, int offset, int data) {
+  (*buf)[offset+0] = data;
+  (*buf)[offset+1] = data >> 8;
+  (*buf)[offset+2] = data >> 16;
+  (*buf)[offset+3] = data >> 24;
+}
+
+static void PushWord(std::vector<uint8_t>* buf, int data) {
+  buf->push_back(data & 0xff);
+  buf->push_back((data >> 8) & 0xff);
+  buf->push_back((data >> 16) & 0xff);
+  buf->push_back((data >> 24) & 0xff);
+}
+
+static void PushHalf(std::vector<uint8_t>* buf, int data) {
+  buf->push_back(data & 0xff);
+  buf->push_back((data >> 8) & 0xff);
+}
+
 bool ElfWriterQuick::ElfBuilder::Write() {
   // The basic layout of the elf file. Order may be different in final output.
   // +-------------------------+
@@ -822,42 +842,136 @@
   }
 }
 
+static void EncodeUnsignedLeb128(uint32_t data, std::vector<uint8_t>* dst) {
+  size_t encoded_size = UnsignedLeb128Size(data);
+  size_t cur_index = dst->size();
+  dst->resize(dst->size() + encoded_size);
+  uint8_t* write_pos = &((*dst)[cur_index]);
+  uint8_t* write_pos_after = EncodeUnsignedLeb128(write_pos, data);
+  DCHECK_EQ(static_cast<size_t>(write_pos_after - write_pos), encoded_size);
+}
+
+static void EncodeSignedLeb128(int32_t data, std::vector<uint8_t>* dst) {
+  size_t encoded_size = SignedLeb128Size(data);
+  size_t cur_index = dst->size();
+  dst->resize(dst->size() + encoded_size);
+  uint8_t* write_pos = &((*dst)[cur_index]);
+  uint8_t* write_pos_after = EncodeSignedLeb128(write_pos, data);
+  DCHECK_EQ(static_cast<size_t>(write_pos_after - write_pos), encoded_size);
+}
+
+std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) {
+  std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>;
+
+  // Length (will be filled in later in this routine).
+  PushWord(cfi_info, 0);
+
+  // CIE id: always 0.
+  PushWord(cfi_info, 0);
+
+  // Version: always 1.
+  cfi_info->push_back(0x01);
+
+  // Augmentation: 'zR\0'
+  cfi_info->push_back(0x7a);
+  cfi_info->push_back(0x52);
+  cfi_info->push_back(0x0);
+
+  // Code alignment: 1.
+  EncodeUnsignedLeb128(1, cfi_info);
+
+  // Data alignment.
+  if (is_x86_64) {
+    EncodeSignedLeb128(-8, cfi_info);
+  } else {
+    EncodeSignedLeb128(-4, cfi_info);
+  }
+
+  // Return address register.
+  if (is_x86_64) {
+    // R16(RIP)
+    cfi_info->push_back(0x10);
+  } else {
+    // R8(EIP)
+    cfi_info->push_back(0x08);
+  }
+
+  // Augmentation length: 1.
+  cfi_info->push_back(1);
+
+  // Augmentation data: 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
+  cfi_info->push_back(0x03);
+
+  // Initial instructions.
+  if (is_x86_64) {
+    // DW_CFA_def_cfa R7(RSP) 8.
+    cfi_info->push_back(0x0c);
+    cfi_info->push_back(0x07);
+    cfi_info->push_back(0x08);
+
+    // DW_CFA_offset R16(RIP) 1 (* -8).
+    cfi_info->push_back(0x90);
+    cfi_info->push_back(0x01);
+  } else {
+    // DW_CFA_def_cfa R4(ESP) 4.
+    cfi_info->push_back(0x0c);
+    cfi_info->push_back(0x04);
+    cfi_info->push_back(0x04);
+
+    // DW_CFA_offset R8(EIP) 1 (* -4).
+    cfi_info->push_back(0x88);
+    cfi_info->push_back(0x01);
+  }
+
+  // Padding to a multiple of 4
+  while ((cfi_info->size() & 3) != 0) {
+    // DW_CFA_nop is encoded as 0.
+    cfi_info->push_back(0);
+  }
+
+  // Set the length of the CIE inside the generated bytes.
+  uint32_t length = cfi_info->size() - 4;
+  (*cfi_info)[0] = length;
+  (*cfi_info)[1] = length >> 8;
+  (*cfi_info)[2] = length >> 16;
+  (*cfi_info)[3] = length >> 24;
+  return cfi_info;
+}
+
+std::vector<uint8_t>* ConstructCIEFrame(InstructionSet isa) {
+  switch (isa) {
+    case kX86:
+      return ConstructCIEFrameX86(false);
+    case kX86_64:
+      return ConstructCIEFrameX86(true);
+
+    default:
+      // Not implemented.
+      return nullptr;
+  }
+}
+
 bool ElfWriterQuick::Write(OatWriter* oat_writer,
                            const std::vector<const DexFile*>& dex_files_unused,
                            const std::string& android_root_unused,
                            bool is_host_unused) {
-  const bool debug = false;
-  const bool add_symbols = oat_writer->DidAddSymbols();
+  constexpr bool debug = false;
   const OatHeader& oat_header = oat_writer->GetOatHeader();
   Elf32_Word oat_data_size = oat_header.GetExecutableOffset();
   uint32_t oat_exec_size = oat_writer->GetSize() - oat_data_size;
 
   ElfBuilder builder(oat_writer, elf_file_, compiler_driver_->GetInstructionSet(), 0,
-                     oat_data_size, oat_data_size, oat_exec_size, add_symbols, debug);
+                     oat_data_size, oat_data_size, oat_exec_size,
+                     compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols(),
+                     debug);
 
-  if (add_symbols) {
-    AddDebugSymbols(builder, oat_writer, debug);
-  }
-
-  bool generateDebugInformation = compiler_driver_->GetCallFrameInformation() != nullptr;
-  if (generateDebugInformation) {
-    ElfRawSectionBuilder debug_info(".debug_info",   SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder debug_str(".debug_str",    SHT_PROGBITS, 0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder eh_frame(".eh_frame",  SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
-    eh_frame.SetBuffer(*compiler_driver_->GetCallFrameInformation());
-
-    FillInCFIInformation(oat_writer, debug_info.GetBuffer(),
-                         debug_abbrev.GetBuffer(), debug_str.GetBuffer());
-    builder.RegisterRawSection(debug_info);
-    builder.RegisterRawSection(debug_abbrev);
-    builder.RegisterRawSection(eh_frame);
-    builder.RegisterRawSection(debug_str);
+  if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
+    WriteDebugSymbols(builder, oat_writer);
   }
 
   if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) {
     ElfRawSectionBuilder oat_patches(".oat_patches", SHT_OAT_PATCH, 0, NULL, 0,
-                                     sizeof(size_t), sizeof(size_t));
+                                     sizeof(uintptr_t), sizeof(uintptr_t));
     ReservePatchSpace(oat_patches.GetBuffer(), debug);
     builder.RegisterRawSection(oat_patches);
   }
@@ -865,32 +979,62 @@
   return builder.Write();
 }
 
-void ElfWriterQuick::AddDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer, bool debug) {
+void ElfWriterQuick::WriteDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer) {
+  std::unique_ptr<std::vector<uint8_t>> cfi_info(
+      ConstructCIEFrame(compiler_driver_->GetInstructionSet()));
+
+  // Iterate over the compiled methods.
   const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo();
   ElfSymtabBuilder* symtab = &builder.symtab_builder_;
   for (auto it = method_info.begin(); it != method_info.end(); ++it) {
     symtab->AddSymbol(it->method_name_, &builder.text_builder_, it->low_pc_, true,
                       it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
+
+    // Include CFI for compiled method, if possible.
+    if (cfi_info.get() != nullptr) {
+      DCHECK(it->compiled_method_ != nullptr);
+
+      // Copy in the FDE, if present
+      const std::vector<uint8_t>* fde = it->compiled_method_->GetCFIInfo();
+      if (fde != nullptr) {
+        // Copy the information into cfi_info and then fix the address in the new copy.
+        int cur_offset = cfi_info->size();
+        cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
+
+        // Set the 'CIE_pointer' field to cur_offset+4.
+        uint32_t CIE_pointer = cur_offset + 4;
+        uint32_t offset_to_update = cur_offset + sizeof(uint32_t);
+        (*cfi_info)[offset_to_update+0] = CIE_pointer;
+        (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
+        (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
+        (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
+
+        // Set the 'initial_location' field to address the start of the method.
+        offset_to_update = cur_offset + 2*sizeof(uint32_t);
+        const uint32_t quick_code_start = it->low_pc_;
+        (*cfi_info)[offset_to_update+0] = quick_code_start;
+        (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
+        (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
+        (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
+      }
+    }
   }
-}
 
-static void UpdateWord(std::vector<uint8_t>*buf, int offset, int data) {
-  (*buf)[offset+0] = data;
-  (*buf)[offset+1] = data >> 8;
-  (*buf)[offset+2] = data >> 16;
-  (*buf)[offset+3] = data >> 24;
-}
+  if (cfi_info.get() != nullptr) {
+    // Now lay down the Elf sections.
+    ElfRawSectionBuilder debug_info(".debug_info",   SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    ElfRawSectionBuilder debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    ElfRawSectionBuilder debug_str(".debug_str",    SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+    ElfRawSectionBuilder eh_frame(".eh_frame",  SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
+    eh_frame.SetBuffer(std::move(*cfi_info.get()));
 
-static void PushWord(std::vector<uint8_t>*buf, int data) {
-  buf->push_back(data & 0xff);
-  buf->push_back((data >> 8) & 0xff);
-  buf->push_back((data >> 16) & 0xff);
-  buf->push_back((data >> 24) & 0xff);
-}
-
-static void PushHalf(std::vector<uint8_t>*buf, int data) {
-  buf->push_back(data & 0xff);
-  buf->push_back((data >> 8) & 0xff);
+    FillInCFIInformation(oat_writer, debug_info.GetBuffer(), debug_abbrev.GetBuffer(),
+                         debug_str.GetBuffer());
+    builder.RegisterRawSection(debug_info);
+    builder.RegisterRawSection(debug_abbrev);
+    builder.RegisterRawSection(eh_frame);
+    builder.RegisterRawSection(debug_str);
+  }
 }
 
 void ElfWriterQuick::FillInCFIInformation(OatWriter* oat_writer,
diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h
index a0d36df..8cfe550 100644
--- a/compiler/elf_writer_quick.h
+++ b/compiler/elf_writer_quick.h
@@ -48,9 +48,7 @@
   ~ElfWriterQuick() {}
 
   class ElfBuilder;
-  void AddDebugSymbols(ElfBuilder& builder,
-                       OatWriter* oat_writer,
-                       bool debug);
+  void WriteDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer);
   void ReservePatchSpace(std::vector<uint8_t>* buffer, bool debug);
 
   class ElfSectionBuilder {
@@ -126,7 +124,7 @@
         : ElfSectionBuilder(sec_name, type, flags, link, info, align, entsize) {}
     ~ElfRawSectionBuilder() {}
     std::vector<uint8_t>* GetBuffer() { return &buf_; }
-    void SetBuffer(std::vector<uint8_t> buf) { buf_ = buf; }
+    void SetBuffer(std::vector<uint8_t>&& buf) { buf_ = buf; }
 
    protected:
     std::vector<uint8_t> buf_;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 9da59ab..1ba5d32 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -357,7 +357,6 @@
         uint32_t thumb_offset = compiled_method->CodeDelta();
         quick_code_offset = offset_ + sizeof(OatQuickMethodHeader) + thumb_offset;
 
-        bool force_debug_capture = false;
         bool deduped = false;
 
         // Deduplicate code arrays.
@@ -400,47 +399,22 @@
           offset_ += code_size;
         }
 
-        uint32_t quick_code_start = quick_code_offset - writer_->oat_header_->GetExecutableOffset();
-        std::vector<uint8_t>* cfi_info = writer_->compiler_driver_->GetCallFrameInformation();
-        if (cfi_info != nullptr) {
-          // Copy in the FDE, if present
-          const std::vector<uint8_t>* fde = compiled_method->GetCFIInfo();
-          if (fde != nullptr) {
-            // Copy the information into cfi_info and then fix the address in the new copy.
-            int cur_offset = cfi_info->size();
-            cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
+        if (writer_->compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
+          // Record debug information for this function if we are doing that.
 
-            // Set the 'CIE_pointer' field to cur_offset+4.
-            uint32_t CIE_pointer = cur_offset + 4;
-            uint32_t offset_to_update = cur_offset + sizeof(uint32_t);
-            (*cfi_info)[offset_to_update+0] = CIE_pointer;
-            (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
-            (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
-            (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
-
-            // Set the 'initial_location' field to address the start of the method.
-            offset_to_update = cur_offset + 2*sizeof(uint32_t);
-            (*cfi_info)[offset_to_update+0] = quick_code_start;
-            (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
-            (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
-            (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
-            force_debug_capture = true;
-          }
-        }
-
-
-        if (writer_->compiler_driver_->DidIncludeDebugSymbols() || force_debug_capture) {
-          // Record debug information for this function if we are doing that or
-          // we have CFI and so need it.
           std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
           if (deduped) {
-            // TODO We should place the DEDUPED tag on the first instance of a
-            // deduplicated symbol so that it will show up in a debuggerd crash
-            // report.
+            // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol
+            // so that it will show up in a debuggerd crash report.
             name += " [ DEDUPED ]";
           }
-          writer_->method_info_.push_back(DebugInfo(name, quick_code_start,
-                                                    quick_code_start + code_size));
+
+          const uint32_t quick_code_start = quick_code_offset -
+              writer_->oat_header_->GetExecutableOffset();
+          writer_->method_info_.push_back(DebugInfo(name,
+                                                    quick_code_start,
+                                                    quick_code_start + code_size,
+                                                    compiled_method));
         }
       }
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 945048e..ef5fd6b 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -30,6 +30,7 @@
 namespace art {
 
 class BitVector;
+class CompiledMethod;
 class OutputStream;
 
 // OatHeader         variable length with count of D OatDexFiles
@@ -97,22 +98,21 @@
   ~OatWriter();
 
   struct DebugInfo {
-    DebugInfo(const std::string& method_name, uint32_t low_pc, uint32_t high_pc)
-      : method_name_(method_name), low_pc_(low_pc), high_pc_(high_pc) {
+    DebugInfo(const std::string& method_name, uint32_t low_pc, uint32_t high_pc,
+              CompiledMethod* compiled_method)
+      : method_name_(method_name), low_pc_(low_pc), high_pc_(high_pc),
+        compiled_method_(compiled_method) {
     }
-    std::string method_name_;
+    std::string method_name_;  // Note: this name is a pretty-printed name.
     uint32_t    low_pc_;
     uint32_t    high_pc_;
+    CompiledMethod* compiled_method_;
   };
 
   const std::vector<DebugInfo>& GetCFIMethodInfo() const {
     return method_info_;
   }
 
-  bool DidAddSymbols() const {
-    return compiler_driver_->DidIncludeDebugSymbols();
-  }
-
  private:
   // The DataAccess classes are helper classes that provide access to members related to
   // a given map, i.e. GC map, mapping table or vmap table. By abstracting these away
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index cb3dd0f..bb699e4 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -501,6 +501,7 @@
   void SetBlock(HBasicBlock* block) { block_ = block; }
   bool IsInBlock() const { return block_ != nullptr; }
   bool IsInLoop() const { return block_->IsInLoop(); }
+  bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); }
 
   virtual size_t InputCount() const  = 0;
   virtual HInstruction* InputAt(size_t i) const = 0;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index a7283ab..bafe577 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -22,6 +22,7 @@
 #include "optimizing_unit_test.h"
 #include "register_allocator.h"
 #include "ssa_liveness_analysis.h"
+#include "ssa_phi_elimination.h"
 #include "utils/arena_allocator.h"
 
 #include "gtest/gtest.h"
@@ -356,4 +357,38 @@
   ASSERT_EQ(new_interval->FirstRegisterUse(), last_add->GetLifetimePosition() + 1);
 }
 
+TEST(RegisterAllocatorTest, DeadPhi) {
+  /* Test for a dead loop phi taking as back-edge input a phi that also has
+   * this loop phi as input. Walking backwards in SsaDeadPhiElimination
+   * does not solve the problem because the loop phi will be visited last.
+   *
+   * Test the following snippet:
+   *  int a = 0
+   *  do {
+   *    if (true) {
+   *      a = 2;
+   *    }
+   *  } while (true);
+   */
+
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
+    Instruction::CONST_4 | 0 | 0,
+    Instruction::CONST_4 | 1 << 8 | 0,
+    Instruction::IF_NE | 1 << 8 | 1 << 12, 3,
+    Instruction::CONST_4 | 2 << 12 | 0 << 8,
+    Instruction::GOTO | 0xFD00,
+    Instruction::RETURN_VOID);
+
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = BuildSSAGraph(data, &allocator);
+  SsaDeadPhiElimination(graph).Run();
+  CodeGenerator* codegen = CodeGenerator::Create(&allocator, graph, kX86);
+  SsaLivenessAnalysis liveness(*graph, codegen);
+  liveness.Analyze();
+  RegisterAllocator register_allocator(&allocator, codegen, liveness);
+  register_allocator.AllocateRegisters();
+  ASSERT_TRUE(register_allocator.Validate(false));
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 13fa03f..a079954 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -53,8 +53,9 @@
     }
   }
 
-  // Remove phis that are not live. Visit in post order to ensure
-  // we only remove phis with no users (dead phis might use dead phis).
+  // Remove phis that are not live. Visit in post order so that phis
+  // that are not inputs of loop phis can be removed when they have
+  // no users left (dead phis might use dead phis).
   for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
     HBasicBlock* block = it.Current();
     HInstruction* current = block->GetFirstPhi();
@@ -62,6 +63,17 @@
     while (current != nullptr) {
       next = current->GetNext();
       if (current->AsPhi()->IsDead()) {
+        if (current->HasUses()) {
+          for (HUseIterator<HInstruction> it(current->GetUses()); !it.Done(); it.Advance()) {
+            HUseListNode<HInstruction>* user_node = it.Current();
+            HInstruction* user = user_node->GetUser();
+            DCHECK(user->IsLoopHeaderPhi());
+            DCHECK(user->AsPhi()->IsDead());
+            // Just put itself as an input. The phi will be removed in this loop anyway.
+            user->SetRawInputAt(user_node->GetIndex(), user);
+            current->RemoveUser(user, user_node->GetIndex());
+          }
+        }
         block->RemovePhi(current->AsPhi());
       }
       current = next;
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 754496b..05ac70e 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -208,12 +208,17 @@
     assembler_.reset(new Ass());
 
     // Fake a runtime test for ScratchFile
-    std::string android_data;
-    CommonRuntimeTest::SetEnvironmentVariables(android_data);
+    CommonRuntimeTest::SetUpAndroidData(android_data_);
 
     SetUpHelpers();
   }
 
+  void TearDown() OVERRIDE {
+    // We leave temporaries in case this failed so we can debug issues.
+    CommonRuntimeTest::TearDownAndroidData(android_data_, false);
+    tmpnam_ = "";
+  }
+
   // Override this to set up any architecture-specific things, e.g., register vectors.
   virtual void SetUpHelpers() {}
 
@@ -687,6 +692,8 @@
   std::string resolved_objdump_cmd_;
   std::string resolved_disassemble_cmd_;
 
+  std::string android_data_;
+
   static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
 };
 
diff --git a/dex2oat/Android.mk b/dex2oat/Android.mk
index 28db711..2ef826b 100644
--- a/dex2oat/Android.mk
+++ b/dex2oat/Android.mk
@@ -37,9 +37,9 @@
 endif
 
 # We always build dex2oat and dependencies, even if the host build is otherwise disabled, since they are used to cross compile for the target.
-ifeq ($(ART_BUILD_NDEBUG),true)
+ifeq ($(ART_BUILD_HOST_NDEBUG),true)
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libart-compiler,art/compiler,host,ndebug))
 endif
-ifeq ($(ART_BUILD_DEBUG),true)
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
   $(eval $(call build-art-executable,dex2oat,$(DEX2OAT_SRC_FILES),libartd-compiler,art/compiler,host,debug))
 endif
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 0437f30..ac3eb39 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1167,6 +1167,7 @@
   switch (instruction_set) {
     case kArm:
     case kThumb2:
+    case kArm64:
     case kX86:
       implicit_null_checks = true;
       implicit_so_checks = true;
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 72189c3..eed20da 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -466,16 +466,13 @@
   return true;
 }
 
-bool PatchOat::CheckOatFile() {
-  Elf32_Shdr* patches_sec = oat_file_->FindSectionByName(".oat_patches");
-  if (patches_sec == nullptr) {
+template <typename ptr_t>
+bool PatchOat::CheckOatFile(const Elf32_Shdr& patches_sec) {
+  if (patches_sec.sh_type != SHT_OAT_PATCH) {
     return false;
   }
-  if (patches_sec->sh_type != SHT_OAT_PATCH) {
-    return false;
-  }
-  uintptr_t* patches = reinterpret_cast<uintptr_t*>(oat_file_->Begin() + patches_sec->sh_offset);
-  uintptr_t* patches_end = patches + (patches_sec->sh_size/sizeof(uintptr_t));
+  ptr_t* patches = reinterpret_cast<ptr_t*>(oat_file_->Begin() + patches_sec.sh_offset);
+  ptr_t* patches_end = patches + (patches_sec.sh_size / sizeof(ptr_t));
   Elf32_Shdr* oat_data_sec = oat_file_->FindSectionByName(".rodata");
   Elf32_Shdr* oat_text_sec = oat_file_->FindSectionByName(".text");
   if (oat_data_sec == nullptr) {
@@ -599,10 +596,28 @@
     LOG(ERROR) << ".oat_patches section not found. Aborting patch";
     return false;
   }
-  DCHECK(CheckOatFile()) << "Oat file invalid";
-  CHECK_EQ(patches_sec->sh_type, SHT_OAT_PATCH) << "Unexpected type of .oat_patches";
-  uintptr_t* patches = reinterpret_cast<uintptr_t*>(oat_file_->Begin() + patches_sec->sh_offset);
-  uintptr_t* patches_end = patches + (patches_sec->sh_size/sizeof(uintptr_t));
+  if (patches_sec->sh_type != SHT_OAT_PATCH) {
+    LOG(ERROR) << "Unexpected type of .oat_patches";
+    return false;
+  }
+
+  switch (patches_sec->sh_entsize) {
+    case sizeof(uint32_t):
+      return PatchTextSection<uint32_t>(*patches_sec);
+    case sizeof(uint64_t):
+      return PatchTextSection<uint64_t>(*patches_sec);
+    default:
+      LOG(ERROR) << ".oat_patches Entsize of " << patches_sec->sh_entsize << "bits "
+                 << "is not valid";
+      return false;
+  }
+}
+
+template <typename ptr_t>
+bool PatchOat::PatchTextSection(const Elf32_Shdr& patches_sec) {
+  DCHECK(CheckOatFile<ptr_t>(patches_sec)) << "Oat file invalid";
+  ptr_t* patches = reinterpret_cast<ptr_t*>(oat_file_->Begin() + patches_sec.sh_offset);
+  ptr_t* patches_end = patches + (patches_sec.sh_size / sizeof(ptr_t));
   Elf32_Shdr* oat_text_sec = oat_file_->FindSectionByName(".text");
   CHECK(oat_text_sec != nullptr);
   byte* to_patch = oat_file_->Begin() + oat_text_sec->sh_offset;
@@ -614,7 +629,6 @@
     CHECK_LT(reinterpret_cast<uintptr_t>(patch_loc), to_patch_end);
     *patch_loc += delta_;
   }
-
   return true;
 }
 
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 6960d3b..326333e 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -74,11 +74,12 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool InHeap(mirror::Object*);
 
-  bool CheckOatFile();
-
   // Patches oat in place, modifying the oat_file given to the constructor.
   bool PatchElf();
   bool PatchTextSection();
+  // Templatized version to actually do the patching with the right sized offsets.
+  template <typename ptr_t> bool PatchTextSection(const Elf32_Shdr& patches_sec);
+  template <typename ptr_t> bool CheckOatFile(const Elf32_Shdr& patches_sec);
   bool PatchOatHeader();
   bool PatchSymbols(Elf32_Shdr* section);
 
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 34eede6..dc82cc2 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -21,7 +21,15 @@
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "registers_arm64.h"
+#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
+#include "thread.h"
+#include "thread-inl.h"
 
+extern "C" void art_quick_throw_stack_overflow_from_signal();
+extern "C" void art_quick_throw_null_pointer_exception();
+extern "C" void art_quick_implicit_suspend();
 
 //
 // ARM64 specific fault handler functions.
@@ -32,17 +40,160 @@
 void FaultManager::GetMethodAndReturnPCAndSP(siginfo_t* siginfo, void* context,
                                              mirror::ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
+  struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  *out_sp = static_cast<uintptr_t>(sc->sp);
+  VLOG(signals) << "sp: " << *out_sp;
+  if (*out_sp == 0) {
+    return;
+  }
+
+  // In the case of a stack overflow, the stack is not valid and we can't
+  // get the method from the top of the stack.  However it's in x0.
+  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
+  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
+      reinterpret_cast<uint8_t*>(*out_sp) - GetStackOverflowReservedBytes(kArm64));
+  if (overflow_addr == fault_addr) {
+    *out_method = reinterpret_cast<mirror::ArtMethod*>(sc->regs[0]);
+  } else {
+    // The method is at the top of the stack.
+    *out_method = (reinterpret_cast<StackReference<mirror::ArtMethod>* >(*out_sp)[0]).AsMirrorPtr();
+  }
+
+  // Work out the return PC.  This will be the address of the instruction
+  // following the faulting ldr/str instruction.
+  VLOG(signals) << "pc: " << std::hex
+      << static_cast<void*>(reinterpret_cast<uint8_t*>(sc->pc));
+
+  *out_return_pc = sc->pc + 4;
 }
 
 bool NullPointerHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
+  // The code that looks for the catch location needs to know the value of the
+  // PC at the point of call.  For Null checks we insert a GC map that is immediately after
+  // the load/store instruction that might cause the fault.
+
+  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+
+  sc->regs[30] = sc->pc + 4;      // LR needs to point to gc map location
+
+  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
+  VLOG(signals) << "Generating null pointer exception";
+  return true;
 }
 
+// A suspend check is done using the following instruction sequence:
+//      0xf7223228: f9405640  ldr x0, [x18, #168]
+// .. some intervening instructions
+//      0xf7223230: f9400000  ldr x0, [x0]
+
+// The offset from r18 is Thread::ThreadSuspendTriggerOffset().
+// To check for a suspend check, we examine the instructions that caused
+// the fault (at PC-4 and PC).
 bool SuspensionHandler::Action(int sig, siginfo_t* info, void* context) {
+  // These are the instructions to check for.  The first one is the ldr x0,[r18,#xxx]
+  // where xxx is the offset of the suspend trigger.
+  uint32_t checkinst1 = 0xf9400240 | (Thread::ThreadSuspendTriggerOffset<8>().Int32Value() << 7);
+  uint32_t checkinst2 = 0xf9400000;
+
+  struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  uint8_t* ptr2 = reinterpret_cast<uint8_t*>(sc->pc);
+  uint8_t* ptr1 = ptr2 - 4;
+  VLOG(signals) << "checking suspend";
+
+  uint32_t inst2 = *reinterpret_cast<uint32_t*>(ptr2);
+  VLOG(signals) << "inst2: " << std::hex << inst2 << " checkinst2: " << checkinst2;
+  if (inst2 != checkinst2) {
+    // Second instruction is not good, not ours.
+    return false;
+  }
+
+  // The first instruction can a little bit up the stream due to load hoisting
+  // in the compiler.
+  uint8_t* limit = ptr1 - 80;   // Compiler will hoist to a max of 20 instructions.
+  bool found = false;
+  while (ptr1 > limit) {
+    uint32_t inst1 = *reinterpret_cast<uint32_t*>(ptr1);
+    VLOG(signals) << "inst1: " << std::hex << inst1 << " checkinst1: " << checkinst1;
+    if (inst1 == checkinst1) {
+      found = true;
+      break;
+    }
+    ptr1 -= 4;
+  }
+  if (found) {
+    VLOG(signals) << "suspend check match";
+    // This is a suspend check.  Arrange for the signal handler to return to
+    // art_quick_implicit_suspend.  Also set LR so that after the suspend check it
+    // will resume the instruction (current PC + 4).  PC points to the
+    // ldr x0,[x0,#0] instruction (r0 will be 0, set by the trigger).
+
+    sc->regs[30] = sc->pc + 4;
+    sc->pc = reinterpret_cast<uintptr_t>(art_quick_implicit_suspend);
+
+    // Now remove the suspend trigger that caused this fault.
+    Thread::Current()->RemoveSuspendTrigger();
+    VLOG(signals) << "removed suspend trigger invoking test suspend";
+    return true;
+  }
   return false;
 }
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
-  return false;
+  struct ucontext *uc = reinterpret_cast<struct ucontext *>(context);
+  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
+  VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
+  VLOG(signals) << "sigcontext: " << std::hex << sc;
+
+  uintptr_t sp = sc->sp;
+  VLOG(signals) << "sp: " << std::hex << sp;
+
+  uintptr_t fault_addr = sc->fault_address;
+  VLOG(signals) << "fault_addr: " << std::hex << fault_addr;
+  VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
+      ", fault_addr: " << fault_addr;
+
+  uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kArm64);
+
+  Thread* self = reinterpret_cast<Thread*>(sc->regs[art::arm64::TR]);
+  CHECK_EQ(self, Thread::Current());
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
+
+  // Check that the fault address is the value expected for a stack overflow.
+  if (fault_addr != overflow_addr) {
+    VLOG(signals) << "Not a stack overflow";
+    return false;
+  }
+
+  // We know this is a stack overflow.  We need to move the sp to the overflow region
+  // that exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  uintptr_t prevsp = sp;
+  sp = pregion;
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << sp;
+
+  // Since the compiler puts the implicit overflow
+  // check before the callee save instructions, the SP is already pointing to
+  // the previous frame.
+  VLOG(signals) << "previous frame: " << std::hex << prevsp;
+
+  // Now establish the stack pointer for the signal return.
+  sc->sp = prevsp;
+
+  // Tell the stack overflow code where the new stack pointer should be.
+  sc->regs[art::arm64::IP0] = sp;      // aka x16
+
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  // The value of LR must be the same as it was when we entered the code that
+  // caused this fault.  This will be inserted into a callee save frame by
+  // the function to which this handler returns (art_quick_throw_stack_overflow_from_signal).
+  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
+
+  // The kernel will now return to the address in sc->pc.
+  return true;
 }
 }       // namespace art
+
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index ba85d32..04be4a2 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -435,6 +435,31 @@
      */
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
+  /*
+   * Invoke stack overflow exception from signal handler.
+   * On entry:
+   * xSELF: thread
+   * SP: address of last known frame
+   * IP0: address of next valid SP below protected region in stack
+   *
+   * This is deceptively simple but hides some complexity.  It is called in the case of
+   * a stack overflow condition during implicit checks.  The signal handler has been
+   * called by the kernel due to a load from the protected stack region.  The handler
+   * works out the address of the previous frame and passes this in SP.  However there
+   * is a piece of memory somewhere below the current SP that is not accessible (the
+   * memory that caused the signal).  The signal handler works out the next
+   * accessible value of SP and passes this in x16/IP0.  This code then sets up the SP
+   * to be this new value and calls the code to create and throw the stack overflow
+   * exception.
+   */
+ENTRY art_quick_throw_stack_overflow_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov x0, xSELF                   // pass Thread::Current
+    mov x1, sp                      // pass SP
+    mov sp, xIP0                    // move SP down to below protected region.
+    b   artThrowStackOverflowFromCode                  // artThrowStackOverflowFromCode(Thread*, SP)
+END art_quick_throw_stack_overflow_from_signal
+
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
@@ -1323,6 +1348,14 @@
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
 END art_quick_test_suspend
 
+ENTRY art_quick_implicit_suspend
+    mov    x0, xSELF
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME          // save callee saves for stack crawl
+    mov    x1, sp
+    bl     artTestSuspendFromCode             // (Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
+END art_quick_implicit_suspend
+
      /*
      * Called by managed code that is attempting to call a method on a proxy class. On entry
      * x0 holds the proxy method and x1 holds the receiver; The frame size of the invoked proxy
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 33b3d3e..3481f2f 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -59,6 +59,9 @@
   EXPECT_TRUE(file.Open(good_path,  O_RDONLY));
   EXPECT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
+
+  file.Close();
+  ASSERT_EQ(unlink(good_path.c_str()), 0);
 }
 
 TEST_F(FdFileTest, ReadFullyEmptyFile) {
diff --git a/runtime/base/unix_file/mapped_file_test.cc b/runtime/base/unix_file/mapped_file_test.cc
index 7e45321..59334d4 100644
--- a/runtime/base/unix_file/mapped_file_test.cc
+++ b/runtime/base/unix_file/mapped_file_test.cc
@@ -30,7 +30,7 @@
   }
 
   void SetUp() {
-    art::CommonRuntimeTest::SetEnvironmentVariables(android_data_);
+    RandomAccessFileTest::SetUp();
 
     good_path_ = GetTmpPath("some-file.txt");
     int fd = TEMP_FAILURE_RETRY(open(good_path_.c_str(), O_CREAT|O_RDWR, 0666));
@@ -42,6 +42,12 @@
     ASSERT_TRUE(CopyFile(src, &dst));
   }
 
+  void TearDown() {
+    ASSERT_EQ(unlink(good_path_.c_str()), 0);
+
+    RandomAccessFileTest::TearDown();
+  }
+
   virtual RandomAccessFile* MakeTestFile() {
     TEMP_FAILURE_RETRY(truncate(good_path_.c_str(), 0));
     MappedFile* f = new MappedFile;
diff --git a/runtime/base/unix_file/random_access_file_test.h b/runtime/base/unix_file/random_access_file_test.h
index 1d0b866..0002433 100644
--- a/runtime/base/unix_file/random_access_file_test.h
+++ b/runtime/base/unix_file/random_access_file_test.h
@@ -35,7 +35,11 @@
   virtual RandomAccessFile* MakeTestFile() = 0;
 
   virtual void SetUp() {
-    art::CommonRuntimeTest::SetEnvironmentVariables(android_data_);
+    art::CommonRuntimeTest::SetUpAndroidData(android_data_);
+  }
+
+  virtual void TearDown() {
+    art::CommonRuntimeTest::TearDownAndroidData(android_data_, true);
   }
 
   std::string GetTmpPath(const std::string& name) {
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index cf25810..9921bdd 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -77,7 +77,7 @@
 
 inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx,
                                                mirror::ArtMethod* referrer) {
-  mirror::Class* resolved_type = referrer->GetDexCacheResolvedTypes()->Get(type_idx);
+  mirror::Class* resolved_type = referrer->GetDexCacheResolvedType(type_idx);
   if (UNLIKELY(resolved_type == nullptr)) {
     mirror::Class* declaring_class = referrer->GetDeclaringClass();
     StackHandleScope<2> hs(Thread::Current());
@@ -85,9 +85,8 @@
     Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_type = ResolveType(dex_file, type_idx, dex_cache, class_loader);
-    if (resolved_type != nullptr) {
-      DCHECK_EQ(dex_cache->GetResolvedType(type_idx), resolved_type);
-    }
+    // Note: We cannot check here to see whether we added the type to the cache. The type
+    //       might be an erroneous class, which results in it being hidden from us.
   }
   return resolved_type;
 }
@@ -102,9 +101,8 @@
     Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_type = ResolveType(dex_file, type_idx, dex_cache, class_loader);
-    if (resolved_type != nullptr) {
-      DCHECK_EQ(dex_cache->GetResolvedType(type_idx), resolved_type);
-    }
+    // Note: We cannot check here to see whether we added the type to the cache. The type
+    //       might be an erroneous class, which results in it being hidden from us.
   }
   return resolved_type;
 }
@@ -112,8 +110,7 @@
 inline mirror::ArtMethod* ClassLinker::GetResolvedMethod(uint32_t method_idx,
                                                          mirror::ArtMethod* referrer,
                                                          InvokeType type) {
-  mirror::ArtMethod* resolved_method =
-      referrer->GetDexCacheResolvedMethods()->Get(method_idx);
+  mirror::ArtMethod* resolved_method = referrer->GetDexCacheResolvedMethod(method_idx);
   if (resolved_method == nullptr || resolved_method->IsRuntimeMethod()) {
     return nullptr;
   }
@@ -135,9 +132,8 @@
   const DexFile* dex_file = h_dex_cache->GetDexFile();
   resolved_method = ResolveMethod(*dex_file, method_idx, h_dex_cache, h_class_loader, h_referrer,
                                   type);
-  if (resolved_method != nullptr) {
-    DCHECK_EQ(h_dex_cache->GetResolvedMethod(method_idx), resolved_method);
-  }
+  // Note: We cannot check here to see whether we added the method to the cache. It
+  //       might be an erroneous class, which results in it being hidden from us.
   return resolved_method;
 }
 
@@ -156,9 +152,8 @@
     Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
     const DexFile& dex_file = *dex_cache->GetDexFile();
     resolved_field = ResolveField(dex_file, field_idx, dex_cache, class_loader, is_static);
-    if (resolved_field != nullptr) {
-      DCHECK_EQ(dex_cache->GetResolvedField(field_idx), resolved_field);
-    }
+    // Note: We cannot check here to see whether we added the field to the cache. The type
+    //       might be an erroneous class, which results in it being hidden from us.
   }
   return resolved_field;
 }
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index c1e3b25..72572a05 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3488,22 +3488,21 @@
   DCHECK(proxy_class->IsProxyClass());
   DCHECK(proxy_method->IsProxyMethod());
   // Locate the dex cache of the original interface/Object
-  mirror::DexCache* dex_cache = NULL;
+  mirror::DexCache* dex_cache = nullptr;
   {
-    mirror::ObjectArray<mirror::Class>* resolved_types = proxy_method->GetDexCacheResolvedTypes();
     ReaderMutexLock mu(Thread::Current(), dex_lock_);
     for (size_t i = 0; i != dex_caches_.size(); ++i) {
       mirror::DexCache* a_dex_cache = GetDexCache(i);
-      if (a_dex_cache->GetResolvedTypes() == resolved_types) {
+      if (proxy_method->HasSameDexCacheResolvedTypes(a_dex_cache->GetResolvedTypes())) {
         dex_cache = a_dex_cache;
         break;
       }
     }
   }
-  CHECK(dex_cache != NULL);
+  CHECK(dex_cache != nullptr);
   uint32_t method_idx = proxy_method->GetDexMethodIndex();
   mirror::ArtMethod* resolved_method = dex_cache->GetResolvedMethod(method_idx);
-  CHECK(resolved_method != NULL);
+  CHECK(resolved_method != nullptr);
   return resolved_method;
 }
 
@@ -3516,14 +3515,19 @@
       proxy_class->GetDirectMethods();
   CHECK_EQ(proxy_direct_methods->GetLength(), 16);
   mirror::ArtMethod* proxy_constructor = proxy_direct_methods->Get(2);
-  // Clone the existing constructor of Proxy (our constructor would just invoke it so steal its
-  // code_ too)
-  mirror::ArtMethod* constructor =
-      down_cast<mirror::ArtMethod*>(proxy_constructor->Clone(self));
-  if (constructor == NULL) {
+  mirror::ArtMethod* constructor = down_cast<mirror::ArtMethod*>(proxy_constructor->Clone(self));
+  if (constructor == nullptr) {
     CHECK(self->IsExceptionPending());  // OOME.
-    return NULL;
+    return nullptr;
   }
+  // Make the proxy constructor's code always point to the uninstrumented code. This avoids
+  // getting a method enter event for the proxy constructor as the proxy constructor doesn't
+  // have an activation.
+  bool have_portable_code;
+  constructor->SetEntryPointFromQuickCompiledCode(GetQuickOatCodeFor(proxy_constructor));
+  constructor->SetEntryPointFromPortableCompiledCode(GetPortableOatCodeFor(proxy_constructor,
+                                                                           &have_portable_code));
+
   // Make this constructor public and fix the class to be our Proxy version
   constructor->SetAccessFlags((constructor->GetAccessFlags() & ~kAccProtected) | kAccPublic);
   constructor->SetDeclaringClass(klass.Get());
@@ -3578,8 +3582,8 @@
   // The proxy method doesn't have its own dex cache or dex file and so it steals those of its
   // interface prototype. The exception to this are Constructors and the Class of the Proxy itself.
   CHECK_EQ(prototype->GetDexCacheStrings(), method->GetDexCacheStrings());
-  CHECK_EQ(prototype->GetDexCacheResolvedMethods(), method->GetDexCacheResolvedMethods());
-  CHECK_EQ(prototype->GetDexCacheResolvedTypes(), method->GetDexCacheResolvedTypes());
+  CHECK(prototype->HasSameDexCacheResolvedMethods(method.Get()));
+  CHECK(prototype->HasSameDexCacheResolvedTypes(method.Get()));
   CHECK_EQ(prototype->GetDexMethodIndex(), method->GetDexMethodIndex());
 
   MethodHelper mh(method);
@@ -4947,7 +4951,7 @@
                                             bool is_static) {
   DCHECK(dex_cache.Get() != nullptr);
   mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
-  if (resolved != NULL) {
+  if (resolved != nullptr) {
     return resolved;
   }
   const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
@@ -4955,9 +4959,9 @@
   StackHandleScope<1> hs(self);
   Handle<mirror::Class> klass(
       hs.NewHandle(ResolveType(dex_file, field_id.class_idx_, dex_cache, class_loader)));
-  if (klass.Get() == NULL) {
+  if (klass.Get() == nullptr) {
     DCHECK(Thread::Current()->IsExceptionPending());
-    return NULL;
+    return nullptr;
   }
 
   if (is_static) {
@@ -4966,7 +4970,7 @@
     resolved = klass->FindInstanceField(dex_cache.Get(), field_idx);
   }
 
-  if (resolved == NULL) {
+  if (resolved == nullptr) {
     const char* name = dex_file.GetFieldName(field_id);
     const char* type = dex_file.GetFieldTypeDescriptor(field_id);
     if (is_static) {
@@ -4974,7 +4978,7 @@
     } else {
       resolved = klass->FindInstanceField(name, type);
     }
-    if (resolved == NULL) {
+    if (resolved == nullptr) {
       ThrowNoSuchFieldError(is_static ? "static " : "instance ", klass.Get(), type, name);
       return NULL;
     }
@@ -4989,7 +4993,7 @@
                                                Handle<mirror::ClassLoader> class_loader) {
   DCHECK(dex_cache.Get() != nullptr);
   mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
-  if (resolved != NULL) {
+  if (resolved != nullptr) {
     return resolved;
   }
   const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 8e16d9b..8d93265 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -156,20 +156,20 @@
   }
 
   void AssertMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    EXPECT_TRUE(method != NULL);
-    EXPECT_TRUE(method->GetClass() != NULL);
-    EXPECT_TRUE(method->GetName() != NULL);
+    EXPECT_TRUE(method != nullptr);
+    EXPECT_TRUE(method->GetClass() != nullptr);
+    EXPECT_TRUE(method->GetName() != nullptr);
     EXPECT_TRUE(method->GetSignature() != Signature::NoSignature());
 
-    EXPECT_TRUE(method->GetDexCacheStrings() != NULL);
-    EXPECT_TRUE(method->GetDexCacheResolvedMethods() != NULL);
-    EXPECT_TRUE(method->GetDexCacheResolvedTypes() != NULL);
+    EXPECT_TRUE(method->GetDexCacheStrings() != nullptr);
+    EXPECT_TRUE(method->HasDexCacheResolvedMethods());
+    EXPECT_TRUE(method->HasDexCacheResolvedTypes());
     EXPECT_EQ(method->GetDeclaringClass()->GetDexCache()->GetStrings(),
               method->GetDexCacheStrings());
-    EXPECT_EQ(method->GetDeclaringClass()->GetDexCache()->GetResolvedMethods(),
-              method->GetDexCacheResolvedMethods());
-    EXPECT_EQ(method->GetDeclaringClass()->GetDexCache()->GetResolvedTypes(),
-              method->GetDexCacheResolvedTypes());
+    EXPECT_TRUE(method->HasSameDexCacheResolvedMethods(
+        method->GetDeclaringClass()->GetDexCache()->GetResolvedMethods()));
+    EXPECT_TRUE(method->HasSameDexCacheResolvedTypes(
+        method->GetDeclaringClass()->GetDexCache()->GetResolvedTypes()));
   }
 
   void AssertField(mirror::Class* klass, mirror::ArtField* field)
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 8e363c4..6f3b3a3 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -95,7 +95,7 @@
 CommonRuntimeTest::CommonRuntimeTest() {}
 CommonRuntimeTest::~CommonRuntimeTest() {}
 
-void CommonRuntimeTest::SetEnvironmentVariables(std::string& android_data) {
+void CommonRuntimeTest::SetUpAndroidRoot() {
   if (IsHost()) {
     // $ANDROID_ROOT is set on the device, but not necessarily on the host.
     // But it needs to be set so that icu4c can find its locale data.
@@ -135,15 +135,36 @@
       setenv("ANDROID_HOST_OUT", getenv("ANDROID_ROOT"), 1);
     }
   }
+}
 
+void CommonRuntimeTest::SetUpAndroidData(std::string& android_data) {
   // On target, Cannot use /mnt/sdcard because it is mounted noexec, so use subdir of dalvik-cache
-  android_data = (IsHost() ? "/tmp/art-data-XXXXXX" : "/data/dalvik-cache/art-data-XXXXXX");
+  if (IsHost()) {
+    const char* tmpdir = getenv("TMPDIR");
+    if (tmpdir != nullptr && tmpdir[0] != 0) {
+      android_data = tmpdir;
+    } else {
+      android_data = "/tmp";
+    }
+  } else {
+    android_data = "/data/dalvik-cache";
+  }
+  android_data += "/art-data-XXXXXX";
   if (mkdtemp(&android_data[0]) == nullptr) {
     PLOG(FATAL) << "mkdtemp(\"" << &android_data[0] << "\") failed";
   }
   setenv("ANDROID_DATA", android_data.c_str(), 1);
 }
 
+void CommonRuntimeTest::TearDownAndroidData(const std::string& android_data, bool fail_on_error) {
+  if (fail_on_error) {
+    ASSERT_EQ(rmdir(android_data.c_str()), 0);
+  } else {
+    rmdir(android_data.c_str());
+  }
+}
+
+
 const DexFile* CommonRuntimeTest::LoadExpectSingleDexFile(const char* location) {
   std::vector<const DexFile*> dex_files;
   std::string error_msg;
@@ -157,7 +178,8 @@
 }
 
 void CommonRuntimeTest::SetUp() {
-  SetEnvironmentVariables(android_data_);
+  SetUpAndroidRoot();
+  SetUpAndroidData(android_data_);
   dalvik_cache_.append(android_data_.c_str());
   dalvik_cache_.append("/dalvik-cache");
   int mkdir_result = mkdir(dalvik_cache_.c_str(), 0700);
@@ -201,7 +223,6 @@
   runtime_->GetHeap()->VerifyHeap();  // Check for heap corruption before the test
 }
 
-
 void CommonRuntimeTest::ClearDirectory(const char* dirpath) {
   ASSERT_TRUE(dirpath != nullptr);
   DIR* dir = opendir(dirpath);
@@ -212,7 +233,7 @@
     if ((strcmp(e->d_name, ".") == 0) || (strcmp(e->d_name, "..") == 0)) {
       continue;
     }
-    std::string filename(dalvik_cache_);
+    std::string filename(dirpath);
     filename.push_back('/');
     filename.append(e->d_name);
     int stat_result = lstat(filename.c_str(), &s);
@@ -235,8 +256,7 @@
   ClearDirectory(dalvik_cache_.c_str());
   int rmdir_cache_result = rmdir(dalvik_cache_.c_str());
   ASSERT_EQ(0, rmdir_cache_result);
-  int rmdir_data_result = rmdir(android_data_.c_str());
-  ASSERT_EQ(0, rmdir_data_result);
+  TearDownAndroidData(android_data_, true);
 
   // icu4c has a fixed 10-element array "gCommonICUDataArray".
   // If we run > 10 tests, we fill that array and u_setCommonData fails.
@@ -265,6 +285,19 @@
   return StringPrintf("%s/framework/%s.jar", GetAndroidRoot(), jar_prefix.c_str());
 }
 
+std::string CommonRuntimeTest::GetLibCoreOatFileName() {
+  return GetOatFileName("core");
+}
+
+std::string CommonRuntimeTest::GetOatFileName(const std::string& oat_prefix) {
+  if (IsHost()) {
+    const char* host_dir = getenv("ANDROID_HOST_OUT");
+    CHECK(host_dir != nullptr);
+    return StringPrintf("%s/framework/%s.art", host_dir, oat_prefix.c_str());
+  }
+  return StringPrintf("%s/framework/%s.art", GetAndroidRoot(), oat_prefix.c_str());
+}
+
 std::string CommonRuntimeTest::GetTestAndroidRoot() {
   if (IsHost()) {
     const char* host_dir = getenv("ANDROID_HOST_OUT");
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index eb96352..0fefc21 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -64,7 +64,13 @@
 
 class CommonRuntimeTest : public testing::Test {
  public:
-  static void SetEnvironmentVariables(std::string& android_data);
+  static void SetUpAndroidRoot();
+
+  // Note: setting up ANDROID_DATA may create a temporary directory. If this is used in a
+  // non-derived class, be sure to also call the corresponding tear-down below.
+  static void SetUpAndroidData(std::string& android_data);
+
+  static void TearDownAndroidData(const std::string& android_data, bool fail_on_error);
 
   CommonRuntimeTest();
   ~CommonRuntimeTest();
@@ -85,10 +91,18 @@
 
   virtual void TearDown();
 
+  // Gets the path of the libcore dex file.
   std::string GetLibCoreDexFileName();
 
+  // Gets the path of the specified dex file for host or target.
   std::string GetDexFileName(const std::string& jar_prefix);
 
+  // Gets the path of the libcore oat file.
+  std::string GetLibCoreOatFileName();
+
+  // Gets the path of the specified oat file for host or target.
+  std::string GetOatFileName(const std::string& oat_prefix);
+
   std::string GetTestAndroidRoot();
 
   std::vector<const DexFile*> OpenTestDexFiles(const char* name)
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index cb0be04..b874a74 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -37,10 +37,11 @@
 
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <const bool kAccessCheck>
+ALWAYS_INLINE
 static inline mirror::Class* CheckObjectAlloc(uint32_t type_idx,
                                               mirror::ArtMethod* method,
                                               Thread* self, bool* slow_path) {
-  mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
+  mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx);
   if (UNLIKELY(klass == NULL)) {
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
     *slow_path = true;
@@ -86,6 +87,7 @@
 }
 
 // TODO: Fix no thread safety analysis when annotalysis is smarter.
+ALWAYS_INLINE
 static inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
                                                                  Thread* self,
                                                                  bool* slow_path) {
@@ -116,6 +118,7 @@
 // check.
 // TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Object* AllocObjectFromCode(uint32_t type_idx,
                                                   mirror::ArtMethod* method,
                                                   Thread* self,
@@ -135,6 +138,7 @@
 // Given the context of a calling Method and a resolved class, create an instance.
 // TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Object* AllocObjectFromCodeResolved(mirror::Class* klass,
                                                           mirror::ArtMethod* method,
                                                           Thread* self,
@@ -157,6 +161,7 @@
 // Given the context of a calling Method and an initialized class, create an instance.
 // TODO: Fix NO_THREAD_SAFETY_ANALYSIS when GCC is smarter.
 template <bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Object* AllocObjectFromCodeInitialized(mirror::Class* klass,
                                                              mirror::ArtMethod* method,
                                                              Thread* self,
@@ -169,6 +174,7 @@
 
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <bool kAccessCheck>
+ALWAYS_INLINE
 static inline mirror::Class* CheckArrayAlloc(uint32_t type_idx,
                                              mirror::ArtMethod* method,
                                              int32_t component_count,
@@ -178,7 +184,7 @@
     *slow_path = true;
     return nullptr;  // Failure
   }
-  mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
+  mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx);
   if (UNLIKELY(klass == nullptr)) {  // Not in dex cache so try to resolve
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
     *slow_path = true;
@@ -205,6 +211,7 @@
 // check.
 // TODO: Fix no thread safety analysis when GCC can handle template specialization.
 template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Array* AllocArrayFromCode(uint32_t type_idx,
                                                 mirror::ArtMethod* method,
                                                 int32_t component_count,
@@ -227,6 +234,7 @@
 }
 
 template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE
 static inline mirror::Array* AllocArrayFromCodeResolved(mirror::Class* klass,
                                                         mirror::ArtMethod* method,
                                                         int32_t component_count,
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index c1c7631..be3895a 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -41,7 +41,7 @@
     ThrowNegativeArraySizeException(component_count);
     return nullptr;  // Failure
   }
-  mirror::Class* klass = referrer->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
+  mirror::Class* klass = referrer->GetDexCacheResolvedType<false>(type_idx);
   if (UNLIKELY(klass == NULL)) {  // Not in dex cache so try to resolve
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, referrer);
     if (klass == NULL) {  // Error
diff --git a/runtime/entrypoints/portable/portable_throw_entrypoints.cc b/runtime/entrypoints/portable/portable_throw_entrypoints.cc
index be6231c..4317358 100644
--- a/runtime/entrypoints/portable/portable_throw_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_throw_entrypoints.cc
@@ -98,7 +98,7 @@
     }
     // Does this catch exception type apply?
     mirror::Class* iter_exception_type =
-        current_method->GetDexCacheResolvedTypes()->Get(iter_type_idx);
+        current_method->GetDexCacheResolvedType(iter_type_idx);
     if (UNLIKELY(iter_exception_type == NULL)) {
       // TODO: check, the verifier (class linker?) should take care of resolving all exception
       //       classes early.
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 1f2713a..9d850c5 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -25,11 +25,34 @@
 
 namespace art {
 
+static constexpr bool kUseTlabFastPath = true;
+
 #define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, suffix2, instrumented_bool, allocator_type) \
 extern "C" mirror::Object* artAllocObjectFromCode ##suffix##suffix2( \
     uint32_t type_idx, mirror::ArtMethod* method, Thread* self, \
     StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+    mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx); \
+    if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \
+      size_t byte_count = klass->GetObjectSize(); \
+      byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+      mirror::Object* obj; \
+      if (LIKELY(byte_count < self->TlabSize())) { \
+        obj = self->AllocTlab(byte_count); \
+        DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+        obj->SetClass(klass); \
+        if (kUseBakerOrBrooksReadBarrier) { \
+          if (kUseBrooksReadBarrier) { \
+            obj->SetReadBarrierPointer(obj); \
+          } \
+          obj->AssertReadBarrierPointer(); \
+        } \
+        QuasiAtomic::ThreadFenceForConstructor(); \
+        return obj; \
+      } \
+    } \
+  } \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCode<false, instrumented_bool>(type_idx, method, self, allocator_type); \
 } \
@@ -37,6 +60,26 @@
     mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
     StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+    if (LIKELY(klass->IsInitialized())) { \
+      size_t byte_count = klass->GetObjectSize(); \
+      byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+      mirror::Object* obj; \
+      if (LIKELY(byte_count < self->TlabSize())) { \
+        obj = self->AllocTlab(byte_count); \
+        DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+        obj->SetClass(klass); \
+        if (kUseBakerOrBrooksReadBarrier) { \
+          if (kUseBrooksReadBarrier) { \
+            obj->SetReadBarrierPointer(obj); \
+          } \
+          obj->AssertReadBarrierPointer(); \
+        } \
+        QuasiAtomic::ThreadFenceForConstructor(); \
+        return obj; \
+      } \
+    } \
+  } \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCodeResolved<instrumented_bool>(klass, method, self, allocator_type); \
 } \
@@ -44,6 +87,24 @@
     mirror::Class* klass, mirror::ArtMethod* method, Thread* self, \
     StackReference<mirror::ArtMethod>* sp) \
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { \
+  if (kUseTlabFastPath && !instrumented_bool && allocator_type == gc::kAllocatorTypeTLAB) { \
+    size_t byte_count = klass->GetObjectSize(); \
+    byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+    mirror::Object* obj; \
+    if (LIKELY(byte_count < self->TlabSize())) { \
+      obj = self->AllocTlab(byte_count); \
+      DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+      obj->SetClass(klass); \
+      if (kUseBakerOrBrooksReadBarrier) { \
+        if (kUseBrooksReadBarrier) { \
+          obj->SetReadBarrierPointer(obj); \
+        } \
+        obj->AssertReadBarrierPointer(); \
+      } \
+      QuasiAtomic::ThreadFenceForConstructor(); \
+      return obj; \
+    } \
+  } \
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly); \
   return AllocObjectFromCodeInitialized<instrumented_bool>(klass, method, self, allocator_type); \
 } \
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 49bb65f..4730701 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -592,8 +592,7 @@
   const char* old_cause =
       self->StartAssertNoThreadSuspension("Adding to IRT proxy object arguments");
   // Register the top of the managed stack, making stack crawlable.
-  DCHECK_EQ(sp->AsMirrorPtr(), proxy_method)
-  << PrettyMethod(proxy_method);
+  DCHECK_EQ(sp->AsMirrorPtr(), proxy_method) << PrettyMethod(proxy_method);
   self->SetTopOfStack(sp, 0);
   DCHECK_EQ(proxy_method->GetFrameSizeInBytes(),
             Runtime::Current()->GetCalleeSaveMethod(Runtime::kRefsAndArgs)->GetFrameSizeInBytes())
@@ -791,8 +790,8 @@
 
       // We came here because of sharpening. Ensure the dex cache is up-to-date on the method index
       // of the sharpened method.
-      if (called->GetDexCacheResolvedMethods() == caller->GetDexCacheResolvedMethods()) {
-        caller->GetDexCacheResolvedMethods()->Set<false>(called->GetDexMethodIndex(), called);
+      if (called->HasSameDexCacheResolvedMethods(caller)) {
+        caller->SetDexCacheResolvedMethod(called->GetDexMethodIndex(), called);
       } else {
         // Calling from one dex file to another, need to compute the method index appropriate to
         // the caller's dex file. Since we get here only if the original called was a runtime
@@ -802,7 +801,7 @@
         MethodHelper mh(hs.NewHandle(called));
         uint32_t method_index = mh.FindDexMethodIndexInOtherDexFile(*dex_file, dex_method_idx);
         if (method_index != DexFile::kDexNoIndex) {
-          caller->GetDexCacheResolvedMethods()->Set<false>(method_index, called);
+          caller->SetDexCacheResolvedMethod(method_index, called);
         }
       }
     }
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index ceb42e5..0498550 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -28,6 +28,11 @@
 namespace gc {
 namespace accounting {
 
+constexpr size_t CardTable::kCardShift;
+constexpr size_t CardTable::kCardSize;
+constexpr uint8_t CardTable::kCardClean;
+constexpr uint8_t CardTable::kCardDirty;
+
 /*
  * Maintain a card table from the write barrier. All writes of
  * non-NULL values to heap addresses should go through an entry in
@@ -55,9 +60,9 @@
   size_t capacity = heap_capacity / kCardSize;
   /* Allocate an extra 256 bytes to allow fixed low-byte of base */
   std::string error_msg;
-  std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous("card table", NULL,
-                                                 capacity + 256, PROT_READ | PROT_WRITE,
-                                                 false, &error_msg));
+  std::unique_ptr<MemMap> mem_map(
+      MemMap::MapAnonymous("card table", nullptr, capacity + 256, PROT_READ | PROT_WRITE,
+                           false, &error_msg));
   CHECK(mem_map.get() != NULL) << "couldn't allocate card table: " << error_msg;
   // All zeros is the correct initial value; all clean. Anonymous mmaps are initialized to zero, we
   // don't clear the card table to avoid unnecessary pages being allocated
@@ -67,17 +72,17 @@
   CHECK(cardtable_begin != NULL);
 
   // We allocated up to a bytes worth of extra space to allow biased_begin's byte value to equal
-  // GC_CARD_DIRTY, compute a offset value to make this the case
+  // kCardDirty, compute a offset value to make this the case
   size_t offset = 0;
   byte* biased_begin = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(cardtable_begin) -
       (reinterpret_cast<uintptr_t>(heap_begin) >> kCardShift));
-  if (((uintptr_t)biased_begin & 0xff) != kCardDirty) {
-    int delta = kCardDirty - (reinterpret_cast<uintptr_t>(biased_begin) & 0xff);
+  uintptr_t biased_byte = reinterpret_cast<uintptr_t>(biased_begin) & 0xff;
+  if (biased_byte != kCardDirty) {
+    int delta = kCardDirty - biased_byte;
     offset = delta + (delta < 0 ? 0x100 : 0);
     biased_begin += offset;
   }
   CHECK_EQ(reinterpret_cast<uintptr_t>(biased_begin) & 0xff, kCardDirty);
-
   return new CardTable(mem_map.release(), biased_begin, offset);
 }
 
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 7934974..fbeea85 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -46,10 +46,10 @@
 // WriteBarrier, and from there to here.
 class CardTable {
  public:
-  static const size_t kCardShift = 7;
-  static const size_t kCardSize = (1 << kCardShift);
-  static const uint8_t kCardClean = 0x0;
-  static const uint8_t kCardDirty = 0x70;
+  static constexpr size_t kCardShift = 7;
+  static constexpr size_t kCardSize = 1 << kCardShift;
+  static constexpr uint8_t kCardClean = 0x0;
+  static constexpr uint8_t kCardDirty = 0x70;
 
   static CardTable* Create(const byte* heap_begin, size_t heap_capacity);
 
diff --git a/runtime/gc/accounting/card_table_test.cc b/runtime/gc/accounting/card_table_test.cc
new file mode 100644
index 0000000..a88b2c9
--- /dev/null
+++ b/runtime/gc/accounting/card_table_test.cc
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "card_table-inl.h"
+
+#include <string>
+
+#include "atomic.h"
+#include "common_runtime_test.h"
+#include "handle_scope-inl.h"
+#include "mirror/class-inl.h"
+#include "mirror/string-inl.h"  // Strings are easiest to allocate
+#include "scoped_thread_state_change.h"
+#include "thread_pool.h"
+#include "utils.h"
+
+namespace art {
+
+namespace mirror {
+  class Object;
+}  // namespace mirror
+
+class CardTableTest : public CommonRuntimeTest {
+ public:
+  std::unique_ptr<gc::accounting::CardTable> card_table_;
+  static constexpr size_t kCardSize = gc::accounting::CardTable::kCardSize;
+
+  void CommonSetup() {
+    if (card_table_.get() == nullptr) {
+      card_table_.reset(gc::accounting::CardTable::Create(heap_begin_, heap_size_));
+      EXPECT_TRUE(card_table_.get() != nullptr);
+    } else {
+      ClearCardTable();
+    }
+  }
+  // Default values for the test, not random to avoid undeterministic behaviour.
+  CardTableTest() : heap_begin_(reinterpret_cast<byte*>(0x2000000)), heap_size_(2 * MB) {
+  }
+  void ClearCardTable() {
+    card_table_->ClearCardTable();
+  }
+  byte* HeapBegin() const {
+    return heap_begin_;
+  }
+  byte* HeapLimit() const {
+    return HeapBegin() + heap_size_;
+  }
+  byte PRandCard(const byte* addr) const {
+    size_t offset = RoundDown(addr - heap_begin_, kCardSize);
+    return 1 + offset % 254;
+  }
+  void FillRandom() {
+    for (const byte* addr = HeapBegin(); addr != HeapLimit(); addr += kCardSize) {
+      EXPECT_TRUE(card_table_->AddrIsInCardTable(addr));
+      byte* card = card_table_->CardFromAddr(addr);
+      *card = PRandCard(addr);
+    }
+  }
+
+ private:
+  byte* const heap_begin_;
+  const size_t heap_size_;
+};
+
+TEST_F(CardTableTest, TestMarkCard) {
+  CommonSetup();
+  for (const byte* addr = HeapBegin(); addr < HeapLimit(); addr += kObjectAlignment) {
+    auto obj = reinterpret_cast<const mirror::Object*>(addr);
+    EXPECT_EQ(card_table_->GetCard(obj), gc::accounting::CardTable::kCardClean);
+    EXPECT_TRUE(!card_table_->IsDirty(obj));
+    card_table_->MarkCard(addr);
+    EXPECT_TRUE(card_table_->IsDirty(obj));
+    EXPECT_EQ(card_table_->GetCard(obj), gc::accounting::CardTable::kCardDirty);
+    byte* card_addr = card_table_->CardFromAddr(addr);
+    EXPECT_EQ(*card_addr, gc::accounting::CardTable::kCardDirty);
+    *card_addr = gc::accounting::CardTable::kCardClean;
+    EXPECT_EQ(*card_addr, gc::accounting::CardTable::kCardClean);
+  }
+}
+
+class UpdateVisitor {
+ public:
+  byte operator()(byte c) const {
+    return c * 93 + 123;
+  }
+  void operator()(byte* /*card*/, byte /*expected_value*/, byte /*new_value*/) const {
+  }
+};
+
+TEST_F(CardTableTest, TestModifyCardsAtomic) {
+  CommonSetup();
+  FillRandom();
+  const size_t delta = std::min(static_cast<size_t>(HeapLimit() - HeapBegin()), 8U * kCardSize);
+  UpdateVisitor visitor;
+  size_t start_offset = 0;
+  for (byte* cstart = HeapBegin(); cstart < HeapBegin() + delta; cstart += kCardSize) {
+    start_offset = (start_offset + kObjectAlignment) % kCardSize;
+    size_t end_offset = 0;
+    for (byte* cend = HeapLimit() - delta; cend < HeapLimit(); cend += kCardSize) {
+      // Don't always start at a card boundary.
+      byte* start = cstart + start_offset;
+      byte* end = cend - end_offset;
+      end_offset = (end_offset + kObjectAlignment) % kCardSize;
+      // Modify cards.
+      card_table_->ModifyCardsAtomic(start, end, visitor, visitor);
+      // Check adjacent cards not modified.
+      for (byte* cur = start - kCardSize; cur >= HeapBegin(); cur -= kCardSize) {
+        EXPECT_EQ(card_table_->GetCard(reinterpret_cast<mirror::Object*>(cur)), PRandCard(cur));
+      }
+      for (byte* cur = end + kCardSize; cur < HeapLimit(); cur += kCardSize) {
+        EXPECT_EQ(card_table_->GetCard(reinterpret_cast<mirror::Object*>(cur)), PRandCard(cur));
+      }
+      // Verify Range.
+      for (byte* cur = start; cur < AlignUp(end, kCardSize); cur += kCardSize) {
+        byte* card = card_table_->CardFromAddr(cur);
+        byte value = PRandCard(cur);
+        if (visitor(value) != *card) {
+          LOG(ERROR) << reinterpret_cast<void*>(start) << " " << reinterpret_cast<void*>(cur) << " " << reinterpret_cast<void*>(end);
+        }
+        EXPECT_EQ(visitor(value), *card);
+        // Restore for next iteration.
+        *card = value;
+      }
+    }
+  }
+}
+
+// TODO: Add test for CardTable::Scan.
+
+}  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 8ffadd5..a82392a 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -662,7 +662,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template <bool kGrow>
-  bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
+  ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
 
   // Returns true if the address passed in is within the address range of a continuous space.
   bool IsValidContinuousSpaceObjectAddress(const mirror::Object* obj) const
diff --git a/runtime/method_helper-inl.h b/runtime/method_helper-inl.h
index 4f95a28..3a5056a 100644
--- a/runtime/method_helper-inl.h
+++ b/runtime/method_helper-inl.h
@@ -28,7 +28,7 @@
 
 inline mirror::Class* MethodHelper::GetClassFromTypeIdx(uint16_t type_idx, bool resolve) {
   mirror::ArtMethod* method = GetMethod();
-  mirror::Class* type = method->GetDexCacheResolvedTypes()->Get(type_idx);
+  mirror::Class* type = method->GetDexCacheResolvedType(type_idx);
   if (type == nullptr && resolve) {
     type = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
     CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 73de683..0dd1588 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -19,6 +19,8 @@
 
 #include "art_method.h"
 
+#include "art_field.h"
+#include "class.h"
 #include "class_linker.h"
 #include "dex_cache.h"
 #include "dex_file.h"
@@ -87,11 +89,60 @@
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_methods_));
 }
 
+inline ArtMethod* ArtMethod::GetDexCacheResolvedMethod(uint16_t method_index) {
+  ArtMethod* method = GetDexCacheResolvedMethods()->Get(method_index);
+  if (method != nullptr && !method->GetDeclaringClass()->IsErroneous()) {
+    return method;
+  } else {
+    return nullptr;
+  }
+}
+
+inline void ArtMethod::SetDexCacheResolvedMethod(uint16_t method_idx, ArtMethod* new_method) {
+  GetDexCacheResolvedMethods()->Set<false>(method_idx, new_method);
+}
+
+inline bool ArtMethod::HasDexCacheResolvedMethods() {
+  return GetDexCacheResolvedMethods() != nullptr;
+}
+
+inline bool ArtMethod::HasSameDexCacheResolvedMethods(ObjectArray<ArtMethod>* other_cache) {
+  return GetDexCacheResolvedMethods() == other_cache;
+}
+
+inline bool ArtMethod::HasSameDexCacheResolvedMethods(ArtMethod* other) {
+  return GetDexCacheResolvedMethods() == other->GetDexCacheResolvedMethods();
+}
+
+
 inline ObjectArray<Class>* ArtMethod::GetDexCacheResolvedTypes() {
   return GetFieldObject<ObjectArray<Class>>(
       OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_));
 }
 
+template <bool kWithCheck>
+inline Class* ArtMethod::GetDexCacheResolvedType(uint32_t type_index) {
+  Class* klass;
+  if (kWithCheck) {
+    klass = GetDexCacheResolvedTypes()->Get(type_index);
+  } else {
+    klass = GetDexCacheResolvedTypes()->GetWithoutChecks(type_index);
+  }
+  return (klass != nullptr && !klass->IsErroneous()) ? klass : nullptr;
+}
+
+inline bool ArtMethod::HasDexCacheResolvedTypes() {
+  return GetDexCacheResolvedTypes() != nullptr;
+}
+
+inline bool ArtMethod::HasSameDexCacheResolvedTypes(ObjectArray<Class>* other_cache) {
+  return GetDexCacheResolvedTypes() == other_cache;
+}
+
+inline bool ArtMethod::HasSameDexCacheResolvedTypes(ArtMethod* other) {
+  return GetDexCacheResolvedTypes() == other->GetDexCacheResolvedTypes();
+}
+
 inline uint32_t ArtMethod::GetCodeSize() {
   DCHECK(!IsRuntimeMethod() && !IsProxyMethod()) << PrettyMethod(this);
   const void* code = EntryPointToCodePointer(GetEntryPointFromQuickCompiledCode());
@@ -396,7 +447,7 @@
 
 inline bool ArtMethod::IsResolvedTypeIdx(uint16_t type_idx) {
   mirror::ArtMethod* method = GetInterfaceMethodIfProxy();
-  return method->GetDexCacheResolvedTypes()->Get(type_idx) != nullptr;
+  return method->GetDexCacheResolvedType(type_idx) != nullptr;
 }
 
 inline int32_t ArtMethod::GetLineNumFromDexPC(uint32_t dex_pc) {
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 4882728..8eacb1c 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -157,12 +157,12 @@
       }
     }
   }
-#ifndef NDEBUG
-  StackHandleScope<2> hs(Thread::Current());
-  MethodHelper result_mh(hs.NewHandle(result));
-  MethodHelper this_mh(hs.NewHandle(this));
-  DCHECK(result == NULL || this_mh.HasSameNameAndSignature(&result_mh));
-#endif
+  if (kIsDebugBuild) {
+    StackHandleScope<2> hs(Thread::Current());
+    MethodHelper result_mh(hs.NewHandle(result));
+    MethodHelper this_mh(hs.NewHandle(this));
+    DCHECK(result == nullptr || this_mh.HasSameNameAndSignature(&result_mh));
+  }
   return result;
 }
 
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 01e6149..4ebceff 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -216,13 +216,25 @@
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_);
   }
 
-  ObjectArray<ArtMethod>* GetDexCacheResolvedMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtMethod* GetDexCacheResolvedMethod(uint16_t method_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetDexCacheResolvedMethod(uint16_t method_idx, ArtMethod* new_method)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetDexCacheResolvedMethods(ObjectArray<ArtMethod>* new_dex_cache_methods)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool HasDexCacheResolvedMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool HasSameDexCacheResolvedMethods(ArtMethod* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool HasSameDexCacheResolvedMethods(ObjectArray<ArtMethod>* other_cache)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ObjectArray<Class>* GetDexCacheResolvedTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  template <bool kWithCheck = true>
+  Class* GetDexCacheResolvedType(uint32_t type_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void SetDexCacheResolvedTypes(ObjectArray<Class>* new_dex_cache_types)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool HasDexCacheResolvedTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool HasSameDexCacheResolvedTypes(ArtMethod* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool HasSameDexCacheResolvedTypes(ObjectArray<Class>* other_cache)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Find the method that this method overrides
   ArtMethod* FindOverriddenMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -518,6 +530,10 @@
   static GcRoot<Class> java_lang_reflect_ArtMethod_;
 
  private:
+  ObjectArray<ArtMethod>* GetDexCacheResolvedMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  ObjectArray<Class>* GetDexCacheResolvedTypes() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   friend struct art::ArtMethodOffsets;  // for verifying offset information
   DISALLOW_IMPLICIT_CONSTRUCTORS(ArtMethod);
 };
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 2c5fbcd..3c947ab 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -17,7 +17,9 @@
 #ifndef ART_RUNTIME_MIRROR_DEX_CACHE_H_
 #define ART_RUNTIME_MIRROR_DEX_CACHE_H_
 
+#include "art_field.h"
 #include "art_method.h"
+#include "class.h"
 #include "object.h"
 #include "object_array.h"
 
@@ -30,9 +32,6 @@
 
 namespace mirror {
 
-class ArtField;
-class ArtMethod;
-class Class;
 class String;
 
 // C++ mirror of java.lang.DexCache.
@@ -115,7 +114,12 @@
 
   ArtField* GetResolvedField(uint32_t field_idx) ALWAYS_INLINE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetResolvedFields()->Get(field_idx);
+    ArtField* field = GetResolvedFields()->Get(field_idx);
+    if (UNLIKELY(field == nullptr || field->GetDeclaringClass()->IsErroneous())) {
+      return nullptr;
+    } else {
+      return field;
+    }
   }
 
   void SetResolvedField(uint32_t field_idx, ArtField* resolved) ALWAYS_INLINE
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
index 440a6be..4964aa0 100644
--- a/runtime/monitor_pool.cc
+++ b/runtime/monitor_pool.cc
@@ -52,7 +52,7 @@
       monitor_chunks_.StoreRelaxed(new_backing);
       capacity_ = new_capacity;
       old_chunk_arrays_.push_back(old_backing);
-      LOG(INFO) << "Resizing to capacity " << capacity_;
+      VLOG(monitor) << "Resizing to capacity " << capacity_;
     }
   }
 
@@ -64,7 +64,7 @@
   CHECK_EQ(0U, reinterpret_cast<uintptr_t>(chunk) % kMonitorAlignment);
 
   // Add the chunk.
-  *(monitor_chunks_.LoadRelaxed()+num_chunks_) = reinterpret_cast<uintptr_t>(chunk);
+  *(monitor_chunks_.LoadRelaxed() + num_chunks_) = reinterpret_cast<uintptr_t>(chunk);
   num_chunks_++;
 
   // Set up the free list
@@ -96,7 +96,7 @@
 
   // Enough space, or need to resize?
   if (first_free_ == nullptr) {
-    LOG(INFO) << "Allocating a new chunk.";
+    VLOG(monitor) << "Allocating a new chunk.";
     AllocateChunk();
   }
 
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index bd6656d..3081421 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -17,14 +17,14 @@
 #include <jni.h>
 #include <vector>
 
-#include "common_compiler_test.h"
+#include "common_runtime_test.h"
 #include "field_helper.h"
 #include "mirror/art_field-inl.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
 
-class ProxyTest : public CommonCompilerTest {
+class ProxyTest : public CommonRuntimeTest {
  public:
   // Generate a proxy class with the given name and interfaces. This is a simplification from what
   // libcore does to fit to our test needs. We do not check for duplicated interfaces or methods and
@@ -103,6 +103,12 @@
     soa.Self()->AssertNoPendingException();
     return proxyClass;
   }
+
+ protected:
+  void SetUpRuntimeOptions(RuntimeOptions *options) OVERRIDE {
+    options->push_back(std::make_pair(StringPrintf("-Ximage:%s", GetLibCoreOatFileName().c_str()),
+                                      nullptr));
+  }
 };
 
 // Creates a proxy class and check ClassHelper works correctly.
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 23b9aed..c4d51cb 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -53,7 +53,7 @@
   kIntrinsicRint,
   kIntrinsicRoundFloat,
   kIntrinsicRoundDouble,
-  kIntrinsicGet,
+  kIntrinsicReferenceGet,
   kIntrinsicCharAt,
   kIntrinsicCompareTo,
   kIntrinsicIsEmptyOrLength,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 2c25c2c..fe877d5 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -620,6 +620,7 @@
     case kArm:
     case kThumb2:
     case kX86:
+    case kArm64:
       implicit_null_checks_ = true;
       implicit_so_checks_ = true;
       break;
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index 255b506..c0e4351 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -209,7 +209,7 @@
 }
 
 RegType& RegTypeCache::FromClass(const char* descriptor, mirror::Class* klass, bool precise) {
-  DCHECK(klass != nullptr && !klass->IsErroneous());
+  DCHECK(klass != nullptr);
   if (klass->IsPrimitive()) {
     // Note: precise isn't used for primitive classes. A char is assignable to an int. All
     // primitive classes are final.
diff --git a/test/015-switch/expected.txt b/test/015-switch/expected.txt
index ca3b518..91b4714 100644
--- a/test/015-switch/expected.txt
+++ b/test/015-switch/expected.txt
@@ -8,3 +8,9 @@
 CORRECT (default only)
 CORRECT big sparse / first
 CORRECT big sparse / last
+default
+254
+255
+256
+257
+default
diff --git a/test/015-switch/src/Main.java b/test/015-switch/src/Main.java
index 7198e2b..dd97a8c 100644
--- a/test/015-switch/src/Main.java
+++ b/test/015-switch/src/Main.java
@@ -101,5 +101,15 @@
             case 100: System.out.print("CORRECT big sparse / last\n"); break;
             default: System.out.print("blah!\n"); break;
         }
+
+        for (a = 253; a <= 258; a++) {
+          switch (a) {
+            case 254: System.out.println("254"); break;
+            case 255: System.out.println("255"); break;
+            case 256: System.out.println("256"); break;
+            case 257: System.out.println("257"); break;
+            default: System.out.println("default"); break;
+          }
+        }
     }
 }
diff --git a/test/046-reflect/expected.txt b/test/046-reflect/expected.txt
index ecb3599..fa053fb 100644
--- a/test/046-reflect/expected.txt
+++ b/test/046-reflect/expected.txt
@@ -123,3 +123,17 @@
 fields are .equals
 methods are unique
 methods are .equals
+type1 is a ParameterizedType
+type2 is a ParameterizedType
+type3 is a ParameterizedType
+type1(java.util.Set<java.lang.String>) equals type2(java.util.Set<java.lang.String>)
+type1(java.util.Set<java.lang.String>) equals type3(java.util.Set<java.lang.String>)
+type1(java.util.Set<java.lang.String>) hashCode equals type2(java.util.Set<java.lang.String>) hashCode
+type1(java.util.Set<java.lang.String>) hashCode equals type3(java.util.Set<java.lang.String>) hashCode
+type1 is a GenericArrayType
+type2 is a GenericArrayType
+type3 is a GenericArrayType
+type1(T[]) equals type2(T[])
+type1(T[]) equals type3(T[])
+type1(T[]) hashCode equals type2(T[]) hashCode
+type1(T[]) hashCode equals type3(T[]) hashCode
diff --git a/test/046-reflect/src/Main.java b/test/046-reflect/src/Main.java
index 3e6d700..11eb773 100644
--- a/test/046-reflect/src/Main.java
+++ b/test/046-reflect/src/Main.java
@@ -18,8 +18,10 @@
 import java.io.IOException;
 import java.util.Collections;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 /**
  * Reflection test.
@@ -579,6 +581,118 @@
         }
     }
 
+    public static void checkParametrizedTypeEqualsAndHashCode() {
+        Method method1;
+        Method method2;
+        Method method3;
+        try {
+            method1 = ParametrizedTypeTest.class.getDeclaredMethod("aMethod", Set.class);
+            method2 = ParametrizedTypeTest.class.getDeclaredMethod("aMethod", Set.class);
+            method3 = ParametrizedTypeTest.class.getDeclaredMethod("aMethodIdentical", Set.class);
+        } catch (NoSuchMethodException nsme) {
+            throw new RuntimeException(nsme);
+        }
+
+        List<Type> types1 = Arrays.asList(method1.getGenericParameterTypes());
+        List<Type> types2 = Arrays.asList(method2.getGenericParameterTypes());
+        List<Type> types3 = Arrays.asList(method3.getGenericParameterTypes());
+
+        Type type1 = types1.get(0);
+        Type type2 = types2.get(0);
+        Type type3 = types3.get(0);
+
+        if (type1 instanceof ParameterizedType) {
+            System.out.println("type1 is a ParameterizedType");
+        }
+        if (type2 instanceof ParameterizedType) {
+            System.out.println("type2 is a ParameterizedType");
+        }
+        if (type3 instanceof ParameterizedType) {
+            System.out.println("type3 is a ParameterizedType");
+        }
+
+        if (type1.equals(type2)) {
+            System.out.println("type1("+type1+") equals type2("+type2+")");
+        } else {
+            System.out.println("type1("+type1+") does not equal type2("+type2+")");
+        }
+
+        if (type1.equals(type3)) {
+            System.out.println("type1("+type1+") equals type3("+type3+")");
+        } else {
+            System.out.println("type1("+type1+") does not equal type3("+type3+")");
+        }
+        if (type1.hashCode() == type2.hashCode()) {
+            System.out.println("type1("+type1+") hashCode equals type2("+type2+") hashCode");
+        } else {
+            System.out.println(
+                   "type1("+type1+") hashCode does not equal type2("+type2+") hashCode");
+        }
+
+        if (type1.hashCode() == type3.hashCode()) {
+            System.out.println("type1("+type1+") hashCode equals type3("+type3+") hashCode");
+        } else {
+            System.out.println(
+                    "type1("+type1+") hashCode does not equal type3("+type3+") hashCode");
+        }
+    }
+
+    public static void checkGenericArrayTypeEqualsAndHashCode() {
+        Method method1;
+        Method method2;
+        Method method3;
+        try {
+            method1 = GenericArrayTypeTest.class.getDeclaredMethod("aMethod", Object[].class);
+            method2 = GenericArrayTypeTest.class.getDeclaredMethod("aMethod", Object[].class);
+            method3 = GenericArrayTypeTest.class.getDeclaredMethod("aMethodIdentical", Object[].class);
+        } catch (NoSuchMethodException nsme) {
+            throw new RuntimeException(nsme);
+        }
+
+        List<Type> types1 = Arrays.asList(method1.getGenericParameterTypes());
+        List<Type> types2 = Arrays.asList(method2.getGenericParameterTypes());
+        List<Type> types3 = Arrays.asList(method3.getGenericParameterTypes());
+
+        Type type1 = types1.get(0);
+        Type type2 = types2.get(0);
+        Type type3 = types3.get(0);
+
+        if (type1 instanceof GenericArrayType) {
+            System.out.println("type1 is a GenericArrayType");
+        }
+        if (type2 instanceof GenericArrayType) {
+            System.out.println("type2 is a GenericArrayType");
+        }
+        if (type3 instanceof GenericArrayType) {
+            System.out.println("type3 is a GenericArrayType");
+        }
+
+        if (type1.equals(type2)) {
+            System.out.println("type1("+type1+") equals type2("+type2+")");
+        } else {
+            System.out.println("type1("+type1+") does not equal type2("+type2+")");
+        }
+
+        if (type1.equals(type3)) {
+            System.out.println("type1("+type1+") equals type3("+type3+")");
+        } else {
+            System.out.println("type1("+type1+") does not equal type3("+type3+")");
+        }
+        if (type1.hashCode() == type2.hashCode()) {
+            System.out.println("type1("+type1+") hashCode equals type2("+type2+") hashCode");
+        } else {
+            System.out.println(
+                   "type1("+type1+") hashCode does not equal type2("+type2+") hashCode");
+        }
+
+        if (type1.hashCode() == type3.hashCode()) {
+            System.out.println("type1("+type1+") hashCode equals type3("+type3+") hashCode");
+        } else {
+            System.out.println(
+                    "type1("+type1+") hashCode does not equal type3("+type3+") hashCode");
+        }
+    }
+
     public static void main(String[] args) throws Exception {
         Main test = new Main();
         test.run();
@@ -589,6 +703,8 @@
         checkClinitForMethods();
         checkGeneric();
         checkUnique();
+        checkParametrizedTypeEqualsAndHashCode();
+        checkGenericArrayTypeEqualsAndHashCode();
     }
 }
 
@@ -696,3 +812,13 @@
     throw new UnsupportedOperationException();
   }
 }
+
+class ParametrizedTypeTest {
+    public void aMethod(Set<String> names) {}
+    public void aMethodIdentical(Set<String> names) {}
+}
+
+class GenericArrayTypeTest<T> {
+    public void aMethod(T[] names) {}
+    public void aMethodIdentical(T[] names) {}
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 5c1bc03..d7ee383 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -81,38 +81,10 @@
 
 # Tests that are broken in --trace mode.
 TEST_ART_BROKEN_TRACE_RUN_TESTS := \
-  003-omnibus-opcodes \
-  004-InterfaceTest \
   004-SignalTest \
-  004-ThreadStress \
-  005-annotations \
-  012-math \
   018-stack-overflow \
-  023-many-interfaces \
-  027-arithmetic \
-  031-class-attributes \
-  037-inherit \
-  044-proxy \
-  046-reflect \
-  051-thread \
-  055-enum-performance \
-  062-character-encodings \
-  064-field-access \
-  074-gc-thrash \
-  078-polymorphic-virtual \
-  080-oom-throw \
-  082-inline-execute \
-  083-compiler-regressions \
-  093-serialization \
   097-duplicate-method \
-  100-reflect2 \
-  102-concurrent-gc \
-  103-string-append \
-  107-int-math2 \
-  112-double-math \
-  114-ParallelGC \
-  700-LoadArgRegs \
-  701-easy-div-rem
+  107-int-math2
 
 ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-relocate))
 ART_TEST_KNOWN_BROKEN += $(foreach test, $(TEST_ART_BROKEN_TRACE_RUN_TESTS), $(call all-run-test-names,$(test),-trace,-no-prebuild))
diff --git a/test/run-test b/test/run-test
index aef7c52..ca7e68c 100755
--- a/test/run-test
+++ b/test/run-test
@@ -33,7 +33,11 @@
 progdir=`pwd`
 prog="${progdir}"/`basename "${prog}"`
 test_dir="test-$$"
-tmp_dir="/tmp/$USER/${test_dir}"
+if [ -z "$TMPDIR" ]; then
+  tmp_dir="/tmp/$USER/${test_dir}"
+else
+  tmp_dir="${TMPDIR}/$USER/${test_dir}"
+fi
 
 export JAVA="java"
 export JAVAC="javac -g"
diff --git a/tools/art b/tools/art
old mode 100755
new mode 100644