Merge "JDWP: fix thread_list deadlock"
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 0e2dad9..d9d09bc 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -188,10 +188,12 @@
   compiler/dex/local_value_numbering_test.cc \
   compiler/dex/mir_graph_test.cc \
   compiler/dex/mir_optimization_test.cc \
+  compiler/dex/quick/quick_cfi_test.cc \
   compiler/dwarf/dwarf_test.cc \
   compiler/driver/compiler_driver_test.cc \
   compiler/elf_writer_test.cc \
   compiler/image_test.cc \
+  compiler/jni/jni_cfi_test.cc \
   compiler/jni/jni_compiler_test.cc \
   compiler/linker/arm64/relative_patcher_arm64_test.cc \
   compiler/linker/arm/relative_patcher_thumb2_test.cc \
@@ -212,6 +214,7 @@
   compiler/optimizing/live_interval_test.cc \
   compiler/optimizing/live_ranges_test.cc \
   compiler/optimizing/nodes_test.cc \
+  compiler/optimizing/optimizing_cfi_test.cc \
   compiler/optimizing/parallel_move_test.cc \
   compiler/optimizing/pretty_printer_test.cc \
   compiler/optimizing/register_allocator_test.cc \
@@ -405,7 +408,7 @@
   LOCAL_CPP_EXTENSION := $$(ART_CPP_EXTENSION)
   LOCAL_SRC_FILES := $$(art_gtest_filename)
   LOCAL_C_INCLUDES += $$(ART_C_INCLUDES) art/runtime $$(art_gtest_extra_c_includes)
-  LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest
+  LOCAL_SHARED_LIBRARIES += libartd $$(art_gtest_extra_shared_libraries) libart-gtest libart-disassembler
   LOCAL_WHOLE_STATIC_LIBRARIES += libsigchain
 
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
diff --git a/compiler/Android.mk b/compiler/Android.mk
index eaea031..ac95abd 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -41,6 +41,7 @@
 	dex/quick/gen_common.cc \
 	dex/quick/gen_invoke.cc \
 	dex/quick/gen_loadstore.cc \
+	dex/quick/lazy_debug_frame_opcode_writer.cc \
 	dex/quick/local_optimizations.cc \
 	dex/quick/mips/assemble_mips.cc \
 	dex/quick/mips/call_mips.cc \
@@ -103,6 +104,7 @@
 	optimizing/code_generator_arm64.cc \
 	optimizing/code_generator_x86.cc \
 	optimizing/code_generator_x86_64.cc \
+	optimizing/code_generator_utils.cc \
 	optimizing/constant_folding.cc \
 	optimizing/dead_code_elimination.cc \
 	optimizing/graph_checker.cc \
@@ -138,7 +140,6 @@
 	utils/arm64/assembler_arm64.cc \
 	utils/arm64/managed_register_arm64.cc \
 	utils/assembler.cc \
-	utils/dwarf_cfi.cc \
 	utils/mips/assembler_mips.cc \
 	utils/mips/managed_register_mips.cc \
 	utils/mips64/assembler_mips64.cc \
@@ -151,6 +152,7 @@
 	buffered_output_stream.cc \
 	compiler.cc \
 	elf_writer.cc \
+	elf_writer_debug.cc \
 	elf_writer_quick.cc \
 	file_output_stream.cc \
 	image_writer.cc \
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
new file mode 100644
index 0000000..9181792
--- /dev/null
+++ b/compiler/cfi_test.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_CFI_TEST_H_
+#define ART_COMPILER_CFI_TEST_H_
+
+#include <vector>
+#include <memory>
+#include <sstream>
+
+#include "arch/instruction_set.h"
+#include "dwarf/dwarf_test.h"
+#include "dwarf/headers.h"
+#include "disassembler/disassembler.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+class CFITest : public dwarf::DwarfTest {
+ public:
+  void GenerateExpected(FILE* f, InstructionSet isa, const char* isa_str,
+                        const std::vector<uint8_t>& actual_asm,
+                        const std::vector<uint8_t>& actual_cfi) {
+    std::vector<std::string> lines;
+    // Print the raw bytes.
+    fprintf(f, "static constexpr uint8_t expected_asm_%s[] = {", isa_str);
+    HexDump(f, actual_asm);
+    fprintf(f, "\n};\n");
+    fprintf(f, "static constexpr uint8_t expected_cfi_%s[] = {", isa_str);
+    HexDump(f, actual_cfi);
+    fprintf(f, "\n};\n");
+    // Pretty-print CFI opcodes.
+    constexpr bool is64bit = false;
+    dwarf::DebugFrameOpCodeWriter<> initial_opcodes;
+    dwarf::WriteEhFrameCIE(is64bit, dwarf::Reg(8), initial_opcodes, &eh_frame_data_);
+    dwarf::WriteEhFrameFDE(is64bit, 0, 0, actual_asm.size(), &actual_cfi, &eh_frame_data_);
+    ReformatCfi(Objdump(false, "-W"), &lines);
+    // Pretty-print assembly.
+    auto* opts = new DisassemblerOptions(false, actual_asm.data(), true);
+    std::unique_ptr<Disassembler> disasm(Disassembler::Create(isa, opts));
+    std::stringstream stream;
+    const uint8_t* base = actual_asm.data() + (isa == kThumb2 ? 1 : 0);
+    disasm->Dump(stream, base, base + actual_asm.size());
+    ReformatAsm(&stream, &lines);
+    // Print CFI and assembly interleaved.
+    std::stable_sort(lines.begin(), lines.end(), CompareByAddress);
+    for (const std::string& line : lines) {
+      fprintf(f, "// %s\n", line.c_str());
+    }
+    fprintf(f, "\n");
+  }
+
+ private:
+  // Helper - get offset just past the end of given string.
+  static size_t FindEndOf(const std::string& str, const char* substr) {
+    size_t pos = str.find(substr);
+    CHECK_NE(std::string::npos, pos);
+    return pos + strlen(substr);
+  }
+
+  // Spit to lines and remove raw instruction bytes.
+  static void ReformatAsm(std::stringstream* stream,
+                          std::vector<std::string>* output) {
+    std::string line;
+    while (std::getline(*stream, line)) {
+      line = line.substr(0, FindEndOf(line, ": ")) +
+             line.substr(FindEndOf(line, "\t"));
+      size_t pos;
+      while ((pos = line.find("  ")) != std::string::npos) {
+        line = line.replace(pos, 2, " ");
+      }
+      while (!line.empty() && line.back() == ' ') {
+        line.pop_back();
+      }
+      output->push_back(line);
+    }
+  }
+
+  // Find interesting parts of objdump output and prefix the lines with address.
+  static void ReformatCfi(const std::vector<std::string>& lines,
+                          std::vector<std::string>* output) {
+    std::string address;
+    for (const std::string& line : lines) {
+      if (line.find("DW_CFA_nop") != std::string::npos) {
+        // Ignore.
+      } else if (line.find("DW_CFA_advance_loc") != std::string::npos) {
+        // The last 8 characters are the address.
+        address = "0x" + line.substr(line.size() - 8);
+      } else if (line.find("DW_CFA_") != std::string::npos) {
+        std::string new_line(line);
+        // "bad register" warning is caused by always using host (x86) objdump.
+        const char* bad_reg = "bad register: ";
+        size_t pos;
+        if ((pos = new_line.find(bad_reg)) != std::string::npos) {
+          new_line = new_line.replace(pos, strlen(bad_reg), "");
+        }
+        // Remove register names in parentheses since they have x86 names.
+        if ((pos = new_line.find(" (")) != std::string::npos) {
+          new_line = new_line.replace(pos, FindEndOf(new_line, ")") - pos, "");
+        }
+        // Use the .cfi_ prefix.
+        new_line = ".cfi_" + new_line.substr(FindEndOf(new_line, "DW_CFA_"));
+        output->push_back(address + ": " + new_line);
+      }
+    }
+  }
+
+  // Compare strings by the address prefix.
+  static bool CompareByAddress(const std::string& lhs, const std::string& rhs) {
+    EXPECT_EQ(lhs[10], ':');
+    EXPECT_EQ(rhs[10], ':');
+    return strncmp(lhs.c_str(), rhs.c_str(), 10) < 0;
+  }
+
+  // Pretty-print byte array.  12 bytes per line.
+  static void HexDump(FILE* f, const std::vector<uint8_t>& data) {
+    for (size_t i = 0; i < data.size(); i++) {
+      fprintf(f, i % 12 == 0 ? "\n    " : " ");  // Whitespace.
+      fprintf(f, "0x%02X,", data[i]);
+    }
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_CFI_TEST_H_
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index eeed877..4f7a970 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -188,39 +188,6 @@
   return ret;
 }
 
-CompiledMethod* CompiledMethod::SwapAllocCompiledMethodStackMap(
-    CompilerDriver* driver,
-    InstructionSet instruction_set,
-    const ArrayRef<const uint8_t>& quick_code,
-    const size_t frame_size_in_bytes,
-    const uint32_t core_spill_mask,
-    const uint32_t fp_spill_mask,
-    const ArrayRef<const uint8_t>& stack_map) {
-  SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
-  CompiledMethod* ret = alloc.allocate(1);
-  alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
-                  fp_spill_mask, nullptr, ArrayRef<const uint8_t>(), stack_map,
-                  ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
-                  ArrayRef<const LinkerPatch>());
-  return ret;
-}
-
-CompiledMethod* CompiledMethod::SwapAllocCompiledMethodCFI(
-    CompilerDriver* driver,
-    InstructionSet instruction_set,
-    const ArrayRef<const uint8_t>& quick_code,
-    const size_t frame_size_in_bytes,
-    const uint32_t core_spill_mask,
-    const uint32_t fp_spill_mask,
-    const ArrayRef<const uint8_t>& cfi_info) {
-  SwapAllocator<CompiledMethod> alloc(driver->GetSwapSpaceAllocator());
-  CompiledMethod* ret = alloc.allocate(1);
-  alloc.construct(ret, driver, instruction_set, quick_code, frame_size_in_bytes, core_spill_mask,
-                  fp_spill_mask, nullptr, ArrayRef<const uint8_t>(),
-                  ArrayRef<const uint8_t>(), ArrayRef<const uint8_t>(),
-                  cfi_info, ArrayRef<const LinkerPatch>());
-  return ret;
-}
 
 
 void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m) {
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 506b47b..480d021 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -320,7 +320,7 @@
                  const ArrayRef<const uint8_t>& vmap_table,
                  const ArrayRef<const uint8_t>& native_gc_map,
                  const ArrayRef<const uint8_t>& cfi_info,
-                 const ArrayRef<const LinkerPatch>& patches = ArrayRef<const LinkerPatch>());
+                 const ArrayRef<const LinkerPatch>& patches);
 
   virtual ~CompiledMethod();
 
@@ -336,24 +336,7 @@
       const ArrayRef<const uint8_t>& vmap_table,
       const ArrayRef<const uint8_t>& native_gc_map,
       const ArrayRef<const uint8_t>& cfi_info,
-      const ArrayRef<const LinkerPatch>& patches = ArrayRef<const LinkerPatch>());
-
-  static CompiledMethod* SwapAllocCompiledMethodStackMap(
-      CompilerDriver* driver,
-      InstructionSet instruction_set,
-      const ArrayRef<const uint8_t>& quick_code,
-      const size_t frame_size_in_bytes,
-      const uint32_t core_spill_mask,
-      const uint32_t fp_spill_mask,
-      const ArrayRef<const uint8_t>& stack_map);
-
-  static CompiledMethod* SwapAllocCompiledMethodCFI(CompilerDriver* driver,
-                                                    InstructionSet instruction_set,
-                                                    const ArrayRef<const uint8_t>& quick_code,
-                                                    const size_t frame_size_in_bytes,
-                                                    const uint32_t core_spill_mask,
-                                                    const uint32_t fp_spill_mask,
-                                                    const ArrayRef<const uint8_t>& cfi_info);
+      const ArrayRef<const LinkerPatch>& patches);
 
   static void ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m);
 
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 6ec39f9..94b0fe3 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -63,13 +63,6 @@
   virtual uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const
      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
 
-  virtual bool WriteElf(art::File* file,
-                        OatWriter* oat_writer,
-                        const std::vector<const art::DexFile*>& dex_files,
-                        const std::string& android_root,
-                        bool is_host) const
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0;
-
   uint64_t GetMaximumCompilationTimeBeforeWarning() const {
     return maximum_compilation_time_before_warning_;
   }
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 93d83c6..0850f42 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -403,13 +403,6 @@
     DCHECK(bb != nullptr);
     return c_unit->mir_graph->EliminateSuspendChecks(bb);
   }
-
-  void End(PassDataHolder* data) const {
-    DCHECK(data != nullptr);
-    CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
-    DCHECK(c_unit != nullptr);
-    c_unit->mir_graph->EliminateSuspendChecksEnd();
-  }
 };
 
 }  // namespace art
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 548b6f8..ef94d8b 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
+#include "art_field-inl.h"
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index f638b0b..2a920a4 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1396,6 +1396,13 @@
   InitializeBasicBlockDataFlow();
 }
 
+uint32_t MIRGraph::GetUseCountWeight(BasicBlock* bb) const {
+  // Each level of nesting adds *100 to count, up to 3 levels deep.
+  uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
+  uint32_t weight = std::max(1U, depth * 100);
+  return weight;
+}
+
 /*
  * Count uses, weighting by loop nesting depth.  This code only
  * counts explicitly used s_regs.  A later phase will add implicit
@@ -1405,9 +1412,7 @@
   if (bb->block_type != kDalvikByteCode) {
     return;
   }
-  // Each level of nesting adds *100 to count, up to 3 levels deep.
-  uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth));
-  uint32_t weight = std::max(1U, depth * 100);
+  uint32_t weight = GetUseCountWeight(bb);
   for (MIR* mir = bb->first_mir_insn; (mir != NULL); mir = mir->next) {
     if (mir->ssa_rep == NULL) {
       continue;
@@ -1417,23 +1422,6 @@
       raw_use_counts_[s_reg] += 1u;
       use_counts_[s_reg] += weight;
     }
-    if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) {
-      uint64_t df_attributes = GetDataFlowAttributes(mir);
-      // Implicit use of Method* ? */
-      if (df_attributes & DF_UMS) {
-        /*
-         * Some invokes will not use Method* - need to perform test similar
-         * to that found in GenInvoke() to decide whether to count refs
-         * for Method* on invoke-class opcodes.  This is a relatively expensive
-         * operation, so should only be done once.
-         * TODO: refactor InvokeUsesMethodStar() to perform check at parse time,
-         * and save results for both here and GenInvoke.  For now, go ahead
-         * and assume all invokes use method*.
-         */
-        raw_use_counts_[method_sreg_] += 1u;
-        use_counts_[method_sreg_] += weight;
-      }
-    }
   }
 }
 
diff --git a/compiler/dex/mir_field_info.cc b/compiler/dex/mir_field_info.cc
index a9ab3bb..4dfec17 100644
--- a/compiler/dex/mir_field_info.cc
+++ b/compiler/dex/mir_field_info.cc
@@ -56,7 +56,7 @@
   // definition) we still want to resolve fields and record all available info.
   for (auto it = field_infos, end = field_infos + count; it != end; ++it) {
     uint32_t field_idx;
-    mirror::ArtField* resolved_field;
+    ArtField* resolved_field;
     if (!it->IsQuickened()) {
       field_idx = it->field_idx_;
       resolved_field = compiler_driver->ResolveField(soa, dex_cache, class_loader, mUnit,
@@ -121,7 +121,7 @@
 
   for (auto it = field_infos, end = field_infos + count; it != end; ++it) {
     uint32_t field_idx = it->field_idx_;
-    mirror::ArtField* resolved_field =
+    ArtField* resolved_field =
         compiler_driver->ResolveField(soa, dex_cache, class_loader, mUnit, field_idx, true);
     if (UNLIKELY(resolved_field == nullptr)) {
       continue;
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 58f12c9..4d34038 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -1609,8 +1609,8 @@
 }
 
 std::string MIRGraph::GetSSAName(int ssa_reg) {
-  // TODO: This value is needed for LLVM and debugging. Currently, we compute this and then copy to
-  //       the arena. We should be smarter and just place straight into the arena, or compute the
+  // TODO: This value is needed for debugging. Currently, we compute this and then copy to the
+  //       arena. We should be smarter and just place straight into the arena, or compute the
   //       value more lazily.
   int vreg = SRegToVReg(ssa_reg);
   if (vreg >= static_cast<int>(GetFirstTempVR())) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 3298af1..85b1344 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -960,6 +960,12 @@
    */
   CompilerTemp* GetNewCompilerTemp(CompilerTempType ct_type, bool wide);
 
+  /**
+   * @brief Used to remove last created compiler temporary when it's not needed.
+   * @param temp the temporary to remove.
+   */
+  void RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp);
+
   bool MethodIsLeaf() {
     return attributes_ & METHOD_IS_LEAF;
   }
@@ -1079,7 +1085,6 @@
   void EliminateDeadCodeEnd();
   bool EliminateSuspendChecksGate();
   bool EliminateSuspendChecks(BasicBlock* bb);
-  void EliminateSuspendChecksEnd();
 
   uint16_t GetGvnIFieldId(MIR* mir) const {
     DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
@@ -1185,6 +1190,12 @@
   void DoConstantPropagation(BasicBlock* bb);
 
   /**
+   * @brief Get use count weight for a given block.
+   * @param bb the BasicBlock.
+   */
+  uint32_t GetUseCountWeight(BasicBlock* bb) const;
+
+  /**
    * @brief Count the uses in the BasicBlock
    * @param bb the BasicBlock
    */
@@ -1396,10 +1407,6 @@
       uint16_t* sfield_ids;  // Ditto.
       GvnDeadCodeElimination* dce;
     } gvn;
-    // Suspend check elimination.
-    struct {
-      DexFileMethodInliner* inliner;
-    } sce;
   } temp_;
   static const int kInvalidEntry = -1;
   ArenaVector<BasicBlock*> block_list_;
@@ -1451,6 +1458,7 @@
   friend class GvnDeadCodeEliminationTest;
   friend class LocalValueNumberingTest;
   friend class TopologicalSortOrderTest;
+  friend class QuickCFITest;
 };
 
 }  // namespace art
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 831ad42..0c84b82 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -16,6 +16,8 @@
 
 # include "mir_method_info.h"
 
+#include "dex/quick/dex_file_method_inliner.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "dex/verified_method.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
@@ -64,6 +66,9 @@
   const DexFile* const dex_file = mUnit->GetDexFile();
   const bool use_jit = runtime->UseJit();
   const VerifiedMethod* const verified_method = mUnit->GetVerifiedMethod();
+  DexFileToMethodInlinerMap* inliner_map = compiler_driver->GetMethodInlinerMap();
+  DexFileMethodInliner* default_inliner =
+      (inliner_map != nullptr) ? inliner_map->GetMethodInliner(dex_file) : nullptr;
 
   for (auto it = method_infos, end = method_infos + count; it != end; ++it) {
     // For quickened invokes, the dex method idx is actually the mir offset.
@@ -122,6 +127,7 @@
     if (UNLIKELY(resolved_method == nullptr)) {
       continue;
     }
+
     compiler_driver->GetResolvedMethodDexFileLocation(resolved_method,
         &it->declaring_dex_file_, &it->declaring_class_idx_, &it->declaring_method_idx_);
     if (!it->IsQuickened()) {
@@ -133,6 +139,7 @@
       it->vtable_idx_ =
           compiler_driver->GetResolvedMethodVTableIndex(resolved_method, invoke_type);
     }
+
     MethodReference target_method(it->target_dex_file_, it->target_method_idx_);
     int fast_path_flags = compiler_driver->IsFastInvoke(
         soa, current_dex_cache, class_loader, mUnit, referrer_class.Get(), resolved_method,
@@ -140,10 +147,23 @@
     const bool is_referrers_class = referrer_class.Get() == resolved_method->GetDeclaringClass();
     const bool is_class_initialized =
         compiler_driver->IsMethodsClassInitialized(referrer_class.Get(), resolved_method);
+
+    // Check if the target method is intrinsic or special.
+    InlineMethodFlags is_intrinsic_or_special = kNoInlineMethodFlags;
+    if (inliner_map != nullptr) {
+      auto* inliner = (target_method.dex_file == dex_file)
+          ? default_inliner
+          : inliner_map->GetMethodInliner(target_method.dex_file);
+      is_intrinsic_or_special = inliner->IsIntrinsicOrSpecial(target_method.dex_method_index);
+    }
+
     uint16_t other_flags = it->flags_ &
-        ~(kFlagFastPath | kFlagClassIsInitialized | (kInvokeTypeMask << kBitSharpTypeBegin));
+        ~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized |
+            (kInvokeTypeMask << kBitSharpTypeBegin));
     it->flags_ = other_flags |
         (fast_path_flags != 0 ? kFlagFastPath : 0u) |
+        ((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) |
+        ((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) |
         (static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) |
         (is_referrers_class ? kFlagIsReferrersClass : 0u) |
         (is_class_initialized ? kFlagClassIsInitialized : 0u);
diff --git a/compiler/dex/mir_method_info.h b/compiler/dex/mir_method_info.h
index e131c96..7230c46 100644
--- a/compiler/dex/mir_method_info.h
+++ b/compiler/dex/mir_method_info.h
@@ -127,6 +127,14 @@
     return (flags_ & kFlagFastPath) != 0u;
   }
 
+  bool IsIntrinsic() const {
+    return (flags_ & kFlagIsIntrinsic) != 0u;
+  }
+
+  bool IsSpecial() const {
+    return (flags_ & kFlagIsSpecial) != 0u;
+  }
+
   bool IsReferrersClass() const {
     return (flags_ & kFlagIsReferrersClass) != 0;
   }
@@ -188,9 +196,11 @@
  private:
   enum {
     kBitFastPath = kMethodInfoBitEnd,
+    kBitIsIntrinsic,
+    kBitIsSpecial,
     kBitInvokeTypeBegin,
     kBitInvokeTypeEnd = kBitInvokeTypeBegin + 3,  // 3 bits for invoke type.
-    kBitSharpTypeBegin,
+    kBitSharpTypeBegin = kBitInvokeTypeEnd,
     kBitSharpTypeEnd = kBitSharpTypeBegin + 3,  // 3 bits for sharp type.
     kBitIsReferrersClass = kBitSharpTypeEnd,
     kBitClassIsInitialized,
@@ -199,6 +209,8 @@
   };
   static_assert(kMethodLoweringInfoBitEnd <= 16, "Too many flags");
   static constexpr uint16_t kFlagFastPath = 1u << kBitFastPath;
+  static constexpr uint16_t kFlagIsIntrinsic = 1u << kBitIsIntrinsic;
+  static constexpr uint16_t kFlagIsSpecial = 1u << kBitIsSpecial;
   static constexpr uint16_t kFlagIsReferrersClass = 1u << kBitIsReferrersClass;
   static constexpr uint16_t kFlagClassIsInitialized = 1u << kBitClassIsInitialized;
   static constexpr uint16_t kFlagQuickened = 1u << kBitQuickened;
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index c85c3b6..9d7b4b4 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -318,9 +318,11 @@
     // Since VR temps cannot be requested once the BE temps are requested, we
     // allow reservation of VR temps as well for BE. We
     size_t available_temps = reserved_temps_for_backend_ + GetNumAvailableVRTemps();
-    if (available_temps <= 0 || (available_temps <= 1 && wide)) {
+    size_t needed_temps = wide ? 2u : 1u;
+    if (available_temps < needed_temps) {
       if (verbose) {
-        LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str << " are available.";
+        LOG(INFO) << "CompilerTemps: Not enough temp(s) of type " << ct_type_str
+            << " are available.";
       }
       return nullptr;
     }
@@ -328,12 +330,8 @@
     // Update the remaining reserved temps since we have now used them.
     // Note that the code below is actually subtracting to remove them from reserve
     // once they have been claimed. It is careful to not go below zero.
-    if (reserved_temps_for_backend_ >= 1) {
-      reserved_temps_for_backend_--;
-    }
-    if (wide && reserved_temps_for_backend_ >= 1) {
-      reserved_temps_for_backend_--;
-    }
+    reserved_temps_for_backend_ =
+        std::max(reserved_temps_for_backend_, needed_temps) - needed_temps;
 
     // The new non-special compiler temp must receive a unique v_reg.
     compiler_temp->v_reg = GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_;
@@ -407,6 +405,36 @@
   return compiler_temp;
 }
 
+void MIRGraph::RemoveLastCompilerTemp(CompilerTempType ct_type, bool wide, CompilerTemp* temp) {
+  // Once the compiler temps have been committed, it's too late for any modifications.
+  DCHECK_EQ(compiler_temps_committed_, false);
+
+  size_t used_temps = wide ? 2u : 1u;
+
+  if (ct_type == kCompilerTempBackend) {
+    DCHECK(requested_backend_temp_);
+
+    // Make the temps available to backend again.
+    reserved_temps_for_backend_ += used_temps;
+  } else if (ct_type == kCompilerTempVR) {
+    DCHECK(!requested_backend_temp_);
+  } else {
+    UNIMPLEMENTED(FATAL) << "No handling for compiler temp type " << static_cast<int>(ct_type);
+  }
+
+  // Reduce the number of non-special compiler temps.
+  DCHECK_LE(used_temps, num_non_special_compiler_temps_);
+  num_non_special_compiler_temps_ -= used_temps;
+
+  // Check that this was really the last temp.
+  DCHECK_EQ(static_cast<size_t>(temp->v_reg),
+            GetFirstNonSpecialTempVR() + num_non_special_compiler_temps_);
+
+  if (cu_->verbose) {
+    LOG(INFO) << "Last temporary has been removed.";
+  }
+}
+
 static bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) {
   bool is_taken;
   switch (opcode) {
@@ -1489,7 +1517,7 @@
       continue;
     }
     const MirMethodLoweringInfo& method_info = GetMethodLoweringInfo(mir);
-    if (!method_info.FastPath()) {
+    if (!method_info.FastPath() || !method_info.IsSpecial()) {
       continue;
     }
 
@@ -1631,10 +1659,6 @@
       !HasInvokes()) {               // No invokes to actually eliminate any suspend checks.
     return false;
   }
-  if (cu_->compiler_driver != nullptr && cu_->compiler_driver->GetMethodInlinerMap() != nullptr) {
-    temp_.sce.inliner =
-        cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
-  }
   suspend_checks_in_loops_ = arena_->AllocArray<uint32_t>(GetNumBlocks(), kArenaAllocMisc);
   return true;
 }
@@ -1652,9 +1676,9 @@
   uint32_t suspend_checks_in_loops = (1u << bb->nesting_depth) - 1u;  // Start with all loop heads.
   bool found_invoke = false;
   for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
-    if (IsInstructionInvoke(mir->dalvikInsn.opcode) &&
-        (temp_.sce.inliner == nullptr ||
-         !temp_.sce.inliner->IsIntrinsic(mir->dalvikInsn.vB, nullptr))) {
+    if ((IsInstructionInvoke(mir->dalvikInsn.opcode) ||
+        IsInstructionQuickInvoke(mir->dalvikInsn.opcode)) &&
+        !GetMethodLoweringInfo(mir).IsIntrinsic()) {
       // Non-intrinsic invoke, rely on a suspend point in the invoked method.
       found_invoke = true;
       break;
@@ -1717,10 +1741,6 @@
   return true;
 }
 
-void MIRGraph::EliminateSuspendChecksEnd() {
-  temp_.sce.inliner = nullptr;
-}
-
 bool MIRGraph::CanThrow(MIR* mir) const {
   if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) {
     return false;
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 9ce5ebb..10a4337 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -474,7 +474,6 @@
     for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
       change = cu_.mir_graph->EliminateSuspendChecks(bb);
     }
-    cu_.mir_graph->EliminateSuspendChecksEnd();
   }
 
   SuspendCheckEliminationTest()
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index e6158c3..3d18af6 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -29,6 +29,7 @@
 #include "mirror/object_array-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "utils.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
@@ -354,7 +355,16 @@
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::ArmCore(num);
+}
+
+static dwarf::Reg DwarfFpReg(int num) {
+  return dwarf::Reg::ArmFp(num);
+}
+
 void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
   int spill_count = num_core_spills_ + num_fp_spills_;
   /*
    * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
@@ -402,28 +412,32 @@
     }
   }
   /* Spill core callee saves */
-  if (core_spill_mask_ == 0u) {
-    // Nothing to spill.
-  } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) {
-    // Spilling only low regs and/or LR, use 16-bit PUSH.
-    constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8;
-    NewLIR1(kThumbPush,
-            (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |
-            ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift));
-  } else if (IsPowerOfTwo(core_spill_mask_)) {
-    // kThumb2Push cannot be used to spill a single register.
-    NewLIR1(kThumb2Push1, CTZ(core_spill_mask_));
-  } else {
-    NewLIR1(kThumb2Push, core_spill_mask_);
+  if (core_spill_mask_ != 0u) {
+    if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) {
+      // Spilling only low regs and/or LR, use 16-bit PUSH.
+      constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8;
+      NewLIR1(kThumbPush,
+              (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |
+              ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift));
+    } else if (IsPowerOfTwo(core_spill_mask_)) {
+      // kThumb2Push cannot be used to spill a single register.
+      NewLIR1(kThumb2Push1, CTZ(core_spill_mask_));
+    } else {
+      NewLIR1(kThumb2Push, core_spill_mask_);
+    }
+    cfi_.AdjustCFAOffset(num_core_spills_ * kArmPointerSize);
+    cfi_.RelOffsetForMany(DwarfCoreReg(0), 0, core_spill_mask_, kArmPointerSize);
   }
   /* Need to spill any FP regs? */
-  if (num_fp_spills_) {
+  if (num_fp_spills_ != 0u) {
     /*
      * NOTE: fp spills are a little different from core spills in that
      * they are pushed as a contiguous block.  When promoting from
      * the fp set, we must allocate all singles from s16..highest-promoted
      */
     NewLIR1(kThumb2VPushCS, num_fp_spills_);
+    cfi_.AdjustCFAOffset(num_fp_spills_ * kArmPointerSize);
+    cfi_.RelOffsetForMany(DwarfFpReg(0), 0, fp_spill_mask_, kArmPointerSize);
   }
 
   const int spill_size = spill_count * 4;
@@ -444,12 +458,14 @@
             m2l_->LoadWordDisp(rs_rARM_SP, sp_displace_ - 4, rs_rARM_LR);
           }
           m2l_->OpRegImm(kOpAdd, rs_rARM_SP, sp_displace_);
+          m2l_->cfi().AdjustCFAOffset(-sp_displace_);
           m2l_->ClobberCallerSave();
           ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow);
           // Load the entrypoint directly into the pc instead of doing a load + branch. Assumes
           // codegen and target are in thumb2 mode.
           // NOTE: native pointer.
           m2l_->LoadWordDisp(rs_rARM_SELF, func_offset.Int32Value(), rs_rARM_PC);
+          m2l_->cfi().AdjustCFAOffset(sp_displace_);
         }
 
        private:
@@ -464,6 +480,7 @@
         // Need to restore LR since we used it as a temp.
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, true, spill_size));
         OpRegCopy(rs_rARM_SP, rs_rARM_LR);     // Establish stack
+        cfi_.AdjustCFAOffset(frame_size_without_spills);
       } else {
         /*
          * If the frame is small enough we are guaranteed to have enough space that remains to
@@ -474,6 +491,7 @@
         MarkTemp(rs_rARM_LR);
         FreeTemp(rs_rARM_LR);
         OpRegRegImm(kOpSub, rs_rARM_SP, rs_rARM_SP, frame_size_without_spills);
+        cfi_.AdjustCFAOffset(frame_size_without_spills);
         Clobber(rs_rARM_LR);
         UnmarkTemp(rs_rARM_LR);
         LIR* branch = OpCmpBranch(kCondUlt, rs_rARM_SP, rs_r12, nullptr);
@@ -483,13 +501,23 @@
       // Implicit stack overflow check has already been done.  Just make room on the
       // stack for the frame now.
       OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+      cfi_.AdjustCFAOffset(frame_size_without_spills);
     }
   } else {
     OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
+    cfi_.AdjustCFAOffset(frame_size_without_spills);
   }
 
   FlushIns(ArgLocs, rl_method);
 
+  // We can promote a PC-relative reference to dex cache arrays to a register
+  // if it's used at least twice. Without investigating where we should lazily
+  // load the reference, we conveniently load it after flushing inputs.
+  if (dex_cache_arrays_base_reg_.Valid()) {
+    OpPcRelDexCacheArrayAddr(cu_->dex_file, dex_cache_arrays_min_offset_,
+                             dex_cache_arrays_base_reg_);
+  }
+
   FreeTemp(rs_r0);
   FreeTemp(rs_r1);
   FreeTemp(rs_r2);
@@ -498,7 +526,9 @@
 }
 
 void ArmMir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   int spill_count = num_core_spills_ + num_fp_spills_;
+
   /*
    * In the exit path, r0/r1 are live - make sure they aren't
    * allocated by the register utilities as temps.
@@ -506,34 +536,47 @@
   LockTemp(rs_r0);
   LockTemp(rs_r1);
 
-  OpRegImm(kOpAdd, rs_rARM_SP, frame_size_ - (spill_count * 4));
+  int adjust = frame_size_ - (spill_count * kArmPointerSize);
+  OpRegImm(kOpAdd, rs_rARM_SP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
   /* Need to restore any FP callee saves? */
   if (num_fp_spills_) {
     NewLIR1(kThumb2VPopCS, num_fp_spills_);
+    cfi_.AdjustCFAOffset(-num_fp_spills_ * kArmPointerSize);
+    cfi_.RestoreMany(DwarfFpReg(0), fp_spill_mask_);
   }
-  if ((core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0) {
-    /* Unspill rARM_LR to rARM_PC */
+  bool unspill_LR_to_PC = (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0;
+  if (unspill_LR_to_PC) {
     core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum());
     core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum());
   }
-  if (core_spill_mask_ == 0u) {
-    // Nothing to unspill.
-  } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
-    // Unspilling only low regs and/or PC, use 16-bit POP.
-    constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
-    NewLIR1(kThumbPop,
-            (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) |
-            ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
-  } else if (IsPowerOfTwo(core_spill_mask_)) {
-    // kThumb2Pop cannot be used to unspill a single register.
-    NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_));
-  } else {
-    NewLIR1(kThumb2Pop, core_spill_mask_);
+  if (core_spill_mask_ != 0u) {
+    if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
+      // Unspilling only low regs and/or PC, use 16-bit POP.
+      constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
+      NewLIR1(kThumbPop,
+              (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) |
+              ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
+    } else if (IsPowerOfTwo(core_spill_mask_)) {
+      // kThumb2Pop cannot be used to unspill a single register.
+      NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_));
+    } else {
+      NewLIR1(kThumb2Pop, core_spill_mask_);
+    }
+    // If we pop to PC, there is no further epilogue code.
+    if (!unspill_LR_to_PC) {
+      cfi_.AdjustCFAOffset(-num_core_spills_ * kArmPointerSize);
+      cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_);
+      DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
+    }
   }
-  if ((core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum())) == 0) {
+  if (!unspill_LR_to_PC) {
     /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
     NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
   }
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void ArmMir2Lir::GenSpecialExitSequence() {
@@ -555,11 +598,16 @@
   NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) |                 // ArtMethod*
           (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |  // Spills other than LR.
           (1u << 8));                                             // LR encoded for 16-bit push.
+  cfi_.AdjustCFAOffset(frame_size_);
+  // Do not generate CFI for scratch register r0.
+  cfi_.RelOffsetForMany(DwarfCoreReg(0), 4, core_spill_mask_, kArmPointerSize);
 }
 
 void ArmMir2Lir::GenSpecialExitForSuspend() {
   // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
   NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_);  // 32-bit because of LR.
+  cfi_.AdjustCFAOffset(-frame_size_);
+  cfi_.RestoreMany(DwarfCoreReg(0), core_spill_mask_);
 }
 
 static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
@@ -571,12 +619,12 @@
  * Bit of a hack here - in the absence of a real scheduling pass,
  * emit the next instruction in static & direct invoke sequences.
  */
-static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
-                             int state, const MethodReference& target_method,
-                             uint32_t unused_idx ATTRIBUTE_UNUSED,
-                             uintptr_t direct_code, uintptr_t direct_method,
-                             InvokeType type) {
-  Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+int ArmMir2Lir::ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
+                                  int state, const MethodReference& target_method,
+                                  uint32_t unused_idx ATTRIBUTE_UNUSED,
+                                  uintptr_t direct_code, uintptr_t direct_method,
+                                  InvokeType type) {
+  ArmMir2Lir* cg = static_cast<ArmMir2Lir*>(cu->cg.get());
   if (direct_code != 0 && direct_method != 0) {
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
@@ -597,17 +645,24 @@
       return -1;
     }
   } else {
+    bool use_pc_rel = cg->CanUseOpPcRelDexCacheArrayLoad();
     RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
     switch (state) {
     case 0:  // Get the current Method* [sets kArg0]
       // TUNING: we can save a reg copy if Method* has been promoted.
-      cg->LoadCurrMethodDirect(arg0_ref);
-      break;
+      if (!use_pc_rel) {
+        cg->LoadCurrMethodDirect(arg0_ref);
+        break;
+      }
+      ++state;
+      FALLTHROUGH_INTENDED;
     case 1:  // Get method->dex_cache_resolved_methods_
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
+      if (!use_pc_rel) {
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+      }
       // Set up direct code if known.
       if (direct_code != 0) {
         if (direct_code != static_cast<uintptr_t>(-1)) {
@@ -619,14 +674,23 @@
           cg->LoadCodeAddress(target_method, type, kInvokeTgt);
         }
       }
-      break;
+      if (!use_pc_rel || direct_code != 0) {
+        break;
+      }
+      ++state;
+      FALLTHROUGH_INTENDED;
     case 2:  // Grab target method*
       CHECK_EQ(cu->dex_file, target_method.dex_file);
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ObjectArray<mirror::Object>::OffsetOfElement(
-                          target_method.dex_method_index).Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
+      if (!use_pc_rel) {
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ObjectArray<mirror::Object>::OffsetOfElement(
+                            target_method.dex_method_index).Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+      } else {
+        size_t offset = cg->dex_cache_arrays_layout_.MethodOffset(target_method.dex_method_index);
+        cg->OpPcRelDexCacheArrayLoad(cu->dex_file, offset, arg0_ref);
+      }
       break;
     case 3:  // Grab the code from the method*
       if (direct_code == 0) {
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 4141bcf..83b27df 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -82,6 +82,9 @@
     /// @copydoc Mir2Lir::UnconditionallyMarkGCCard(RegStorage)
     void UnconditionallyMarkGCCard(RegStorage tgt_addr_reg) OVERRIDE;
 
+    bool CanUseOpPcRelDexCacheArrayLoad() const OVERRIDE;
+    void OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) OVERRIDE;
+
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
     RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE {
@@ -257,6 +260,9 @@
      */
     LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
 
+    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE;
+    void DoPromotion() OVERRIDE;
+
     /*
      * @brief Handle ARM specific literals.
      */
@@ -300,6 +306,13 @@
 
     ArenaVector<LIR*> call_method_insns_;
 
+    // Instructions needing patching with PC relative code addresses.
+    ArenaVector<LIR*> dex_cache_access_insns_;
+
+    // Register with a reference to the dex cache arrays at dex_cache_arrays_min_offset_,
+    // if promoted.
+    RegStorage dex_cache_arrays_base_reg_;
+
     /**
      * @brief Given float register pair, returns Solo64 float register.
      * @param reg #RegStorage containing a float register pair (e.g. @c s2 and @c s3).
@@ -329,6 +342,14 @@
     }
 
     int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE;
+
+    static int ArmNextSDCallInsn(CompilationUnit* cu, CallInfo* info ATTRIBUTE_UNUSED,
+                                 int state, const MethodReference& target_method,
+                                 uint32_t unused_idx ATTRIBUTE_UNUSED,
+                                 uintptr_t direct_code, uintptr_t direct_method,
+                                 InvokeType type);
+
+    void OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 9193e1b..47669db 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -1087,6 +1087,36 @@
   lir->target = target;
 }
 
+bool ArmMir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
+  return dex_cache_arrays_layout_.Valid();
+}
+
+void ArmMir2Lir::OpPcRelDexCacheArrayAddr(const DexFile* dex_file, int offset, RegStorage r_dest) {
+  LIR* movw = NewLIR2(kThumb2MovImm16, r_dest.GetReg(), 0);
+  LIR* movt = NewLIR2(kThumb2MovImm16H, r_dest.GetReg(), 0);
+  ArmOpcode add_pc_opcode = (r_dest.GetRegNum() < 8) ? kThumbAddRRLH : kThumbAddRRHH;
+  LIR* add_pc = NewLIR2(add_pc_opcode, r_dest.GetReg(), rs_rARM_PC.GetReg());
+  add_pc->flags.fixup = kFixupLabel;
+  movw->operands[2] = WrapPointer(dex_file);
+  movw->operands[3] = offset;
+  movw->operands[4] = WrapPointer(add_pc);
+  movt->operands[2] = movw->operands[2];
+  movt->operands[3] = movw->operands[3];
+  movt->operands[4] = movw->operands[4];
+  dex_cache_access_insns_.push_back(movw);
+  dex_cache_access_insns_.push_back(movt);
+}
+
+void ArmMir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset, RegStorage r_dest) {
+  if (dex_cache_arrays_base_reg_.Valid()) {
+    LoadRefDisp(dex_cache_arrays_base_reg_, offset - dex_cache_arrays_min_offset_,
+                r_dest, kNotVolatile);
+  } else {
+    OpPcRelDexCacheArrayAddr(dex_file, offset, r_dest);
+    LoadRefDisp(r_dest, 0, r_dest, kNotVolatile);
+  }
+}
+
 LIR* ArmMir2Lir::OpVldm(RegStorage r_base, int count) {
   return NewLIR3(kThumb2Vldms, r_base.GetReg(), rs_fr0.GetReg(), count);
 }
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 9812d9f..5f27338 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -575,7 +575,9 @@
 
 ArmMir2Lir::ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
-      call_method_insns_(arena->Adapter()) {
+      call_method_insns_(arena->Adapter()),
+      dex_cache_access_insns_(arena->Adapter()),
+      dex_cache_arrays_base_reg_(RegStorage::InvalidReg()) {
   call_method_insns_.reserve(100);
   // Sanity check - make sure encoding map lines up.
   for (int i = 0; i < kArmLast; i++) {
@@ -901,14 +903,28 @@
 }
 
 void ArmMir2Lir::InstallLiteralPools() {
+  patches_.reserve(call_method_insns_.size() + dex_cache_access_insns_.size());
+
   // PC-relative calls to methods.
-  patches_.reserve(call_method_insns_.size());
   for (LIR* p : call_method_insns_) {
-      DCHECK_EQ(p->opcode, kThumb2Bl);
-      uint32_t target_method_idx = p->operands[1];
-      const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
-      patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
-                                                        target_dex_file, target_method_idx));
+    DCHECK_EQ(p->opcode, kThumb2Bl);
+    uint32_t target_method_idx = p->operands[1];
+    const DexFile* target_dex_file = UnwrapPointer<DexFile>(p->operands[2]);
+    patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
+                                                      target_dex_file, target_method_idx));
+  }
+
+  // PC-relative dex cache array accesses.
+  for (LIR* p : dex_cache_access_insns_) {
+    DCHECK(p->opcode = kThumb2MovImm16 || p->opcode == kThumb2MovImm16H);
+    const LIR* add_pc = UnwrapPointer<LIR>(p->operands[4]);
+    DCHECK(add_pc->opcode == kThumbAddRRLH || add_pc->opcode == kThumbAddRRHH);
+    const DexFile* dex_file = UnwrapPointer<DexFile>(p->operands[2]);
+    uint32_t offset = p->operands[3];
+    DCHECK(!p->flags.is_nop);
+    DCHECK(!add_pc->flags.is_nop);
+    patches_.push_back(LinkerPatch::DexCacheArrayPatch(p->offset,
+                                                       dex_file, add_pc->offset, offset));
   }
 
   // And do the normal processing.
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index e4bd2a3..25ea694 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -19,6 +19,7 @@
 #include "arch/arm/instruction_set_features_arm.h"
 #include "arm_lir.h"
 #include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 #include "driver/compiler_driver.h"
@@ -1266,4 +1267,39 @@
   return offset;
 }
 
+void ArmMir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
+  // Start with the default counts.
+  Mir2Lir::CountRefs(core_counts, fp_counts, num_regs);
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is used only once outside any loops (weight = 1),
+    // avoid the promotion, otherwise boost the weight by factor 3 because the full PC-relative
+    // load sequence is 4 instructions long and by promoting the PC base we save up to 3
+    // instructions per use.
+    int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low);
+    if (core_counts[p_map_idx].count == 1) {
+      core_counts[p_map_idx].count = 0;
+    } else {
+      core_counts[p_map_idx].count *= 3;
+    }
+  }
+}
+
+void ArmMir2Lir::DoPromotion() {
+  if (CanUseOpPcRelDexCacheArrayLoad()) {
+    pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
+  }
+
+  Mir2Lir::DoPromotion();
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is promoted, remember the register but
+    // always remove the temp's stack location to avoid unnecessarily bloating the stack.
+    dex_cache_arrays_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg;
+    DCHECK(!dex_cache_arrays_base_reg_.Valid() || !dex_cache_arrays_base_reg_.IsFloat());
+    mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_);
+    pc_rel_temp_ = nullptr;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 6b47bba..4abbd77 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -282,7 +282,13 @@
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::Arm64Core(num);
+}
+
 void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);  // empty stack.
+
   /*
    * On entry, x0 to x7 are live.  Let the register allocation
    * mechanism know so it doesn't try to use any of them when
@@ -345,6 +351,7 @@
 
   if (spilled_already != frame_size_) {
     OpRegImm(kOpSub, rs_sp, frame_size_without_spills);
+    cfi_.AdjustCFAOffset(frame_size_without_spills);
   }
 
   if (!skip_overflow_check) {
@@ -361,12 +368,14 @@
           GenerateTargetLabel(kPseudoThrowTarget);
           // Unwinds stack.
           m2l_->OpRegImm(kOpAdd, rs_sp, sp_displace_);
+          m2l_->cfi().AdjustCFAOffset(-sp_displace_);
           m2l_->ClobberCallerSave();
           ThreadOffset<8> func_offset = QUICK_ENTRYPOINT_OFFSET(8, pThrowStackOverflow);
           m2l_->LockTemp(rs_xIP0);
           m2l_->LoadWordDisp(rs_xSELF, func_offset.Int32Value(), rs_xIP0);
           m2l_->NewLIR1(kA64Br1x, rs_xIP0.GetReg());
           m2l_->FreeTemp(rs_xIP0);
+          m2l_->cfi().AdjustCFAOffset(sp_displace_);
         }
 
       private:
@@ -393,6 +402,7 @@
 }
 
 void Arm64Mir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   /*
    * In the exit path, r0/r1 are live - make sure they aren't
    * allocated by the register utilities as temps.
@@ -403,6 +413,9 @@
 
   // Finally return.
   NewLIR0(kA64Ret);
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void Arm64Mir2Lir::GenSpecialExitSequence() {
@@ -419,11 +432,16 @@
   core_vmap_table_.clear();
   fp_vmap_table_.clear();
   NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8);
+  cfi_.AdjustCFAOffset(frame_size_);
+  // Do not generate CFI for scratch register x0.
+  cfi_.RelOffset(DwarfCoreReg(rxLR), 8);
 }
 
 void Arm64Mir2Lir::GenSpecialExitForSuspend() {
   // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
   NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8);
+  cfi_.AdjustCFAOffset(-frame_size_);
+  cfi_.Restore(DwarfCoreReg(rxLR));
 }
 
 static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index a9d9f3d..20f61f2 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -1458,6 +1458,14 @@
   return reg_mask;
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::Arm64Core(num);
+}
+
+static dwarf::Reg DwarfFpReg(int num) {
+  return dwarf::Reg::Arm64Fp(num);
+}
+
 static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
   int reg1 = -1, reg2 = -1;
   const int reg_log2_size = 3;
@@ -1466,9 +1474,12 @@
     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), offset << reg_log2_size);
     } else {
       m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg2), offset << reg_log2_size);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), (offset + 1) << reg_log2_size);
     }
   }
 }
@@ -1483,9 +1494,12 @@
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                    offset);
+      m2l->cfi().RelOffset(DwarfFpReg(reg1), offset << reg_log2_size);
     } else {
       m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                    RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().RelOffset(DwarfFpReg(reg2), offset << reg_log2_size);
+      m2l->cfi().RelOffset(DwarfFpReg(reg1), (offset + 1) << reg_log2_size);
     }
   }
 }
@@ -1493,6 +1507,7 @@
 static int SpillRegsPreSub(Arm64Mir2Lir* m2l, uint32_t core_reg_mask, uint32_t fp_reg_mask,
                            int frame_size) {
   m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size);
+  m2l->cfi().AdjustCFAOffset(frame_size);
 
   int core_count = POPCOUNT(core_reg_mask);
 
@@ -1552,11 +1567,15 @@
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
       } else {
         m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(),
                      base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), 0);
         cur_offset = 0;  // That core reg needs to go into the upper half.
       }
     } else {
@@ -1564,10 +1583,15 @@
         fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
         m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg2), 0);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
       } else {
         fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
         m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(),
                      base.GetReg(), -all_offset);
+        m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), kArm64PointerSize);
       }
     }
   } else {
@@ -1580,12 +1604,19 @@
       core_reg_mask = ExtractReg(core_reg_mask, &reg1);
       m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
+      m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
     } else {
       core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
       m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
+      m2l->cfi().AdjustCFAOffset(all_offset * kArm64PointerSize);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg2), 0);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), kArm64PointerSize);
     }
   }
+  DCHECK_EQ(m2l->cfi().GetCurrentCFAOffset(),
+            static_cast<int>(all_offset * kArm64PointerSize));
 
   if (fp_count != 0) {
     for (; fp_reg_mask != 0;) {
@@ -1594,10 +1625,13 @@
       if (UNLIKELY(reg2 < 0)) {
         m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                      cur_offset);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), cur_offset * kArm64PointerSize);
         // Do not increment offset here, as the second half will be filled by a core reg.
       } else {
         m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                      RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset);
+        m2l->cfi().RelOffset(DwarfFpReg(reg2), cur_offset * kArm64PointerSize);
+        m2l->cfi().RelOffset(DwarfFpReg(reg1), (cur_offset + 1) * kArm64PointerSize);
         cur_offset += 2;
       }
     }
@@ -1610,6 +1644,7 @@
       core_reg_mask = ExtractReg(core_reg_mask, &reg1);
       m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(),
                    cur_offset + 1);
+      m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
       cur_offset += 2;  // Half-slot filled now.
     }
   }
@@ -1620,6 +1655,8 @@
     core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
     m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
                  RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset);
+    m2l->cfi().RelOffset(DwarfCoreReg(reg2), cur_offset * kArm64PointerSize);
+    m2l->cfi().RelOffset(DwarfCoreReg(reg1), (cur_offset + 1) * kArm64PointerSize);
   }
 
   DCHECK_EQ(cur_offset, all_offset);
@@ -1650,10 +1687,13 @@
     reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().Restore(DwarfCoreReg(reg1));
     } else {
       DCHECK_LE(offset, 63);
       m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
                    RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().Restore(DwarfCoreReg(reg2));
+      m2l->cfi().Restore(DwarfCoreReg(reg1));
     }
   }
 }
@@ -1667,9 +1707,12 @@
     if (UNLIKELY(reg2 < 0)) {
       m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
                    offset);
+      m2l->cfi().Restore(DwarfFpReg(reg1));
     } else {
       m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
                    RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
+      m2l->cfi().Restore(DwarfFpReg(reg2));
+      m2l->cfi().Restore(DwarfFpReg(reg1));
     }
   }
 }
@@ -1711,6 +1754,7 @@
     early_drop = RoundDown(early_drop, 16);
 
     OpRegImm64(kOpAdd, rs_sp, early_drop);
+    cfi_.AdjustCFAOffset(-early_drop);
   }
 
   // Unspill.
@@ -1724,7 +1768,9 @@
   }
 
   // Drop the (rest of) the frame.
-  OpRegImm64(kOpAdd, rs_sp, frame_size - early_drop);
+  int adjust = frame_size - early_drop;
+  OpRegImm64(kOpAdd, rs_sp, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index f944c11..5ea36c2 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -541,13 +541,11 @@
         DCHECK(tab_rec->anchor->flags.fixup != kFixupNone);
         bx_offset = tab_rec->anchor->offset + 4;
         break;
-      case kX86:
-        bx_offset = 0;
-        break;
       case kX86_64:
         // RIP relative to switch table.
         bx_offset = tab_rec->offset;
         break;
+      case kX86:
       case kArm64:
       case kMips:
       case kMips64:
@@ -1070,6 +1068,11 @@
       mask_cache_(arena),
       safepoints_(arena->Adapter()),
       dex_cache_arrays_layout_(cu->compiler_driver->GetDexCacheArraysLayout(cu->dex_file)),
+      pc_rel_temp_(nullptr),
+      dex_cache_arrays_min_offset_(std::numeric_limits<uint32_t>::max()),
+      cfi_(&last_lir_insn_,
+           cu->compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(),
+           arena),
       in_to_reg_storage_mapping_(arena) {
   switch_tables_.reserve(4);
   fill_array_data_.reserve(4);
@@ -1154,14 +1157,6 @@
     return lhs.LiteralOffset() < rhs.LiteralOffset();
   });
 
-  std::unique_ptr<std::vector<uint8_t>> cfi_info(
-      cu_->compiler_driver->GetCompilerOptions().GetGenerateGDBInformation() ?
-          ReturnFrameDescriptionEntry() :
-          nullptr);
-  ArrayRef<const uint8_t> cfi_ref;
-  if (cfi_info.get() != nullptr) {
-    cfi_ref = ArrayRef<const uint8_t>(*cfi_info);
-  }
   return CompiledMethod::SwapAllocCompiledMethod(
       cu_->compiler_driver, cu_->instruction_set,
       ArrayRef<const uint8_t>(code_buffer_),
@@ -1170,7 +1165,7 @@
       ArrayRef<const uint8_t>(encoded_mapping_table_),
       ArrayRef<const uint8_t>(vmap_encoder.GetData()),
       ArrayRef<const uint8_t>(native_gc_map_),
-      cfi_ref,
+      ArrayRef<const uint8_t>(*cfi_.Patch(code_buffer_.size())),
       ArrayRef<const LinkerPatch>(patches_));
 }
 
@@ -1332,11 +1327,6 @@
   UNREACHABLE();
 }
 
-std::vector<uint8_t>* Mir2Lir::ReturnFrameDescriptionEntry() {
-  // Default case is to do nothing.
-  return nullptr;
-}
-
 RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) {
   if (loc.location == kLocPhysReg) {
     DCHECK(!loc.reg.Is32Bit());
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 8e3f4ef..4ac6c0c 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -413,6 +413,17 @@
   return success && AddInlineMethod(verifier->GetMethodReference().dex_method_index, method);
 }
 
+InlineMethodFlags DexFileMethodInliner::IsIntrinsicOrSpecial(uint32_t method_index) {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  auto it = inline_methods_.find(method_index);
+  if (it != inline_methods_.end()) {
+    DCHECK_NE(it->second.flags & (kInlineIntrinsic | kInlineSpecial), 0);
+    return it->second.flags;
+  } else {
+    return kNoInlineMethodFlags;
+  }
+}
+
 bool DexFileMethodInliner::IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   auto it = inline_methods_.find(method_index);
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index cb521da..d1e5621 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -65,6 +65,11 @@
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
 
     /**
+     * Check whether a particular method index corresponds to an intrinsic or special function.
+     */
+    InlineMethodFlags IsIntrinsicOrSpecial(uint32_t method_index) LOCKS_EXCLUDED(lock_);
+
+    /**
      * Check whether a particular method index corresponds to an intrinsic function.
      */
     bool IsIntrinsic(uint32_t method_index, InlineMethod* intrinsic) LOCKS_EXCLUDED(lock_);
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 1813e09..b132c4c 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -94,6 +94,97 @@
                                                        r_method, r_result));
 }
 
+RegStorage Mir2Lir::GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info,
+                                               int opt_flags) {
+  DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
+  // May do runtime call so everything to home locations.
+  FlushAllRegs();
+  RegStorage r_base = TargetReg(kArg0, kRef);
+  LockTemp(r_base);
+  RegStorage r_method = RegStorage::InvalidReg();  // Loaded lazily, maybe in the slow-path.
+  if (CanUseOpPcRelDexCacheArrayLoad()) {
+    uint32_t offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
+    OpPcRelDexCacheArrayLoad(cu_->dex_file, offset, r_base);
+  } else {
+    // Using fixed register to sync with possible call to runtime support.
+    r_method = LoadCurrMethodWithHint(TargetReg(kArg1, kRef));
+    LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
+                kNotVolatile);
+    int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
+    LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
+  }
+  // r_base now points at static storage (Class*) or nullptr if the type is not yet resolved.
+  LIR* unresolved_branch = nullptr;
+  if (!field_info.IsClassInDexCache() && (opt_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
+    // Check if r_base is nullptr.
+    unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, nullptr);
+  }
+  LIR* uninit_branch = nullptr;
+  if (!field_info.IsClassInitialized() && (opt_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
+    // Check if r_base is not yet initialized class.
+    RegStorage r_tmp = TargetReg(kArg2, kNotWide);
+    LockTemp(r_tmp);
+    uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
+                                      mirror::Class::StatusOffset().Int32Value(),
+                                      mirror::Class::kStatusInitialized, nullptr, nullptr);
+    FreeTemp(r_tmp);
+  }
+  if (unresolved_branch != nullptr || uninit_branch != nullptr) {
+    //
+    // Slow path to ensure a class is initialized for sget/sput.
+    //
+    class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
+     public:
+      // There are up to two branches to the static field slow path, the "unresolved" when the type
+      // entry in the dex cache is nullptr, and the "uninit" when the class is not yet initialized.
+      // At least one will be non-nullptr here, otherwise we wouldn't generate the slow path.
+      StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
+                          RegStorage r_base_in, RegStorage r_method_in)
+          : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
+            second_branch_(unresolved != nullptr ? uninit : nullptr),
+            storage_index_(storage_index), r_base_(r_base_in), r_method_(r_method_in) {
+      }
+
+      void Compile() {
+        LIR* target = GenerateTargetLabel();
+        if (second_branch_ != nullptr) {
+          second_branch_->target = target;
+        }
+        if (r_method_.Valid()) {
+          // ArtMethod* was loaded in normal path - use it.
+          m2l_->CallRuntimeHelperImmReg(kQuickInitializeStaticStorage, storage_index_, r_method_,
+                                        true);
+        } else {
+          // ArtMethod* wasn't loaded in normal path - use a helper that loads it.
+          m2l_->CallRuntimeHelperImmMethod(kQuickInitializeStaticStorage, storage_index_, true);
+        }
+        // Copy helper's result into r_base, a no-op on all but MIPS.
+        m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0, kRef));
+
+        m2l_->OpUnconditionalBranch(cont_);
+      }
+
+     private:
+      // Second branch to the slow path, or nullptr if there's only one branch.
+      LIR* const second_branch_;
+
+      const int storage_index_;
+      const RegStorage r_base_;
+      RegStorage r_method_;
+    };
+
+    // The slow path is invoked if the r_base is nullptr or the class pointed
+    // to by it is not initialized.
+    LIR* cont = NewLIR0(kPseudoTargetLabel);
+    AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
+                                                 field_info.StorageIndex(), r_base, r_method));
+  }
+  if (IsTemp(r_method)) {
+    FreeTemp(r_method);
+  }
+  return r_base;
+}
+
 /*
  * Generate a kPseudoBarrier marker to indicate the boundary of special
  * blocks.
@@ -609,41 +700,6 @@
   CallRuntimeHelperImmRegLocation(kQuickHandleFillArrayData, table_offset_from_start, rl_src, true);
 }
 
-//
-// Slow path to ensure a class is initialized for sget/sput.
-//
-class StaticFieldSlowPath : public Mir2Lir::LIRSlowPath {
- public:
-  // There are up to two branches to the static field slow path, the "unresolved" when the type
-  // entry in the dex cache is null, and the "uninit" when the class is not yet initialized.
-  // At least one will be non-null here, otherwise we wouldn't generate the slow path.
-  StaticFieldSlowPath(Mir2Lir* m2l, LIR* unresolved, LIR* uninit, LIR* cont, int storage_index,
-                      RegStorage r_base)
-      : LIRSlowPath(m2l, unresolved != nullptr ? unresolved : uninit, cont),
-        second_branch_(unresolved != nullptr ? uninit : nullptr),
-        storage_index_(storage_index), r_base_(r_base) {
-  }
-
-  void Compile() {
-    LIR* target = GenerateTargetLabel();
-    if (second_branch_ != nullptr) {
-      second_branch_->target = target;
-    }
-    m2l_->CallRuntimeHelperImm(kQuickInitializeStaticStorage, storage_index_, true);
-    // Copy helper's result into r_base, a no-op on all but MIPS.
-    m2l_->OpRegCopy(r_base_,  m2l_->TargetReg(kRet0, kRef));
-
-    m2l_->OpUnconditionalBranch(cont_);
-  }
-
- private:
-  // Second branch to the slow path, or null if there's only one branch.
-  LIR* const second_branch_;
-
-  const int storage_index_;
-  const RegStorage r_base_;
-};
-
 void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, OpSize size) {
   const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
   DCHECK_EQ(SPutMemAccessType(mir->dalvikInsn.opcode), field_info.MemAccessType());
@@ -653,65 +709,23 @@
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
-      RegLocation rl_method = LoadCurrMethod();
       r_base = AllocTempRef();
-      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
+      RegStorage r_method = LoadCurrMethodWithHint(r_base);
+      LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
                   kNotVolatile);
-      if (IsTemp(rl_method.reg)) {
-        FreeTemp(rl_method.reg);
-      }
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
       // class is initialized.
-      // TODO: remove initialized check now that we are initializing classes in the compiler driver.
-      DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
-      // May do runtime call so everything to home locations.
-      FlushAllRegs();
-      // Using fixed register to sync with possible call to runtime support.
-      RegStorage r_method = TargetReg(kArg1, kRef);
-      LockTemp(r_method);
-      LoadCurrMethodDirect(r_method);
-      r_base = TargetReg(kArg0, kRef);
-      LockTemp(r_base);
-      LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
-                  kNotVolatile);
-      int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
-      LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
-      // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
-      LIR* unresolved_branch = nullptr;
-      if (!field_info.IsClassInDexCache() &&
-          (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
-        // Check if r_base is NULL.
-        unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
-      }
-      LIR* uninit_branch = nullptr;
+      r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags);
       if (!field_info.IsClassInitialized() &&
           (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
-        // Check if r_base is not yet initialized class.
-        RegStorage r_tmp = TargetReg(kArg2, kNotWide);
-        LockTemp(r_tmp);
-        uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
-                                          mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
-        FreeTemp(r_tmp);
+        // Ensure load of status and store of value don't re-order.
+        // TODO: Presumably the actual value store is control-dependent on the status load,
+        // and will thus not be reordered in any case, since stores are never speculated.
+        // Does later code "know" that the class is now initialized?  If so, we still
+        // need the barrier to guard later static loads.
+        GenMemBarrier(kLoadAny);
       }
-      if (unresolved_branch != nullptr || uninit_branch != nullptr) {
-        // The slow path is invoked if the r_base is NULL or the class pointed
-        // to by it is not initialized.
-        LIR* cont = NewLIR0(kPseudoTargetLabel);
-        AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
-                                                     field_info.StorageIndex(), r_base));
-
-        if (uninit_branch != nullptr) {
-          // Ensure load of status and store of value don't re-order.
-          // TODO: Presumably the actual value store is control-dependent on the status load,
-          // and will thus not be reordered in any case, since stores are never speculated.
-          // Does later code "know" that the class is now initialized?  If so, we still
-          // need the barrier to guard later static loads.
-          GenMemBarrier(kLoadAny);
-        }
-      }
-      FreeTemp(r_method);
     }
     // rBase now holds static storage base
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
@@ -773,57 +787,19 @@
     RegStorage r_base;
     if (field_info.IsReferrersClass()) {
       // Fast path, static storage base is this method's class
-      RegLocation rl_method  = LoadCurrMethod();
       r_base = AllocTempRef();
-      LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
+      RegStorage r_method = LoadCurrMethodWithHint(r_base);
+      LoadRefDisp(r_method, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base,
                   kNotVolatile);
     } else {
       // Medium path, static storage base in a different class which requires checks that the other
       // class is initialized
-      DCHECK_NE(field_info.StorageIndex(), DexFile::kDexNoIndex);
-      // May do runtime call so everything to home locations.
-      FlushAllRegs();
-      // Using fixed register to sync with possible call to runtime support.
-      RegStorage r_method = TargetReg(kArg1, kRef);
-      LockTemp(r_method);
-      LoadCurrMethodDirect(r_method);
-      r_base = TargetReg(kArg0, kRef);
-      LockTemp(r_base);
-      LoadRefDisp(r_method, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), r_base,
-                  kNotVolatile);
-      int32_t offset_of_field = ObjArray::OffsetOfElement(field_info.StorageIndex()).Int32Value();
-      LoadRefDisp(r_base, offset_of_field, r_base, kNotVolatile);
-      // r_base now points at static storage (Class*) or NULL if the type is not yet resolved.
-      LIR* unresolved_branch = nullptr;
-      if (!field_info.IsClassInDexCache() &&
-          (mir->optimization_flags & MIR_CLASS_IS_IN_DEX_CACHE) == 0) {
-        // Check if r_base is NULL.
-        unresolved_branch = OpCmpImmBranch(kCondEq, r_base, 0, NULL);
-      }
-      LIR* uninit_branch = nullptr;
+      r_base = GenGetOtherTypeForSgetSput(field_info, mir->optimization_flags);
       if (!field_info.IsClassInitialized() &&
           (mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0) {
-        // Check if r_base is not yet initialized class.
-        RegStorage r_tmp = TargetReg(kArg2, kNotWide);
-        LockTemp(r_tmp);
-        uninit_branch = OpCmpMemImmBranch(kCondLt, r_tmp, r_base,
-                                          mirror::Class::StatusOffset().Int32Value(),
-                                          mirror::Class::kStatusInitialized, nullptr, nullptr);
-        FreeTemp(r_tmp);
+        // Ensure load of status and load of value don't re-order.
+        GenMemBarrier(kLoadAny);
       }
-      if (unresolved_branch != nullptr || uninit_branch != nullptr) {
-        // The slow path is invoked if the r_base is NULL or the class pointed
-        // to by it is not initialized.
-        LIR* cont = NewLIR0(kPseudoTargetLabel);
-        AddSlowPath(new (arena_) StaticFieldSlowPath(this, unresolved_branch, uninit_branch, cont,
-                                                     field_info.StorageIndex(), r_base));
-
-        if (uninit_branch != nullptr) {
-          // Ensure load of status and load of value don't re-order.
-          GenMemBarrier(kLoadAny);
-        }
-      }
-      FreeTemp(r_method);
     }
     // r_base now holds static storage base
     RegisterClass reg_class = RegClassForFieldLoadStore(size, field_info.IsVolatile());
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index e747239..db7095d 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1435,10 +1435,12 @@
 
 void Mir2Lir::GenInvoke(CallInfo* info) {
   DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-  const DexFile* dex_file = info->method_ref.dex_file;
-  if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file)
-      ->GenIntrinsic(this, info)) {
-    return;
+  if (mir_graph_->GetMethodLoweringInfo(info->mir).IsIntrinsic()) {
+    const DexFile* dex_file = info->method_ref.dex_file;
+    auto* inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(dex_file);
+    if (inliner->GenIntrinsic(this, info)) {
+      return;
+    }
   }
   GenInvokeNoInline(info);
 }
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc
new file mode 100644
index 0000000..5cfb0ff
--- /dev/null
+++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lazy_debug_frame_opcode_writer.h"
+#include "mir_to_lir.h"
+
+namespace art {
+namespace dwarf {
+
+const ArenaVector<uint8_t>* LazyDebugFrameOpCodeWriter::Patch(size_t code_size) {
+  if (!this->enabled_) {
+    DCHECK(this->data()->empty());
+    return this->data();
+  }
+  if (!patched_) {
+    patched_ = true;
+    // Move our data buffer to temporary variable.
+    ArenaVector<uint8_t> old_opcodes(this->opcodes_.get_allocator());
+    old_opcodes.swap(this->opcodes_);
+    // Refill our data buffer with patched opcodes.
+    this->opcodes_.reserve(old_opcodes.size() + advances_.size() + 4);
+    size_t pos = 0;
+    for (auto advance : advances_) {
+      DCHECK_GE(advance.pos, pos);
+      // Copy old data up to the point when advance was issued.
+      this->opcodes_.insert(this->opcodes_.end(),
+                            old_opcodes.begin() + pos,
+                            old_opcodes.begin() + advance.pos);
+      pos = advance.pos;
+      // This may be null if there is no slow-path code after return.
+      LIR* next_lir = NEXT_LIR(advance.last_lir_insn);
+      // Insert the advance command with its final offset.
+      Base::AdvancePC(next_lir != nullptr ? next_lir->offset : code_size);
+    }
+    // Copy the final segment.
+    this->opcodes_.insert(this->opcodes_.end(),
+                          old_opcodes.begin() + pos,
+                          old_opcodes.end());
+    Base::AdvancePC(code_size);
+  }
+  return this->data();
+}
+
+}  // namespace dwarf
+}  // namespace art
diff --git a/compiler/dex/quick/lazy_debug_frame_opcode_writer.h b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
new file mode 100644
index 0000000..94ffd7f
--- /dev/null
+++ b/compiler/dex/quick/lazy_debug_frame_opcode_writer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
+#define ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
+
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "dwarf/debug_frame_opcode_writer.h"
+
+namespace art {
+struct LIR;
+namespace dwarf {
+
+// When we are generating the CFI code, we do not know the instuction offsets,
+// this class stores the LIR references and patches the instruction stream later.
+class LazyDebugFrameOpCodeWriter FINAL
+    : public DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> {
+  typedef DebugFrameOpCodeWriter<ArenaAllocatorAdapter<uint8_t>> Base;
+ public:
+  // This method is implicitely called the by opcode writers.
+  virtual void ImplicitlyAdvancePC() OVERRIDE {
+    DCHECK_EQ(patched_, false);
+    DCHECK_EQ(this->current_pc_, 0);
+    advances_.push_back({this->data()->size(), *last_lir_insn_});
+  }
+
+  const ArenaVector<uint8_t>* Patch(size_t code_size);
+
+  explicit LazyDebugFrameOpCodeWriter(LIR** last_lir_insn, bool enable_writes,
+                                      ArenaAllocator* allocator)
+    : Base(enable_writes, allocator->Adapter()),
+      last_lir_insn_(last_lir_insn),
+      advances_(allocator->Adapter()),
+      patched_(false) {
+  }
+
+ private:
+  typedef struct {
+    size_t pos;
+    LIR* last_lir_insn;
+  } Advance;
+
+  using Base::data;  // Hidden. Use Patch method instead.
+
+  LIR** last_lir_insn_;
+  ArenaVector<Advance> advances_;
+  bool patched_;
+
+  DISALLOW_COPY_AND_ASSIGN(LazyDebugFrameOpCodeWriter);
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DEX_QUICK_LAZY_DEBUG_FRAME_OPCODE_WRITER_H_
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index c932df6..05570e4 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -238,7 +238,12 @@
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::MipsCore(num);
+}
+
 void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
+  DCHECK_EQ(cfi_.GetCurrentCFAOffset(), 0);
   int spill_count = num_core_spills_ + num_fp_spills_;
   /*
    * On entry, A0, A1, A2 & A3 are live. On Mips64, A4, A5, A6 & A7 are also live.
@@ -304,10 +309,12 @@
         // RA is offset 0 since we push in reverse order.
         m2l_->LoadWordDisp(m2l_->TargetPtrReg(kSp), 0, m2l_->TargetPtrReg(kLr));
         m2l_->OpRegImm(kOpAdd, m2l_->TargetPtrReg(kSp), sp_displace_);
+        m2l_->cfi().AdjustCFAOffset(-sp_displace_);
         m2l_->ClobberCallerSave();
         RegStorage r_tgt = m2l_->CallHelperSetup(kQuickThrowStackOverflow);  // Doesn't clobber LR.
         m2l_->CallHelper(r_tgt, kQuickThrowStackOverflow, false /* MarkSafepointPC */,
                          false /* UseLink */);
+        m2l_->cfi().AdjustCFAOffset(sp_displace_);
       }
 
      private:
@@ -318,8 +325,10 @@
     AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_count * ptr_size));
     // TODO: avoid copy for small frame sizes.
     OpRegCopy(rs_sp, new_sp);  // Establish stack.
+    cfi_.AdjustCFAOffset(frame_sub);
   } else {
     OpRegImm(kOpSub, rs_sp, frame_sub);
+    cfi_.AdjustCFAOffset(frame_sub);
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -337,6 +346,7 @@
 }
 
 void MipsMir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   /*
    * In the exit path, rMIPS_RET0/rMIPS_RET1 are live - make sure they aren't
    * allocated by the register utilities as temps.
@@ -346,6 +356,9 @@
 
   UnSpillCoreRegs();
   OpReg(kOpBx, TargetPtrReg(kLr));
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void MipsMir2Lir::GenSpecialExitSequence() {
@@ -364,15 +377,20 @@
   fp_vmap_table_.clear();
   const RegStorage rs_sp = TargetPtrReg(kSp);
   OpRegImm(kOpSub, rs_sp, frame_size_);
+  cfi_.AdjustCFAOffset(frame_size_);
   StoreWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr));
+  cfi_.RelOffset(DwarfCoreReg(rRA), frame_size_ - (cu_->target64 ? 8 : 4));
   StoreWordDisp(rs_sp, 0, TargetPtrReg(kArg0));
+  // Do not generate CFI for scratch register A0.
 }
 
 void MipsMir2Lir::GenSpecialExitForSuspend() {
   // Pop the frame. Don't pop ArtMethod*, it's no longer needed.
   const RegStorage rs_sp = TargetPtrReg(kSp);
   LoadWordDisp(rs_sp, frame_size_ - (cu_->target64 ? 8 : 4), TargetPtrReg(kLr));
+  cfi_.Restore(DwarfCoreReg(rRA));
   OpRegImm(kOpAdd, rs_sp, frame_size_);
+  cfi_.AdjustCFAOffset(-frame_size_);
 }
 
 /*
@@ -385,43 +403,7 @@
   Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
   if (direct_code != 0 && direct_method != 0) {
     switch (state) {
-    case 0:  // Get the current Method* [sets kArg0]
-      if (direct_code != static_cast<uintptr_t>(-1)) {
-        if (cu->target64) {
-          cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
-        } else {
-          cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
-        }
-      } else {
-        cg->LoadCodeAddress(target_method, type, kInvokeTgt);
-      }
-      if (direct_method != static_cast<uintptr_t>(-1)) {
-        if (cu->target64) {
-          cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
-        } else {
-          cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
-        }
-      } else {
-        cg->LoadMethodAddress(target_method, type, kArg0);
-      }
-      break;
-    default:
-      return -1;
-    }
-  } else {
-    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
-    switch (state) {
-    case 0:  // Get the current Method* [sets kArg0]
-      // TUNING: we can save a reg copy if Method* has been promoted.
-      cg->LoadCurrMethodDirect(arg0_ref);
-      break;
-    case 1:  // Get method->dex_cache_resolved_methods_
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
-      // Set up direct code if known.
-      if (direct_code != 0) {
+      case 0:  // Get the current Method* [sets kArg0]
         if (direct_code != static_cast<uintptr_t>(-1)) {
           if (cu->target64) {
             cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
@@ -429,29 +411,65 @@
             cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
           }
         } else {
-          CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
           cg->LoadCodeAddress(target_method, type, kInvokeTgt);
         }
-      }
-      break;
-    case 2:  // Grab target method*
-      CHECK_EQ(cu->dex_file, target_method.dex_file);
-      cg->LoadRefDisp(arg0_ref,
-                      mirror::ObjectArray<mirror::Object>::
-                          OffsetOfElement(target_method.dex_method_index).Int32Value(),
-                      arg0_ref,
-                      kNotVolatile);
-      break;
-    case 3:  // Grab the code from the method*
-      if (direct_code == 0) {
-        int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-            InstructionSetPointerSize(cu->instruction_set)).Int32Value();
-        // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt]
-        cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt));
-      }
-      break;
-    default:
-      return -1;
+        if (direct_method != static_cast<uintptr_t>(-1)) {
+          if (cu->target64) {
+            cg->LoadConstantWide(cg->TargetReg(kArg0, kRef), direct_method);
+          } else {
+            cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+          }
+        } else {
+          cg->LoadMethodAddress(target_method, type, kArg0);
+        }
+        break;
+      default:
+        return -1;
+    }
+  } else {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+      case 0:  // Get the current Method* [sets kArg0]
+        // TUNING: we can save a reg copy if Method* has been promoted.
+        cg->LoadCurrMethodDirect(arg0_ref);
+        break;
+      case 1:  // Get method->dex_cache_resolved_methods_
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+        // Set up direct code if known.
+        if (direct_code != 0) {
+          if (direct_code != static_cast<uintptr_t>(-1)) {
+            if (cu->target64) {
+              cg->LoadConstantWide(cg->TargetPtrReg(kInvokeTgt), direct_code);
+            } else {
+              cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+            }
+          } else {
+            CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
+            cg->LoadCodeAddress(target_method, type, kInvokeTgt);
+          }
+        }
+        break;
+      case 2:  // Grab target method*
+        CHECK_EQ(cu->dex_file, target_method.dex_file);
+        cg->LoadRefDisp(arg0_ref,
+                        mirror::ObjectArray<mirror::Object>::
+                        OffsetOfElement(target_method.dex_method_index).Int32Value(),
+                        arg0_ref,
+                        kNotVolatile);
+        break;
+      case 3:  // Grab the code from the method*
+        if (direct_code == 0) {
+          int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+              InstructionSetPointerSize(cu->instruction_set)).Int32Value();
+          // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt]
+          cg->LoadWordDisp(arg0_ref, offset, cg->TargetPtrReg(kInvokeTgt));
+        }
+        break;
+      default:
+        return -1;
     }
   }
   return state + 1;
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 626b36e..1ca8bb6 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -237,12 +237,12 @@
         // note the operands are swapped for the mtc1 and mthc1 instr.
         // Here if dest is fp reg and src is core reg.
         if (fpuIs32Bit_) {
-            NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
-            NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
+          NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetLowReg());
+          NewLIR2(kMipsMtc1, r_src.GetHighReg(), r_dest.GetHighReg());
         } else {
-            r_dest = Fp64ToSolo32(r_dest);
-            NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg());
-            NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg());
+          r_dest = Fp64ToSolo32(r_dest);
+          NewLIR2(kMipsMtc1, r_src.GetLowReg(), r_dest.GetReg());
+          NewLIR2(kMipsMthc1, r_src.GetHighReg(), r_dest.GetReg());
         }
       }
     } else {
@@ -309,7 +309,13 @@
 
 RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
   RegStorage t_reg = AllocTemp();
-  NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
+  // lit is guarantee to be a 16-bit constant
+  if (IsUint<16>(lit)) {
+    NewLIR3(kMipsOri, t_reg.GetReg(), rZERO, lit);
+  } else {
+    // Addiu will sign extend the entire width (32 or 64) of the register.
+    NewLIR3(kMipsAddiu, t_reg.GetReg(), rZERO, lit);
+  }
   RegLocation rl_result = GenDivRem(rl_dest, reg1, t_reg, is_div);
   FreeTemp(t_reg);
   return rl_result;
@@ -815,20 +821,20 @@
   }
   OpKind op = kOpBkpt;
   switch (opcode) {
-  case Instruction::SHL_LONG:
-  case Instruction::SHL_LONG_2ADDR:
-    op = kOpLsl;
-    break;
-  case Instruction::SHR_LONG:
-  case Instruction::SHR_LONG_2ADDR:
-    op = kOpAsr;
-    break;
-  case Instruction::USHR_LONG:
-  case Instruction::USHR_LONG_2ADDR:
-    op = kOpLsr;
-    break;
-  default:
-    LOG(FATAL) << "Unexpected case: " << opcode;
+    case Instruction::SHL_LONG:
+    case Instruction::SHL_LONG_2ADDR:
+      op = kOpLsl;
+      break;
+    case Instruction::SHR_LONG:
+    case Instruction::SHR_LONG_2ADDR:
+      op = kOpAsr;
+      break;
+    case Instruction::USHR_LONG:
+    case Instruction::USHR_LONG_2ADDR:
+      op = kOpLsr;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected case: " << opcode;
   }
   rl_shift = LoadValue(rl_shift, kCoreReg);
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index a94fad7..4c0bd83 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -830,6 +830,10 @@
   return OpReg(kOpBlx, r_tgt);
 }
 
+static dwarf::Reg DwarfCoreReg(int num) {
+  return dwarf::Reg::MipsCore(num);
+}
+
 void MipsMir2Lir::SpillCoreRegs() {
   if (num_core_spills_ == 0) {
     return;
@@ -839,11 +843,13 @@
   int offset = num_core_spills_ * ptr_size;
   const RegStorage rs_sp = TargetPtrReg(kSp);
   OpRegImm(kOpSub, rs_sp, offset);
+  cfi_.AdjustCFAOffset(offset);
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
       offset -= ptr_size;
       StoreWordDisp(rs_sp, offset,
                     cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg));
+      cfi_.RelOffset(DwarfCoreReg(reg), offset);
     }
   }
 }
@@ -861,9 +867,11 @@
       offset -= ptr_size;
       LoadWordDisp(rs_sp, offset,
                    cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg));
+      cfi_.Restore(DwarfCoreReg(reg));
     }
   }
   OpRegImm(kOpAdd, rs_sp, frame_size_);
+  cfi_.AdjustCFAOffset(-frame_size_);
 }
 
 bool MipsMir2Lir::IsUnconditionalBranch(LIR* lir) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index ed8e21e..961cd4f 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1253,11 +1253,14 @@
     AppendLIR(NewLIR0(kPseudoPrologueBegin));
     GenEntrySequence(&mir_graph_->reg_location_[start_vreg], mir_graph_->GetMethodLoc());
     AppendLIR(NewLIR0(kPseudoPrologueEnd));
+    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
   } else if (bb->block_type == kExitBlock) {
     ResetRegPool();
+    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
     AppendLIR(NewLIR0(kPseudoEpilogueBegin));
     GenExitSequence();
     AppendLIR(NewLIR0(kPseudoEpilogueEnd));
+    DCHECK_EQ(cfi_.GetCurrentCFAOffset(), frame_size_);
   }
 
   for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index bb8fbae..db59714 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -29,6 +29,7 @@
 #include "dex/quick/resource_mask.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "invoke_type.h"
+#include "lazy_debug_frame_opcode_writer.h"
 #include "leb128.h"
 #include "safe_map.h"
 #include "utils/array_ref.h"
@@ -135,6 +136,7 @@
 class BitVector;
 struct CallInfo;
 struct CompilationUnit;
+struct CompilerTemp;
 struct InlineMethod;
 class MIR;
 struct LIR;
@@ -142,6 +144,7 @@
 class DexFileMethodInliner;
 class MIRGraph;
 class MirMethodLoweringInfo;
+class MirSFieldLoweringInfo;
 
 typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int,
                             const MethodReference& target_method,
@@ -633,7 +636,7 @@
     RegisterClass ShortyToRegClass(char shorty_type);
     RegisterClass LocToRegClass(RegLocation loc);
     int ComputeFrameSize();
-    virtual void Materialize();
+    void Materialize();
     virtual CompiledMethod* GetCompiledMethod();
     void MarkSafepointPC(LIR* inst);
     void MarkSafepointPCAfter(LIR* after);
@@ -774,9 +777,10 @@
      */
     virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
 
-    void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
+    virtual void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight);
+    virtual void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
     void DumpCounts(const RefCounts* arr, int size, const char* msg);
-    void DoPromotion();
+    virtual void DoPromotion();
     int VRegOffset(int v_reg);
     int SRegOffset(int s_reg);
     RegLocation GetReturnWide(RegisterClass reg_class);
@@ -1505,6 +1509,12 @@
       return 0;
     }
 
+    /**
+     * @brief Buffer of DWARF's Call Frame Information opcodes.
+     * @details It is used by debuggers and other tools to unwind the call stack.
+     */
+    dwarf::LazyDebugFrameOpCodeWriter& cfi() { return cfi_; }
+
   protected:
     Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -1570,11 +1580,6 @@
                                     bool can_assume_type_is_in_dex_cache,
                                     uint32_t type_idx, RegLocation rl_dest,
                                     RegLocation rl_src);
-    /*
-     * @brief Generate the eh_frame FDE information if possible.
-     * @returns pointer to vector containg FDE information, or NULL.
-     */
-    virtual std::vector<uint8_t>* ReturnFrameDescriptionEntry();
 
     /**
      * @brief Used to insert marker that can be used to associate MIR with LIR.
@@ -1692,6 +1697,13 @@
     void GenIfNullUseHelperImmMethod(
         RegStorage r_result, QuickEntrypointEnum trampoline, int imm, RegStorage r_method);
 
+    /**
+     * @brief Generate code to retrieve Class* for another type to be used by SGET/SPUT.
+     * @param field_info information about the field to be accessed.
+     * @param opt_flags the optimization flags of the MIR.
+     */
+    RegStorage GenGetOtherTypeForSgetSput(const MirSFieldLoweringInfo& field_info, int opt_flags);
+
     void AddDivZeroCheckSlowPath(LIR* branch);
 
     // Copy arg0 and arg1 to kArg0 and kArg1 safely, possibly using
@@ -1765,6 +1777,13 @@
     // Update references from prev_mir to mir.
     void UpdateReferenceVRegs(MIR* mir, MIR* prev_mir, BitVector* references);
 
+    /**
+     * Returns true if the frame spills the given core register.
+     */
+    bool CoreSpillMaskContains(int reg) {
+      return (core_spill_mask_ & (1u << reg)) != 0;
+    }
+
   public:
     // TODO: add accessors for these.
     LIR* literal_list_;                        // Constants.
@@ -1841,6 +1860,20 @@
     // The layout of the cu_->dex_file's dex cache arrays for PC-relative addressing.
     const DexCacheArraysLayout dex_cache_arrays_layout_;
 
+    // For architectures that don't have true PC-relative addressing, we can promote
+    // a PC of an instruction (or another PC-relative address such as a pointer to
+    // the dex cache arrays if supported) to a register. This is indicated to the
+    // register promotion by allocating a backend temp.
+    CompilerTemp* pc_rel_temp_;
+
+    // For architectures that don't have true PC-relative addressing (see pc_rel_temp_
+    // above) and also have a limited range of offsets for loads, it's be useful to
+    // know the minimum offset into the dex cache arrays, so we calculate that as well
+    // if pc_rel_temp_ isn't nullptr.
+    uint32_t dex_cache_arrays_min_offset_;
+
+    dwarf::LazyDebugFrameOpCodeWriter cfi_;
+
     // ABI support
     class ShortyArg {
       public:
@@ -1900,6 +1933,8 @@
 
   private:
     static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type);
+
+    friend class QuickCFITest;
 };  // Class Mir2Lir
 
 }  // namespace art
diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc
new file mode 100644
index 0000000..2e62166
--- /dev/null
+++ b/compiler/dex/quick/quick_cfi_test.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+#include <memory>
+
+#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
+#include "cfi_test.h"
+#include "dex/compiler_ir.h"
+#include "dex/mir_graph.h"
+#include "dex/pass_manager.h"
+#include "dex/quick/dex_file_to_method_inliner_map.h"
+#include "dex/quick/quick_compiler.h"
+#include "dex/quick/mir_to_lir.h"
+#include "dex/verification_results.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
+#include "gtest/gtest.h"
+
+#include "dex/quick/quick_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class QuickCFITest : public CFITest {
+ public:
+  // Enable this flag to generate the expected outputs.
+  static constexpr bool kGenerateExpected = false;
+
+  void TestImpl(InstructionSet isa, const char* isa_str,
+                const std::vector<uint8_t>& expected_asm,
+                const std::vector<uint8_t>& expected_cfi) {
+    // Setup simple compiler context.
+    ArenaPool pool;
+    ArenaAllocator arena(&pool);
+    CompilerOptions compiler_options(
+      CompilerOptions::kDefaultCompilerFilter,
+      CompilerOptions::kDefaultHugeMethodThreshold,
+      CompilerOptions::kDefaultLargeMethodThreshold,
+      CompilerOptions::kDefaultSmallMethodThreshold,
+      CompilerOptions::kDefaultTinyMethodThreshold,
+      CompilerOptions::kDefaultNumDexMethodsThreshold,
+      true,  // generate_gdb_information.
+      false,
+      CompilerOptions::kDefaultTopKProfileThreshold,
+      false,
+      true,  // include_debug_symbols.
+      false,
+      false,
+      false,
+      false,
+      nullptr,
+      new PassManagerOptions(),
+      nullptr,
+      false);
+    VerificationResults verification_results(&compiler_options);
+    DexFileToMethodInlinerMap method_inliner_map;
+    std::unique_ptr<const InstructionSetFeatures> isa_features;
+    std::string error;
+    isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+    CompilerDriver driver(&compiler_options, &verification_results, &method_inliner_map,
+                          Compiler::kQuick, isa, isa_features.get(),
+                          false, 0, 0, 0, false, false, "", 0, -1, "");
+    ClassLinker* linker = nullptr;
+    CompilationUnit cu(&pool, isa, &driver, linker);
+    DexFile::CodeItem code_item { 0, 0, 0, 0, 0, 0, { 0 } };  // NOLINT
+    cu.mir_graph.reset(new MIRGraph(&cu, &arena));
+    cu.mir_graph->current_code_item_ = &code_item;
+
+    // Generate empty method with some spills.
+    std::unique_ptr<Mir2Lir> m2l(QuickCompiler::GetCodeGenerator(&cu, nullptr));
+    m2l->frame_size_ = 64u;
+    m2l->CompilerInitializeRegAlloc();
+    for (const auto& info : m2l->reg_pool_->core_regs_) {
+      if (m2l->num_core_spills_ < 2 && !info->IsTemp() && !info->InUse()) {
+        m2l->core_spill_mask_ |= 1 << info->GetReg().GetReg();
+        m2l->num_core_spills_++;
+      }
+    }
+    for (const auto& info : m2l->reg_pool_->sp_regs_) {
+      if (m2l->num_fp_spills_ < 2 && !info->IsTemp() && !info->InUse()) {
+        m2l->fp_spill_mask_ |= 1 << info->GetReg().GetReg();
+        m2l->num_fp_spills_++;
+      }
+    }
+    m2l->AdjustSpillMask();
+    m2l->GenEntrySequence(NULL, m2l->LocCReturnRef());
+    m2l->GenExitSequence();
+    m2l->HandleSlowPaths();
+    m2l->AssembleLIR();
+    std::vector<uint8_t> actual_asm(m2l->code_buffer_.begin(), m2l->code_buffer_.end());
+    auto const& cfi_data = m2l->cfi().Patch(actual_asm.size());
+    std::vector<uint8_t> actual_cfi(cfi_data->begin(), cfi_data->end());
+    EXPECT_EQ(m2l->cfi().GetCurrentPC(), static_cast<int>(actual_asm.size()));
+
+    if (kGenerateExpected) {
+      GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+    } else {
+      EXPECT_EQ(expected_asm, actual_asm);
+      EXPECT_EQ(expected_cfi, actual_cfi);
+    }
+  }
+};
+
+#define TEST_ISA(isa) \
+  TEST_F(QuickCFITest, isa) { \
+    std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+        expected_asm_##isa + arraysize(expected_asm_##isa)); \
+    std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+        expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+    TestImpl(isa, #isa, expected_asm, expected_cfi); \
+  }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+TEST_ISA(kMips)
+TEST_ISA(kMips64)
+
+#endif  // HAVE_ANDROID_OS
+
+}  // namespace art
diff --git a/compiler/dex/quick/quick_cfi_test_expected.inc b/compiler/dex/quick/quick_cfi_test_expected.inc
new file mode 100644
index 0000000..634fdee
--- /dev/null
+++ b/compiler/dex/quick/quick_cfi_test_expected.inc
@@ -0,0 +1,217 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
+    0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+    0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
+    0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x44, 0x0B, 0x0E,
+    0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: .cfi_remember_state
+// 0x0000000a: add sp, sp, #44
+// 0x0000000c: .cfi_def_cfa_offset: 20
+// 0x0000000c: vpop.f32 {s16-s17}
+// 0x00000010: .cfi_def_cfa_offset: 12
+// 0x00000010: .cfi_restore_extended: r80
+// 0x00000010: .cfi_restore_extended: r81
+// 0x00000010: pop {r5, r6, pc}
+// 0x00000012: lsls r0, r0, #0
+// 0x00000014: .cfi_restore_state
+// 0x00000014: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+    0xFF, 0x03, 0x01, 0xD1, 0xE8, 0xA7, 0x01, 0x6D, 0xF4, 0xD7, 0x02, 0xA9,
+    0xFE, 0x1F, 0x00, 0xF9, 0xE0, 0x03, 0x00, 0xB9, 0xE8, 0xA7, 0x41, 0x6D,
+    0xF4, 0xD7, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91,
+    0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+    0x44, 0x0E, 0x40, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x44, 0x94,
+    0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, 0x0A, 0x44, 0x06, 0x48, 0x06,
+    0x49, 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E,
+    0x40,
+};
+// 0x00000000: sub sp, sp, #0x40 (64)
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: stp d8, d9, [sp, #24]
+// 0x00000008: .cfi_offset_extended: r72 at cfa-40
+// 0x00000008: .cfi_offset_extended: r73 at cfa-32
+// 0x00000008: stp x20, x21, [sp, #40]
+// 0x0000000c: .cfi_offset: r20 at cfa-24
+// 0x0000000c: .cfi_offset: r21 at cfa-16
+// 0x0000000c: str lr, [sp, #56]
+// 0x00000010: .cfi_offset: r30 at cfa-8
+// 0x00000010: str w0, [sp]
+// 0x00000014: .cfi_remember_state
+// 0x00000014: ldp d8, d9, [sp, #24]
+// 0x00000018: .cfi_restore_extended: r72
+// 0x00000018: .cfi_restore_extended: r73
+// 0x00000018: ldp x20, x21, [sp, #40]
+// 0x0000001c: .cfi_restore: r20
+// 0x0000001c: .cfi_restore: r21
+// 0x0000001c: ldr lr, [sp, #56]
+// 0x00000020: .cfi_restore: r30
+// 0x00000020: add sp, sp, #0x40 (64)
+// 0x00000024: .cfi_def_cfa_offset: 0
+// 0x00000024: ret
+// 0x00000028: .cfi_restore_state
+// 0x00000028: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86[] = {
+    0x83, 0xEC, 0x3C, 0x89, 0x6C, 0x24, 0x34, 0x89, 0x74, 0x24, 0x38, 0x89,
+    0x04, 0x24, 0x8B, 0x6C, 0x24, 0x34, 0x8B, 0x74, 0x24, 0x38, 0x83, 0xC4,
+    0x3C, 0xC3, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+    0x43, 0x0E, 0x40, 0x44, 0x85, 0x03, 0x44, 0x86, 0x02, 0x43, 0x0A, 0x44,
+    0xC5, 0x44, 0xC6, 0x43, 0x0E, 0x04, 0x43, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: sub esp, 60
+// 0x00000003: .cfi_def_cfa_offset: 64
+// 0x00000003: mov [esp + 52], ebp
+// 0x00000007: .cfi_offset: r5 at cfa-12
+// 0x00000007: mov [esp + 56], esi
+// 0x0000000b: .cfi_offset: r6 at cfa-8
+// 0x0000000b: mov [esp], eax
+// 0x0000000e: .cfi_remember_state
+// 0x0000000e: mov ebp, [esp + 52]
+// 0x00000012: .cfi_restore: r5
+// 0x00000012: mov esi, [esp + 56]
+// 0x00000016: .cfi_restore: r6
+// 0x00000016: add esp, 60
+// 0x00000019: .cfi_def_cfa_offset: 4
+// 0x00000019: ret
+// 0x0000001a: addb [eax], al
+// 0x0000001c: .cfi_restore_state
+// 0x0000001c: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+    0x48, 0x83, 0xEC, 0x38, 0x48, 0x89, 0x5C, 0x24, 0x28, 0x48, 0x89, 0x6C,
+    0x24, 0x30, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F,
+    0x11, 0x6C, 0x24, 0x20, 0x48, 0x8B, 0xC7, 0x89, 0x3C, 0x24, 0x48, 0x8B,
+    0x5C, 0x24, 0x28, 0x48, 0x8B, 0x6C, 0x24, 0x30, 0xF2, 0x44, 0x0F, 0x10,
+    0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, 0x20, 0x48, 0x83,
+    0xC4, 0x38, 0xC3, 0x00,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+    0x44, 0x0E, 0x40, 0x45, 0x83, 0x06, 0x45, 0x86, 0x04, 0x47, 0x9D, 0x0A,
+    0x47, 0x9E, 0x08, 0x46, 0x0A, 0x45, 0xC3, 0x45, 0xC6, 0x47, 0xDD, 0x47,
+    0xDE, 0x44, 0x0E, 0x08, 0x42, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: subq rsp, 56
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: movq [rsp + 40], rbx
+// 0x00000009: .cfi_offset: r3 at cfa-24
+// 0x00000009: movq [rsp + 48], rbp
+// 0x0000000e: .cfi_offset: r6 at cfa-16
+// 0x0000000e: movsd [rsp + 24], xmm12
+// 0x00000015: .cfi_offset: r29 at cfa-40
+// 0x00000015: movsd [rsp + 32], xmm13
+// 0x0000001c: .cfi_offset: r30 at cfa-32
+// 0x0000001c: movq rax, rdi
+// 0x0000001f: mov [rsp], edi
+// 0x00000022: .cfi_remember_state
+// 0x00000022: movq rbx, [rsp + 40]
+// 0x00000027: .cfi_restore: r3
+// 0x00000027: movq rbp, [rsp + 48]
+// 0x0000002c: .cfi_restore: r6
+// 0x0000002c: movsd xmm12, [rsp + 24]
+// 0x00000033: .cfi_restore: r29
+// 0x00000033: movsd xmm13, [rsp + 32]
+// 0x0000003a: .cfi_restore: r30
+// 0x0000003a: addq rsp, 56
+// 0x0000003e: .cfi_def_cfa_offset: 8
+// 0x0000003e: ret
+// 0x0000003f: addb al, al
+// 0x00000040: .cfi_restore_state
+// 0x00000040: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips[] = {
+    0xF4, 0xFF, 0xBD, 0x27, 0x08, 0x00, 0xB2, 0xAF, 0x04, 0x00, 0xB3, 0xAF,
+    0x00, 0x00, 0xBF, 0xAF, 0xCC, 0xFF, 0xBD, 0x27, 0x25, 0x10, 0x80, 0x00,
+    0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xB2, 0x8F, 0x38, 0x00, 0xB3, 0x8F,
+    0x34, 0x00, 0xBF, 0x8F, 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03,
+    0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips[] = {
+    0x44, 0x0E, 0x0C, 0x44, 0x92, 0x01, 0x44, 0x93, 0x02, 0x44, 0x9F, 0x03,
+    0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44,
+    0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -12
+// 0x00000004: .cfi_def_cfa_offset: 12
+// 0x00000004: sw r18, +8(r29)
+// 0x00000008: .cfi_offset: r18 at cfa-4
+// 0x00000008: sw r19, +4(r29)
+// 0x0000000c: .cfi_offset: r19 at cfa-8
+// 0x0000000c: sw r31, +0(r29)
+// 0x00000010: .cfi_offset: r31 at cfa-12
+// 0x00000010: addiu r29, r29, -52
+// 0x00000014: .cfi_def_cfa_offset: 64
+// 0x00000014: or r2, r4, r0
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: lw r18, +60(r29)
+// 0x00000020: .cfi_restore: r18
+// 0x00000020: lw r19, +56(r29)
+// 0x00000024: .cfi_restore: r19
+// 0x00000024: lw r31, +52(r29)
+// 0x00000028: .cfi_restore: r31
+// 0x00000028: addiu r29, r29, 64
+// 0x0000002c: .cfi_def_cfa_offset: 0
+// 0x0000002c: jalr r0, r31
+// 0x00000030: nop
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64[] = {
+    0xE8, 0xFF, 0xBD, 0x67, 0x10, 0x00, 0xB2, 0xFF, 0x08, 0x00, 0xB3, 0xFF,
+    0x00, 0x00, 0xBF, 0xFF, 0xD8, 0xFF, 0xBD, 0x67, 0x25, 0x10, 0x80, 0x00,
+    0x00, 0x00, 0xA4, 0xAF, 0x38, 0x00, 0xB2, 0xDF, 0x30, 0x00, 0xB3, 0xDF,
+    0x28, 0x00, 0xBF, 0xDF, 0x40, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03,
+    0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64[] = {
+    0x44, 0x0E, 0x18, 0x44, 0x92, 0x02, 0x44, 0x93, 0x04, 0x44, 0x9F, 0x06,
+    0x44, 0x0E, 0x40, 0x48, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xDF, 0x44,
+    0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: daddiu r29, r29, -24
+// 0x00000004: .cfi_def_cfa_offset: 24
+// 0x00000004: sd r18, +16(r29)
+// 0x00000008: .cfi_offset: r18 at cfa-8
+// 0x00000008: sd r19, +8(r29)
+// 0x0000000c: .cfi_offset: r19 at cfa-16
+// 0x0000000c: sd r31, +0(r29)
+// 0x00000010: .cfi_offset: r31 at cfa-24
+// 0x00000010: daddiu r29, r29, -40
+// 0x00000014: .cfi_def_cfa_offset: 64
+// 0x00000014: or r2, r4, r0
+// 0x00000018: sw r4, +0(r29)
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: ld r18, +56(r29)
+// 0x00000020: .cfi_restore: r18
+// 0x00000020: ld r19, +48(r29)
+// 0x00000024: .cfi_restore: r19
+// 0x00000024: ld r31, +40(r29)
+// 0x00000028: .cfi_restore: r31
+// 0x00000028: daddiu r29, r29, 64
+// 0x0000002c: .cfi_def_cfa_offset: 0
+// 0x0000002c: jr r31
+// 0x00000030: nop
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
+
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 8baafc7..fc3e687 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -793,16 +793,7 @@
       InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
 }
 
-bool QuickCompiler::WriteElf(art::File* file,
-                             OatWriter* oat_writer,
-                             const std::vector<const art::DexFile*>& dex_files,
-                             const std::string& android_root,
-                             bool is_host) const {
-  return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
-                                       *GetCompilerDriver());
-}
-
-Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const {
+Mir2Lir* QuickCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) {
   UNUSED(compilation_unit);
   Mir2Lir* mir_to_lir = nullptr;
   switch (cu->instruction_set) {
diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h
index 5153a9e..8d2c324 100644
--- a/compiler/dex/quick/quick_compiler.h
+++ b/compiler/dex/quick/quick_compiler.h
@@ -52,15 +52,7 @@
   uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool WriteElf(art::File* file,
-                OatWriter* oat_writer,
-                const std::vector<const art::DexFile*>& dex_files,
-                const std::string& android_root,
-                bool is_host) const
-    OVERRIDE
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const;
+  static Mir2Lir* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit);
 
   void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
 
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 741657b..e779479 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -19,9 +19,11 @@
 #include "mir_to_lir-inl.h"
 
 #include "dex/compiler_ir.h"
+#include "dex/dataflow_iterator-inl.h"
 #include "dex/mir_graph.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
 
 namespace art {
 
@@ -1128,6 +1130,152 @@
   return loc;
 }
 
+void Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) {
+  // NOTE: This should be in sync with functions that actually generate code for
+  // the opcodes below. However, if we get this wrong, the generated code will
+  // still be correct even if it may be sub-optimal.
+  int opcode = mir->dalvikInsn.opcode;
+  bool uses_method = false;
+  bool uses_pc_rel_load = false;
+  uint32_t dex_cache_array_offset = std::numeric_limits<uint32_t>::max();
+  switch (opcode) {
+    case Instruction::CHECK_CAST:
+    case Instruction::INSTANCE_OF: {
+      if ((opcode == Instruction::CHECK_CAST) &&
+          (mir->optimization_flags & MIR_IGNORE_CHECK_CAST) != 0) {
+        break;  // No code generated.
+      }
+      uint32_t type_idx =
+          (opcode == Instruction::CHECK_CAST) ? mir->dalvikInsn.vB : mir->dalvikInsn.vC;
+      bool type_known_final, type_known_abstract, use_declaring_class;
+      bool needs_access_check = !cu_->compiler_driver->CanAccessTypeWithoutChecks(
+          cu_->method_idx, *cu_->dex_file, type_idx,
+          &type_known_final, &type_known_abstract, &use_declaring_class);
+      if (opcode == Instruction::CHECK_CAST && !needs_access_check &&
+          cu_->compiler_driver->IsSafeCast(
+              mir_graph_->GetCurrentDexCompilationUnit(), mir->offset)) {
+        break;  // No code generated.
+      }
+      if (!needs_access_check && !use_declaring_class && CanUseOpPcRelDexCacheArrayLoad()) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(type_idx);
+      } else {
+        uses_method = true;
+      }
+      break;
+    }
+
+    case Instruction::CONST_CLASS:
+      if (CanUseOpPcRelDexCacheArrayLoad() &&
+          cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file,
+                                                           mir->dalvikInsn.vB)) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+
+    case Instruction::CONST_STRING:
+    case Instruction::CONST_STRING_JUMBO:
+      if (CanUseOpPcRelDexCacheArrayLoad()) {
+        uses_pc_rel_load = true;  // And ignore method use in slow path.
+        dex_cache_array_offset = dex_cache_arrays_layout_.StringOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+
+    case Instruction::INVOKE_VIRTUAL:
+    case Instruction::INVOKE_SUPER:
+    case Instruction::INVOKE_DIRECT:
+    case Instruction::INVOKE_STATIC:
+    case Instruction::INVOKE_INTERFACE:
+    case Instruction::INVOKE_VIRTUAL_RANGE:
+    case Instruction::INVOKE_SUPER_RANGE:
+    case Instruction::INVOKE_DIRECT_RANGE:
+    case Instruction::INVOKE_STATIC_RANGE:
+    case Instruction::INVOKE_INTERFACE_RANGE:
+    case Instruction::INVOKE_VIRTUAL_QUICK:
+    case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: {
+      const MirMethodLoweringInfo& info = mir_graph_->GetMethodLoweringInfo(mir);
+      InvokeType sharp_type = info.GetSharpType();
+      if (info.IsIntrinsic()) {
+        // Nothing to do, if an intrinsic uses ArtMethod* it's in the slow-path - don't count it.
+      } else if (!info.FastPath() || (sharp_type != kStatic && sharp_type != kDirect)) {
+        // Nothing to do, the generated code or entrypoint uses method from the stack.
+      } else if (info.DirectCode() != 0 && info.DirectMethod() != 0) {
+        // Nothing to do, the generated code uses method from the stack.
+      } else if (CanUseOpPcRelDexCacheArrayLoad()) {
+        uses_pc_rel_load = true;
+        dex_cache_array_offset = dex_cache_arrays_layout_.MethodOffset(mir->dalvikInsn.vB);
+      } else {
+        uses_method = true;
+      }
+      break;
+    }
+
+    case Instruction::NEW_INSTANCE:
+    case Instruction::NEW_ARRAY:
+    case Instruction::FILLED_NEW_ARRAY:
+    case Instruction::FILLED_NEW_ARRAY_RANGE:
+      uses_method = true;
+      break;
+    case Instruction::FILL_ARRAY_DATA:
+      // Nothing to do, the entrypoint uses method from the stack.
+      break;
+    case Instruction::THROW:
+      // Nothing to do, the entrypoint uses method from the stack.
+      break;
+
+    case Instruction::SGET:
+    case Instruction::SGET_WIDE:
+    case Instruction::SGET_OBJECT:
+    case Instruction::SGET_BOOLEAN:
+    case Instruction::SGET_BYTE:
+    case Instruction::SGET_CHAR:
+    case Instruction::SGET_SHORT:
+    case Instruction::SPUT:
+    case Instruction::SPUT_WIDE:
+    case Instruction::SPUT_OBJECT:
+    case Instruction::SPUT_BOOLEAN:
+    case Instruction::SPUT_BYTE:
+    case Instruction::SPUT_CHAR:
+    case Instruction::SPUT_SHORT: {
+      const MirSFieldLoweringInfo& field_info = mir_graph_->GetSFieldLoweringInfo(mir);
+      bool fast = IsInstructionSGet(static_cast<Instruction::Code>(opcode))
+          ? field_info.FastGet()
+          : field_info.FastPut();
+      if (fast && (cu_->enable_debug & (1 << kDebugSlowFieldPath)) == 0) {
+        if (!field_info.IsReferrersClass() && CanUseOpPcRelDexCacheArrayLoad()) {
+          uses_pc_rel_load = true;  // And ignore method use in slow path.
+          dex_cache_array_offset = dex_cache_arrays_layout_.TypeOffset(field_info.StorageIndex());
+        } else {
+          uses_method = true;
+        }
+      } else {
+        // Nothing to do, the entrypoint uses method from the stack.
+      }
+      break;
+    }
+
+    default:
+      break;
+  }
+  if (uses_method) {
+    core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count += weight;
+  }
+  if (uses_pc_rel_load) {
+    if (pc_rel_temp_ != nullptr) {
+      core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
+      DCHECK_NE(dex_cache_array_offset, std::numeric_limits<uint32_t>::max());
+      dex_cache_arrays_min_offset_ = std::min(dex_cache_arrays_min_offset_, dex_cache_array_offset);
+    } else {
+      // Nothing to do, using PC-relative addressing without promoting base PC to register.
+    }
+  }
+}
+
 /* USE SSA names to count references of base Dalvik v_regs. */
 void Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
   for (int i = 0; i < mir_graph_->GetNumSSARegs(); i++) {
@@ -1157,6 +1305,22 @@
       }
     }
   }
+
+  // Now analyze the ArtMethod* and pc_rel_temp_ uses.
+  DCHECK_EQ(core_counts[SRegToPMap(mir_graph_->GetMethodLoc().s_reg_low)].count, 0);
+  if (pc_rel_temp_ != nullptr) {
+    DCHECK_EQ(core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count, 0);
+  }
+  PreOrderDfsIterator iter(mir_graph_);
+  for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
+    if (bb->block_type == kDead) {
+      continue;
+    }
+    uint32_t weight = mir_graph_->GetUseCountWeight(bb);
+    for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+      AnalyzeMIR(core_counts, mir, weight);
+    }
+  }
 }
 
 /* qsort callback function, sort descending */
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 118ab1d..af19f5e 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -544,7 +544,6 @@
   { kX86CallI, kCall, IS_UNARY_OP  | IS_BRANCH,                             { 0,             0, 0xE8, 0,    0, 0, 0, 4, false }, "CallI", "!0d" },
   { kX86Ret,   kNullary, NO_OPERAND | IS_BRANCH,                            { 0,             0, 0xC3, 0,    0, 0, 0, 0, false }, "Ret", "" },
 
-  { kX86StartOfMethod, kMacro,  IS_UNARY_OP | REG_DEF0 | SETS_CCODES,  { 0, 0, 0,    0, 0, 0, 0, 0, false }, "StartOfMethod", "!0r" },
   { kX86PcRelLoadRA,   kPcRel,  IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "PcRelLoadRA",   "!0r,[!1r+!2r<<!3d+!4p]" },
   { kX86PcRelAdr,      kPcRel,  IS_LOAD | IS_BINARY_OP | REG_DEF0,     { 0, 0, 0xB8, 0, 0, 0, 0, 4, false }, "PcRelAdr",      "!0r,!1p" },
   { kX86RepneScasw,    kNullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0, false }, "RepNE ScasW", "" },
@@ -865,13 +864,6 @@
         DCHECK_EQ(entry->opcode, kX86PcRelAdr);
         return 5;  // opcode with reg + 4 byte immediate
       }
-    case kMacro:  // lir operands - 0: reg
-      DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
-      return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
-          ComputeSize(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI],
-                      lir->operands[0], NO_REG, NO_REG, 0) -
-              // Shorter ax encoding.
-              (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
     case kUnimplemented:
       break;
   }
@@ -1586,8 +1578,8 @@
                            int32_t raw_index, int scale, int32_t table_or_disp) {
   int disp;
   if (entry->opcode == kX86PcRelLoadRA) {
-    const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(table_or_disp);
-    disp = tab_rec->offset;
+    const SwitchTable* tab_rec = UnwrapPointer<SwitchTable>(table_or_disp);
+    disp = tab_rec->offset - tab_rec->anchor->offset;
   } else {
     DCHECK(entry->opcode == kX86PcRelAdr);
     const EmbeddedData* tab_rec = UnwrapPointer<EmbeddedData>(raw_base_or_table);
@@ -1621,23 +1613,6 @@
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
 }
 
-void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) {
-  DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name;
-  DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, raw_reg, NO_REG, NO_REG);
-  code_buffer_.push_back(0xE8);  // call +0
-  code_buffer_.push_back(0);
-  code_buffer_.push_back(0);
-  code_buffer_.push_back(0);
-  code_buffer_.push_back(0);
-
-  uint8_t low_reg = LowRegisterBits(raw_reg);
-  code_buffer_.push_back(0x58 + low_reg);  // pop reg
-
-  EmitRegImm(&X86Mir2Lir::EncodingMap[cu_->target64 ? kX86Sub64RI : kX86Sub32RI],
-             raw_reg, offset + 5 /* size of call +0 */);
-}
-
 void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) {
   UNIMPLEMENTED(WARNING) << "encoding kind for " << entry->name << " "
                          << BuildInsnString(entry->fmt, lir, 0);
@@ -1780,7 +1755,8 @@
               // Offset is relative to next instruction.
               lir->operands[2] = target - (lir->offset + lir->flags.size);
             } else {
-              lir->operands[2] = target;
+              const LIR* anchor = UnwrapPointer<LIR>(lir->operands[4]);
+              lir->operands[2] = target - anchor->offset;
               int newSize = GetInsnSize(lir);
               if (newSize != lir->flags.size) {
                 lir->flags.size = newSize;
@@ -1951,9 +1927,6 @@
         EmitPcRel(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                   lir->operands[3], lir->operands[4]);
         break;
-      case kMacro:  // lir operands - 0: reg
-        EmitMacro(entry, lir->operands[0], lir->offset);
-        break;
       case kNop:  // TODO: these instruction kinds are missing implementations.
       case kThreadReg:
       case kRegArrayImm:
@@ -2044,9 +2017,13 @@
   cu_->NewTimingSplit("Assemble");
 
   // We will remove the method address if we never ended up using it
-  if (store_method_addr_ && !store_method_addr_used_) {
-    setup_method_address_[0]->flags.is_nop = true;
-    setup_method_address_[1]->flags.is_nop = true;
+  if (pc_rel_base_reg_.Valid() && !pc_rel_base_reg_used_) {
+    if (kIsDebugBuild) {
+      LOG(WARNING) << "PC-relative addressing base promoted but unused in "
+          << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+    }
+    setup_pc_rel_base_reg_->flags.is_nop = true;
+    NEXT_LIR(setup_pc_rel_base_reg_)->flags.is_nop = true;
   }
 
   AssignOffsets();
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index fd23692..d7a5eb0 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -97,29 +97,23 @@
 
     // Add the offset from the table to the table base.
     OpRegReg(kOpAdd, addr_for_jump, table_base);
+    tab_rec->anchor = nullptr;  // Unused for x86-64.
   } else {
-    // Materialize a pointer to the switch table.
-    RegStorage start_of_method_reg;
-    if (base_of_code_ != nullptr) {
-      // We can use the saved value.
-      RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-      rl_method = LoadValue(rl_method, kCoreReg);
-      start_of_method_reg = rl_method.reg;
-      store_method_addr_used_ = true;
-    } else {
-      start_of_method_reg = AllocTempRef();
-      NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg());
-    }
+    // Get the PC to a register and get the anchor.
+    LIR* anchor;
+    RegStorage r_pc = GetPcAndAnchor(&anchor);
+
     // Load the displacement from the switch table.
     addr_for_jump = AllocTemp();
-    NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
+    NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), r_pc.GetReg(), keyReg.GetReg(),
             2, WrapPointer(tab_rec));
-    // Add displacement to start of method.
-    OpRegReg(kOpAdd, addr_for_jump, start_of_method_reg);
+    // Add displacement and r_pc to get the address.
+    OpRegReg(kOpAdd, addr_for_jump, r_pc);
+    tab_rec->anchor = anchor;
   }
 
   // ..and go!
-  tab_rec->anchor = NewLIR1(kX86JmpR, addr_for_jump.GetReg());
+  NewLIR1(kX86JmpR, addr_for_jump.GetReg());
 
   /* branch_over target here */
   LIR* target = NewLIR0(kPseudoTargetLabel);
@@ -150,6 +144,10 @@
   FreeTemp(reg_card_no);
 }
 
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
 void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
   /*
    * On entry, rX86_ARG0, rX86_ARG1, rX86_ARG2 are live.  Let the register
@@ -184,8 +182,9 @@
   }
 
   /* Build frame, return address already on stack */
-  stack_decrement_ = OpRegImm(kOpSub, rs_rSP, frame_size_ -
-                              GetInstructionSetPointerSize(cu_->instruction_set));
+  cfi_.SetCurrentCFAOffset(GetInstructionSetPointerSize(cu_->instruction_set));
+  OpRegImm(kOpSub, rs_rSP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
+  cfi_.DefCFAOffset(frame_size_);
 
   /* Spill core callee saves */
   SpillCoreRegs();
@@ -202,10 +201,12 @@
         GenerateTargetLabel(kPseudoThrowTarget);
         const RegStorage local_rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
         m2l_->OpRegImm(kOpAdd, local_rs_rSP, sp_displace_);
+        m2l_->cfi().AdjustCFAOffset(-sp_displace_);
         m2l_->ClobberCallerSave();
         // Assumes codegen and target are in thumb2 mode.
         m2l_->CallHelper(RegStorage::InvalidReg(), kQuickThrowStackOverflow,
                          false /* MarkSafepointPC */, false /* UseLink */);
+        m2l_->cfi().AdjustCFAOffset(sp_displace_);
       }
 
      private:
@@ -236,14 +237,12 @@
 
   FlushIns(ArgLocs, rl_method);
 
-  if (base_of_code_ != nullptr) {
-    RegStorage method_start = TargetPtrReg(kArg0);
-    // We have been asked to save the address of the method start for later use.
-    setup_method_address_[0] = NewLIR1(kX86StartOfMethod, method_start.GetReg());
-    int displacement = SRegOffset(base_of_code_->s_reg_low);
-    // Native pointer - must be natural word size.
-    setup_method_address_[1] = StoreBaseDisp(rs_rSP, displacement, method_start,
-                                             cu_->target64 ? k64 : k32, kNotVolatile);
+  // We can promote the PC of an anchor for PC-relative addressing to a register
+  // if it's used at least twice. Without investigating where we should lazily
+  // load the reference, we conveniently load it after flushing inputs.
+  if (pc_rel_base_reg_.Valid()) {
+    DCHECK(!cu_->target64);
+    setup_pc_rel_base_reg_ = OpLoadPc(pc_rel_base_reg_);
   }
 
   FreeTemp(arg0);
@@ -252,6 +251,7 @@
 }
 
 void X86Mir2Lir::GenExitSequence() {
+  cfi_.RememberState();
   /*
    * In the exit path, rX86_RET0/rX86_RET1 are live - make sure they aren't
    * allocated by the register utilities as temps.
@@ -263,9 +263,14 @@
   UnSpillFPRegs();
   /* Remove frame except for return address */
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  stack_increment_ = OpRegImm(kOpAdd, rs_rSP,
-                              frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
+  int adjust = frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set);
+  OpRegImm(kOpAdd, rs_rSP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
+  // There is only the return PC on the stack now.
   NewLIR0(kX86Ret);
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size_);
 }
 
 void X86Mir2Lir::GenSpecialExitSequence() {
@@ -276,6 +281,8 @@
   // Keep 16-byte stack alignment, there's already the return address, so
   //   - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI,
   //   - for 64-bit push RAX, i.e. ArtMethod*.
+  const int kRegSize = cu_->target64 ? 8 : 4;
+  cfi_.SetCurrentCFAOffset(kRegSize);  // Return address.
   if (!cu_->target64) {
     DCHECK(!IsTemp(rs_rSI));
     DCHECK(!IsTemp(rs_rDI));
@@ -293,17 +300,29 @@
   fp_vmap_table_.clear();
   if (!cu_->target64) {
     NewLIR1(kX86Push32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(kRegSize);
+    cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()), 0);
     NewLIR1(kX86Push32R, rs_rSI.GetReg());
+    cfi_.AdjustCFAOffset(kRegSize);
+    cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()), 0);
   }
   NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
+  cfi_.AdjustCFAOffset(kRegSize);
+  // Do not generate CFI for scratch register.
 }
 
 void X86Mir2Lir::GenSpecialExitForSuspend() {
+  const int kRegSize = cu_->target64 ? 8 : 4;
   // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
   NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg());  // ArtMethod*
+  cfi_.AdjustCFAOffset(-kRegSize);
   if (!cu_->target64) {
     NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+    cfi_.AdjustCFAOffset(-kRegSize);
+    cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
     NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(-kRegSize);
+    cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
   }
 }
 
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 758684e..72580a3 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -28,7 +28,7 @@
 
 namespace art {
 
-class X86Mir2Lir : public Mir2Lir {
+class X86Mir2Lir FINAL : public Mir2Lir {
  protected:
   class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
    public:
@@ -375,17 +375,15 @@
    */
   LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
 
+  void AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) OVERRIDE;
+  void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) OVERRIDE;
+  void DoPromotion() OVERRIDE;
+
   /*
    * @brief Handle x86 specific literals
    */
   void InstallLiteralPools() OVERRIDE;
 
-  /*
-   * @brief Generate the debug_frame FDE information.
-   * @returns pointer to vector containing CFE information
-   */
-  std::vector<uint8_t>* ReturnFrameDescriptionEntry() OVERRIDE;
-
   LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
 
  protected:
@@ -494,7 +492,6 @@
   void EmitCallThread(const X86EncodingMap* entry, int32_t disp);
   void EmitPcRel(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base_or_table,
                  int32_t raw_index, int scale, int32_t table_or_disp);
-  void EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset);
   void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
   void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
                                 int64_t val, ConditionCode ccode);
@@ -865,12 +862,6 @@
   void SpillFPRegs();
 
   /*
-   * @brief Perform MIR analysis before compiling method.
-   * @note Invokes Mir2LiR::Materialize after analysis.
-   */
-  void Materialize();
-
-  /*
    * Mir2Lir's UpdateLoc() looks to see if the Dalvik value is currently live in any temp register
    * without regard to data type.  In practice, this can result in UpdateLoc returning a
    * location record for a Dalvik float value in a core register, and vis-versa.  For targets
@@ -884,67 +875,39 @@
   RegLocation UpdateLocWideTyped(RegLocation loc);
 
   /*
-   * @brief Analyze MIR before generating code, to prepare for the code generation.
-   */
-  void AnalyzeMIR();
-
-  /*
-   * @brief Analyze one basic block.
-   * @param bb Basic block to analyze.
-   */
-  void AnalyzeBB(BasicBlock* bb);
-
-  /*
-   * @brief Analyze one extended MIR instruction
-   * @param opcode MIR instruction opcode.
-   * @param bb Basic block containing instruction.
-   * @param mir Extended instruction to analyze.
-   */
-  void AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir);
-
-  /*
-   * @brief Analyze one MIR instruction
-   * @param opcode MIR instruction opcode.
-   * @param bb Basic block containing instruction.
-   * @param mir Instruction to analyze.
-   */
-  virtual void AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir);
-
-  /*
    * @brief Analyze one MIR float/double instruction
    * @param opcode MIR instruction opcode.
-   * @param bb Basic block containing instruction.
    * @param mir Instruction to analyze.
+   * @return true iff the instruction needs to load a literal using PC-relative addressing.
    */
-  virtual void AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir);
+  bool AnalyzeFPInstruction(int opcode, MIR* mir);
 
   /*
    * @brief Analyze one use of a double operand.
    * @param rl_use Double RegLocation for the operand.
+   * @return true iff the instruction needs to load a literal using PC-relative addressing.
    */
-  void AnalyzeDoubleUse(RegLocation rl_use);
+  bool AnalyzeDoubleUse(RegLocation rl_use);
 
   /*
    * @brief Analyze one invoke-static MIR instruction
-   * @param opcode MIR instruction opcode.
-   * @param bb Basic block containing instruction.
    * @param mir Instruction to analyze.
+   * @return true iff the instruction needs to load a literal using PC-relative addressing.
    */
-  void AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir);
+  bool AnalyzeInvokeStaticIntrinsic(MIR* mir);
 
   // Information derived from analysis of MIR
 
-  // The compiler temporary for the code address of the method.
-  CompilerTemp *base_of_code_;
+  // The base register for PC-relative addressing if promoted (32-bit only).
+  RegStorage pc_rel_base_reg_;
 
-  // Have we decided to compute a ptr to code and store in temporary VR?
-  bool store_method_addr_;
+  // Have we actually used the pc_rel_base_reg_?
+  bool pc_rel_base_reg_used_;
 
-  // Have we used the stored method address?
-  bool store_method_addr_used_;
-
-  // Instructions to remove if we didn't use the stored method address.
-  LIR* setup_method_address_[2];
+  // Pointer to the "call +0" insn that sets up the promoted register for PC-relative addressing.
+  // The anchor "pop" insn is NEXT_LIR(setup_pc_rel_base_reg_). The whole "call +0; pop <reg>"
+  // sequence will be removed in AssembleLIR() if we do not actually use PC-relative addressing.
+  LIR* setup_pc_rel_base_reg_;  // There are 2 chained insns (no reordering allowed).
 
   // Instructions needing patching with Method* values.
   ArenaVector<LIR*> method_address_insns_;
@@ -958,12 +921,6 @@
   // Instructions needing patching with PC relative code addresses.
   ArenaVector<LIR*> dex_cache_access_insns_;
 
-  // Prologue decrement of stack pointer.
-  LIR* stack_decrement_;
-
-  // Epilogue increment of stack pointer.
-  LIR* stack_increment_;
-
   // The list of const vector literals.
   LIR* const_vectors_;
 
@@ -1004,6 +961,14 @@
                                uintptr_t direct_code, uintptr_t direct_method,
                                InvokeType type);
 
+  LIR* OpLoadPc(RegStorage r_dest);
+  RegStorage GetPcAndAnchor(LIR** anchor, RegStorage r_tmp = RegStorage::InvalidReg());
+
+  // When we don't know the proper offset for the value, pick one that will force
+  // 4 byte offset.  We will fix this up in the assembler or linker later to have
+  // the right value.
+  static constexpr int kDummy32BitOffset = 256;
+
   static const X86EncodingMap EncodingMap[kX86Last];
 
   friend std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs);
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index d8616a7..cfe0480 100755
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -756,24 +756,6 @@
     branch_nan->target = NewLIR0(kPseudoTargetLabel);
     LoadConstantWide(rl_result.reg, INT64_C(0x7ff8000000000000));
 
-    // The base_of_code_ compiler temp is non-null when it is reserved
-    // for being able to do data accesses relative to method start.
-    if (base_of_code_ != nullptr) {
-      // Loading from the constant pool may have used base of code register.
-      // However, the code here generates logic in diamond shape and not all
-      // paths load base of code register. Therefore, we ensure it is clobbered so
-      // that the temp caching system does not believe it is live at merge point.
-      RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-      if (rl_method.wide) {
-        rl_method = UpdateLocWide(rl_method);
-      } else {
-        rl_method = UpdateLoc(rl_method);
-      }
-      if (rl_method.location == kLocPhysReg) {
-        Clobber(rl_method.reg);
-      }
-    }
-
     LIR* branch_exit_nan = NewLIR1(kX86Jmp8, 0);
     // Handle Min/Max. Copy greater/lesser value from src2.
     branch_cond1->target = NewLIR0(kPseudoTargetLabel);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 5def5c8..1043815 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -830,6 +830,10 @@
   return rl_result;
 }
 
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
 bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
   DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
 
@@ -928,6 +932,7 @@
 
     // Do we have a free register for intermediate calculations?
     RegStorage tmp = AllocTemp(false);
+    const int kRegSize = cu_->target64 ? 8 : 4;
     if (tmp == RegStorage::InvalidReg()) {
        /*
         * No, will use 'edi'.
@@ -946,6 +951,11 @@
               IsTemp(rl_result.reg.GetHigh()));
        tmp = rs_rDI;
        NewLIR1(kX86Push32R, tmp.GetReg());
+       cfi_.AdjustCFAOffset(kRegSize);
+       // Record cfi only if it is not already spilled.
+       if (!CoreSpillMaskContains(tmp.GetReg())) {
+         cfi_.RelOffset(DwarfCoreReg(cu_->target64, tmp.GetReg()), 0);
+       }
     }
 
     // Now we are ready to do calculations.
@@ -957,6 +967,10 @@
     // Let's put pop 'edi' here to break a bit the dependency chain.
     if (tmp == rs_rDI) {
       NewLIR1(kX86Pop32R, tmp.GetReg());
+      cfi_.AdjustCFAOffset(-kRegSize);
+      if (!CoreSpillMaskContains(tmp.GetReg())) {
+        cfi_.Restore(DwarfCoreReg(cu_->target64, tmp.GetReg()));
+      }
     } else {
       FreeTemp(tmp);
     }
@@ -1104,6 +1118,7 @@
   // If is_long, high half is in info->args[5]
   RegLocation rl_src_new_value = info->args[is_long ? 6 : 5];  // int, long or Object
   // If is_long, high half is in info->args[7]
+  const int kRegSize = cu_->target64 ? 8 : 4;
 
   if (is_long && cu_->target64) {
     // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
@@ -1125,7 +1140,6 @@
     FreeTemp(rs_r0q);
   } else if (is_long) {
     // TODO: avoid unnecessary loads of SI and DI when the values are in registers.
-    // TODO: CFI support.
     FlushAllRegs();
     LockCallTemps();
     RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX);
@@ -1148,11 +1162,21 @@
       NewLIR1(kX86Push32R, rs_rDI.GetReg());
       MarkTemp(rs_rDI);
       LockTemp(rs_rDI);
+      cfi_.AdjustCFAOffset(kRegSize);
+      // Record cfi only if it is not already spilled.
+      if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+        cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
+      }
     }
     if (push_si) {
       NewLIR1(kX86Push32R, rs_rSI.GetReg());
       MarkTemp(rs_rSI);
       LockTemp(rs_rSI);
+      cfi_.AdjustCFAOffset(kRegSize);
+      // Record cfi only if it is not already spilled.
+      if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
+        cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rSI.GetReg()), 0);
+      }
     }
     ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
     const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u);
@@ -1183,11 +1207,19 @@
       FreeTemp(rs_rSI);
       UnmarkTemp(rs_rSI);
       NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+      cfi_.AdjustCFAOffset(-kRegSize);
+      if (!CoreSpillMaskContains(rs_rSI.GetReg())) {
+        cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rSI.GetRegNum()));
+      }
     }
     if (push_di) {
       FreeTemp(rs_rDI);
       UnmarkTemp(rs_rDI);
       NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+      cfi_.AdjustCFAOffset(-kRegSize);
+      if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+        cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetRegNum()));
+      }
     }
     FreeCallTemps();
   } else {
@@ -1324,11 +1356,6 @@
   return true;
 }
 
-// When we don't know the proper offset for the value, pick one that will force
-// 4 byte offset.  We will fix this up in the assembler or linker later to have
-// the right value.
-static constexpr int kDummy32BitOffset = 256;
-
 void X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   if (cu_->target64) {
     // We can do this directly using RIP addressing.
@@ -1339,27 +1366,48 @@
     return;
   }
 
-  CHECK(base_of_code_ != nullptr);
-
-  // Address the start of the method
-  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-  if (rl_method.wide) {
-    LoadValueDirectWideFixed(rl_method, reg);
-  } else {
-    LoadValueDirectFixed(rl_method, reg);
-  }
-  store_method_addr_used_ = true;
+  // Get the PC to a register and get the anchor.
+  LIR* anchor;
+  RegStorage r_pc = GetPcAndAnchor(&anchor);
 
   // Load the proper value from the literal area.
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), reg.GetReg(), kDummy32BitOffset);
+  LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
+  res->operands[4] = WrapPointer(anchor);
   res->target = target;
   res->flags.fixup = kFixupLoad;
 }
 
 bool X86Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
-  // TODO: Implement for 32-bit.
-  return cu_->target64 && dex_cache_arrays_layout_.Valid();
+  return dex_cache_arrays_layout_.Valid();
+}
+
+LIR* X86Mir2Lir::OpLoadPc(RegStorage r_dest) {
+  DCHECK(!cu_->target64);
+  LIR* call = NewLIR1(kX86CallI, 0);
+  call->flags.fixup = kFixupLabel;
+  LIR* pop = NewLIR1(kX86Pop32R, r_dest.GetReg());
+  pop->flags.fixup = kFixupLabel;
+  DCHECK(NEXT_LIR(call) == pop);
+  return call;
+}
+
+RegStorage X86Mir2Lir::GetPcAndAnchor(LIR** anchor, RegStorage r_tmp) {
+  if (pc_rel_base_reg_.Valid()) {
+    DCHECK(setup_pc_rel_base_reg_ != nullptr);
+    *anchor = NEXT_LIR(setup_pc_rel_base_reg_);
+    DCHECK(*anchor != nullptr);
+    DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
+    pc_rel_base_reg_used_ = true;
+    return pc_rel_base_reg_;
+  } else {
+    RegStorage r_pc = r_tmp.Valid() ? r_tmp : AllocTempRef();
+    LIR* load_pc = OpLoadPc(r_pc);
+    *anchor = NEXT_LIR(load_pc);
+    DCHECK(*anchor != nullptr);
+    DCHECK_EQ((*anchor)->opcode, kX86Pop32R);
+    return r_pc;
+  }
 }
 
 void X86Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset,
@@ -1369,11 +1417,18 @@
     mov->flags.fixup = kFixupLabel;
     mov->operands[3] = WrapPointer(dex_file);
     mov->operands[4] = offset;
+    mov->target = mov;  // Used for pc_insn_offset (not used by x86-64 relative patcher).
     dex_cache_access_insns_.push_back(mov);
   } else {
-    // TODO: Implement for 32-bit.
-    LOG(FATAL) << "Unimplemented.";
-    UNREACHABLE();
+    // Get the PC to a register and get the anchor. Use r_dest for the temp if needed.
+    LIR* anchor;
+    RegStorage r_pc = GetPcAndAnchor(&anchor, r_dest);
+    LIR* mov = NewLIR3(kX86Mov32RM, r_dest.GetReg(), r_pc.GetReg(), kDummy32BitOffset);
+    mov->flags.fixup = kFixupLabel;
+    mov->operands[3] = WrapPointer(dex_file);
+    mov->operands[4] = offset;
+    mov->target = anchor;  // Used for pc_insn_offset.
+    dex_cache_access_insns_.push_back(mov);
   }
 }
 
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index cad82a1..a16e242 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -32,7 +32,6 @@
 #include "mirror/string.h"
 #include "oat.h"
 #include "x86_lir.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 
@@ -725,6 +724,14 @@
   return long_or_fp ? num_vector_temps - 2 : num_vector_temps - 1;
 }
 
+static dwarf::Reg DwarfCoreReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Core(num) : dwarf::Reg::X86Core(num);
+}
+
+static dwarf::Reg DwarfFpReg(bool is_x86_64, int num) {
+  return is_x86_64 ? dwarf::Reg::X86_64Fp(num) : dwarf::Reg::X86Fp(num);
+}
+
 void X86Mir2Lir::SpillCoreRegs() {
   if (num_core_spills_ == 0) {
     return;
@@ -735,11 +742,11 @@
       frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   OpSize size = cu_->target64 ? k64 : k32;
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
-      StoreBaseDisp(rs_rSP, offset,
-                    cu_->target64 ? RegStorage::Solo64(reg) :  RegStorage::Solo32(reg),
-                   size, kNotVolatile);
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
+      RegStorage r_src = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg);
+      StoreBaseDisp(rs_rSP, offset, r_src, size, kNotVolatile);
+      cfi_.RelOffset(DwarfCoreReg(cu_->target64, reg), offset);
       offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
@@ -754,10 +761,11 @@
   int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   OpSize size = cu_->target64 ? k64 : k32;
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
-      LoadBaseDisp(rs_rSP, offset, cu_->target64 ? RegStorage::Solo64(reg) :  RegStorage::Solo32(reg),
-                   size, kNotVolatile);
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
+      RegStorage r_dest = cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg);
+      LoadBaseDisp(rs_rSP, offset, r_dest, size, kNotVolatile);
+      cfi_.Restore(DwarfCoreReg(cu_->target64, reg));
       offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
@@ -771,9 +779,10 @@
   int offset = frame_size_ -
       (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
       StoreBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg), k64, kNotVolatile);
+      cfi_.RelOffset(DwarfFpReg(cu_->target64, reg), offset);
       offset += sizeof(double);
     }
   }
@@ -786,10 +795,11 @@
   int offset = frame_size_ -
       (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_));
   const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
-  for (int reg = 0; mask; mask >>= 1, reg++) {
-    if (mask & 0x1) {
+  for (int reg = 0; mask != 0u; mask >>= 1, reg++) {
+    if ((mask & 0x1) != 0u) {
       LoadBaseDisp(rs_rSP, offset, RegStorage::FloatSolo64(reg),
                    k64, kNotVolatile);
+      cfi_.Restore(DwarfFpReg(cu_->target64, reg));
       offset += sizeof(double);
     }
   }
@@ -825,22 +835,22 @@
 X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
     : Mir2Lir(cu, mir_graph, arena),
       in_to_reg_storage_x86_64_mapper_(this), in_to_reg_storage_x86_mapper_(this),
-      base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
+      pc_rel_base_reg_(RegStorage::InvalidReg()),
+      pc_rel_base_reg_used_(false),
+      setup_pc_rel_base_reg_(nullptr),
       method_address_insns_(arena->Adapter()),
       class_type_address_insns_(arena->Adapter()),
       call_method_insns_(arena->Adapter()),
       dex_cache_access_insns_(arena->Adapter()),
-      stack_decrement_(nullptr), stack_increment_(nullptr),
       const_vectors_(nullptr) {
   method_address_insns_.reserve(100);
   class_type_address_insns_.reserve(100);
   call_method_insns_.reserve(100);
-  store_method_addr_used_ = false;
-    for (int i = 0; i < kX86Last; i++) {
-      DCHECK_EQ(X86Mir2Lir::EncodingMap[i].opcode, i)
-          << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
-          << " is wrong: expecting " << i << ", seeing "
-          << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
+  for (int i = 0; i < kX86Last; i++) {
+    DCHECK_EQ(X86Mir2Lir::EncodingMap[i].opcode, i)
+        << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name
+        << " is wrong: expecting " << i << ", seeing "
+        << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode);
   }
 }
 
@@ -925,14 +935,6 @@
              << ", orig: " << loc.orig_sreg;
 }
 
-void X86Mir2Lir::Materialize() {
-  // A good place to put the analysis before starting.
-  AnalyzeMIR();
-
-  // Now continue with regular code generation.
-  Mir2Lir::Materialize();
-}
-
 void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type,
                                    SpecialTargetRegister symbolic_reg) {
   /*
@@ -1107,7 +1109,8 @@
     // The offset to patch is the last 4 bytes of the instruction.
     int patch_offset = p->offset + p->flags.size - 4;
     DCHECK(!p->flags.is_nop);
-    patches_.push_back(LinkerPatch::DexCacheArrayPatch(patch_offset, dex_file, p->offset, offset));
+    patches_.push_back(LinkerPatch::DexCacheArrayPatch(patch_offset, dex_file,
+                                                       p->target->offset, offset));
   }
 
   // And do the normal processing.
@@ -1317,6 +1320,11 @@
   if (!cu_->target64) {
     // EDI is promotable in 32-bit mode.
     NewLIR1(kX86Push32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(4);
+    // Record cfi only if it is not already spilled.
+    if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+      cfi_.RelOffset(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()), 0);
+    }
   }
 
   if (zero_based) {
@@ -1412,8 +1420,13 @@
   // And join up at the end.
   all_done->target = NewLIR0(kPseudoTargetLabel);
 
-  if (!cu_->target64)
+  if (!cu_->target64) {
     NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+    cfi_.AdjustCFAOffset(-4);
+    if (!CoreSpillMaskContains(rs_rDI.GetReg())) {
+      cfi_.Restore(DwarfCoreReg(cu_->target64, rs_rDI.GetReg()));
+    }
+  }
 
   // Out of line code returns here.
   if (slowpath_branch != nullptr) {
@@ -1426,100 +1439,6 @@
   return true;
 }
 
-static bool ARTRegIDToDWARFRegID(bool is_x86_64, int art_reg_id, int* dwarf_reg_id) {
-  if (is_x86_64) {
-    switch (art_reg_id) {
-    case 3 : *dwarf_reg_id =  3; return true;  // %rbx
-    // This is the only discrepancy between ART & DWARF register numbering.
-    case 5 : *dwarf_reg_id =  6; return true;  // %rbp
-    case 12: *dwarf_reg_id = 12; return true;  // %r12
-    case 13: *dwarf_reg_id = 13; return true;  // %r13
-    case 14: *dwarf_reg_id = 14; return true;  // %r14
-    case 15: *dwarf_reg_id = 15; return true;  // %r15
-    default: return false;  // Should not get here
-    }
-  } else {
-    switch (art_reg_id) {
-    case 5: *dwarf_reg_id = 5; return true;  // %ebp
-    case 6: *dwarf_reg_id = 6; return true;  // %esi
-    case 7: *dwarf_reg_id = 7; return true;  // %edi
-    default: return false;  // Should not get here
-    }
-  }
-}
-
-std::vector<uint8_t>* X86Mir2Lir::ReturnFrameDescriptionEntry() {
-  std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
-
-  // Generate the FDE for the method.
-  DCHECK_NE(data_offset_, 0U);
-
-  WriteFDEHeader(cfi_info, cu_->target64);
-  WriteFDEAddressRange(cfi_info, data_offset_, cu_->target64);
-
-  // The instructions in the FDE.
-  if (stack_decrement_ != nullptr) {
-    // Advance LOC to just past the stack decrement.
-    uint32_t pc = NEXT_LIR(stack_decrement_)->offset;
-    DW_CFA_advance_loc(cfi_info, pc);
-
-    // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size.
-    DW_CFA_def_cfa_offset(cfi_info, frame_size_);
-
-    // Handle register spills
-    const uint32_t kSpillInstLen = (cu_->target64) ? 5 : 4;
-    const int kDataAlignmentFactor = (cu_->target64) ? -8 : -4;
-    uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
-    int offset = -(GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
-    for (int reg = 0; mask; mask >>= 1, reg++) {
-      if (mask & 0x1) {
-        pc += kSpillInstLen;
-
-        // Advance LOC to pass this instruction
-        DW_CFA_advance_loc(cfi_info, kSpillInstLen);
-
-        int dwarf_reg_id;
-        if (ARTRegIDToDWARFRegID(cu_->target64, reg, &dwarf_reg_id)) {
-          // DW_CFA_offset_extended_sf reg offset
-          DW_CFA_offset_extended_sf(cfi_info, dwarf_reg_id, offset / kDataAlignmentFactor);
-        }
-
-        offset += GetInstructionSetPointerSize(cu_->instruction_set);
-      }
-    }
-
-    // We continue with that stack until the epilogue.
-    if (stack_increment_ != nullptr) {
-      uint32_t new_pc = NEXT_LIR(stack_increment_)->offset;
-      DW_CFA_advance_loc(cfi_info, new_pc - pc);
-
-      // We probably have code snippets after the epilogue, so save the
-      // current state: DW_CFA_remember_state.
-      DW_CFA_remember_state(cfi_info);
-
-      // We have now popped the stack: DW_CFA_def_cfa_offset 4/8.
-      // There is only the return PC on the stack now.
-      DW_CFA_def_cfa_offset(cfi_info, GetInstructionSetPointerSize(cu_->instruction_set));
-
-      // Everything after that is the same as before the epilogue.
-      // Stack bump was followed by RET instruction.
-      LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_));
-      if (post_ret_insn != nullptr) {
-        pc = new_pc;
-        new_pc = post_ret_insn->offset;
-        DW_CFA_advance_loc(cfi_info, new_pc - pc);
-        // Restore the state: DW_CFA_restore_state.
-        DW_CFA_restore_state(cfi_info);
-      }
-    }
-  }
-
-  PadCFI(cfi_info);
-  WriteCFILength(cfi_info, cu_->target64);
-
-  return cfi_info;
-}
-
 void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
   switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
     case kMirOpReserveVectorRegisters:
@@ -1656,20 +1575,17 @@
   LIR* load;
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
   if (cu_->target64) {
-    load = NewLIR3(opcode, reg, kRIPReg, 256 /* bogus */);
+    load = NewLIR3(opcode, reg, kRIPReg, kDummy32BitOffset);
   } else {
-    // Address the start of the method.
-    RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-    if (rl_method.wide) {
-      rl_method = LoadValueWide(rl_method, kCoreReg);
-    } else {
-      rl_method = LoadValue(rl_method, kCoreReg);
+    // Get the PC to a register and get the anchor.
+    LIR* anchor;
+    RegStorage r_pc = GetPcAndAnchor(&anchor);
+
+    load = NewLIR3(opcode, reg, r_pc.GetReg(), kDummy32BitOffset);
+    load->operands[4] = WrapPointer(anchor);
+    if (IsTemp(r_pc)) {
+      FreeTemp(r_pc);
     }
-
-    load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */);
-
-    // The literal pool needs position independent logic.
-    store_method_addr_used_ = true;
   }
   load->flags.fixup = kFixupLoad;
   load->target = data_target;
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 893b98a..efcb9ee 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -17,6 +17,7 @@
 #include "codegen_x86.h"
 
 #include "base/logging.h"
+#include "dex/mir_graph.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/dataflow_iterator-inl.h"
 #include "dex/quick/dex_file_method_inliner.h"
@@ -574,7 +575,7 @@
       DCHECK(r_dest.IsDouble());
       if (value == 0) {
         return NewLIR2(kX86XorpdRR, low_reg_val, low_reg_val);
-      } else if (base_of_code_ != nullptr || cu_->target64) {
+      } else if (pc_rel_base_reg_.Valid() || cu_->target64) {
         // We will load the value from the literal area.
         LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
         if (data_target == NULL) {
@@ -589,17 +590,16 @@
         if (cu_->target64) {
           res = NewLIR3(kX86MovsdRM, low_reg_val, kRIPReg, 256 /* bogus */);
         } else {
-          // Address the start of the method.
-          RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-          if (rl_method.wide) {
-            rl_method = LoadValueWide(rl_method, kCoreReg);
-          } else {
-            rl_method = LoadValue(rl_method, kCoreReg);
-          }
+          // Get the PC to a register and get the anchor.
+          LIR* anchor;
+          RegStorage r_pc = GetPcAndAnchor(&anchor);
 
-          res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val),
+          res = LoadBaseDisp(r_pc, kDummy32BitOffset, RegStorage::FloatSolo64(low_reg_val),
                              kDouble, kNotVolatile);
-          store_method_addr_used_ = true;
+          res->operands[4] = WrapPointer(anchor);
+          if (IsTemp(r_pc)) {
+            FreeTemp(r_pc);
+          }
         }
         res->target = data_target;
         res->flags.fixup = kFixupLoad;
@@ -954,82 +954,14 @@
   return branch;
 }
 
-void X86Mir2Lir::AnalyzeMIR() {
-  // Assume we don't need a pointer to the base of the code.
-  cu_->NewTimingSplit("X86 MIR Analysis");
-  store_method_addr_ = false;
-
-  // Walk the MIR looking for interesting items.
-  PreOrderDfsIterator iter(mir_graph_);
-  BasicBlock* curr_bb = iter.Next();
-  while (curr_bb != NULL) {
-    AnalyzeBB(curr_bb);
-    curr_bb = iter.Next();
-  }
-
-  // Did we need a pointer to the method code?  Not in 64 bit mode.
-  base_of_code_ = nullptr;
-
-  // store_method_addr_ must be false for x86_64, since RIP addressing is used.
-  CHECK(!(cu_->target64 && store_method_addr_));
-  if (store_method_addr_) {
-    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
-    DCHECK(base_of_code_ != nullptr);
-  }
-}
-
-void X86Mir2Lir::AnalyzeBB(BasicBlock* bb) {
-  if (bb->block_type == kDead) {
-    // Ignore dead blocks
+void X86Mir2Lir::AnalyzeMIR(RefCounts* core_counts, MIR* mir, uint32_t weight) {
+  if (cu_->target64) {
+    Mir2Lir::AnalyzeMIR(core_counts, mir, weight);
     return;
   }
 
-  for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
-    int opcode = mir->dalvikInsn.opcode;
-    if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
-      AnalyzeExtendedMIR(opcode, bb, mir);
-    } else {
-      AnalyzeMIR(opcode, bb, mir);
-    }
-  }
-}
-
-
-void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir) {
-  switch (opcode) {
-    // Instructions referencing doubles.
-    case kMirOpFusedCmplDouble:
-    case kMirOpFusedCmpgDouble:
-      AnalyzeFPInstruction(opcode, bb, mir);
-      break;
-    case kMirOpConstVector:
-      if (!cu_->target64) {
-        store_method_addr_ = true;
-      }
-      break;
-    case kMirOpPackedMultiply:
-    case kMirOpPackedShiftLeft:
-    case kMirOpPackedSignedShiftRight:
-    case kMirOpPackedUnsignedShiftRight:
-      if (!cu_->target64) {
-        // Byte emulation requires constants from the literal pool.
-        OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-        if (opsize == kSignedByte || opsize == kUnsignedByte) {
-          store_method_addr_ = true;
-        }
-      }
-      break;
-    default:
-      // Ignore the rest.
-      break;
-  }
-}
-
-void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) {
-  // Looking for
-  // - Do we need a pointer to the code (used for packed switches and double lits)?
-  // 64 bit uses RIP addressing instead.
-
+  int opcode = mir->dalvikInsn.opcode;
+  bool uses_pc_rel_load = false;
   switch (opcode) {
     // Instructions referencing doubles.
     case Instruction::CMPL_DOUBLE:
@@ -1045,34 +977,62 @@
     case Instruction::MUL_DOUBLE_2ADDR:
     case Instruction::DIV_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE_2ADDR:
-      AnalyzeFPInstruction(opcode, bb, mir);
+    case kMirOpFusedCmplDouble:
+    case kMirOpFusedCmpgDouble:
+      uses_pc_rel_load = AnalyzeFPInstruction(opcode, mir);
       break;
 
-    // Packed switches and array fills need a pointer to the base of the method.
-    case Instruction::FILL_ARRAY_DATA:
+    // Packed switch needs the PC-relative pointer if it's large.
     case Instruction::PACKED_SWITCH:
-      if (!cu_->target64) {
-        store_method_addr_ = true;
+      if (mir_graph_->GetTable(mir, mir->dalvikInsn.vB)[1] > kSmallSwitchThreshold) {
+        uses_pc_rel_load = true;
       }
       break;
+
+    case kMirOpConstVector:
+      uses_pc_rel_load = true;
+      break;
+    case kMirOpPackedMultiply:
+    case kMirOpPackedShiftLeft:
+    case kMirOpPackedSignedShiftRight:
+    case kMirOpPackedUnsignedShiftRight:
+      {
+        // Byte emulation requires constants from the literal pool.
+        OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+        if (opsize == kSignedByte || opsize == kUnsignedByte) {
+          uses_pc_rel_load = true;
+        }
+      }
+      break;
+
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE:
-      AnalyzeInvokeStatic(opcode, bb, mir);
-      break;
+      if (mir_graph_->GetMethodLoweringInfo(mir).IsIntrinsic()) {
+        uses_pc_rel_load = AnalyzeInvokeStaticIntrinsic(mir);
+        break;
+      }
+      FALLTHROUGH_INTENDED;
     default:
-      // Other instructions are not interesting yet.
+      Mir2Lir::AnalyzeMIR(core_counts, mir, weight);
       break;
   }
+
+  if (uses_pc_rel_load) {
+    DCHECK(pc_rel_temp_ != nullptr);
+    core_counts[SRegToPMap(pc_rel_temp_->s_reg_low)].count += weight;
+  }
 }
 
-void X86Mir2Lir::AnalyzeFPInstruction(int opcode, BasicBlock* bb, MIR* mir) {
-  UNUSED(bb);
+bool X86Mir2Lir::AnalyzeFPInstruction(int opcode, MIR* mir) {
+  DCHECK(!cu_->target64);
   // Look at all the uses, and see if they are double constants.
   uint64_t attrs = MIRGraph::GetDataFlowAttributes(static_cast<Instruction::Code>(opcode));
   int next_sreg = 0;
   if (attrs & DF_UA) {
     if (attrs & DF_A_WIDE) {
-      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
+        return true;
+      }
       next_sreg += 2;
     } else {
       next_sreg++;
@@ -1080,7 +1040,9 @@
   }
   if (attrs & DF_UB) {
     if (attrs & DF_B_WIDE) {
-      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
+        return true;
+      }
       next_sreg += 2;
     } else {
       next_sreg++;
@@ -1088,15 +1050,39 @@
   }
   if (attrs & DF_UC) {
     if (attrs & DF_C_WIDE) {
-      AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg));
+      if (AnalyzeDoubleUse(mir_graph_->GetSrcWide(mir, next_sreg))) {
+        return true;
+      }
     }
   }
+  return false;
 }
 
-void X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) {
+inline bool X86Mir2Lir::AnalyzeDoubleUse(RegLocation use) {
   // If this is a double literal, we will want it in the literal pool on 32b platforms.
-  if (use.is_const && !cu_->target64) {
-    store_method_addr_ = true;
+  DCHECK(!cu_->target64);
+  return use.is_const;
+}
+
+bool X86Mir2Lir::AnalyzeInvokeStaticIntrinsic(MIR* mir) {
+  // 64 bit RIP addressing doesn't need this analysis.
+  DCHECK(!cu_->target64);
+
+  // Retrieve the type of the intrinsic.
+  MethodReference method_ref = mir_graph_->GetMethodLoweringInfo(mir).GetTargetMethod();
+  DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
+  DexFileMethodInliner* method_inliner =
+    cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(method_ref.dex_file);
+  InlineMethod method;
+  bool is_intrinsic = method_inliner->IsIntrinsic(method_ref.dex_method_index, &method);
+  DCHECK(is_intrinsic);
+
+  switch (method.opcode) {
+    case kIntrinsicAbsDouble:
+    case kIntrinsicMinMaxDouble:
+      return true;
+    default:
+      return false;
   }
 }
 
@@ -1128,31 +1114,6 @@
   return loc;
 }
 
-void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir) {
-  UNUSED(opcode, bb);
-
-  // 64 bit RIP addressing doesn't need store_method_addr_ set.
-  if (cu_->target64) {
-    return;
-  }
-
-  uint32_t index = mir->dalvikInsn.vB;
-  DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-  DexFileMethodInliner* method_inliner =
-    cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
-  InlineMethod method;
-  if (method_inliner->IsIntrinsic(index, &method)) {
-    switch (method.opcode) {
-      case kIntrinsicAbsDouble:
-      case kIntrinsicMinMaxDouble:
-        store_method_addr_ = true;
-        break;
-      default:
-        break;
-    }
-  }
-}
-
 LIR* X86Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) {
   UNUSED(r_tgt);  // Call to absolute memory location doesn't need a temporary target register.
   if (cu_->target64) {
@@ -1162,4 +1123,39 @@
   }
 }
 
+void X86Mir2Lir::CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs) {
+  // Start with the default counts.
+  Mir2Lir::CountRefs(core_counts, fp_counts, num_regs);
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is used only once outside any loops (weight = 1),
+    // avoid the promotion, otherwise boost the weight by factor 2 because the full PC-relative
+    // load sequence is 3 instructions long and by promoting the PC base we save 2 instructions
+    // per use.
+    int p_map_idx = SRegToPMap(pc_rel_temp_->s_reg_low);
+    if (core_counts[p_map_idx].count == 1) {
+      core_counts[p_map_idx].count = 0;
+    } else {
+      core_counts[p_map_idx].count *= 2;
+    }
+  }
+}
+
+void X86Mir2Lir::DoPromotion() {
+  if (!cu_->target64) {
+    pc_rel_temp_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
+  }
+
+  Mir2Lir::DoPromotion();
+
+  if (pc_rel_temp_ != nullptr) {
+    // Now, if the dex cache array base temp is promoted, remember the register but
+    // always remove the temp's stack location to avoid unnecessarily bloating the stack.
+    pc_rel_base_reg_ = mir_graph_->reg_location_[pc_rel_temp_->s_reg_low].reg;
+    DCHECK(!pc_rel_base_reg_.Valid() || !pc_rel_base_reg_.IsFloat());
+    mir_graph_->RemoveLastCompilerTemp(kCompilerTempBackend, false, pc_rel_temp_);
+    pc_rel_temp_ = nullptr;
+  }
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 7dea09a..57db015 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -635,8 +635,6 @@
   kX86CallT,            // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
   kX86CallI,            // call <relative> - 0: disp; Used for core.oat linking only
   kX86Ret,              // ret; no lir operands
-  kX86StartOfMethod,    // call 0; pop reg; sub reg, # - generate start of method into reg
-                        // lir operands - 0: reg
   kX86PcRelLoadRA,      // mov reg, [base + index * scale + PC relative displacement]
                         // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
   kX86PcRelAdr,         // mov reg, PC relative displacement; lir operands - 0: reg, 1: table
@@ -670,7 +668,6 @@
   kRegMemCond,                             // RM instruction kind followed by a condition.
   kJmp, kJcc, kCall,                       // Branch instruction kinds.
   kPcRel,                                  // Operation with displacement that is PC relative
-  kMacro,                                  // An instruction composing multiple others
   kUnimplemented                           // Encoding used when an instruction isn't yet implemented.
 };
 
diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc
index 5b90ba9..ae814b4 100644
--- a/compiler/dex/verified_method.cc
+++ b/compiler/dex/verified_method.cc
@@ -222,7 +222,7 @@
     } else if (IsInstructionIGetQuickOrIPutQuick(inst->Opcode())) {
       uint32_t dex_pc = inst->GetDexPc(insns);
       verifier::RegisterLine* line = method_verifier->GetRegLine(dex_pc);
-      mirror::ArtField* field = method_verifier->GetQuickFieldAccess(inst, line);
+      ArtField* field = method_verifier->GetQuickFieldAccess(inst, line);
       if (field == nullptr) {
         // It can be null if the line wasn't verified since it was unreachable.
         return false;
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 8babc28..b4d4695 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -19,12 +19,11 @@
 
 #include "compiler_driver.h"
 
+#include "art_field-inl.h"
 #include "dex_compilation_unit.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache-inl.h"
-#include "mirror/art_field-inl.h"
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
 
@@ -65,12 +64,12 @@
   return ResolveClass(soa, dex_cache, class_loader, referrer_method_id.class_idx_, mUnit);
 }
 
-inline mirror::ArtField* CompilerDriver::ResolveFieldWithDexFile(
+inline ArtField* CompilerDriver::ResolveFieldWithDexFile(
     const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
     Handle<mirror::ClassLoader> class_loader, const DexFile* dex_file,
     uint32_t field_idx, bool is_static) {
   DCHECK_EQ(dex_cache->GetDexFile(), dex_file);
-  mirror::ArtField* resolved_field = Runtime::Current()->GetClassLinker()->ResolveField(
+  ArtField* resolved_field = Runtime::Current()->GetClassLinker()->ResolveField(
       *dex_file, field_idx, dex_cache, class_loader, is_static);
   DCHECK_EQ(resolved_field == nullptr, soa.Self()->IsExceptionPending());
   if (UNLIKELY(resolved_field == nullptr)) {
@@ -90,7 +89,7 @@
   return Runtime::Current()->GetClassLinker()->FindDexCache(*dex_file);
 }
 
-inline mirror::ArtField* CompilerDriver::ResolveField(
+inline ArtField* CompilerDriver::ResolveField(
     const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
     Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
     uint32_t field_idx, bool is_static) {
@@ -100,7 +99,7 @@
 }
 
 inline void CompilerDriver::GetResolvedFieldDexFileLocation(
-    mirror::ArtField* resolved_field, const DexFile** declaring_dex_file,
+    ArtField* resolved_field, const DexFile** declaring_dex_file,
     uint16_t* declaring_class_idx, uint16_t* declaring_field_idx) {
   mirror::Class* declaring_class = resolved_field->GetDeclaringClass();
   *declaring_dex_file = declaring_class->GetDexCache()->GetDexFile();
@@ -108,17 +107,17 @@
   *declaring_field_idx = resolved_field->GetDexFieldIndex();
 }
 
-inline bool CompilerDriver::IsFieldVolatile(mirror::ArtField* field) {
+inline bool CompilerDriver::IsFieldVolatile(ArtField* field) {
   return field->IsVolatile();
 }
 
-inline MemberOffset CompilerDriver::GetFieldOffset(mirror::ArtField* field) {
+inline MemberOffset CompilerDriver::GetFieldOffset(ArtField* field) {
   return field->GetOffset();
 }
 
 inline std::pair<bool, bool> CompilerDriver::IsFastInstanceField(
     mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-    mirror::ArtField* resolved_field, uint16_t field_idx) {
+    ArtField* resolved_field, uint16_t field_idx) {
   DCHECK(!resolved_field->IsStatic());
   mirror::Class* fields_class = resolved_field->GetDeclaringClass();
   bool fast_get = referrer_class != nullptr &&
@@ -130,7 +129,7 @@
 
 inline std::pair<bool, bool> CompilerDriver::IsFastStaticField(
     mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-    mirror::ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index) {
+    ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index) {
   DCHECK(resolved_field->IsStatic());
   if (LIKELY(referrer_class != nullptr)) {
     mirror::Class* fields_class = resolved_field->GetDeclaringClass();
@@ -177,14 +176,14 @@
 }
 
 inline bool CompilerDriver::IsStaticFieldInReferrerClass(mirror::Class* referrer_class,
-                                                         mirror::ArtField* resolved_field) {
+                                                         ArtField* resolved_field) {
   DCHECK(resolved_field->IsStatic());
   mirror::Class* fields_class = resolved_field->GetDeclaringClass();
   return referrer_class == fields_class;
 }
 
 inline bool CompilerDriver::IsStaticFieldsClassInitialized(mirror::Class* referrer_class,
-                                                           mirror::ArtField* resolved_field) {
+                                                           ArtField* resolved_field) {
   DCHECK(resolved_field->IsStatic());
   mirror::Class* fields_class = resolved_field->GetDeclaringClass();
   return fields_class == referrer_class || fields_class->IsInitialized();
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index f6b217a..6d79248 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -27,6 +27,7 @@
 #include <malloc.h>  // For mallinfo
 #endif
 
+#include "art_field-inl.h"
 #include "base/stl_util.h"
 #include "base/timing_logger.h"
 #include "class_linker.h"
@@ -40,6 +41,7 @@
 #include "dex/verified_method.h"
 #include "dex/quick/dex_file_method_inliner.h"
 #include "driver/compiler_options.h"
+#include "elf_writer_quick.h"
 #include "jni_internal.h"
 #include "object_lock.h"
 #include "profiler.h"
@@ -47,7 +49,6 @@
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "gc/space/space.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/class-inl.h"
@@ -72,6 +73,9 @@
 
 static constexpr bool kTimeCompileMethod = !kIsDebugBuild;
 
+// Whether to produce 64-bit ELF files for 64-bit targets. Leave this off for now.
+static constexpr bool kProduce64BitELFFiles = false;
+
 static double Percentage(size_t x, size_t y) {
   return 100.0 * (static_cast<double>(x)) / (static_cast<double>(x + y));
 }
@@ -1179,7 +1183,7 @@
 DexCacheArraysLayout CompilerDriver::GetDexCacheArraysLayout(const DexFile* dex_file) {
   // Currently only image dex caches have fixed array layout.
   return IsImage() && GetSupportBootImageFixup()
-      ? DexCacheArraysLayout(dex_file)
+      ? DexCacheArraysLayout(GetInstructionSetPointerSize(instruction_set_), dex_file)
       : DexCacheArraysLayout();
 }
 
@@ -1205,12 +1209,11 @@
   stats_->ProcessedInvoke(invoke_type, flags);
 }
 
-mirror::ArtField* CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx,
-                                                           const DexCompilationUnit* mUnit,
-                                                           bool is_put,
-                                                           const ScopedObjectAccess& soa) {
+ArtField* CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx,
+                                                   const DexCompilationUnit* mUnit, bool is_put,
+                                                   const ScopedObjectAccess& soa) {
   // Try to resolve the field and compiling method's class.
-  mirror::ArtField* resolved_field;
+  ArtField* resolved_field;
   mirror::Class* referrer_class;
   mirror::DexCache* dex_cache;
   {
@@ -1219,11 +1222,10 @@
         hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile())));
     Handle<mirror::ClassLoader> class_loader_handle(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
-    Handle<mirror::ArtField> resolved_field_handle(hs.NewHandle(
-        ResolveField(soa, dex_cache_handle, class_loader_handle, mUnit, field_idx, false)));
-    referrer_class = (resolved_field_handle.Get() != nullptr)
+    resolved_field =
+        ResolveField(soa, dex_cache_handle, class_loader_handle, mUnit, field_idx, false);
+    referrer_class = resolved_field != nullptr
         ? ResolveCompilingMethodsClass(soa, dex_cache_handle, class_loader_handle, mUnit) : nullptr;
-    resolved_field = resolved_field_handle.Get();
     dex_cache = dex_cache_handle.Get();
   }
   bool can_link = false;
@@ -1240,11 +1242,9 @@
                                               bool is_put, MemberOffset* field_offset,
                                               bool* is_volatile) {
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<1> hs(soa.Self());
-  Handle<mirror::ArtField> resolved_field =
-      hs.NewHandle(ComputeInstanceFieldInfo(field_idx, mUnit, is_put, soa));
+  ArtField* resolved_field = ComputeInstanceFieldInfo(field_idx, mUnit, is_put, soa);
 
-  if (resolved_field.Get() == nullptr) {
+  if (resolved_field == nullptr) {
     // Conservative defaults.
     *is_volatile = true;
     *field_offset = MemberOffset(static_cast<size_t>(-1));
@@ -1263,20 +1263,19 @@
                                             Primitive::Type* type) {
   ScopedObjectAccess soa(Thread::Current());
   // Try to resolve the field and compiling method's class.
-  mirror::ArtField* resolved_field;
+  ArtField* resolved_field;
   mirror::Class* referrer_class;
   mirror::DexCache* dex_cache;
   {
-    StackHandleScope<3> hs(soa.Self());
+    StackHandleScope<2> hs(soa.Self());
     Handle<mirror::DexCache> dex_cache_handle(
         hs.NewHandle(mUnit->GetClassLinker()->FindDexCache(*mUnit->GetDexFile())));
     Handle<mirror::ClassLoader> class_loader_handle(
         hs.NewHandle(soa.Decode<mirror::ClassLoader*>(mUnit->GetClassLoader())));
-    Handle<mirror::ArtField> resolved_field_handle(hs.NewHandle(
-        ResolveField(soa, dex_cache_handle, class_loader_handle, mUnit, field_idx, true)));
-    referrer_class = (resolved_field_handle.Get() != nullptr)
+    resolved_field =
+        ResolveField(soa, dex_cache_handle, class_loader_handle, mUnit, field_idx, true);
+    referrer_class = resolved_field != nullptr
         ? ResolveCompilingMethodsClass(soa, dex_cache_handle, class_loader_handle, mUnit) : nullptr;
-    resolved_field = resolved_field_handle.Get();
     dex_cache = dex_cache_handle.Get();
   }
   bool result = false;
@@ -1724,7 +1723,7 @@
     ClassDataItemIterator it(dex_file, class_data);
     while (it.HasNextStaticField()) {
       if (resolve_fields_and_methods) {
-        mirror::ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(),
+        ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(),
                                                              dex_cache, class_loader, true);
         if (field == nullptr) {
           CheckAndClearResolveException(soa.Self());
@@ -1739,7 +1738,7 @@
         requires_constructor_barrier = true;
       }
       if (resolve_fields_and_methods) {
-        mirror::ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(),
+        ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(),
                                                              dex_cache, class_loader, false);
         if (field == nullptr) {
           CheckAndClearResolveException(soa.Self());
@@ -2368,46 +2367,12 @@
                               OatWriter* oat_writer,
                               art::File* file)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return compiler_->WriteElf(file, oat_writer, dex_files, android_root, is_host);
-}
-void CompilerDriver::InstructionSetToLLVMTarget(InstructionSet instruction_set,
-                                                std::string* target_triple,
-                                                std::string* target_cpu,
-                                                std::string* target_attr) {
-  switch (instruction_set) {
-    case kThumb2:
-      *target_triple = "thumb-none-linux-gnueabi";
-      *target_cpu = "cortex-a9";
-      *target_attr = "+thumb2,+neon,+neonfp,+vfp3,+db";
-      break;
-
-    case kArm:
-      *target_triple = "armv7-none-linux-gnueabi";
-      // TODO: Fix for Nexus S.
-      *target_cpu = "cortex-a9";
-      // TODO: Fix for Xoom.
-      *target_attr = "+v7,+neon,+neonfp,+vfp3,+db";
-      break;
-
-    case kX86:
-      *target_triple = "i386-pc-linux-gnu";
-      *target_attr = "";
-      break;
-
-    case kX86_64:
-      *target_triple = "x86_64-pc-linux-gnu";
-      *target_attr = "";
-      break;
-
-    case kMips:
-      *target_triple = "mipsel-unknown-linux";
-      *target_attr = "mips32r2";
-      break;
-
-    default:
-      LOG(FATAL) << "Unknown instruction set: " << instruction_set;
-    }
+  if (kProduce64BitELFFiles && Is64BitInstructionSet(GetInstructionSet())) {
+    return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host, *this);
+  } else {
+    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host, *this);
   }
+}
 
 bool CompilerDriver::SkipCompilation(const std::string& method_name) {
   if (!profile_present_) {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index edd1bd2..f1066a5 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -239,14 +239,14 @@
 
   // Resolve a field. Returns nullptr on failure, including incompatible class change.
   // NOTE: Unlike ClassLinker's ResolveField(), this method enforces is_static.
-  mirror::ArtField* ResolveField(
+  ArtField* ResolveField(
       const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
       Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
       uint32_t field_idx, bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a field with a given dex file.
-  mirror::ArtField* ResolveFieldWithDexFile(
+  ArtField* ResolveFieldWithDexFile(
       const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
       Handle<mirror::ClassLoader> class_loader, const DexFile* dex_file,
       uint32_t field_idx, bool is_static)
@@ -254,12 +254,12 @@
 
   // Get declaration location of a resolved field.
   void GetResolvedFieldDexFileLocation(
-      mirror::ArtField* resolved_field, const DexFile** declaring_dex_file,
+      ArtField* resolved_field, const DexFile** declaring_dex_file,
       uint16_t* declaring_class_idx, uint16_t* declaring_field_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  bool IsFieldVolatile(mirror::ArtField* field) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  MemberOffset GetFieldOffset(mirror::ArtField* field) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool IsFieldVolatile(ArtField* field) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  MemberOffset GetFieldOffset(ArtField* field) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Find a dex cache for a dex file.
   inline mirror::DexCache* FindDexCache(const DexFile* dex_file)
@@ -268,23 +268,23 @@
   // Can we fast-path an IGET/IPUT access to an instance field? If yes, compute the field offset.
   std::pair<bool, bool> IsFastInstanceField(
       mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-      mirror::ArtField* resolved_field, uint16_t field_idx)
+      ArtField* resolved_field, uint16_t field_idx)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Can we fast-path an SGET/SPUT access to a static field? If yes, compute the type index
   // of the declaring class in the referrer's dex file.
   std::pair<bool, bool> IsFastStaticField(
       mirror::DexCache* dex_cache, mirror::Class* referrer_class,
-      mirror::ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index)
+      ArtField* resolved_field, uint16_t field_idx, uint32_t* storage_index)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Is static field's in referrer's class?
-  bool IsStaticFieldInReferrerClass(mirror::Class* referrer_class, mirror::ArtField* resolved_field)
+  bool IsStaticFieldInReferrerClass(mirror::Class* referrer_class, ArtField* resolved_field)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Is static field's class initialized?
   bool IsStaticFieldsClassInitialized(mirror::Class* referrer_class,
-                                      mirror::ArtField* resolved_field)
+                                      ArtField* resolved_field)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a method. Returns nullptr on failure, including incompatible class change.
@@ -331,7 +331,7 @@
 
   void ComputeFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
                         const ScopedObjectAccess& soa, bool is_static,
-                        mirror::ArtField** resolved_field,
+                        ArtField** resolved_field,
                         mirror::Class** referrer_class,
                         mirror::DexCache** dex_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -341,7 +341,7 @@
                                 MemberOffset* field_offset, bool* is_volatile)
       LOCKS_EXCLUDED(Locks::mutator_lock_);
 
-  mirror::ArtField* ComputeInstanceFieldInfo(uint32_t field_idx,
+  ArtField* ComputeInstanceFieldInfo(uint32_t field_idx,
                                              const DexCompilationUnit* mUnit,
                                              bool is_put,
                                              const ScopedObjectAccess& soa)
@@ -385,12 +385,6 @@
                 OatWriter* oat_writer,
                 File* file);
 
-  // TODO: move to a common home for llvm helpers once quick/portable are merged.
-  static void InstructionSetToLLVMTarget(InstructionSet instruction_set,
-                                         std::string* target_triple,
-                                         std::string* target_cpu,
-                                         std::string* target_attr);
-
   void SetCompilerContext(void* compiler_context) {
     compiler_context_ = compiler_context;
   }
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 5ebc029..7200cda 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -132,7 +132,7 @@
   }
   EXPECT_EQ(dex.NumFieldIds(), dex_cache->NumResolvedFields());
   for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
-    mirror::ArtField* field = dex_cache->GetResolvedField(i);
+    ArtField* field = Runtime::Current()->GetClassLinker()->GetResolvedField(i, dex_cache);
     EXPECT_TRUE(field != NULL) << "field_idx=" << i
                                << " " << dex.GetFieldDeclaringClassDescriptor(dex.GetFieldId(i))
                                << " " << dex.GetFieldName(dex.GetFieldId(i));
diff --git a/compiler/dwarf/debug_frame_opcode_writer.h b/compiler/dwarf/debug_frame_opcode_writer.h
index cc4ef8f..d0d1821 100644
--- a/compiler/dwarf/debug_frame_opcode_writer.h
+++ b/compiler/dwarf/debug_frame_opcode_writer.h
@@ -20,6 +20,7 @@
 #include "dwarf.h"
 #include "register.h"
 #include "writer.h"
+#include "utils.h"
 
 namespace art {
 namespace dwarf {
@@ -41,45 +42,51 @@
   static constexpr int kCodeAlignmentFactor = 1;
 
   // Explicitely advance the program counter to given location.
-  void AdvancePC(int absolute_pc) {
+  void ALWAYS_INLINE AdvancePC(int absolute_pc) {
     DCHECK_GE(absolute_pc, current_pc_);
-    int delta = FactorCodeOffset(absolute_pc - current_pc_);
-    if (delta != 0) {
-      if (delta <= 0x3F) {
-        this->PushUint8(DW_CFA_advance_loc | delta);
-      } else if (delta <= UINT8_MAX) {
-        this->PushUint8(DW_CFA_advance_loc1);
-        this->PushUint8(delta);
-      } else if (delta <= UINT16_MAX) {
-        this->PushUint8(DW_CFA_advance_loc2);
-        this->PushUint16(delta);
-      } else {
-        this->PushUint8(DW_CFA_advance_loc4);
-        this->PushUint32(delta);
+    if (UNLIKELY(enabled_)) {
+      int delta = FactorCodeOffset(absolute_pc - current_pc_);
+      if (delta != 0) {
+        if (delta <= 0x3F) {
+          this->PushUint8(DW_CFA_advance_loc | delta);
+        } else if (delta <= UINT8_MAX) {
+          this->PushUint8(DW_CFA_advance_loc1);
+          this->PushUint8(delta);
+        } else if (delta <= UINT16_MAX) {
+          this->PushUint8(DW_CFA_advance_loc2);
+          this->PushUint16(delta);
+        } else {
+          this->PushUint8(DW_CFA_advance_loc4);
+          this->PushUint32(delta);
+        }
       }
+      current_pc_ = absolute_pc;
     }
-    current_pc_ = absolute_pc;
   }
 
   // Override this method to automatically advance the PC before each opcode.
   virtual void ImplicitlyAdvancePC() { }
 
   // Common alias in assemblers - spill relative to current stack pointer.
-  void RelOffset(Reg reg, int offset) {
+  void ALWAYS_INLINE RelOffset(Reg reg, int offset) {
     Offset(reg, offset - current_cfa_offset_);
   }
 
   // Common alias in assemblers - increase stack frame size.
-  void AdjustCFAOffset(int delta) {
+  void ALWAYS_INLINE AdjustCFAOffset(int delta) {
     DefCFAOffset(current_cfa_offset_ + delta);
   }
 
   // Custom alias - spill many registers based on bitmask.
-  void RelOffsetForMany(Reg reg_base, int offset, uint32_t reg_mask,
-                        int reg_size) {
+  void ALWAYS_INLINE RelOffsetForMany(Reg reg_base, int offset,
+                                      uint32_t reg_mask, int reg_size) {
     DCHECK(reg_size == 4 || reg_size == 8);
-    for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
-      if ((reg_mask & 1) != 0u) {
+    if (UNLIKELY(enabled_)) {
+      for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
+        // Skip zero bits and go to the set bit.
+        int num_zeros = CTZ(reg_mask);
+        i += num_zeros;
+        reg_mask >>= num_zeros;
         RelOffset(Reg(reg_base.num() + i), offset);
         offset += reg_size;
       }
@@ -87,171 +94,214 @@
   }
 
   // Custom alias - unspill many registers based on bitmask.
-  void RestoreMany(Reg reg_base, uint32_t reg_mask) {
-    for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
-      if ((reg_mask & 1) != 0u) {
+  void ALWAYS_INLINE RestoreMany(Reg reg_base, uint32_t reg_mask) {
+    if (UNLIKELY(enabled_)) {
+      for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) {
+        // Skip zero bits and go to the set bit.
+        int num_zeros = CTZ(reg_mask);
+        i += num_zeros;
+        reg_mask >>= num_zeros;
         Restore(Reg(reg_base.num() + i));
       }
     }
   }
 
-  void Nop() {
-    this->PushUint8(DW_CFA_nop);
+  void ALWAYS_INLINE Nop() {
+    if (UNLIKELY(enabled_)) {
+      this->PushUint8(DW_CFA_nop);
+    }
   }
 
-  void Offset(Reg reg, int offset) {
-    ImplicitlyAdvancePC();
-    int factored_offset = FactorDataOffset(offset);  // May change sign.
-    if (factored_offset >= 0) {
-      if (0 <= reg.num() && reg.num() <= 0x3F) {
-        this->PushUint8(DW_CFA_offset | reg.num());
-        this->PushUleb128(factored_offset);
+  void ALWAYS_INLINE Offset(Reg reg, int offset) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      int factored_offset = FactorDataOffset(offset);  // May change sign.
+      if (factored_offset >= 0) {
+        if (0 <= reg.num() && reg.num() <= 0x3F) {
+          this->PushUint8(DW_CFA_offset | reg.num());
+          this->PushUleb128(factored_offset);
+        } else {
+          this->PushUint8(DW_CFA_offset_extended);
+          this->PushUleb128(reg.num());
+          this->PushUleb128(factored_offset);
+        }
       } else {
-        this->PushUint8(DW_CFA_offset_extended);
+        uses_dwarf3_features_ = true;
+        this->PushUint8(DW_CFA_offset_extended_sf);
         this->PushUleb128(reg.num());
-        this->PushUleb128(factored_offset);
+        this->PushSleb128(factored_offset);
       }
-    } else {
-      uses_dwarf3_features_ = true;
-      this->PushUint8(DW_CFA_offset_extended_sf);
-      this->PushUleb128(reg.num());
-      this->PushSleb128(factored_offset);
     }
   }
 
-  void Restore(Reg reg) {
-    ImplicitlyAdvancePC();
-    if (0 <= reg.num() && reg.num() <= 0x3F) {
-      this->PushUint8(DW_CFA_restore | reg.num());
-    } else {
-      this->PushUint8(DW_CFA_restore_extended);
+  void ALWAYS_INLINE Restore(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      if (0 <= reg.num() && reg.num() <= 0x3F) {
+        this->PushUint8(DW_CFA_restore | reg.num());
+      } else {
+        this->PushUint8(DW_CFA_restore_extended);
+        this->PushUleb128(reg.num());
+      }
+    }
+  }
+
+  void ALWAYS_INLINE Undefined(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_undefined);
       this->PushUleb128(reg.num());
     }
   }
 
-  void Undefined(Reg reg) {
-    ImplicitlyAdvancePC();
-    this->PushUint8(DW_CFA_undefined);
-    this->PushUleb128(reg.num());
-  }
-
-  void SameValue(Reg reg) {
-    ImplicitlyAdvancePC();
-    this->PushUint8(DW_CFA_same_value);
-    this->PushUleb128(reg.num());
+  void ALWAYS_INLINE SameValue(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_same_value);
+      this->PushUleb128(reg.num());
+    }
   }
 
   // The previous value of "reg" is stored in register "new_reg".
-  void Register(Reg reg, Reg new_reg) {
-    ImplicitlyAdvancePC();
-    this->PushUint8(DW_CFA_register);
-    this->PushUleb128(reg.num());
-    this->PushUleb128(new_reg.num());
-  }
-
-  void RememberState() {
-    // Note that we do not need to advance the PC.
-    this->PushUint8(DW_CFA_remember_state);
-  }
-
-  void RestoreState() {
-    ImplicitlyAdvancePC();
-    this->PushUint8(DW_CFA_restore_state);
-  }
-
-  void DefCFA(Reg reg, int offset) {
-    ImplicitlyAdvancePC();
-    if (offset >= 0) {
-      this->PushUint8(DW_CFA_def_cfa);
+  void ALWAYS_INLINE Register(Reg reg, Reg new_reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_register);
       this->PushUleb128(reg.num());
-      this->PushUleb128(offset);  // Non-factored.
-    } else {
-      uses_dwarf3_features_ = true;
-      this->PushUint8(DW_CFA_def_cfa_sf);
-      this->PushUleb128(reg.num());
-      this->PushSleb128(FactorDataOffset(offset));
+      this->PushUleb128(new_reg.num());
     }
-    current_cfa_offset_ = offset;
   }
 
-  void DefCFARegister(Reg reg) {
-    ImplicitlyAdvancePC();
-    this->PushUint8(DW_CFA_def_cfa_register);
-    this->PushUleb128(reg.num());
+  void ALWAYS_INLINE RememberState() {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_remember_state);
+    }
   }
 
-  void DefCFAOffset(int offset) {
-    if (current_cfa_offset_ != offset) {
+  void ALWAYS_INLINE RestoreState() {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_restore_state);
+    }
+  }
+
+  void ALWAYS_INLINE DefCFA(Reg reg, int offset) {
+    if (UNLIKELY(enabled_)) {
       ImplicitlyAdvancePC();
       if (offset >= 0) {
-        this->PushUint8(DW_CFA_def_cfa_offset);
+        this->PushUint8(DW_CFA_def_cfa);
+        this->PushUleb128(reg.num());
         this->PushUleb128(offset);  // Non-factored.
       } else {
         uses_dwarf3_features_ = true;
-        this->PushUint8(DW_CFA_def_cfa_offset_sf);
+        this->PushUint8(DW_CFA_def_cfa_sf);
+        this->PushUleb128(reg.num());
         this->PushSleb128(FactorDataOffset(offset));
       }
-      current_cfa_offset_ = offset;
     }
-  }
-
-  void ValOffset(Reg reg, int offset) {
-    ImplicitlyAdvancePC();
-    uses_dwarf3_features_ = true;
-    int factored_offset = FactorDataOffset(offset);  // May change sign.
-    if (factored_offset >= 0) {
-      this->PushUint8(DW_CFA_val_offset);
-      this->PushUleb128(reg.num());
-      this->PushUleb128(factored_offset);
-    } else {
-      this->PushUint8(DW_CFA_val_offset_sf);
-      this->PushUleb128(reg.num());
-      this->PushSleb128(factored_offset);
-    }
-  }
-
-  void DefCFAExpression(void* expr, int expr_size) {
-    ImplicitlyAdvancePC();
-    uses_dwarf3_features_ = true;
-    this->PushUint8(DW_CFA_def_cfa_expression);
-    this->PushUleb128(expr_size);
-    this->PushData(expr, expr_size);
-  }
-
-  void Expression(Reg reg, void* expr, int expr_size) {
-    ImplicitlyAdvancePC();
-    uses_dwarf3_features_ = true;
-    this->PushUint8(DW_CFA_expression);
-    this->PushUleb128(reg.num());
-    this->PushUleb128(expr_size);
-    this->PushData(expr, expr_size);
-  }
-
-  void ValExpression(Reg reg, void* expr, int expr_size) {
-    ImplicitlyAdvancePC();
-    uses_dwarf3_features_ = true;
-    this->PushUint8(DW_CFA_val_expression);
-    this->PushUleb128(reg.num());
-    this->PushUleb128(expr_size);
-    this->PushData(expr, expr_size);
-  }
-
-  int GetCurrentCFAOffset() const {
-    return current_cfa_offset_;
-  }
-
-  void SetCurrentCFAOffset(int offset) {
     current_cfa_offset_ = offset;
   }
 
+  void ALWAYS_INLINE DefCFARegister(Reg reg) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      this->PushUint8(DW_CFA_def_cfa_register);
+      this->PushUleb128(reg.num());
+    }
+  }
+
+  void ALWAYS_INLINE DefCFAOffset(int offset) {
+    if (UNLIKELY(enabled_)) {
+      if (current_cfa_offset_ != offset) {
+        ImplicitlyAdvancePC();
+        if (offset >= 0) {
+          this->PushUint8(DW_CFA_def_cfa_offset);
+          this->PushUleb128(offset);  // Non-factored.
+        } else {
+          uses_dwarf3_features_ = true;
+          this->PushUint8(DW_CFA_def_cfa_offset_sf);
+          this->PushSleb128(FactorDataOffset(offset));
+        }
+      }
+    }
+    // Uncoditional so that the user can still get and check the value.
+    current_cfa_offset_ = offset;
+  }
+
+  void ALWAYS_INLINE ValOffset(Reg reg, int offset) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      int factored_offset = FactorDataOffset(offset);  // May change sign.
+      if (factored_offset >= 0) {
+        this->PushUint8(DW_CFA_val_offset);
+        this->PushUleb128(reg.num());
+        this->PushUleb128(factored_offset);
+      } else {
+        this->PushUint8(DW_CFA_val_offset_sf);
+        this->PushUleb128(reg.num());
+        this->PushSleb128(factored_offset);
+      }
+    }
+  }
+
+  void ALWAYS_INLINE DefCFAExpression(void * expr, int expr_size) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      this->PushUint8(DW_CFA_def_cfa_expression);
+      this->PushUleb128(expr_size);
+      this->PushData(expr, expr_size);
+    }
+  }
+
+  void ALWAYS_INLINE Expression(Reg reg, void * expr, int expr_size) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      this->PushUint8(DW_CFA_expression);
+      this->PushUleb128(reg.num());
+      this->PushUleb128(expr_size);
+      this->PushData(expr, expr_size);
+    }
+  }
+
+  void ALWAYS_INLINE ValExpression(Reg reg, void * expr, int expr_size) {
+    if (UNLIKELY(enabled_)) {
+      ImplicitlyAdvancePC();
+      uses_dwarf3_features_ = true;
+      this->PushUint8(DW_CFA_val_expression);
+      this->PushUleb128(reg.num());
+      this->PushUleb128(expr_size);
+      this->PushData(expr, expr_size);
+    }
+  }
+
+  bool IsEnabled() const { return enabled_; }
+
+  void SetEnabled(bool value) { enabled_ = value; }
+
+  int GetCurrentPC() const { return current_pc_; }
+
+  int GetCurrentCFAOffset() const { return current_cfa_offset_; }
+
+  void SetCurrentCFAOffset(int offset) { current_cfa_offset_ = offset; }
+
   using Writer<Allocator>::data;
 
-  DebugFrameOpCodeWriter(const Allocator& alloc = Allocator())
+  DebugFrameOpCodeWriter(bool enabled = true,
+                         const Allocator& alloc = Allocator())
       : Writer<Allocator>(&opcodes_),
+        enabled_(enabled),
         opcodes_(alloc),
         current_cfa_offset_(0),
         current_pc_(0),
         uses_dwarf3_features_(false) {
+    if (enabled) {
+      // Best guess based on couple of observed outputs.
+      opcodes_.reserve(16);
+    }
   }
 
   virtual ~DebugFrameOpCodeWriter() { }
@@ -267,6 +317,7 @@
     return offset / kCodeAlignmentFactor;
   }
 
+  bool enabled_;  // If disabled all writes are no-ops.
   std::vector<uint8_t, Allocator> opcodes_;
   int current_cfa_offset_;
   int current_pc_;
diff --git a/compiler/dwarf/debug_frame_writer.h b/compiler/dwarf/debug_frame_writer.h
deleted file mode 100644
index 6de45f5..0000000
--- a/compiler/dwarf/debug_frame_writer.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_DEBUG_FRAME_WRITER_H_
-#define ART_COMPILER_DWARF_DEBUG_FRAME_WRITER_H_
-
-#include "debug_frame_opcode_writer.h"
-#include "dwarf.h"
-#include "writer.h"
-
-namespace art {
-namespace dwarf {
-
-// Writer for the .eh_frame section (which extends .debug_frame specification).
-template<typename Allocator = std::allocator<uint8_t>>
-class DebugFrameWriter FINAL : private Writer<Allocator> {
- public:
-  void WriteCIE(Reg return_address_register,
-                const uint8_t* initial_opcodes,
-                int initial_opcodes_size) {
-    DCHECK(cie_header_start_ == ~0u);
-    cie_header_start_ = this->data()->size();
-    this->PushUint32(0);  // Length placeholder.
-    this->PushUint32(0);  // CIE id.
-    this->PushUint8(1);   // Version.
-    this->PushString("zR");
-    this->PushUleb128(DebugFrameOpCodeWriter<Allocator>::kCodeAlignmentFactor);
-    this->PushSleb128(DebugFrameOpCodeWriter<Allocator>::kDataAlignmentFactor);
-    this->PushUleb128(return_address_register.num());  // ubyte in DWARF2.
-    this->PushUleb128(1);  // z: Augmentation data size.
-    if (use_64bit_address_) {
-      this->PushUint8(0x04);  // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8).
-    } else {
-      this->PushUint8(0x03);  // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
-    }
-    this->PushData(initial_opcodes, initial_opcodes_size);
-    this->Pad(use_64bit_address_ ? 8 : 4);
-    this->UpdateUint32(cie_header_start_, this->data()->size() - cie_header_start_ - 4);
-  }
-
-  void WriteCIE(Reg return_address_register,
-                const DebugFrameOpCodeWriter<Allocator>& opcodes) {
-    WriteCIE(return_address_register, opcodes.data()->data(), opcodes.data()->size());
-  }
-
-  void WriteFDE(uint64_t initial_address,
-                uint64_t address_range,
-                const uint8_t* unwind_opcodes,
-                int unwind_opcodes_size) {
-    DCHECK(cie_header_start_ != ~0u);
-    size_t fde_header_start = this->data()->size();
-    this->PushUint32(0);  // Length placeholder.
-    this->PushUint32(this->data()->size() - cie_header_start_);  // 'CIE_pointer'
-    if (use_64bit_address_) {
-      this->PushUint64(initial_address);
-      this->PushUint64(address_range);
-    } else {
-      this->PushUint32(initial_address);
-      this->PushUint32(address_range);
-    }
-    this->PushUleb128(0);  // Augmentation data size.
-    this->PushData(unwind_opcodes, unwind_opcodes_size);
-    this->Pad(use_64bit_address_ ? 8 : 4);
-    this->UpdateUint32(fde_header_start, this->data()->size() - fde_header_start - 4);
-  }
-
-  DebugFrameWriter(std::vector<uint8_t, Allocator>* buffer, bool use_64bit_address)
-      : Writer<Allocator>(buffer),
-        use_64bit_address_(use_64bit_address),
-        cie_header_start_(~0u) {
-  }
-
- private:
-  bool use_64bit_address_;
-  size_t cie_header_start_;
-
-  DISALLOW_COPY_AND_ASSIGN(DebugFrameWriter);
-};
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_DEBUG_FRAME_WRITER_H_
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
new file mode 100644
index 0000000..c0350b6
--- /dev/null
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
+#define ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
+
+#include <unordered_map>
+
+#include "dwarf.h"
+#include "leb128.h"
+#include "writer.h"
+
+namespace art {
+namespace dwarf {
+
+// 32-bit FNV-1a hash function which we use to find duplicate abbreviations.
+// See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+template< typename Allocator >
+struct FNVHash {
+  size_t operator()(const std::vector<uint8_t, Allocator>& v) const {
+    uint32_t hash = 2166136261u;
+    for (size_t i = 0; i < v.size(); i++) {
+      hash = (hash ^ v[i]) * 16777619u;
+    }
+    return hash;
+  }
+};
+
+/*
+ * Writer for debug information entries (DIE).
+ * It also handles generation of abbreviations.
+ *
+ * Usage:
+ *   StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
+ *     WriteStrp(DW_AT_producer, "Compiler name", debug_str);
+ *     StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
+ *       WriteStrp(DW_AT_name, "Foo", debug_str);
+ *     EndTag();
+ *   EndTag();
+ */
+template< typename Allocator = std::allocator<uint8_t> >
+class DebugInfoEntryWriter FINAL : private Writer<Allocator> {
+ public:
+  // Start debugging information entry.
+  void StartTag(Tag tag, Children children) {
+    DCHECK(has_children) << "This tag can not have nested tags";
+    if (inside_entry_) {
+      // Write abbrev code for the previous entry.
+      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+      inside_entry_ = false;
+    }
+    StartAbbrev(tag, children);
+    // Abbrev code placeholder of sufficient size.
+    abbrev_code_offset_ = this->data()->size();
+    this->PushUleb128(NextAbbrevCode());
+    depth_++;
+    inside_entry_ = true;
+    has_children = (children == DW_CHILDREN_yes);
+  }
+
+  // End debugging information entry.
+  void EndTag() {
+    DCHECK_GT(depth_, 0);
+    if (inside_entry_) {
+      // Write abbrev code for this tag.
+      this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+      inside_entry_ = false;
+    }
+    if (has_children) {
+      this->PushUint8(0);  // End of children.
+    }
+    depth_--;
+    has_children = true;  // Parent tag obviously has children.
+  }
+
+  void WriteAddr(Attribute attrib, uint64_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_addr);
+    if (is64bit_) {
+      this->PushUint64(value);
+    } else {
+      this->PushUint32(value);
+    }
+  }
+
+  void WriteBlock(Attribute attrib, const void* ptr, int size) {
+    AddAbbrevAttribute(attrib, DW_FORM_block);
+    this->PushUleb128(size);
+    this->PushData(ptr, size);
+  }
+
+  void WriteData1(Attribute attrib, uint8_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_data1);
+    this->PushUint8(value);
+  }
+
+  void WriteData2(Attribute attrib, uint16_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_data2);
+    this->PushUint16(value);
+  }
+
+  void WriteData4(Attribute attrib, uint32_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_data4);
+    this->PushUint32(value);
+  }
+
+  void WriteData8(Attribute attrib, uint64_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_data8);
+    this->PushUint64(value);
+  }
+
+  void WriteSdata(Attribute attrib, int value) {
+    AddAbbrevAttribute(attrib, DW_FORM_sdata);
+    this->PushSleb128(value);
+  }
+
+  void WriteUdata(Attribute attrib, int value) {
+    AddAbbrevAttribute(attrib, DW_FORM_udata);
+    this->PushUleb128(value);
+  }
+
+  void WriteUdata(Attribute attrib, uint32_t value) {
+    AddAbbrevAttribute(attrib, DW_FORM_udata);
+    this->PushUleb128(value);
+  }
+
+  void WriteFlag(Attribute attrib, bool value) {
+    AddAbbrevAttribute(attrib, DW_FORM_flag);
+    this->PushUint8(value ? 1 : 0);
+  }
+
+  void WriteRef4(Attribute attrib, int cu_offset) {
+    AddAbbrevAttribute(attrib, DW_FORM_ref4);
+    this->PushUint32(cu_offset);
+  }
+
+  void WriteRef(Attribute attrib, int cu_offset) {
+    AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
+    this->PushUleb128(cu_offset);
+  }
+
+  void WriteString(Attribute attrib, const char* value) {
+    AddAbbrevAttribute(attrib, DW_FORM_string);
+    this->PushString(value);
+  }
+
+  void WriteStrp(Attribute attrib, int address) {
+    AddAbbrevAttribute(attrib, DW_FORM_strp);
+    this->PushUint32(address);
+  }
+
+  void WriteStrp(Attribute attrib, const char* value, std::vector<uint8_t>* debug_str) {
+    AddAbbrevAttribute(attrib, DW_FORM_strp);
+    int address = debug_str->size();
+    debug_str->insert(debug_str->end(), value, value + strlen(value) + 1);
+    this->PushUint32(address);
+  }
+
+  bool is64bit() const { return is64bit_; }
+
+  using Writer<Allocator>::data;
+
+  DebugInfoEntryWriter(bool is64bitArch,
+                       std::vector<uint8_t, Allocator>* debug_abbrev,
+                       const Allocator& alloc = Allocator())
+      : Writer<Allocator>(&entries_),
+        debug_abbrev_(debug_abbrev),
+        current_abbrev_(alloc),
+        abbrev_codes_(alloc),
+        entries_(alloc),
+        is64bit_(is64bitArch) {
+    debug_abbrev_.PushUint8(0);  // Add abbrev table terminator.
+  }
+
+  ~DebugInfoEntryWriter() {
+    DCHECK_EQ(depth_, 0);
+  }
+
+ private:
+  // Start abbreviation declaration.
+  void StartAbbrev(Tag tag, Children children) {
+    DCHECK(!inside_entry_);
+    current_abbrev_.clear();
+    EncodeUnsignedLeb128(&current_abbrev_, tag);
+    current_abbrev_.push_back(children);
+  }
+
+  // Add attribute specification.
+  void AddAbbrevAttribute(Attribute name, Form type) {
+    DCHECK(inside_entry_) << "Call StartTag before adding attributes.";
+    EncodeUnsignedLeb128(&current_abbrev_, name);
+    EncodeUnsignedLeb128(&current_abbrev_, type);
+  }
+
+  int NextAbbrevCode() {
+    return 1 + abbrev_codes_.size();
+  }
+
+  // End abbreviation declaration and return its code.
+  int EndAbbrev() {
+    DCHECK(inside_entry_);
+    auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_),
+                                                  NextAbbrevCode()));
+    int abbrev_code = it.first->second;
+    if (UNLIKELY(it.second)) {  // Inserted new entry.
+      const std::vector<uint8_t, Allocator>& abbrev = it.first->first;
+      debug_abbrev_.Pop();  // Remove abbrev table terminator.
+      debug_abbrev_.PushUleb128(abbrev_code);
+      debug_abbrev_.PushData(abbrev.data(), abbrev.size());
+      debug_abbrev_.PushUint8(0);  // Attribute list end.
+      debug_abbrev_.PushUint8(0);  // Attribute list end.
+      debug_abbrev_.PushUint8(0);  // Add abbrev table terminator.
+    }
+    return abbrev_code;
+  }
+
+ private:
+  // Fields for writing and deduplication of abbrevs.
+  Writer<Allocator> debug_abbrev_;
+  std::vector<uint8_t, Allocator> current_abbrev_;
+  std::unordered_map<std::vector<uint8_t, Allocator>, int,
+                     FNVHash<Allocator> > abbrev_codes_;
+
+  // Fields for writing of debugging information entries.
+  std::vector<uint8_t, Allocator> entries_;
+  bool is64bit_;
+  int depth_ = 0;
+  size_t abbrev_code_offset_ = 0;  // Location to patch once we know the code.
+  bool inside_entry_ = false;  // Entry ends at first child (if any).
+  bool has_children = true;
+};
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_DEBUG_INFO_ENTRY_WRITER_H_
diff --git a/compiler/dwarf/debug_line_writer.h b/compiler/dwarf/debug_line_writer.h
deleted file mode 100644
index 4b7d8d9..0000000
--- a/compiler/dwarf/debug_line_writer.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_DWARF_DEBUG_LINE_WRITER_H_
-#define ART_COMPILER_DWARF_DEBUG_LINE_WRITER_H_
-
-#include "debug_line_opcode_writer.h"
-#include "dwarf.h"
-#include "writer.h"
-#include <string>
-
-namespace art {
-namespace dwarf {
-
-// Writer for the .debug_line section (DWARF-3).
-template<typename Allocator = std::allocator<uint8_t>>
-class DebugLineWriter FINAL : private Writer<Allocator> {
- public:
-  struct FileEntry {
-    std::string file_name;
-    int directory_index;
-    int modification_time;
-    int file_size;
-  };
-
-  void WriteTable(const std::vector<std::string>& include_directories,
-                  const std::vector<FileEntry>& files,
-                  const DebugLineOpCodeWriter<Allocator>& opcodes) {
-    size_t header_start = this->data()->size();
-    this->PushUint32(0);  // Section-length placeholder.
-    // Claim DWARF-2 version even though we use some DWARF-3 features.
-    // DWARF-2 consumers will ignore the unknown opcodes.
-    // This is what clang currently does.
-    this->PushUint16(2);  // .debug_line version.
-    size_t header_length_pos = this->data()->size();
-    this->PushUint32(0);  // Header-length placeholder.
-    this->PushUint8(1 << opcodes.GetCodeFactorBits());
-    this->PushUint8(DebugLineOpCodeWriter<Allocator>::kDefaultIsStmt ? 1 : 0);
-    this->PushInt8(DebugLineOpCodeWriter<Allocator>::kLineBase);
-    this->PushUint8(DebugLineOpCodeWriter<Allocator>::kLineRange);
-    this->PushUint8(DebugLineOpCodeWriter<Allocator>::kOpcodeBase);
-    static const int opcode_lengths[DebugLineOpCodeWriter<Allocator>::kOpcodeBase] = {
-        0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 };
-    for (int i = 1; i < DebugLineOpCodeWriter<Allocator>::kOpcodeBase; i++) {
-      this->PushUint8(opcode_lengths[i]);
-    }
-    for (const std::string& directory : include_directories) {
-      this->PushData(directory.data(), directory.size() + 1);
-    }
-    this->PushUint8(0);  // Terminate include_directories list.
-    for (const FileEntry& file : files) {
-      this->PushData(file.file_name.data(), file.file_name.size() + 1);
-      this->PushUleb128(file.directory_index);
-      this->PushUleb128(file.modification_time);
-      this->PushUleb128(file.file_size);
-    }
-    this->PushUint8(0);  // Terminate file list.
-    this->UpdateUint32(header_length_pos, this->data()->size() - header_length_pos - 4);
-    this->PushData(opcodes.data()->data(), opcodes.data()->size());
-    this->UpdateUint32(header_start, this->data()->size() - header_start - 4);
-  }
-
-  explicit DebugLineWriter(std::vector<uint8_t, Allocator>* buffer)
-    : Writer<Allocator>(buffer) {
-  }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(DebugLineWriter);
-};
-
-}  // namespace dwarf
-}  // namespace art
-
-#endif  // ART_COMPILER_DWARF_DEBUG_LINE_WRITER_H_
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
index f3553bc..ec18e96 100644
--- a/compiler/dwarf/dwarf_test.cc
+++ b/compiler/dwarf/dwarf_test.cc
@@ -17,9 +17,9 @@
 #include "dwarf_test.h"
 
 #include "dwarf/debug_frame_opcode_writer.h"
-#include "dwarf/debug_frame_writer.h"
+#include "dwarf/debug_info_entry_writer.h"
 #include "dwarf/debug_line_opcode_writer.h"
-#include "dwarf/debug_line_writer.h"
+#include "dwarf/headers.h"
 #include "gtest/gtest.h"
 
 namespace art {
@@ -118,22 +118,20 @@
   DW_CHECK_NEXT("DW_CFA_restore: r2 (edx)");
   DW_CHECK_NEXT("DW_CFA_restore: r5 (ebp)");
 
-  DebugFrameWriter<> eh_frame(&eh_frame_data_, is64bit);
   DebugFrameOpCodeWriter<> initial_opcodes;
-  eh_frame.WriteCIE(Reg(is64bit ? 16 : 8),  // Return address register.
-                    initial_opcodes);  // Initial opcodes.
-  eh_frame.WriteFDE(0x01000000, 0x01000000,
-                    opcodes.data()->data(), opcodes.data()->size());
+  WriteEhFrameCIE(is64bit, Reg(is64bit ? 16 : 8), initial_opcodes, &eh_frame_data_);
+  WriteEhFrameFDE(is64bit, 0, 0x01000000, 0x01000000, opcodes.data(), &eh_frame_data_);
   CheckObjdumpOutput(is64bit, "-W");
 }
 
-TEST_F(DwarfTest, DebugFrame64) {
-  const bool is64bit = true;
-  DebugFrameWriter<> eh_frame(&eh_frame_data_, is64bit);
-  DebugFrameOpCodeWriter<> no_opcodes;
-  eh_frame.WriteCIE(Reg(16), no_opcodes);
-  eh_frame.WriteFDE(0x0100000000000000, 0x0200000000000000,
-                    no_opcodes.data()->data(), no_opcodes.data()->size());
+// TODO: objdump seems to have trouble with 64bit CIE length.
+TEST_F(DwarfTest, DISABLED_DebugFrame64) {
+  constexpr bool is64bit = true;
+  DebugFrameOpCodeWriter<> initial_opcodes;
+  WriteEhFrameCIE(is64bit, Reg(16), initial_opcodes, &eh_frame_data_);
+  DebugFrameOpCodeWriter<> opcodes;
+  WriteEhFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000,
+                  opcodes.data(), &eh_frame_data_);
   DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000");
   CheckObjdumpOutput(is64bit, "-W");
 }
@@ -147,7 +145,7 @@
   include_directories.push_back("/path/to/source");
   DW_CHECK("/path/to/source");
 
-  std::vector<DebugLineWriter<>::FileEntry> files {
+  std::vector<FileEntry> files {
     { "file0.c", 0, 1000, 2000 },
     { "file1.c", 1, 1000, 2000 },
     { "file2.c", 1, 1000, 2000 },
@@ -186,8 +184,7 @@
   DW_CHECK_NEXT("Entry\tDir\tTime\tSize\tName");
   DW_CHECK_NEXT("1\t0\t1000\t2000\tfile.c");
 
-  DebugLineWriter<> debug_line(&debug_line_data_);
-  debug_line.WriteTable(include_directories, files, opcodes);
+  WriteDebugLineTable(include_directories, files, opcodes, &debug_line_data_);
   CheckObjdumpOutput(is64bit, "-W");
 }
 
@@ -221,14 +218,63 @@
   EXPECT_LT(opcodes.data()->size(), num_rows * 3);
 
   std::vector<std::string> directories;
-  std::vector<DebugLineWriter<>::FileEntry> files {
-    { "file.c", 0, 1000, 2000 },
-  };
-  DebugLineWriter<> debug_line(&debug_line_data_);
-  debug_line.WriteTable(directories, files, opcodes);
+  std::vector<FileEntry> files { { "file.c", 0, 1000, 2000 } };  // NOLINT
+  WriteDebugLineTable(directories, files, opcodes, &debug_line_data_);
   CheckObjdumpOutput(is64bit, "-W -WL");
 }
 
+TEST_F(DwarfTest, DebugInfo) {
+  constexpr bool is64bit = false;
+  DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_);
+  DW_CHECK("Contents of the .debug_info section:");
+  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+  DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)");
+  info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_);
+  DW_CHECK_NEXT("DW_AT_producer    : (indirect string, offset: 0x0): Compiler name");
+  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01000000);
+  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1000000");
+  info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000);
+  DW_CHECK_NEXT("DW_AT_high_pc     : 0x2000000");
+  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+  DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
+  info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_);
+  DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0xe): Foo");
+  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01010000);
+  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1010000");
+  info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000);
+  DW_CHECK_NEXT("DW_AT_high_pc     : 0x1020000");
+  info.EndTag();  // DW_TAG_subprogram
+  info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+  DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
+  info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_);
+  DW_CHECK_NEXT("DW_AT_name        : (indirect string, offset: 0x12): Bar");
+  info.WriteAddr(dwarf::DW_AT_low_pc, 0x01020000);
+  DW_CHECK_NEXT("DW_AT_low_pc      : 0x1020000");
+  info.WriteAddr(dwarf::DW_AT_high_pc, 0x01030000);
+  DW_CHECK_NEXT("DW_AT_high_pc     : 0x1030000");
+  info.EndTag();  // DW_TAG_subprogram
+  info.EndTag();  // DW_TAG_compile_unit
+  // Test that previous list was properly terminated and empty children.
+  info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+  info.EndTag();  // DW_TAG_compile_unit
+
+  // The abbrev table is just side product, but check it as well.
+  DW_CHECK("Abbrev Number: 3 (DW_TAG_compile_unit)");
+  DW_CHECK("Contents of the .debug_abbrev section:");
+  DW_CHECK("1      DW_TAG_compile_unit    [has children]");
+  DW_CHECK_NEXT("DW_AT_producer     DW_FORM_strp");
+  DW_CHECK_NEXT("DW_AT_low_pc       DW_FORM_addr");
+  DW_CHECK_NEXT("DW_AT_high_pc      DW_FORM_addr");
+  DW_CHECK("2      DW_TAG_subprogram    [no children]");
+  DW_CHECK_NEXT("DW_AT_name         DW_FORM_strp");
+  DW_CHECK_NEXT("DW_AT_low_pc       DW_FORM_addr");
+  DW_CHECK_NEXT("DW_AT_high_pc      DW_FORM_addr");
+  DW_CHECK("3      DW_TAG_compile_unit    [has children]");
+
+  dwarf::WriteDebugInfoCU(0 /* debug_abbrev_offset */, info, &debug_info_data_);
+  CheckObjdumpOutput(is64bit, "-W");
+}
+
 #endif  // HAVE_ANDROID_OS
 
 }  // namespace dwarf
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
new file mode 100644
index 0000000..d866b91
--- /dev/null
+++ b/compiler/dwarf/headers.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_HEADERS_H_
+#define ART_COMPILER_DWARF_HEADERS_H_
+
+#include "debug_frame_opcode_writer.h"
+#include "debug_info_entry_writer.h"
+#include "debug_line_opcode_writer.h"
+#include "register.h"
+#include "writer.h"
+
+namespace art {
+namespace dwarf {
+
+// Write common information entry (CIE) to .eh_frame section.
+template<typename Allocator>
+void WriteEhFrameCIE(bool is64bit, Reg return_address_register,
+                     const DebugFrameOpCodeWriter<Allocator>& opcodes,
+                     std::vector<uint8_t>* eh_frame) {
+  Writer<> writer(eh_frame);
+  size_t cie_header_start_ = writer.data()->size();
+  if (is64bit) {
+    // TODO: This is not related to being 64bit.
+    writer.PushUint32(0xffffffff);
+    writer.PushUint64(0);  // Length placeholder.
+    writer.PushUint64(0);  // CIE id.
+  } else {
+    writer.PushUint32(0);  // Length placeholder.
+    writer.PushUint32(0);  // CIE id.
+  }
+  writer.PushUint8(1);   // Version.
+  writer.PushString("zR");
+  writer.PushUleb128(DebugFrameOpCodeWriter<Allocator>::kCodeAlignmentFactor);
+  writer.PushSleb128(DebugFrameOpCodeWriter<Allocator>::kDataAlignmentFactor);
+  writer.PushUleb128(return_address_register.num());  // ubyte in DWARF2.
+  writer.PushUleb128(1);  // z: Augmentation data size.
+  if (is64bit) {
+    writer.PushUint8(0x04);  // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8).
+  } else {
+    writer.PushUint8(0x03);  // R: ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
+  }
+  writer.PushData(opcodes.data());
+  writer.Pad(is64bit ? 8 : 4);
+  if (is64bit) {
+    writer.UpdateUint64(cie_header_start_ + 4, writer.data()->size() - cie_header_start_ - 12);
+  } else {
+    writer.UpdateUint32(cie_header_start_, writer.data()->size() - cie_header_start_ - 4);
+  }
+}
+
+// Write frame description entry (FDE) to .eh_frame section.
+template<typename Allocator>
+void WriteEhFrameFDE(bool is64bit, size_t cie_offset,
+                     uint64_t initial_address, uint64_t address_range,
+                     const std::vector<uint8_t, Allocator>* opcodes,
+                     std::vector<uint8_t>* eh_frame) {
+  Writer<> writer(eh_frame);
+  size_t fde_header_start = writer.data()->size();
+  if (is64bit) {
+    // TODO: This is not related to being 64bit.
+    writer.PushUint32(0xffffffff);
+    writer.PushUint64(0);  // Length placeholder.
+    uint64_t cie_pointer = writer.data()->size() - cie_offset;
+    writer.PushUint64(cie_pointer);
+  } else {
+    writer.PushUint32(0);  // Length placeholder.
+    uint32_t cie_pointer = writer.data()->size() - cie_offset;
+    writer.PushUint32(cie_pointer);
+  }
+  if (is64bit) {
+    writer.PushUint64(initial_address);
+    writer.PushUint64(address_range);
+  } else {
+    writer.PushUint32(initial_address);
+    writer.PushUint32(address_range);
+  }
+  writer.PushUleb128(0);  // Augmentation data size.
+  writer.PushData(opcodes);
+  writer.Pad(is64bit ? 8 : 4);
+  if (is64bit) {
+    writer.UpdateUint64(fde_header_start + 4, writer.data()->size() - fde_header_start - 12);
+  } else {
+    writer.UpdateUint32(fde_header_start, writer.data()->size() - fde_header_start - 4);
+  }
+}
+
+// Write compilation unit (CU) to .debug_info section.
+template<typename Allocator>
+void WriteDebugInfoCU(uint32_t debug_abbrev_offset,
+                      const DebugInfoEntryWriter<Allocator>& entries,
+                      std::vector<uint8_t>* debug_info) {
+  Writer<> writer(debug_info);
+  size_t start = writer.data()->size();
+  writer.PushUint32(0);  // Length placeholder.
+  writer.PushUint16(3);  // Version.
+  writer.PushUint32(debug_abbrev_offset);
+  writer.PushUint8(entries.is64bit() ? 8 : 4);
+  writer.PushData(entries.data());
+  writer.UpdateUint32(start, writer.data()->size() - start - 4);
+}
+
+struct FileEntry {
+  std::string file_name;
+  int directory_index;
+  int modification_time;
+  int file_size;
+};
+
+// Write line table to .debug_line section.
+template<typename Allocator>
+void WriteDebugLineTable(const std::vector<std::string>& include_directories,
+                         const std::vector<FileEntry>& files,
+                         const DebugLineOpCodeWriter<Allocator>& opcodes,
+                         std::vector<uint8_t>* debug_line) {
+  Writer<> writer(debug_line);
+  size_t header_start = writer.data()->size();
+  writer.PushUint32(0);  // Section-length placeholder.
+  // Claim DWARF-2 version even though we use some DWARF-3 features.
+  // DWARF-2 consumers will ignore the unknown opcodes.
+  // This is what clang currently does.
+  writer.PushUint16(2);  // .debug_line version.
+  size_t header_length_pos = writer.data()->size();
+  writer.PushUint32(0);  // Header-length placeholder.
+  writer.PushUint8(1 << opcodes.GetCodeFactorBits());
+  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kDefaultIsStmt ? 1 : 0);
+  writer.PushInt8(DebugLineOpCodeWriter<Allocator>::kLineBase);
+  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kLineRange);
+  writer.PushUint8(DebugLineOpCodeWriter<Allocator>::kOpcodeBase);
+  static const int opcode_lengths[DebugLineOpCodeWriter<Allocator>::kOpcodeBase] = {
+      0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 };
+  for (int i = 1; i < DebugLineOpCodeWriter<Allocator>::kOpcodeBase; i++) {
+    writer.PushUint8(opcode_lengths[i]);
+  }
+  for (const std::string& directory : include_directories) {
+    writer.PushData(directory.data(), directory.size() + 1);
+  }
+  writer.PushUint8(0);  // Terminate include_directories list.
+  for (const FileEntry& file : files) {
+    writer.PushData(file.file_name.data(), file.file_name.size() + 1);
+    writer.PushUleb128(file.directory_index);
+    writer.PushUleb128(file.modification_time);
+    writer.PushUleb128(file.file_size);
+  }
+  writer.PushUint8(0);  // Terminate file list.
+  writer.UpdateUint32(header_length_pos, writer.data()->size() - header_length_pos - 4);
+  writer.PushData(opcodes.data()->data(), opcodes.data()->size());
+  writer.UpdateUint32(header_start, writer.data()->size() - header_start - 4);
+}
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_DWARF_HEADERS_H_
diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h
index d8e29f0..3b9c558 100644
--- a/compiler/dwarf/writer.h
+++ b/compiler/dwarf/writer.h
@@ -116,6 +116,11 @@
     data_->insert(data_->end(), p, p + size);
   }
 
+  template<typename Allocator2>
+  void PushData(const std::vector<uint8_t, Allocator2>* buffer) {
+    data_->insert(data_->end(), buffer->begin(), buffer->end());
+  }
+
   void UpdateUint32(size_t offset, uint32_t value) {
     DCHECK_LT(offset + 3, data_->size());
     (*data_)[offset + 0] = (value >> 0) & 0xFF;
@@ -136,6 +141,15 @@
     (*data_)[offset + 7] = (value >> 56) & 0xFF;
   }
 
+  void UpdateUleb128(size_t offset, uint32_t value) {
+    DCHECK_LE(offset + UnsignedLeb128Size(value), data_->size());
+    UpdateUnsignedLeb128(data_->data() + offset, value);
+  }
+
+  void Pop() {
+    return data_->pop_back();
+  }
+
   void Pad(int alignment) {
     DCHECK_NE(alignment, 0);
     data_->resize(RoundUp(data_->size(), alignment), 0);
diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc
new file mode 100644
index 0000000..5e8e24b
--- /dev/null
+++ b/compiler/elf_writer_debug.cc
@@ -0,0 +1,360 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "elf_writer_debug.h"
+
+#include "compiled_method.h"
+#include "driver/compiler_driver.h"
+#include "dex_file-inl.h"
+#include "dwarf/headers.h"
+#include "dwarf/register.h"
+#include "oat_writer.h"
+
+namespace art {
+namespace dwarf {
+
+static void WriteEhFrameCIE(InstructionSet isa, std::vector<uint8_t>* eh_frame) {
+  // Scratch registers should be marked as undefined.  This tells the
+  // debugger that its value in the previous frame is not recoverable.
+  bool is64bit = Is64BitInstructionSet(isa);
+  switch (isa) {
+    case kArm:
+    case kThumb2: {
+      DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::ArmCore(13), 0);  // R13(SP).
+      // core registers.
+      for (int reg = 0; reg < 13; reg++) {
+        if (reg < 4 || reg == 12) {
+          opcodes.Undefined(Reg::ArmCore(reg));
+        } else {
+          opcodes.SameValue(Reg::ArmCore(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 16) {
+          opcodes.Undefined(Reg::ArmFp(reg));
+        } else {
+          opcodes.SameValue(Reg::ArmFp(reg));
+        }
+      }
+      auto return_address_reg = Reg::ArmCore(14);  // R14(LR).
+      WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+      return;
+    }
+    case kArm64: {
+      DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::Arm64Core(31), 0);  // R31(SP).
+      // core registers.
+      for (int reg = 0; reg < 30; reg++) {
+        if (reg < 8 || reg == 16 || reg == 17) {
+          opcodes.Undefined(Reg::Arm64Core(reg));
+        } else {
+          opcodes.SameValue(Reg::Arm64Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 8 || reg >= 16) {
+          opcodes.Undefined(Reg::Arm64Fp(reg));
+        } else {
+          opcodes.SameValue(Reg::Arm64Fp(reg));
+        }
+      }
+      auto return_address_reg = Reg::Arm64Core(30);  // R30(LR).
+      WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+      return;
+    }
+    case kMips:
+    case kMips64: {
+      DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::MipsCore(29), 0);  // R29(SP).
+      // core registers.
+      for (int reg = 1; reg < 26; reg++) {
+        if (reg < 16 || reg == 24 || reg == 25) {  // AT, V*, A*, T*.
+          opcodes.Undefined(Reg::MipsCore(reg));
+        } else {
+          opcodes.SameValue(Reg::MipsCore(reg));
+        }
+      }
+      auto return_address_reg = Reg::MipsCore(31);  // R31(RA).
+      WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+      return;
+    }
+    case kX86: {
+      DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::X86Core(4), 4);   // R4(ESP).
+      opcodes.Offset(Reg::X86Core(8), -4);  // R8(EIP).
+      // core registers.
+      for (int reg = 0; reg < 8; reg++) {
+        if (reg <= 3) {
+          opcodes.Undefined(Reg::X86Core(reg));
+        } else if (reg == 4) {
+          // Stack pointer.
+        } else {
+          opcodes.SameValue(Reg::X86Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 8; reg++) {
+        opcodes.Undefined(Reg::X86Fp(reg));
+      }
+      auto return_address_reg = Reg::X86Core(8);  // R8(EIP).
+      WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+      return;
+    }
+    case kX86_64: {
+      DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::X86_64Core(4), 8);  // R4(RSP).
+      opcodes.Offset(Reg::X86_64Core(16), -8);  // R16(RIP).
+      // core registers.
+      for (int reg = 0; reg < 16; reg++) {
+        if (reg == 4) {
+          // Stack pointer.
+        } else if (reg < 12 && reg != 3 && reg != 5) {  // except EBX and EBP.
+          opcodes.Undefined(Reg::X86_64Core(reg));
+        } else {
+          opcodes.SameValue(Reg::X86_64Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 16; reg++) {
+        if (reg < 12) {
+          opcodes.Undefined(Reg::X86_64Fp(reg));
+        } else {
+          opcodes.SameValue(Reg::X86_64Fp(reg));
+        }
+      }
+      auto return_address_reg = Reg::X86_64Core(16);  // R16(RIP).
+      WriteEhFrameCIE(is64bit, return_address_reg, opcodes, eh_frame);
+      return;
+    }
+    case kNone:
+      break;
+  }
+  LOG(FATAL) << "Can not write CIE frame for ISA " << isa;
+  UNREACHABLE();
+}
+
+/*
+ * @brief Generate the DWARF sections.
+ * @param oat_writer The Oat file Writer.
+ * @param eh_frame Call Frame Information.
+ * @param debug_info Compilation unit information.
+ * @param debug_abbrev Abbreviations used to generate dbg_info.
+ * @param debug_str Debug strings.
+ * @param debug_line Line number table.
+ */
+void WriteDebugSections(const CompilerDriver* compiler,
+                        const OatWriter* oat_writer,
+                        uint32_t text_section_offset,
+                        std::vector<uint8_t>* eh_frame,
+                        std::vector<uint8_t>* debug_info,
+                        std::vector<uint8_t>* debug_abbrev,
+                        std::vector<uint8_t>* debug_str,
+                        std::vector<uint8_t>* debug_line) {
+  const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetMethodDebugInfo();
+  const InstructionSet isa = compiler->GetInstructionSet();
+  uint32_t cunit_low_pc = static_cast<uint32_t>(-1);
+  uint32_t cunit_high_pc = 0;
+  for (auto method_info : method_infos) {
+    cunit_low_pc = std::min(cunit_low_pc, method_info.low_pc_);
+    cunit_high_pc = std::max(cunit_high_pc, method_info.high_pc_);
+  }
+
+  // Write .eh_frame section.
+  size_t cie_offset = eh_frame->size();
+  WriteEhFrameCIE(isa, eh_frame);
+  for (const OatWriter::DebugInfo& mi : method_infos) {
+    const SwapVector<uint8_t>* opcodes = mi.compiled_method_->GetCFIInfo();
+    if (opcodes != nullptr) {
+      WriteEhFrameFDE(Is64BitInstructionSet(isa), cie_offset,
+                      text_section_offset + mi.low_pc_, mi.high_pc_ - mi.low_pc_,
+                      opcodes, eh_frame);
+    }
+  }
+
+  // Write .debug_info section.
+  size_t debug_abbrev_offset = debug_abbrev->size();
+  DebugInfoEntryWriter<> info(false /* 32 bit */, debug_abbrev);
+  info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
+  info.WriteStrp(DW_AT_producer, "Android dex2oat", debug_str);
+  info.WriteData1(DW_AT_language, DW_LANG_Java);
+  info.WriteAddr(DW_AT_low_pc, cunit_low_pc + text_section_offset);
+  info.WriteAddr(DW_AT_high_pc, cunit_high_pc + text_section_offset);
+  info.WriteData4(DW_AT_stmt_list, debug_line->size());
+  for (auto method_info : method_infos) {
+    std::string method_name = PrettyMethod(method_info.dex_method_index_,
+                                           *method_info.dex_file_, true);
+    if (method_info.deduped_) {
+      // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol
+      // so that it will show up in a debuggerd crash report.
+      method_name += " [ DEDUPED ]";
+    }
+    info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
+    info.WriteStrp(DW_AT_name, method_name.data(), debug_str);
+    info.WriteAddr(DW_AT_low_pc, method_info.low_pc_ + text_section_offset);
+    info.WriteAddr(DW_AT_high_pc, method_info.high_pc_ + text_section_offset);
+    info.EndTag();  // DW_TAG_subprogram
+  }
+  info.EndTag();  // DW_TAG_compile_unit
+  WriteDebugInfoCU(debug_abbrev_offset, info, debug_info);
+
+  // TODO: in gdb info functions <regexp> - reports Java functions, but
+  // source file is <unknown> because .debug_line is formed as one
+  // compilation unit. To fix this it is possible to generate
+  // a separate compilation unit for every distinct Java source.
+  // Each of the these compilation units can have several non-adjacent
+  // method ranges.
+
+  // Write .debug_line section.
+  std::vector<FileEntry> files;
+  std::unordered_map<std::string, size_t> files_map;
+  std::vector<std::string> directories;
+  std::unordered_map<std::string, size_t> directories_map;
+  int code_factor_bits_ = 0;
+  int dwarf_isa = -1;
+  switch (isa) {
+    case kArm:  // arm actually means thumb2.
+    case kThumb2:
+      code_factor_bits_ = 1;  // 16-bit instuctions
+      dwarf_isa = 1;  // DW_ISA_ARM_thumb.
+      break;
+    case kArm64:
+    case kMips:
+    case kMips64:
+      code_factor_bits_ = 2;  // 32-bit instructions
+      break;
+    case kNone:
+    case kX86:
+    case kX86_64:
+      break;
+  }
+  DebugLineOpCodeWriter<> opcodes(false /* 32bit */, code_factor_bits_);
+  opcodes.SetAddress(text_section_offset + cunit_low_pc);
+  if (dwarf_isa != -1) {
+    opcodes.SetISA(dwarf_isa);
+  }
+  for (const OatWriter::DebugInfo& mi : method_infos) {
+    // Addresses in the line table should be unique and increasing.
+    if (mi.deduped_) {
+      continue;
+    }
+
+    struct DebugInfoCallbacks {
+      static bool NewPosition(void* ctx, uint32_t address, uint32_t line) {
+        auto* context = reinterpret_cast<DebugInfoCallbacks*>(ctx);
+        context->dex2line_.push_back({address, static_cast<int32_t>(line)});
+        return false;
+      }
+      DefaultSrcMap dex2line_;
+    } debug_info_callbacks;
+
+    const DexFile* dex = mi.dex_file_;
+    if (mi.code_item_ != nullptr) {
+      dex->DecodeDebugInfo(mi.code_item_,
+                           (mi.access_flags_ & kAccStatic) != 0,
+                           mi.dex_method_index_,
+                           DebugInfoCallbacks::NewPosition,
+                           nullptr,
+                           &debug_info_callbacks);
+    }
+
+    // Get and deduplicate directory and filename.
+    int file_index = 0;  // 0 - primary source file of the compilation.
+    auto& dex_class_def = dex->GetClassDef(mi.class_def_index_);
+    const char* source_file = dex->GetSourceFile(dex_class_def);
+    if (source_file != nullptr) {
+      std::string file_name(source_file);
+      size_t file_name_slash = file_name.find_last_of('/');
+      std::string class_name(dex->GetClassDescriptor(dex_class_def));
+      size_t class_name_slash = class_name.find_last_of('/');
+      std::string full_path(file_name);
+
+      // Guess directory from package name.
+      int directory_index = 0;  // 0 - current directory of the compilation.
+      if (file_name_slash == std::string::npos &&  // Just filename.
+          class_name.front() == 'L' &&  // Type descriptor for a class.
+          class_name_slash != std::string::npos) {  // Has package name.
+        std::string package_name = class_name.substr(1, class_name_slash - 1);
+        auto it = directories_map.find(package_name);
+        if (it == directories_map.end()) {
+          directory_index = 1 + directories.size();
+          directories_map.emplace(package_name, directory_index);
+          directories.push_back(package_name);
+        } else {
+          directory_index = it->second;
+        }
+        full_path = package_name + "/" + file_name;
+      }
+
+      // Add file entry.
+      auto it2 = files_map.find(full_path);
+      if (it2 == files_map.end()) {
+        file_index = 1 + files.size();
+        files_map.emplace(full_path, file_index);
+        files.push_back(FileEntry {
+          file_name,
+          directory_index,
+          0,  // Modification time - NA.
+          0,  // File size - NA.
+        });
+      } else {
+        file_index = it2->second;
+      }
+    }
+    opcodes.SetFile(file_index);
+
+    // Generate mapping opcodes from PC to Java lines.
+    const DefaultSrcMap& dex2line_map = debug_info_callbacks.dex2line_;
+    uint32_t low_pc = text_section_offset + mi.low_pc_;
+    if (file_index != 0 && !dex2line_map.empty()) {
+      bool first = true;
+      for (SrcMapElem pc2dex : mi.compiled_method_->GetSrcMappingTable()) {
+        uint32_t pc = pc2dex.from_;
+        int dex_pc = pc2dex.to_;
+        auto dex2line = dex2line_map.Find(static_cast<uint32_t>(dex_pc));
+        if (dex2line.first) {
+          int line = dex2line.second;
+          if (first) {
+            first = false;
+            if (pc > 0) {
+              // Assume that any preceding code is prologue.
+              int first_line = dex2line_map.front().to_;
+              // Prologue is not a sensible place for a breakpoint.
+              opcodes.NegateStmt();
+              opcodes.AddRow(low_pc, first_line);
+              opcodes.NegateStmt();
+              opcodes.SetPrologueEnd();
+            }
+            opcodes.AddRow(low_pc + pc, line);
+          } else if (line != opcodes.CurrentLine()) {
+            opcodes.AddRow(low_pc + pc, line);
+          }
+        }
+      }
+    } else {
+      // line 0 - instruction cannot be attributed to any source line.
+      opcodes.AddRow(low_pc, 0);
+    }
+  }
+  opcodes.AdvancePC(text_section_offset + cunit_high_pc);
+  opcodes.EndSequence();
+  WriteDebugLineTable(directories, files, opcodes, debug_line);
+}
+
+}  // namespace dwarf
+}  // namespace art
diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h
new file mode 100644
index 0000000..39a99d6
--- /dev/null
+++ b/compiler/elf_writer_debug.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_
+#define ART_COMPILER_ELF_WRITER_DEBUG_H_
+
+#include <vector>
+
+#include "oat_writer.h"
+
+namespace art {
+namespace dwarf {
+
+void WriteDebugSections(const CompilerDriver* compiler,
+                        const OatWriter* oat_writer,
+                        uint32_t text_section_offset,
+                        std::vector<uint8_t>* eh_frame_data,
+                        std::vector<uint8_t>* debug_info_data,
+                        std::vector<uint8_t>* debug_abbrev_data,
+                        std::vector<uint8_t>* debug_str_data,
+                        std::vector<uint8_t>* debug_line_data);
+
+}  // namespace dwarf
+}  // namespace art
+
+#endif  // ART_COMPILER_ELF_WRITER_DEBUG_H_
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 3ce19ab..e9af25f 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -22,14 +22,13 @@
 #include "base/unix_file/fd_file.h"
 #include "buffered_output_stream.h"
 #include "compiled_method.h"
+#include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
-#include "dwarf.h"
-#include "dwarf/debug_frame_writer.h"
-#include "dwarf/debug_line_writer.h"
 #include "elf_builder.h"
 #include "elf_file.h"
 #include "elf_utils.h"
+#include "elf_writer_debug.h"
 #include "file_output_stream.h"
 #include "globals.h"
 #include "leb128.h"
@@ -39,42 +38,6 @@
 
 namespace art {
 
-static void PushByte(std::vector<uint8_t>* buf, int data) {
-  buf->push_back(data & 0xff);
-}
-
-static uint32_t PushStr(std::vector<uint8_t>* buf, const char* str, const char* def = nullptr) {
-  if (str == nullptr) {
-    str = def;
-  }
-
-  uint32_t offset = buf->size();
-  for (size_t i = 0; str[i] != '\0'; ++i) {
-    buf->push_back(str[i]);
-  }
-  buf->push_back('\0');
-  return offset;
-}
-
-static uint32_t PushStr(std::vector<uint8_t>* buf, const std::string &str) {
-  uint32_t offset = buf->size();
-  buf->insert(buf->end(), str.begin(), str.end());
-  buf->push_back('\0');
-  return offset;
-}
-
-static void UpdateWord(std::vector<uint8_t>* buf, int offset, int data) {
-  (*buf)[offset+0] = data;
-  (*buf)[offset+1] = data >> 8;
-  (*buf)[offset+2] = data >> 16;
-  (*buf)[offset+3] = data >> 24;
-}
-
-static void PushHalf(std::vector<uint8_t>* buf, int data) {
-  buf->push_back(data & 0xff);
-  buf->push_back((data >> 8) & 0xff);
-}
-
 template <typename Elf_Word, typename Elf_Sword, typename Elf_Addr,
           typename Elf_Dyn, typename Elf_Sym, typename Elf_Ehdr,
           typename Elf_Phdr, typename Elf_Shdr>
@@ -89,116 +52,6 @@
   return elf_writer.Write(oat_writer, dex_files, android_root, is_host);
 }
 
-std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) {
-  std::vector<uint8_t>* cfi_info = new std::vector<uint8_t>;
-
-  // Length (will be filled in later in this routine).
-  if (is_x86_64) {
-    Push32(cfi_info, 0xffffffff);  // Indicates 64bit
-    Push32(cfi_info, 0);
-    Push32(cfi_info, 0);
-  } else {
-    Push32(cfi_info, 0);
-  }
-
-  // CIE id: always 0.
-  if (is_x86_64) {
-    Push32(cfi_info, 0);
-    Push32(cfi_info, 0);
-  } else {
-    Push32(cfi_info, 0);
-  }
-
-  // Version: always 1.
-  cfi_info->push_back(0x01);
-
-  // Augmentation: 'zR\0'
-  cfi_info->push_back(0x7a);
-  cfi_info->push_back(0x52);
-  cfi_info->push_back(0x0);
-
-  // Code alignment: 1.
-  EncodeUnsignedLeb128(1, cfi_info);
-
-  // Data alignment.
-  if (is_x86_64) {
-    EncodeSignedLeb128(-8, cfi_info);
-  } else {
-    EncodeSignedLeb128(-4, cfi_info);
-  }
-
-  // Return address register.
-  if (is_x86_64) {
-    // R16(RIP)
-    cfi_info->push_back(0x10);
-  } else {
-    // R8(EIP)
-    cfi_info->push_back(0x08);
-  }
-
-  // Augmentation length: 1.
-  cfi_info->push_back(1);
-
-  // Augmentation data.
-  if (is_x86_64) {
-    // 0x04 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata8).
-    cfi_info->push_back(0x04);
-  } else {
-    // 0x03 ((DW_EH_PE_absptr << 4) | DW_EH_PE_udata4).
-    cfi_info->push_back(0x03);
-  }
-
-  // Initial instructions.
-  if (is_x86_64) {
-    // DW_CFA_def_cfa R7(RSP) 8.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x07);
-    cfi_info->push_back(0x08);
-
-    // DW_CFA_offset R16(RIP) 1 (* -8).
-    cfi_info->push_back(0x90);
-    cfi_info->push_back(0x01);
-  } else {
-    // DW_CFA_def_cfa R4(ESP) 4.
-    cfi_info->push_back(0x0c);
-    cfi_info->push_back(0x04);
-    cfi_info->push_back(0x04);
-
-    // DW_CFA_offset R8(EIP) 1 (* -4).
-    cfi_info->push_back(0x88);
-    cfi_info->push_back(0x01);
-  }
-
-  // Padding to a multiple of 4
-  while ((cfi_info->size() & 3) != 0) {
-    // DW_CFA_nop is encoded as 0.
-    cfi_info->push_back(0);
-  }
-
-  // Set the length of the CIE inside the generated bytes.
-  if (is_x86_64) {
-    uint32_t length = cfi_info->size() - 12;
-    UpdateWord(cfi_info, 4, length);
-  } else {
-    uint32_t length = cfi_info->size() - 4;
-    UpdateWord(cfi_info, 0, length);
-  }
-  return cfi_info;
-}
-
-std::vector<uint8_t>* ConstructCIEFrame(InstructionSet isa) {
-  switch (isa) {
-    case kX86:
-      return ConstructCIEFrameX86(false);
-    case kX86_64:
-      return ConstructCIEFrameX86(true);
-
-    default:
-      // Not implemented.
-      return nullptr;
-  }
-}
-
 class OatWriterWrapper FINAL : public CodeOutput {
  public:
   explicit OatWriterWrapper(OatWriter* oat_writer) : oat_writer_(oat_writer) {}
@@ -257,7 +110,8 @@
     return false;
   }
 
-  if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
+  if (compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols() &&
+      !oat_writer->GetMethodDebugInfo().empty()) {
     WriteDebugSymbols(compiler_driver_, builder.get(), oat_writer);
   }
 
@@ -277,344 +131,6 @@
   return builder->Write();
 }
 
-// TODO: rewriting it using DexFile::DecodeDebugInfo needs unneeded stuff.
-static void GetLineInfoForJava(const uint8_t* dbgstream, DefaultSrcMap* dex2line) {
-  if (dbgstream == nullptr) {
-    return;
-  }
-
-  int adjopcode;
-  uint32_t dex_offset = 0;
-  uint32_t java_line = DecodeUnsignedLeb128(&dbgstream);
-
-  // skip parameters
-  for (uint32_t param_count = DecodeUnsignedLeb128(&dbgstream); param_count != 0; --param_count) {
-    DecodeUnsignedLeb128(&dbgstream);
-  }
-
-  for (bool is_end = false; is_end == false; ) {
-    uint8_t opcode = *dbgstream;
-    dbgstream++;
-    switch (opcode) {
-    case DexFile::DBG_END_SEQUENCE:
-      is_end = true;
-      break;
-
-    case DexFile::DBG_ADVANCE_PC:
-      dex_offset += DecodeUnsignedLeb128(&dbgstream);
-      break;
-
-    case DexFile::DBG_ADVANCE_LINE:
-      java_line += DecodeSignedLeb128(&dbgstream);
-      break;
-
-    case DexFile::DBG_START_LOCAL:
-    case DexFile::DBG_START_LOCAL_EXTENDED:
-      DecodeUnsignedLeb128(&dbgstream);
-      DecodeUnsignedLeb128(&dbgstream);
-      DecodeUnsignedLeb128(&dbgstream);
-
-      if (opcode == DexFile::DBG_START_LOCAL_EXTENDED) {
-        DecodeUnsignedLeb128(&dbgstream);
-      }
-      break;
-
-    case DexFile::DBG_END_LOCAL:
-    case DexFile::DBG_RESTART_LOCAL:
-      DecodeUnsignedLeb128(&dbgstream);
-      break;
-
-    case DexFile::DBG_SET_PROLOGUE_END:
-    case DexFile::DBG_SET_EPILOGUE_BEGIN:
-    case DexFile::DBG_SET_FILE:
-      break;
-
-    default:
-      adjopcode = opcode - DexFile::DBG_FIRST_SPECIAL;
-      dex_offset += adjopcode / DexFile::DBG_LINE_RANGE;
-      java_line += DexFile::DBG_LINE_BASE + (adjopcode % DexFile::DBG_LINE_RANGE);
-      dex2line->push_back({dex_offset, static_cast<int32_t>(java_line)});
-      break;
-    }
-  }
-}
-
-/*
- * @brief Generate the DWARF debug_info and debug_abbrev sections
- * @param oat_writer The Oat file Writer.
- * @param dbg_info Compilation unit information.
- * @param dbg_abbrev Abbreviations used to generate dbg_info.
- * @param dbg_str Debug strings.
- */
-static void FillInCFIInformation(OatWriter* oat_writer,
-                                 std::vector<uint8_t>* dbg_info,
-                                 std::vector<uint8_t>* dbg_abbrev,
-                                 std::vector<uint8_t>* dbg_str,
-                                 std::vector<uint8_t>* dbg_line,
-                                 uint32_t text_section_offset) {
-  const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetCFIMethodInfo();
-
-  uint32_t producer_str_offset = PushStr(dbg_str, "Android dex2oat");
-
-  constexpr bool use_64bit_addresses = false;
-
-  // Create the debug_abbrev section with boilerplate information.
-  // We only care about low_pc and high_pc right now for the compilation
-  // unit and methods.
-
-  // Tag 1: Compilation unit: DW_TAG_compile_unit.
-  PushByte(dbg_abbrev, 1);
-  PushByte(dbg_abbrev, dwarf::DW_TAG_compile_unit);
-
-  // There are children (the methods).
-  PushByte(dbg_abbrev, dwarf::DW_CHILDREN_yes);
-
-  // DW_AT_producer DW_FORM_data1.
-  // REVIEW: we can get rid of dbg_str section if
-  // DW_FORM_string (immediate string) was used everywhere instead of
-  // DW_FORM_strp (ref to string from .debug_str section).
-  // DW_FORM_strp makes sense only if we reuse the strings.
-  PushByte(dbg_abbrev, dwarf::DW_AT_producer);
-  PushByte(dbg_abbrev, dwarf::DW_FORM_strp);
-
-  // DW_LANG_Java DW_FORM_data1.
-  PushByte(dbg_abbrev, dwarf::DW_AT_language);
-  PushByte(dbg_abbrev, dwarf::DW_FORM_data1);
-
-  // DW_AT_low_pc DW_FORM_addr.
-  PushByte(dbg_abbrev, dwarf::DW_AT_low_pc);
-  PushByte(dbg_abbrev, dwarf::DW_FORM_addr);
-
-  // DW_AT_high_pc DW_FORM_addr.
-  PushByte(dbg_abbrev, dwarf::DW_AT_high_pc);
-  PushByte(dbg_abbrev, dwarf::DW_FORM_addr);
-
-  if (dbg_line != nullptr) {
-    // DW_AT_stmt_list DW_FORM_sec_offset.
-    PushByte(dbg_abbrev, dwarf::DW_AT_stmt_list);
-    PushByte(dbg_abbrev, dwarf::DW_FORM_data4);
-  }
-
-  // End of DW_TAG_compile_unit.
-  PushByte(dbg_abbrev, 0);  // DW_AT.
-  PushByte(dbg_abbrev, 0);  // DW_FORM.
-
-  // Tag 2: Compilation unit: DW_TAG_subprogram.
-  PushByte(dbg_abbrev, 2);
-  PushByte(dbg_abbrev, dwarf::DW_TAG_subprogram);
-
-  // There are no children.
-  PushByte(dbg_abbrev, dwarf::DW_CHILDREN_no);
-
-  // Name of the method.
-  PushByte(dbg_abbrev, dwarf::DW_AT_name);
-  PushByte(dbg_abbrev, dwarf::DW_FORM_strp);
-
-  // DW_AT_low_pc DW_FORM_addr.
-  PushByte(dbg_abbrev, dwarf::DW_AT_low_pc);
-  PushByte(dbg_abbrev, dwarf::DW_FORM_addr);
-
-  // DW_AT_high_pc DW_FORM_addr.
-  PushByte(dbg_abbrev, dwarf::DW_AT_high_pc);
-  PushByte(dbg_abbrev, dwarf::DW_FORM_addr);
-
-  // End of DW_TAG_subprogram.
-  PushByte(dbg_abbrev, 0);  // DW_AT.
-  PushByte(dbg_abbrev, 0);  // DW_FORM.
-
-  // End of abbrevs for compilation unit
-  PushByte(dbg_abbrev, 0);
-
-  // Start the debug_info section with the header information
-  // 'unit_length' will be filled in later.
-  int cunit_length = dbg_info->size();
-  Push32(dbg_info, 0);
-
-  // 'version' - 3.
-  PushHalf(dbg_info, 3);
-
-  // Offset into .debug_abbrev section (always 0).
-  Push32(dbg_info, 0);
-
-  // Address size: 4 or 8.
-  PushByte(dbg_info, use_64bit_addresses ? 8 : 4);
-
-  // Start the description for the compilation unit.
-  // This uses tag 1.
-  PushByte(dbg_info, 1);
-
-  // The producer is Android dex2oat.
-  Push32(dbg_info, producer_str_offset);
-
-  // The language is Java.
-  PushByte(dbg_info, dwarf::DW_LANG_Java);
-
-  // low_pc and high_pc.
-  uint32_t cunit_low_pc = static_cast<uint32_t>(-1);
-  uint32_t cunit_high_pc = 0;
-  for (auto method_info : method_infos) {
-    cunit_low_pc = std::min(cunit_low_pc, method_info.low_pc_);
-    cunit_high_pc = std::max(cunit_high_pc, method_info.high_pc_);
-  }
-  Push32(dbg_info, cunit_low_pc + text_section_offset);
-  Push32(dbg_info, cunit_high_pc + text_section_offset);
-
-  if (dbg_line != nullptr) {
-    // Line number table offset.
-    Push32(dbg_info, dbg_line->size());
-  }
-
-  for (auto method_info : method_infos) {
-    // Start a new TAG: subroutine (2).
-    PushByte(dbg_info, 2);
-
-    // Enter name, low_pc, high_pc.
-    Push32(dbg_info, PushStr(dbg_str, method_info.method_name_));
-    Push32(dbg_info, method_info.low_pc_ + text_section_offset);
-    Push32(dbg_info, method_info.high_pc_ + text_section_offset);
-  }
-
-  if (dbg_line != nullptr) {
-    // TODO: in gdb info functions <regexp> - reports Java functions, but
-    // source file is <unknown> because .debug_line is formed as one
-    // compilation unit. To fix this it is possible to generate
-    // a separate compilation unit for every distinct Java source.
-    // Each of the these compilation units can have several non-adjacent
-    // method ranges.
-
-    std::vector<dwarf::DebugLineWriter<>::FileEntry> files;
-    std::unordered_map<std::string, size_t> files_map;
-    std::vector<std::string> directories;
-    std::unordered_map<std::string, size_t> directories_map;
-
-    int code_factor_bits_ = 0;
-    int isa = -1;
-    switch (oat_writer->GetOatHeader().GetInstructionSet()) {
-      case kThumb2:
-        code_factor_bits_ = 1;  // 16-bit instuctions
-        isa = 1;  // DW_ISA_ARM_thumb.
-        break;
-      case kArm:
-        code_factor_bits_ = 1;  // 16-bit instructions
-        isa = 2;  // DW_ISA_ARM_arm.
-        break;
-      case kArm64:
-      case kMips:
-      case kMips64:
-        code_factor_bits_ = 2;  // 32-bit instructions
-        break;
-      case kNone:
-      case kX86:
-      case kX86_64:
-        break;
-    }
-
-    dwarf::DebugLineOpCodeWriter<> opcodes(use_64bit_addresses, code_factor_bits_);
-    opcodes.SetAddress(text_section_offset + cunit_low_pc);
-    if (isa != -1) {
-      opcodes.SetISA(isa);
-    }
-    DefaultSrcMap dex2line_map;
-    for (size_t i = 0; i < method_infos.size(); i++) {
-      const OatWriter::DebugInfo& method_info = method_infos[i];
-
-      // Addresses in the line table should be unique and increasing.
-      if (method_info.deduped_) {
-        continue;
-      }
-
-      // Get and deduplicate directory and filename.
-      int file_index = 0;  // 0 - primary source file of the compilation.
-      if (method_info.src_file_name_ != nullptr) {
-        std::string file_name(method_info.src_file_name_);
-        size_t file_name_slash = file_name.find_last_of('/');
-        std::string class_name(method_info.class_descriptor_);
-        size_t class_name_slash = class_name.find_last_of('/');
-        std::string full_path(file_name);
-
-        // Guess directory from package name.
-        int directory_index = 0;  // 0 - current directory of the compilation.
-        if (file_name_slash == std::string::npos &&  // Just filename.
-            class_name.front() == 'L' &&  // Type descriptor for a class.
-            class_name_slash != std::string::npos) {  // Has package name.
-          std::string package_name = class_name.substr(1, class_name_slash - 1);
-          auto it = directories_map.find(package_name);
-          if (it == directories_map.end()) {
-            directory_index = 1 + directories.size();
-            directories_map.emplace(package_name, directory_index);
-            directories.push_back(package_name);
-          } else {
-            directory_index = it->second;
-          }
-          full_path = package_name + "/" + file_name;
-        }
-
-        // Add file entry.
-        auto it2 = files_map.find(full_path);
-        if (it2 == files_map.end()) {
-          file_index = 1 + files.size();
-          files_map.emplace(full_path, file_index);
-          files.push_back(dwarf::DebugLineWriter<>::FileEntry {
-            file_name,
-            directory_index,
-            0,  // Modification time - NA.
-            0,  // File size - NA.
-          });
-        } else {
-          file_index = it2->second;
-        }
-      }
-      opcodes.SetFile(file_index);
-
-      // Generate mapping opcodes from PC to Java lines.
-      dex2line_map.clear();
-      GetLineInfoForJava(method_info.dbgstream_, &dex2line_map);
-      uint32_t low_pc = text_section_offset + method_info.low_pc_;
-      if (file_index != 0 && !dex2line_map.empty()) {
-        bool first = true;
-        for (SrcMapElem pc2dex : method_info.compiled_method_->GetSrcMappingTable()) {
-          uint32_t pc = pc2dex.from_;
-          int dex = pc2dex.to_;
-          auto dex2line = dex2line_map.Find(static_cast<uint32_t>(dex));
-          if (dex2line.first) {
-            int line = dex2line.second;
-            if (first) {
-              first = false;
-              if (pc > 0) {
-                // Assume that any preceding code is prologue.
-                int first_line = dex2line_map.front().to_;
-                // Prologue is not a sensible place for a breakpoint.
-                opcodes.NegateStmt();
-                opcodes.AddRow(low_pc, first_line);
-                opcodes.NegateStmt();
-                opcodes.SetPrologueEnd();
-              }
-              opcodes.AddRow(low_pc + pc, line);
-            } else if (line != opcodes.CurrentLine()) {
-              opcodes.AddRow(low_pc + pc, line);
-            }
-          }
-        }
-      } else {
-        // line 0 - instruction cannot be attributed to any source line.
-        opcodes.AddRow(low_pc, 0);
-      }
-    }
-
-    opcodes.AdvancePC(text_section_offset + cunit_high_pc);
-    opcodes.EndSequence();
-
-    dwarf::DebugLineWriter<> dbg_line_writer(dbg_line);
-    dbg_line_writer.WriteTable(directories, files, opcodes);
-  }
-
-  // One byte terminator.
-  PushByte(dbg_info, 0);
-
-  // We have now walked all the methods.  Fill in lengths.
-  UpdateWord(dbg_info, cunit_length, dbg_info->size() - cunit_length - 4);
-}
-
 template <typename Elf_Word, typename Elf_Sword, typename Elf_Addr,
           typename Elf_Dyn, typename Elf_Sym, typename Elf_Ehdr,
           typename Elf_Phdr, typename Elf_Shdr>
@@ -624,20 +140,22 @@
                               ElfBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Dyn,
                                          Elf_Sym, Elf_Ehdr, Elf_Phdr, Elf_Shdr>* builder,
                               OatWriter* oat_writer) {
-  std::unique_ptr<std::vector<uint8_t>> cfi_info(
-      ConstructCIEFrame(compiler_driver->GetInstructionSet()));
-
-  Elf_Addr text_section_address = builder->GetTextBuilder().GetSection()->sh_addr;
-
   // Iterate over the compiled methods.
-  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetCFIMethodInfo();
+  const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo();
   ElfSymtabBuilder<Elf_Word, Elf_Sword, Elf_Addr, Elf_Sym, Elf_Shdr>* symtab =
       builder->GetSymtabBuilder();
   for (auto it = method_info.begin(); it != method_info.end(); ++it) {
+    std::string name = PrettyMethod(it->dex_method_index_, *it->dex_file_, true);
+    if (it->deduped_) {
+      // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol
+      // so that it will show up in a debuggerd crash report.
+      name += " [ DEDUPED ]";
+    }
+
     uint32_t low_pc = it->low_pc_;
     // Add in code delta, e.g., thumb bit 0 for Thumb2 code.
     low_pc += it->compiled_method_->CodeDelta();
-    symtab->AddSymbol(it->method_name_, &builder->GetTextBuilder(), low_pc,
+    symtab->AddSymbol(name, &builder->GetTextBuilder(), low_pc,
                       true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC);
 
     // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2
@@ -646,109 +164,29 @@
       symtab->AddSymbol("$t", &builder->GetTextBuilder(), it->low_pc_ & ~1, true,
                         0, STB_LOCAL, STT_NOTYPE);
     }
-
-    // Include CFI for compiled method, if possible.
-    if (cfi_info.get() != nullptr) {
-      DCHECK(it->compiled_method_ != nullptr);
-
-      // Copy in the FDE, if present
-      const SwapVector<uint8_t>* fde = it->compiled_method_->GetCFIInfo();
-      if (fde != nullptr) {
-        // Copy the information into cfi_info and then fix the address in the new copy.
-        int cur_offset = cfi_info->size();
-        cfi_info->insert(cfi_info->end(), fde->begin(), fde->end());
-
-        bool is_64bit = *(reinterpret_cast<const uint32_t*>(fde->data())) == 0xffffffff;
-
-        // Set the 'CIE_pointer' field.
-        uint64_t CIE_pointer = cur_offset + (is_64bit ? 12 : 4);
-        uint64_t offset_to_update = CIE_pointer;
-        if (is_64bit) {
-          (*cfi_info)[offset_to_update+0] = CIE_pointer;
-          (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
-          (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
-          (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
-          (*cfi_info)[offset_to_update+4] = CIE_pointer >> 32;
-          (*cfi_info)[offset_to_update+5] = CIE_pointer >> 40;
-          (*cfi_info)[offset_to_update+6] = CIE_pointer >> 48;
-          (*cfi_info)[offset_to_update+7] = CIE_pointer >> 56;
-        } else {
-          (*cfi_info)[offset_to_update+0] = CIE_pointer;
-          (*cfi_info)[offset_to_update+1] = CIE_pointer >> 8;
-          (*cfi_info)[offset_to_update+2] = CIE_pointer >> 16;
-          (*cfi_info)[offset_to_update+3] = CIE_pointer >> 24;
-        }
-
-        // Set the 'initial_location' field.
-        offset_to_update += is_64bit ? 8 : 4;
-        if (is_64bit) {
-          const uint64_t quick_code_start = it->low_pc_ + text_section_address;
-          (*cfi_info)[offset_to_update+0] = quick_code_start;
-          (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
-          (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
-          (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
-          (*cfi_info)[offset_to_update+4] = quick_code_start >> 32;
-          (*cfi_info)[offset_to_update+5] = quick_code_start >> 40;
-          (*cfi_info)[offset_to_update+6] = quick_code_start >> 48;
-          (*cfi_info)[offset_to_update+7] = quick_code_start >> 56;
-        } else {
-          const uint32_t quick_code_start = it->low_pc_ + text_section_address;
-          (*cfi_info)[offset_to_update+0] = quick_code_start;
-          (*cfi_info)[offset_to_update+1] = quick_code_start >> 8;
-          (*cfi_info)[offset_to_update+2] = quick_code_start >> 16;
-          (*cfi_info)[offset_to_update+3] = quick_code_start >> 24;
-        }
-      }
-    }
   }
 
-  bool hasCFI = (cfi_info.get() != nullptr);
-  bool hasLineInfo = false;
-  for (auto& dbg_info : oat_writer->GetCFIMethodInfo()) {
-    if (dbg_info.dbgstream_ != nullptr &&
-        !dbg_info.compiled_method_->GetSrcMappingTable().empty()) {
-      hasLineInfo = true;
-      break;
-    }
-  }
+  typedef ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> Section;
+  Section eh_frame(".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0);
+  Section debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+  Section debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+  Section debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
+  Section debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0);
 
-  if (hasLineInfo || hasCFI) {
-    ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_info(".debug_info",
-                                                                   SHT_PROGBITS,
-                                                                   0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_abbrev(".debug_abbrev",
-                                                                     SHT_PROGBITS,
-                                                                     0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_str(".debug_str",
-                                                                  SHT_PROGBITS,
-                                                                  0, nullptr, 0, 1, 0);
-    ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> debug_line(".debug_line",
-                                                                   SHT_PROGBITS,
-                                                                   0, nullptr, 0, 1, 0);
+  dwarf::WriteDebugSections(compiler_driver,
+                            oat_writer,
+                            builder->GetTextBuilder().GetSection()->sh_addr,
+                            eh_frame.GetBuffer(),
+                            debug_info.GetBuffer(),
+                            debug_abbrev.GetBuffer(),
+                            debug_str.GetBuffer(),
+                            debug_line.GetBuffer());
 
-    FillInCFIInformation(oat_writer, debug_info.GetBuffer(),
-                         debug_abbrev.GetBuffer(), debug_str.GetBuffer(),
-                         hasLineInfo ? debug_line.GetBuffer() : nullptr,
-                         text_section_address);
-
-    builder->RegisterRawSection(debug_info);
-    builder->RegisterRawSection(debug_abbrev);
-
-    if (hasCFI) {
-      ElfRawSectionBuilder<Elf_Word, Elf_Sword, Elf_Shdr> eh_frame(".eh_frame",
-                                                                   SHT_PROGBITS,
-                                                                   SHF_ALLOC,
-                                                                   nullptr, 0, 4, 0);
-      eh_frame.SetBuffer(std::move(*cfi_info.get()));
-      builder->RegisterRawSection(eh_frame);
-    }
-
-    if (hasLineInfo) {
-      builder->RegisterRawSection(debug_line);
-    }
-
-    builder->RegisterRawSection(debug_str);
-  }
+  builder->RegisterRawSection(eh_frame);
+  builder->RegisterRawSection(debug_info);
+  builder->RegisterRawSection(debug_abbrev);
+  builder->RegisterRawSection(debug_str);
+  builder->RegisterRawSection(debug_line);
 }
 
 // Explicit instantiations
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index cf97943..627a42e 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -205,6 +205,7 @@
     uint32_t oat_file_end = ART_BASE_ADDRESS + (10 * KB);
     ImageHeader image_header(image_begin,
                              image_size_,
+                             0u, 0u,
                              image_bitmap_offset,
                              image_bitmap_size,
                              image_roots,
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 1ede228..6f8884a 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -22,6 +22,7 @@
 #include <numeric>
 #include <vector>
 
+#include "art_field-inl.h"
 #include "base/logging.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
@@ -40,8 +41,8 @@
 #include "globals.h"
 #include "image.h"
 #include "intern_table.h"
+#include "linear_alloc.h"
 #include "lock_word.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
@@ -57,7 +58,6 @@
 #include "handle_scope-inl.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 
-using ::art::mirror::ArtField;
 using ::art::mirror::ArtMethod;
 using ::art::mirror::Class;
 using ::art::mirror::DexCache;
@@ -164,6 +164,9 @@
 
   Thread::Current()->TransitionFromSuspendedToRunnable();
   CreateHeader(oat_loaded_size, oat_data_offset);
+  // TODO: heap validation can't handle these fix up passes.
+  Runtime::Current()->GetHeap()->DisableObjectValidation();
+  CopyAndFixupNativeData();
   CopyAndFixupObjects();
   Thread::Current()->TransitionFromRunnableToSuspended(kNative);
 
@@ -186,9 +189,10 @@
     return EXIT_FAILURE;
   }
 
-  // Write out the image.
+  // Write out the image + fields.
+  const auto write_count = image_header->GetImageSize() + image_header->GetArtFieldsSize();
   CHECK_EQ(image_end_, image_header->GetImageSize());
-  if (!image_file->WriteFully(image_->Begin(), image_end_)) {
+  if (!image_file->WriteFully(image_->Begin(), write_count)) {
     PLOG(ERROR) << "Failed to write image file " << image_filename;
     image_file->Erase();
     return false;
@@ -204,6 +208,8 @@
     return false;
   }
 
+  CHECK_EQ(image_header->GetImageBitmapOffset() + image_header->GetImageBitmapSize(),
+           static_cast<size_t>(image_file->GetLength()));
   if (image_file->FlushCloseOrErase() != 0) {
     PLOG(ERROR) << "Failed to flush and close image file " << image_filename;
     return false;
@@ -219,6 +225,8 @@
   mirror::Object* obj = reinterpret_cast<mirror::Object*>(image_->Begin() + offset);
   DCHECK_ALIGNED(obj, kObjectAlignment);
 
+  static size_t max_offset = 0;
+  max_offset = std::max(max_offset, offset);
   image_bitmap_->Set(obj);  // Mark the obj as mutated, since we will end up changing it.
   {
     // Remember the object-inside-of-the-image's hash code so we can restore it after the copy.
@@ -302,13 +310,26 @@
     DexCache* dex_cache = class_linker->GetDexCache(idx);
     const DexFile* dex_file = dex_cache->GetDexFile();
     dex_cache_array_starts_.Put(dex_file, size);
-    DexCacheArraysLayout layout(dex_file);
+    DexCacheArraysLayout layout(target_ptr_size_, dex_file);
     DCHECK(layout.Valid());
-    dex_cache_array_indexes_.Put(dex_cache->GetResolvedTypes(), size + layout.TypesOffset());
-    dex_cache_array_indexes_.Put(dex_cache->GetResolvedMethods(), size + layout.MethodsOffset());
-    dex_cache_array_indexes_.Put(dex_cache->GetResolvedFields(), size + layout.FieldsOffset());
-    dex_cache_array_indexes_.Put(dex_cache->GetStrings(), size + layout.StringsOffset());
+    auto types_size = layout.TypesSize(dex_file->NumTypeIds());
+    auto methods_size = layout.MethodsSize(dex_file->NumMethodIds());
+    auto fields_size = layout.FieldsSize(dex_file->NumFieldIds());
+    auto strings_size = layout.StringsSize(dex_file->NumStringIds());
+    dex_cache_array_indexes_.Put(
+        dex_cache->GetResolvedTypes(),
+        DexCacheArrayLocation {size + layout.TypesOffset(), types_size});
+    dex_cache_array_indexes_.Put(
+        dex_cache->GetResolvedMethods(),
+        DexCacheArrayLocation {size + layout.MethodsOffset(), methods_size});
+    dex_cache_array_indexes_.Put(
+        dex_cache->GetResolvedFields(),
+        DexCacheArrayLocation {size + layout.FieldsOffset(), fields_size});
+    dex_cache_array_indexes_.Put(
+        dex_cache->GetStrings(),
+        DexCacheArrayLocation {size + layout.StringsOffset(), strings_size});
     size += layout.Size();
+    CHECK_EQ(layout.Size(), types_size + methods_size + fields_size + strings_size);
   }
   // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned()
   // when AssignImageBinSlot() assigns their indexes out or order.
@@ -405,12 +426,20 @@
       }
     } else if (object->GetClass<kVerifyNone>()->IsStringClass()) {
       bin = kBinString;  // Strings are almost always immutable (except for object header).
-    } else if (object->IsObjectArray()) {
-      auto it = dex_cache_array_indexes_.find(object);
-      if (it != dex_cache_array_indexes_.end()) {
-        bin = kBinDexCacheArray;
-        current_offset = it->second;  // Use prepared offset defined by the DexCacheLayout.
-      }  // else bin = kBinRegular
+    } else if (object->IsArrayInstance()) {
+      mirror::Class* klass = object->GetClass<kVerifyNone>();
+      auto* component_type = klass->GetComponentType();
+      if (!component_type->IsPrimitive() || component_type->IsPrimitiveInt() ||
+          component_type->IsPrimitiveLong()) {
+        auto it = dex_cache_array_indexes_.find(object);
+        if (it != dex_cache_array_indexes_.end()) {
+          bin = kBinDexCacheArray;
+          // Use prepared offset defined by the DexCacheLayout.
+          current_offset = it->second.offset_;
+          // Override incase of cross compilation.
+          object_size = it->second.length_;
+        }  // else bin = kBinRegular
+      }
     }  // else bin = kBinRegular
   }
 
@@ -465,7 +494,10 @@
 }
 
 bool ImageWriter::AllocMemory() {
-  size_t length = RoundUp(Runtime::Current()->GetHeap()->GetTotalMemory(), kPageSize);
+  auto* runtime = Runtime::Current();
+  const size_t heap_size = runtime->GetHeap()->GetTotalMemory();
+  // Add linear alloc usage since we need to have room for the ArtFields.
+  const size_t length = RoundUp(heap_size + runtime->GetLinearAlloc()->GetUsedMemory(), kPageSize);
   std::string error_msg;
   image_.reset(MemMap::MapAnonymous("image writer image", nullptr, length, PROT_READ | PROT_WRITE,
                                     false, false, &error_msg));
@@ -476,7 +508,7 @@
 
   // Create the image bitmap.
   image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create("image bitmap", image_->Begin(),
-                                                                    length));
+                                                                    RoundUp(length, kPageSize)));
   if (image_bitmap_.get() == nullptr) {
     LOG(ERROR) << "Failed to allocate memory for image bitmap";
     return false;
@@ -698,9 +730,9 @@
       }
     }
     for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
-      ArtField* field = dex_cache->GetResolvedField(i);
-      if (field != NULL && !IsImageClass(field->GetDeclaringClass())) {
-        dex_cache->SetResolvedField(i, NULL);
+      ArtField* field = dex_cache->GetResolvedField(i, sizeof(void*));
+      if (field != nullptr && !IsImageClass(field->GetDeclaringClass())) {
+        dex_cache->SetResolvedField(i, nullptr, sizeof(void*));
       }
     }
     // Clean the dex field. It might have been populated during the initialization phase, but
@@ -786,7 +818,7 @@
   // caches. We check that the number of dex caches does not change.
   size_t dex_cache_count;
   {
-    ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock());
+    ReaderMutexLock mu(self, *class_linker->DexLock());
     dex_cache_count = class_linker->GetDexCacheCount();
   }
   Handle<ObjectArray<Object>> dex_caches(
@@ -794,7 +826,7 @@
                                               dex_cache_count)));
   CHECK(dex_caches.Get() != nullptr) << "Failed to allocate a dex cache array.";
   {
-    ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock());
+    ReaderMutexLock mu(self, *class_linker->DexLock());
     CHECK_EQ(dex_cache_count, class_linker->GetDexCacheCount())
         << "The number of dex caches changed.";
     for (size_t i = 0; i < dex_cache_count; ++i) {
@@ -861,9 +893,9 @@
     WalkInstanceFields(h_obj.Get(), klass.Get());
     // Walk static fields of a Class.
     if (h_obj->IsClass()) {
-      size_t num_static_fields = klass->NumReferenceStaticFields();
+      size_t num_reference_static_fields = klass->NumReferenceStaticFields();
       MemberOffset field_offset = klass->GetFirstReferenceStaticFieldOffset();
-      for (size_t i = 0; i < num_static_fields; ++i) {
+      for (size_t i = 0; i < num_reference_static_fields; ++i) {
         mirror::Object* value = h_obj->GetFieldObject<mirror::Object>(field_offset);
         if (value != nullptr) {
           WalkFieldsInOrder(value);
@@ -871,6 +903,21 @@
         field_offset = MemberOffset(field_offset.Uint32Value() +
                                     sizeof(mirror::HeapReference<mirror::Object>));
       }
+
+      // Visit and assign offsets for fields.
+      ArtField* fields[2] = { h_obj->AsClass()->GetSFields(), h_obj->AsClass()->GetIFields() };
+      size_t num_fields[2] = { h_obj->AsClass()->NumStaticFields(),
+          h_obj->AsClass()->NumInstanceFields() };
+      for (size_t i = 0; i < 2; ++i) {
+        for (size_t j = 0; j < num_fields[i]; ++j) {
+          auto* field = fields[i] + j;
+          auto it = art_field_reloc_.find(field);
+          CHECK(it == art_field_reloc_.end()) << "Field at index " << i << ":" << j
+              << " already assigned " << PrettyField(field);
+          art_field_reloc_.emplace(field, bin_slot_sizes_[kBinArtField]);
+          bin_slot_sizes_[kBinArtField] += sizeof(ArtField);
+        }
+      }
     } else if (h_obj->IsObjectArray()) {
       // Walk elements of an object array.
       int32_t length = h_obj->AsObjectArray<mirror::Object>()->GetLength();
@@ -921,7 +968,6 @@
   // know where image_roots is going to end up
   image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment);  // 64-bit-alignment
 
-  // TODO: Image spaces only?
   DCHECK_LT(image_end_, image_->Size());
   image_objects_offset_begin_ = image_end_;
   // Prepare bin slots for dex cache arrays.
@@ -935,34 +981,47 @@
     previous_sizes += bin_slot_sizes_[i];
   }
   DCHECK_EQ(previous_sizes, GetBinSizeSum());
+  DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_);
+
   // Transform each object's bin slot into an offset which will be used to do the final copy.
   heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
   DCHECK(saved_hashes_map_.empty());  // All binslot hashes should've been put into vector by now.
 
-  DCHECK_GT(image_end_, GetBinSizeSum());
+  DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_);
 
   image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots.Get()));
 
-  // Note that image_end_ is left at end of used space
+  // Note that image_end_ is left at end of used mirror space
 }
 
 void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) {
   CHECK_NE(0U, oat_loaded_size);
   const uint8_t* oat_file_begin = GetOatFileBegin();
   const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size;
-
   oat_data_begin_ = oat_file_begin + oat_data_offset;
   const uint8_t* oat_data_end = oat_data_begin_ + oat_file_->Size();
-
+  // Write out sections.
+  size_t cur_pos = image_end_;
+  // Add fields.
+  auto fields_offset = cur_pos;
+  CHECK_EQ(image_objects_offset_begin_ + GetBinSizeSum(kBinArtField), fields_offset);
+  auto fields_size = bin_slot_sizes_[kBinArtField];
+  cur_pos += fields_size;
   // Return to write header at start of image with future location of image_roots. At this point,
-  // image_end_ is the size of the image (excluding bitmaps).
+  // image_end_ is the size of the image (excluding bitmaps, ArtFields).
+  /*
   const size_t heap_bytes_per_bitmap_byte = kBitsPerByte * kObjectAlignment;
   const size_t bitmap_bytes = RoundUp(image_end_, heap_bytes_per_bitmap_byte) /
       heap_bytes_per_bitmap_byte;
+      */
+  const size_t bitmap_bytes = image_bitmap_->Size();
+  auto bitmap_offset = RoundUp(cur_pos, kPageSize);
+  auto bitmap_size = RoundUp(bitmap_bytes, kPageSize);
+  cur_pos += bitmap_size;
   new (image_->Begin()) ImageHeader(PointerToLowMemUInt32(image_begin_),
                                     static_cast<uint32_t>(image_end_),
-                                    RoundUp(image_end_, kPageSize),
-                                    RoundUp(bitmap_bytes, kPageSize),
+                                    fields_offset, fields_size,
+                                    bitmap_offset, bitmap_size,
                                     image_roots_address_,
                                     oat_file_->GetOatHeader().GetChecksum(),
                                     PointerToLowMemUInt32(oat_file_begin),
@@ -972,11 +1031,21 @@
                                     compile_pic_);
 }
 
+void ImageWriter::CopyAndFixupNativeData() {
+  // Copy ArtFields to their locations and update the array for convenience.
+  auto fields_offset = image_objects_offset_begin_ + GetBinSizeSum(kBinArtField);
+  for (auto& pair : art_field_reloc_) {
+    pair.second += fields_offset;
+    auto* dest = image_->Begin() + pair.second;
+    DCHECK_GE(dest, image_->Begin() + image_end_);
+    memcpy(dest, pair.first, sizeof(ArtField));
+    reinterpret_cast<ArtField*>(dest)->SetDeclaringClass(
+        down_cast<Class*>(GetImageAddress(pair.first->GetDeclaringClass())));
+  }
+}
+
 void ImageWriter::CopyAndFixupObjects() {
   gc::Heap* heap = Runtime::Current()->GetHeap();
-  // TODO: heap validation can't handle this fix up pass
-  heap->DisableObjectValidation();
-  // TODO: Image spaces only?
   heap->VisitObjects(CopyAndFixupObjectsCallback, this);
   // Fix up the object previously had hash codes.
   for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) {
@@ -990,26 +1059,88 @@
 void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) {
   DCHECK(obj != nullptr);
   DCHECK(arg != nullptr);
-  ImageWriter* image_writer = reinterpret_cast<ImageWriter*>(arg);
+  reinterpret_cast<ImageWriter*>(arg)->CopyAndFixupObject(obj);
+}
+
+bool ImageWriter::CopyAndFixupIfDexCacheFieldArray(mirror::Object* dst, mirror::Object* obj,
+                                                   mirror::Class* klass) {
+  if (!klass->IsArrayClass()) {
+    return false;
+  }
+  auto* component_type = klass->GetComponentType();
+  bool is_int_arr = component_type->IsPrimitiveInt();
+  bool is_long_arr = component_type->IsPrimitiveLong();
+  if (!is_int_arr && !is_long_arr) {
+    return false;
+  }
+  auto it = dex_cache_array_indexes_.find(obj);  // Is this a dex cache array?
+  if (it == dex_cache_array_indexes_.end()) {
+    return false;
+  }
+  mirror::Array* arr = obj->AsArray();
+  CHECK_EQ(reinterpret_cast<Object*>(
+      image_->Begin() + it->second.offset_ + image_objects_offset_begin_), dst);
+  dex_cache_array_indexes_.erase(it);
+  // Fixup int pointers for the field array.
+  CHECK(!arr->IsObjectArray());
+  const size_t num_elements = arr->GetLength();
+  if (target_ptr_size_ == 4) {
+    // Will get fixed up by fixup object.
+    dst->SetClass(down_cast<mirror::Class*>(
+    GetImageAddress(mirror::IntArray::GetArrayClass())));
+  } else {
+    DCHECK_EQ(target_ptr_size_, 8u);
+    dst->SetClass(down_cast<mirror::Class*>(
+    GetImageAddress(mirror::LongArray::GetArrayClass())));
+  }
+  mirror::Array* dest_array = down_cast<mirror::Array*>(dst);
+  dest_array->SetLength(num_elements);
+  for (size_t i = 0, count = num_elements; i < count; ++i) {
+    ArtField* field = reinterpret_cast<ArtField*>(is_int_arr ?
+        arr->AsIntArray()->GetWithoutChecks(i) : arr->AsLongArray()->GetWithoutChecks(i));
+    uint8_t* fixup_location = nullptr;
+    if (field != nullptr) {
+      auto it2 = art_field_reloc_.find(field);
+      CHECK(it2 != art_field_reloc_.end()) << "No relocation for field " << PrettyField(field);
+      fixup_location = image_begin_ + it2->second;
+    }
+    if (target_ptr_size_ == 4) {
+      down_cast<mirror::IntArray*>(dest_array)->SetWithoutChecks<kVerifyNone>(
+          i, static_cast<uint32_t>(reinterpret_cast<uint64_t>(fixup_location)));
+    } else {
+      down_cast<mirror::LongArray*>(dest_array)->SetWithoutChecks<kVerifyNone>(
+          i, reinterpret_cast<uint64_t>(fixup_location));
+    }
+  }
+  dst->SetLockWord(LockWord::Default(), false);
+  return true;
+}
+
+void ImageWriter::CopyAndFixupObject(Object* obj) {
   // see GetLocalAddress for similar computation
-  size_t offset = image_writer->GetImageOffset(obj);
-  uint8_t* dst = image_writer->image_->Begin() + offset;
+  size_t offset = GetImageOffset(obj);
+  auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset);
   const uint8_t* src = reinterpret_cast<const uint8_t*>(obj);
   size_t n;
-  if (obj->IsArtMethod()) {
+  mirror::Class* klass = obj->GetClass();
+
+  if (CopyAndFixupIfDexCacheFieldArray(dst, obj, klass)) {
+    return;
+  }
+  if (klass->IsArtMethodClass()) {
     // Size without pointer fields since we don't want to overrun the buffer if target art method
     // is 32 bits but source is 64 bits.
-    n = mirror::ArtMethod::SizeWithoutPointerFields(image_writer->target_ptr_size_);
+    n = mirror::ArtMethod::SizeWithoutPointerFields(target_ptr_size_);
   } else {
     n = obj->SizeOf();
   }
-  DCHECK_LT(offset + n, image_writer->image_->Size());
+  DCHECK_LE(offset + n, image_->Size());
   memcpy(dst, src, n);
-  Object* copy = reinterpret_cast<Object*>(dst);
+
   // Write in a hash code of objects which have inflated monitors or a hash code in their monitor
   // word.
-  copy->SetLockWord(LockWord::Default(), false);
-  image_writer->FixupObject(obj, copy);
+  dst->SetLockWord(LockWord::Default(), false);
+  FixupObject(obj, dst);
 }
 
 // Rewrite all the references in the copied object to point to their image address equivalent
@@ -1045,15 +1176,10 @@
   FixupClassVisitor(ImageWriter* image_writer, Object* copy) : FixupVisitor(image_writer, copy) {
   }
 
-  void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const
+  void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
       EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     DCHECK(obj->IsClass());
     FixupVisitor::operator()(obj, offset, /*is_static*/false);
-
-    // TODO: Remove dead code
-    if (offset.Uint32Value() < mirror::Class::EmbeddedVTableOffset().Uint32Value()) {
-      return;
-    }
   }
 
   void operator()(mirror::Class* klass ATTRIBUTE_UNUSED,
@@ -1064,6 +1190,31 @@
   }
 };
 
+void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) {
+  // Copy and fix up ArtFields in the class.
+  ArtField* fields[2] = { orig->AsClass()->GetSFields(), orig->AsClass()->GetIFields() };
+  size_t num_fields[2] = { orig->AsClass()->NumStaticFields(),
+      orig->AsClass()->NumInstanceFields() };
+  // Update the arrays.
+  for (size_t i = 0; i < 2; ++i) {
+    if (num_fields[i] == 0) {
+      CHECK(fields[i] == nullptr);
+      continue;
+    }
+    auto it = art_field_reloc_.find(fields[i]);
+    CHECK(it != art_field_reloc_.end()) << PrettyClass(orig->AsClass()) << " : "
+        << PrettyField(fields[i]);
+    auto* image_fields = reinterpret_cast<ArtField*>(image_begin_ + it->second);
+    if (i == 0) {
+      down_cast<Class*>(copy)->SetSFieldsUnchecked(image_fields);
+    } else {
+      down_cast<Class*>(copy)->SetIFieldsUnchecked(image_fields);
+    }
+  }
+  FixupClassVisitor visitor(this, copy);
+  static_cast<mirror::Object*>(orig)->VisitReferences<true /*visit class*/>(visitor, visitor);
+}
+
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
   DCHECK(orig != nullptr);
   DCHECK(copy != nullptr);
@@ -1075,9 +1226,8 @@
       DCHECK_EQ(copy->GetReadBarrierPointer(), GetImageAddress(orig));
     }
   }
-  if (orig->IsClass() && orig->AsClass()->ShouldHaveEmbeddedImtAndVTable()) {
-    FixupClassVisitor visitor(this, copy);
-    orig->VisitReferences<true /*visit class*/>(visitor, visitor);
+  if (orig->IsClass()) {
+    FixupClass(orig->AsClass<kVerifyNone>(), down_cast<mirror::Class*>(copy));
   } else {
     FixupVisitor visitor(this, copy);
     orig->VisitReferences<true /*visit class*/>(visitor, visitor);
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 71044f7..a2d99ee 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -90,7 +90,7 @@
   }
 
   uint8_t* GetOatFileBegin() const {
-    return image_begin_ + RoundUp(image_end_, kPageSize);
+    return image_begin_ + RoundUp(image_end_ + bin_slot_sizes_[kBinArtField], kPageSize);
   }
 
   bool Write(const std::string& image_filename,
@@ -127,12 +127,16 @@
     kBinArtMethodNative,          // Art method that is actually native
     kBinArtMethodNotInitialized,  // Art method with a declaring class that wasn't initialized
     // Add more bins here if we add more segregation code.
+    // Non mirror fields must be below. ArtFields should be always clean.
+    kBinArtField,
     kBinSize,
+    // Number of bins which are for mirror objects.
+    kBinMirrorCount = kBinArtField,
   };
 
   friend std::ostream& operator<<(std::ostream& stream, const Bin& bin);
 
-  static constexpr size_t kBinBits = MinimumBitsToStore(kBinSize - 1);
+  static constexpr size_t kBinBits = MinimumBitsToStore(kBinMirrorCount - 1);
   // uint32 = typeof(lockword_)
   static constexpr size_t kBinShift = BitSizeOf<uint32_t>() - kBinBits;
   // 111000.....0
@@ -251,11 +255,18 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Creates the contiguous image in memory and adjusts pointers.
+  void CopyAndFixupNativeData() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void CopyAndFixupObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void CopyAndFixupObject(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool CopyAndFixupIfDexCacheFieldArray(mirror::Object* dst, mirror::Object* obj,
+                                        mirror::Class* klass)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void FixupMethod(mirror::ArtMethod* orig, mirror::ArtMethod* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void FixupClass(mirror::Class* orig, mirror::Class* copy)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void FixupObject(mirror::Object* orig, mirror::Object* copy)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -295,8 +306,13 @@
   // Memory mapped for generating the image.
   std::unique_ptr<MemMap> image_;
 
-  // Indexes for dex cache arrays (objects are inside of the image so that they don't move).
-  SafeMap<mirror::Object*, size_t> dex_cache_array_indexes_;
+  // Indexes, lengths for dex cache arrays (objects are inside of the image so that they don't
+  // move).
+  struct DexCacheArrayLocation {
+    size_t offset_;
+    size_t length_;
+  };
+  SafeMap<mirror::Object*, DexCacheArrayLocation> dex_cache_array_indexes_;
 
   // The start offsets of the dex cache arrays.
   SafeMap<const DexFile*, size_t> dex_cache_array_starts_;
@@ -331,6 +347,11 @@
   size_t bin_slot_previous_sizes_[kBinSize];  // Number of bytes in previous bins.
   size_t bin_slot_count_[kBinSize];  // Number of objects in a bin
 
+  // ArtField relocating map, ArtFields are allocated as array of structs but we want to have one
+  // entry per art field for convenience.
+  // ArtFields are placed right after the end of the image objects (aka sum of bin_slot_sizes_).
+  std::unordered_map<ArtField*, uintptr_t> art_field_reloc_;
+
   void* string_data_array_;  // The backing for the interned strings.
 
   friend class FixupVisitor;
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
new file mode 100644
index 0000000..3a0d520
--- /dev/null
+++ b/compiler/jni/jni_cfi_test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "cfi_test.h"
+#include "gtest/gtest.h"
+#include "jni/quick/calling_convention.h"
+#include "utils/assembler.h"
+
+#include "jni/jni_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class JNICFITest : public CFITest {
+ public:
+  // Enable this flag to generate the expected outputs.
+  static constexpr bool kGenerateExpected = false;
+
+  void TestImpl(InstructionSet isa, const char* isa_str,
+                const std::vector<uint8_t>& expected_asm,
+                const std::vector<uint8_t>& expected_cfi) {
+    // Description of simple method.
+    const bool is_static = true;
+    const bool is_synchronized = false;
+    const char* shorty = "IIFII";
+    std::unique_ptr<JniCallingConvention> jni_conv(
+        JniCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+    std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
+        ManagedRuntimeCallingConvention::Create(is_static, is_synchronized, shorty, isa));
+    const int frame_size(jni_conv->FrameSize());
+    const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
+
+    // Assemble the method.
+    std::unique_ptr<Assembler> jni_asm(Assembler::Create(isa));
+    jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(),
+                        callee_save_regs, mr_conv->EntrySpills());
+    jni_asm->IncreaseFrameSize(32);
+    jni_asm->DecreaseFrameSize(32);
+    jni_asm->RemoveFrame(frame_size, callee_save_regs);
+    jni_asm->EmitSlowPaths();
+    std::vector<uint8_t> actual_asm(jni_asm->CodeSize());
+    MemoryRegion code(&actual_asm[0], actual_asm.size());
+    jni_asm->FinalizeInstructions(code);
+    ASSERT_EQ(jni_asm->cfi().GetCurrentCFAOffset(), frame_size);
+    const std::vector<uint8_t>& actual_cfi = *(jni_asm->cfi().data());
+
+    if (kGenerateExpected) {
+      GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+    } else {
+      EXPECT_EQ(expected_asm, actual_asm);
+      EXPECT_EQ(expected_cfi, actual_cfi);
+    }
+  }
+};
+
+#define TEST_ISA(isa) \
+  TEST_F(JNICFITest, isa) { \
+    std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+        expected_asm_##isa + arraysize(expected_asm_##isa)); \
+    std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+        expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+    TestImpl(isa, #isa, expected_asm, expected_cfi); \
+  }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+TEST_ISA(kMips)
+TEST_ISA(kMips64)
+
+#endif  // HAVE_ANDROID_OS
+
+}  // namespace art
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
new file mode 100644
index 0000000..47e6f10
--- /dev/null
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -0,0 +1,505 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+    0x2D, 0xE9, 0xE0, 0x4D, 0x2D, 0xED, 0x10, 0x8A, 0x89, 0xB0, 0x00, 0x90,
+    0xCD, 0xF8, 0x84, 0x10, 0x8D, 0xED, 0x22, 0x0A, 0xCD, 0xF8, 0x8C, 0x20,
+    0xCD, 0xF8, 0x90, 0x30, 0x88, 0xB0, 0x08, 0xB0, 0x09, 0xB0, 0xBD, 0xEC,
+    0x10, 0x8A, 0xBD, 0xE8, 0xE0, 0x8D,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+    0x44, 0x0E, 0x1C, 0x85, 0x07, 0x86, 0x06, 0x87, 0x05, 0x88, 0x04, 0x8A,
+    0x03, 0x8B, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x5C, 0x05, 0x50, 0x17, 0x05,
+    0x51, 0x16, 0x05, 0x52, 0x15, 0x05, 0x53, 0x14, 0x05, 0x54, 0x13, 0x05,
+    0x55, 0x12, 0x05, 0x56, 0x11, 0x05, 0x57, 0x10, 0x05, 0x58, 0x0F, 0x05,
+    0x59, 0x0E, 0x05, 0x5A, 0x0D, 0x05, 0x5B, 0x0C, 0x05, 0x5C, 0x0B, 0x05,
+    0x5D, 0x0A, 0x05, 0x5E, 0x09, 0x05, 0x5F, 0x08, 0x42, 0x0E, 0x80, 0x01,
+    0x54, 0x0E, 0xA0, 0x01, 0x42, 0x0E, 0x80, 0x01, 0x0A, 0x42, 0x0E, 0x5C,
+    0x44, 0x0E, 0x1C, 0x06, 0x50, 0x06, 0x51, 0x06, 0x52, 0x06, 0x53, 0x06,
+    0x54, 0x06, 0x55, 0x06, 0x56, 0x06, 0x57, 0x06, 0x58, 0x06, 0x59, 0x06,
+    0x5A, 0x06, 0x5B, 0x06, 0x5C, 0x06, 0x5D, 0x06, 0x5E, 0x06, 0x5F, 0x44,
+    0x0B, 0x0E, 0x80, 0x01,
+};
+// 0x00000000: push {r5, r6, r7, r8, r10, r11, lr}
+// 0x00000004: .cfi_def_cfa_offset: 28
+// 0x00000004: .cfi_offset: r5 at cfa-28
+// 0x00000004: .cfi_offset: r6 at cfa-24
+// 0x00000004: .cfi_offset: r7 at cfa-20
+// 0x00000004: .cfi_offset: r8 at cfa-16
+// 0x00000004: .cfi_offset: r10 at cfa-12
+// 0x00000004: .cfi_offset: r11 at cfa-8
+// 0x00000004: .cfi_offset: r14 at cfa-4
+// 0x00000004: vpush.f32 {s16-s31}
+// 0x00000008: .cfi_def_cfa_offset: 92
+// 0x00000008: .cfi_offset_extended: r80 at cfa-92
+// 0x00000008: .cfi_offset_extended: r81 at cfa-88
+// 0x00000008: .cfi_offset_extended: r82 at cfa-84
+// 0x00000008: .cfi_offset_extended: r83 at cfa-80
+// 0x00000008: .cfi_offset_extended: r84 at cfa-76
+// 0x00000008: .cfi_offset_extended: r85 at cfa-72
+// 0x00000008: .cfi_offset_extended: r86 at cfa-68
+// 0x00000008: .cfi_offset_extended: r87 at cfa-64
+// 0x00000008: .cfi_offset_extended: r88 at cfa-60
+// 0x00000008: .cfi_offset_extended: r89 at cfa-56
+// 0x00000008: .cfi_offset_extended: r90 at cfa-52
+// 0x00000008: .cfi_offset_extended: r91 at cfa-48
+// 0x00000008: .cfi_offset_extended: r92 at cfa-44
+// 0x00000008: .cfi_offset_extended: r93 at cfa-40
+// 0x00000008: .cfi_offset_extended: r94 at cfa-36
+// 0x00000008: .cfi_offset_extended: r95 at cfa-32
+// 0x00000008: sub sp, sp, #36
+// 0x0000000a: .cfi_def_cfa_offset: 128
+// 0x0000000a: str r0, [sp, #0]
+// 0x0000000c: str.w r1, [sp, #132]
+// 0x00000010: vstr.f32 s0, [sp, #136]
+// 0x00000014: str.w r2, [sp, #140]
+// 0x00000018: str.w r3, [sp, #144]
+// 0x0000001c: sub sp, sp, #32
+// 0x0000001e: .cfi_def_cfa_offset: 160
+// 0x0000001e: add sp, sp, #32
+// 0x00000020: .cfi_def_cfa_offset: 128
+// 0x00000020: .cfi_remember_state
+// 0x00000020: add sp, sp, #36
+// 0x00000022: .cfi_def_cfa_offset: 92
+// 0x00000022: vpop.f32 {s16-s31}
+// 0x00000026: .cfi_def_cfa_offset: 28
+// 0x00000026: .cfi_restore_extended: r80
+// 0x00000026: .cfi_restore_extended: r81
+// 0x00000026: .cfi_restore_extended: r82
+// 0x00000026: .cfi_restore_extended: r83
+// 0x00000026: .cfi_restore_extended: r84
+// 0x00000026: .cfi_restore_extended: r85
+// 0x00000026: .cfi_restore_extended: r86
+// 0x00000026: .cfi_restore_extended: r87
+// 0x00000026: .cfi_restore_extended: r88
+// 0x00000026: .cfi_restore_extended: r89
+// 0x00000026: .cfi_restore_extended: r90
+// 0x00000026: .cfi_restore_extended: r91
+// 0x00000026: .cfi_restore_extended: r92
+// 0x00000026: .cfi_restore_extended: r93
+// 0x00000026: .cfi_restore_extended: r94
+// 0x00000026: .cfi_restore_extended: r95
+// 0x00000026: pop {r5, r6, r7, r8, r10, r11, pc}
+// 0x0000002a: .cfi_restore_state
+// 0x0000002a: .cfi_def_cfa_offset: 128
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+    0xFF, 0x03, 0x03, 0xD1, 0xFE, 0x5F, 0x00, 0xF9, 0xFD, 0x5B, 0x00, 0xF9,
+    0xFC, 0x57, 0x00, 0xF9, 0xFB, 0x53, 0x00, 0xF9, 0xFA, 0x4F, 0x00, 0xF9,
+    0xF9, 0x4B, 0x00, 0xF9, 0xF8, 0x47, 0x00, 0xF9, 0xF7, 0x43, 0x00, 0xF9,
+    0xF6, 0x3F, 0x00, 0xF9, 0xF5, 0x3B, 0x00, 0xF9, 0xF4, 0x37, 0x00, 0xF9,
+    0xEF, 0x33, 0x00, 0xFD, 0xEE, 0x2F, 0x00, 0xFD, 0xED, 0x2B, 0x00, 0xFD,
+    0xEC, 0x27, 0x00, 0xFD, 0xEB, 0x23, 0x00, 0xFD, 0xEA, 0x1F, 0x00, 0xFD,
+    0xE9, 0x1B, 0x00, 0xFD, 0xE8, 0x17, 0x00, 0xFD, 0xF5, 0x03, 0x12, 0xAA,
+    0xE0, 0x03, 0x00, 0xB9, 0xE1, 0xC7, 0x00, 0xB9, 0xE0, 0xCB, 0x00, 0xBD,
+    0xE2, 0xCF, 0x00, 0xB9, 0xE3, 0xD3, 0x00, 0xB9, 0xFF, 0x83, 0x00, 0xD1,
+    0xFF, 0x83, 0x00, 0x91, 0xF2, 0x03, 0x15, 0xAA, 0xFE, 0x5F, 0x40, 0xF9,
+    0xFD, 0x5B, 0x40, 0xF9, 0xFC, 0x57, 0x40, 0xF9, 0xFB, 0x53, 0x40, 0xF9,
+    0xFA, 0x4F, 0x40, 0xF9, 0xF9, 0x4B, 0x40, 0xF9, 0xF8, 0x47, 0x40, 0xF9,
+    0xF7, 0x43, 0x40, 0xF9, 0xF6, 0x3F, 0x40, 0xF9, 0xF5, 0x3B, 0x40, 0xF9,
+    0xF4, 0x37, 0x40, 0xF9, 0xEF, 0x33, 0x40, 0xFD, 0xEE, 0x2F, 0x40, 0xFD,
+    0xED, 0x2B, 0x40, 0xFD, 0xEC, 0x27, 0x40, 0xFD, 0xEB, 0x23, 0x40, 0xFD,
+    0xEA, 0x1F, 0x40, 0xFD, 0xE9, 0x1B, 0x40, 0xFD, 0xE8, 0x17, 0x40, 0xFD,
+    0xFF, 0x03, 0x03, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+    0x44, 0x0E, 0xC0, 0x01, 0x44, 0x9E, 0x02, 0x44, 0x9D, 0x04, 0x44, 0x9C,
+    0x06, 0x44, 0x9B, 0x08, 0x44, 0x9A, 0x0A, 0x44, 0x99, 0x0C, 0x44, 0x98,
+    0x0E, 0x44, 0x97, 0x10, 0x44, 0x96, 0x12, 0x44, 0x95, 0x14, 0x44, 0x94,
+    0x16, 0x44, 0x05, 0x4F, 0x18, 0x44, 0x05, 0x4E, 0x1A, 0x44, 0x05, 0x4D,
+    0x1C, 0x44, 0x05, 0x4C, 0x1E, 0x44, 0x05, 0x4B, 0x20, 0x44, 0x05, 0x4A,
+    0x22, 0x44, 0x05, 0x49, 0x24, 0x44, 0x05, 0x48, 0x26, 0x5C, 0x0E, 0xE0,
+    0x01, 0x44, 0x0E, 0xC0, 0x01, 0x0A, 0x48, 0xDE, 0x44, 0xDD, 0x44, 0xDC,
+    0x44, 0xDB, 0x44, 0xDA, 0x44, 0xD9, 0x44, 0xD8, 0x44, 0xD7, 0x44, 0xD6,
+    0x44, 0xD5, 0x44, 0xD4, 0x44, 0x06, 0x4F, 0x44, 0x06, 0x4E, 0x44, 0x06,
+    0x4D, 0x44, 0x06, 0x4C, 0x44, 0x06, 0x4B, 0x44, 0x06, 0x4A, 0x44, 0x06,
+    0x49, 0x44, 0x06, 0x48, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0xC0, 0x01,
+};
+// 0x00000000: sub sp, sp, #0xc0 (192)
+// 0x00000004: .cfi_def_cfa_offset: 192
+// 0x00000004: str lr, [sp, #184]
+// 0x00000008: .cfi_offset: r30 at cfa-8
+// 0x00000008: str x29, [sp, #176]
+// 0x0000000c: .cfi_offset: r29 at cfa-16
+// 0x0000000c: str x28, [sp, #168]
+// 0x00000010: .cfi_offset: r28 at cfa-24
+// 0x00000010: str x27, [sp, #160]
+// 0x00000014: .cfi_offset: r27 at cfa-32
+// 0x00000014: str x26, [sp, #152]
+// 0x00000018: .cfi_offset: r26 at cfa-40
+// 0x00000018: str x25, [sp, #144]
+// 0x0000001c: .cfi_offset: r25 at cfa-48
+// 0x0000001c: str x24, [sp, #136]
+// 0x00000020: .cfi_offset: r24 at cfa-56
+// 0x00000020: str x23, [sp, #128]
+// 0x00000024: .cfi_offset: r23 at cfa-64
+// 0x00000024: str x22, [sp, #120]
+// 0x00000028: .cfi_offset: r22 at cfa-72
+// 0x00000028: str x21, [sp, #112]
+// 0x0000002c: .cfi_offset: r21 at cfa-80
+// 0x0000002c: str x20, [sp, #104]
+// 0x00000030: .cfi_offset: r20 at cfa-88
+// 0x00000030: str d15, [sp, #96]
+// 0x00000034: .cfi_offset_extended: r79 at cfa-96
+// 0x00000034: str d14, [sp, #88]
+// 0x00000038: .cfi_offset_extended: r78 at cfa-104
+// 0x00000038: str d13, [sp, #80]
+// 0x0000003c: .cfi_offset_extended: r77 at cfa-112
+// 0x0000003c: str d12, [sp, #72]
+// 0x00000040: .cfi_offset_extended: r76 at cfa-120
+// 0x00000040: str d11, [sp, #64]
+// 0x00000044: .cfi_offset_extended: r75 at cfa-128
+// 0x00000044: str d10, [sp, #56]
+// 0x00000048: .cfi_offset_extended: r74 at cfa-136
+// 0x00000048: str d9, [sp, #48]
+// 0x0000004c: .cfi_offset_extended: r73 at cfa-144
+// 0x0000004c: str d8, [sp, #40]
+// 0x00000050: .cfi_offset_extended: r72 at cfa-152
+// 0x00000050: mov x21, tr
+// 0x00000054: str w0, [sp]
+// 0x00000058: str w1, [sp, #196]
+// 0x0000005c: str s0, [sp, #200]
+// 0x00000060: str w2, [sp, #204]
+// 0x00000064: str w3, [sp, #208]
+// 0x00000068: sub sp, sp, #0x20 (32)
+// 0x0000006c: .cfi_def_cfa_offset: 224
+// 0x0000006c: add sp, sp, #0x20 (32)
+// 0x00000070: .cfi_def_cfa_offset: 192
+// 0x00000070: .cfi_remember_state
+// 0x00000070: mov tr, x21
+// 0x00000074: ldr lr, [sp, #184]
+// 0x00000078: .cfi_restore: r30
+// 0x00000078: ldr x29, [sp, #176]
+// 0x0000007c: .cfi_restore: r29
+// 0x0000007c: ldr x28, [sp, #168]
+// 0x00000080: .cfi_restore: r28
+// 0x00000080: ldr x27, [sp, #160]
+// 0x00000084: .cfi_restore: r27
+// 0x00000084: ldr x26, [sp, #152]
+// 0x00000088: .cfi_restore: r26
+// 0x00000088: ldr x25, [sp, #144]
+// 0x0000008c: .cfi_restore: r25
+// 0x0000008c: ldr x24, [sp, #136]
+// 0x00000090: .cfi_restore: r24
+// 0x00000090: ldr x23, [sp, #128]
+// 0x00000094: .cfi_restore: r23
+// 0x00000094: ldr x22, [sp, #120]
+// 0x00000098: .cfi_restore: r22
+// 0x00000098: ldr x21, [sp, #112]
+// 0x0000009c: .cfi_restore: r21
+// 0x0000009c: ldr x20, [sp, #104]
+// 0x000000a0: .cfi_restore: r20
+// 0x000000a0: ldr d15, [sp, #96]
+// 0x000000a4: .cfi_restore_extended: r79
+// 0x000000a4: ldr d14, [sp, #88]
+// 0x000000a8: .cfi_restore_extended: r78
+// 0x000000a8: ldr d13, [sp, #80]
+// 0x000000ac: .cfi_restore_extended: r77
+// 0x000000ac: ldr d12, [sp, #72]
+// 0x000000b0: .cfi_restore_extended: r76
+// 0x000000b0: ldr d11, [sp, #64]
+// 0x000000b4: .cfi_restore_extended: r75
+// 0x000000b4: ldr d10, [sp, #56]
+// 0x000000b8: .cfi_restore_extended: r74
+// 0x000000b8: ldr d9, [sp, #48]
+// 0x000000bc: .cfi_restore_extended: r73
+// 0x000000bc: ldr d8, [sp, #40]
+// 0x000000c0: .cfi_restore_extended: r72
+// 0x000000c0: add sp, sp, #0xc0 (192)
+// 0x000000c4: .cfi_def_cfa_offset: 0
+// 0x000000c4: ret
+// 0x000000c8: .cfi_restore_state
+// 0x000000c8: .cfi_def_cfa_offset: 192
+
+static constexpr uint8_t expected_asm_kX86[] = {
+    0x57, 0x56, 0x55, 0x83, 0xC4, 0xE4, 0x50, 0x89, 0x4C, 0x24, 0x34, 0xF3,
+    0x0F, 0x11, 0x44, 0x24, 0x38, 0x89, 0x54, 0x24, 0x3C, 0x89, 0x5C, 0x24,
+    0x40, 0x83, 0xC4, 0xE0, 0x83, 0xC4, 0x20, 0x83, 0xC4, 0x20, 0x5D, 0x5E,
+    0x5F, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+    0x41, 0x0E, 0x08, 0x87, 0x02, 0x41, 0x0E, 0x0C, 0x86, 0x03, 0x41, 0x0E,
+    0x10, 0x85, 0x04, 0x43, 0x0E, 0x2C, 0x41, 0x0E, 0x30, 0x55, 0x0E, 0x50,
+    0x43, 0x0E, 0x30, 0x0A, 0x43, 0x0E, 0x10, 0x41, 0x0E, 0x0C, 0xC5, 0x41,
+    0x0E, 0x08, 0xC6, 0x41, 0x0E, 0x04, 0xC7, 0x41, 0x0B, 0x0E, 0x30,
+};
+// 0x00000000: push edi
+// 0x00000001: .cfi_def_cfa_offset: 8
+// 0x00000001: .cfi_offset: r7 at cfa-8
+// 0x00000001: push esi
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r6 at cfa-12
+// 0x00000002: push ebp
+// 0x00000003: .cfi_def_cfa_offset: 16
+// 0x00000003: .cfi_offset: r5 at cfa-16
+// 0x00000003: add esp, -28
+// 0x00000006: .cfi_def_cfa_offset: 44
+// 0x00000006: push eax
+// 0x00000007: .cfi_def_cfa_offset: 48
+// 0x00000007: mov [esp + 52], ecx
+// 0x0000000b: movss [esp + 56], xmm0
+// 0x00000011: mov [esp + 60], edx
+// 0x00000015: mov [esp + 64], ebx
+// 0x00000019: add esp, -32
+// 0x0000001c: .cfi_def_cfa_offset: 80
+// 0x0000001c: add esp, 32
+// 0x0000001f: .cfi_def_cfa_offset: 48
+// 0x0000001f: .cfi_remember_state
+// 0x0000001f: add esp, 32
+// 0x00000022: .cfi_def_cfa_offset: 16
+// 0x00000022: pop ebp
+// 0x00000023: .cfi_def_cfa_offset: 12
+// 0x00000023: .cfi_restore: r5
+// 0x00000023: pop esi
+// 0x00000024: .cfi_def_cfa_offset: 8
+// 0x00000024: .cfi_restore: r6
+// 0x00000024: pop edi
+// 0x00000025: .cfi_def_cfa_offset: 4
+// 0x00000025: .cfi_restore: r7
+// 0x00000025: ret
+// 0x00000026: .cfi_restore_state
+// 0x00000026: .cfi_def_cfa_offset: 48
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+    0x41, 0x57, 0x41, 0x56, 0x41, 0x55, 0x41, 0x54, 0x55, 0x53, 0x48, 0x83,
+    0xEC, 0x48, 0xF2, 0x44, 0x0F, 0x11, 0x7C, 0x24, 0x40, 0xF2, 0x44, 0x0F,
+    0x11, 0x74, 0x24, 0x38, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24, 0x30, 0xF2,
+    0x44, 0x0F, 0x11, 0x64, 0x24, 0x28, 0x89, 0x3C, 0x24, 0x89, 0xB4, 0x24,
+    0x84, 0x00, 0x00, 0x00, 0xF3, 0x0F, 0x11, 0x84, 0x24, 0x88, 0x00, 0x00,
+    0x00, 0x89, 0x94, 0x24, 0x8C, 0x00, 0x00, 0x00, 0x89, 0x8C, 0x24, 0x90,
+    0x00, 0x00, 0x00, 0x48, 0x83, 0xC4, 0xE0, 0x48, 0x83, 0xC4, 0x20, 0xF2,
+    0x44, 0x0F, 0x10, 0x64, 0x24, 0x28, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24,
+    0x30, 0xF2, 0x44, 0x0F, 0x10, 0x74, 0x24, 0x38, 0xF2, 0x44, 0x0F, 0x10,
+    0x7C, 0x24, 0x40, 0x48, 0x83, 0xC4, 0x48, 0x5B, 0x5D, 0x41, 0x5C, 0x41,
+    0x5D, 0x41, 0x5E, 0x41, 0x5F, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+    0x42, 0x0E, 0x10, 0x8F, 0x04, 0x42, 0x0E, 0x18, 0x8E, 0x06, 0x42, 0x0E,
+    0x20, 0x8D, 0x08, 0x42, 0x0E, 0x28, 0x8C, 0x0A, 0x41, 0x0E, 0x30, 0x86,
+    0x0C, 0x41, 0x0E, 0x38, 0x83, 0x0E, 0x44, 0x0E, 0x80, 0x01, 0x47, 0xA0,
+    0x10, 0x47, 0x9F, 0x12, 0x47, 0x9E, 0x14, 0x47, 0x9D, 0x16, 0x65, 0x0E,
+    0xA0, 0x01, 0x44, 0x0E, 0x80, 0x01, 0x0A, 0x47, 0xDD, 0x47, 0xDE, 0x47,
+    0xDF, 0x47, 0xE0, 0x44, 0x0E, 0x38, 0x41, 0x0E, 0x30, 0xC3, 0x41, 0x0E,
+    0x28, 0xC6, 0x42, 0x0E, 0x20, 0xCC, 0x42, 0x0E, 0x18, 0xCD, 0x42, 0x0E,
+    0x10, 0xCE, 0x42, 0x0E, 0x08, 0xCF, 0x41, 0x0B, 0x0E, 0x80, 0x01,
+};
+// 0x00000000: push r15
+// 0x00000002: .cfi_def_cfa_offset: 16
+// 0x00000002: .cfi_offset: r15 at cfa-16
+// 0x00000002: push r14
+// 0x00000004: .cfi_def_cfa_offset: 24
+// 0x00000004: .cfi_offset: r14 at cfa-24
+// 0x00000004: push r13
+// 0x00000006: .cfi_def_cfa_offset: 32
+// 0x00000006: .cfi_offset: r13 at cfa-32
+// 0x00000006: push r12
+// 0x00000008: .cfi_def_cfa_offset: 40
+// 0x00000008: .cfi_offset: r12 at cfa-40
+// 0x00000008: push rbp
+// 0x00000009: .cfi_def_cfa_offset: 48
+// 0x00000009: .cfi_offset: r6 at cfa-48
+// 0x00000009: push rbx
+// 0x0000000a: .cfi_def_cfa_offset: 56
+// 0x0000000a: .cfi_offset: r3 at cfa-56
+// 0x0000000a: subq rsp, 72
+// 0x0000000e: .cfi_def_cfa_offset: 128
+// 0x0000000e: movsd [rsp + 64], xmm15
+// 0x00000015: .cfi_offset: r32 at cfa-64
+// 0x00000015: movsd [rsp + 56], xmm14
+// 0x0000001c: .cfi_offset: r31 at cfa-72
+// 0x0000001c: movsd [rsp + 48], xmm13
+// 0x00000023: .cfi_offset: r30 at cfa-80
+// 0x00000023: movsd [rsp + 40], xmm12
+// 0x0000002a: .cfi_offset: r29 at cfa-88
+// 0x0000002a: mov [rsp], edi
+// 0x0000002d: mov [rsp + 132], esi
+// 0x00000034: movss [rsp + 136], xmm0
+// 0x0000003d: mov [rsp + 140], edx
+// 0x00000044: mov [rsp + 144], ecx
+// 0x0000004b: addq rsp, -32
+// 0x0000004f: .cfi_def_cfa_offset: 160
+// 0x0000004f: addq rsp, 32
+// 0x00000053: .cfi_def_cfa_offset: 128
+// 0x00000053: .cfi_remember_state
+// 0x00000053: movsd xmm12, [rsp + 40]
+// 0x0000005a: .cfi_restore: r29
+// 0x0000005a: movsd xmm13, [rsp + 48]
+// 0x00000061: .cfi_restore: r30
+// 0x00000061: movsd xmm14, [rsp + 56]
+// 0x00000068: .cfi_restore: r31
+// 0x00000068: movsd xmm15, [rsp + 64]
+// 0x0000006f: .cfi_restore: r32
+// 0x0000006f: addq rsp, 72
+// 0x00000073: .cfi_def_cfa_offset: 56
+// 0x00000073: pop rbx
+// 0x00000074: .cfi_def_cfa_offset: 48
+// 0x00000074: .cfi_restore: r3
+// 0x00000074: pop rbp
+// 0x00000075: .cfi_def_cfa_offset: 40
+// 0x00000075: .cfi_restore: r6
+// 0x00000075: pop r12
+// 0x00000077: .cfi_def_cfa_offset: 32
+// 0x00000077: .cfi_restore: r12
+// 0x00000077: pop r13
+// 0x00000079: .cfi_def_cfa_offset: 24
+// 0x00000079: .cfi_restore: r13
+// 0x00000079: pop r14
+// 0x0000007b: .cfi_def_cfa_offset: 16
+// 0x0000007b: .cfi_restore: r14
+// 0x0000007b: pop r15
+// 0x0000007d: .cfi_def_cfa_offset: 8
+// 0x0000007d: .cfi_restore: r15
+// 0x0000007d: ret
+// 0x0000007e: .cfi_restore_state
+// 0x0000007e: .cfi_def_cfa_offset: 128
+
+static constexpr uint8_t expected_asm_kMips[] = {
+    0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB8, 0xAF,
+    0x34, 0x00, 0xAF, 0xAF, 0x30, 0x00, 0xAE, 0xAF, 0x2C, 0x00, 0xAD, 0xAF,
+    0x28, 0x00, 0xAC, 0xAF, 0x24, 0x00, 0xAB, 0xAF, 0x20, 0x00, 0xAA, 0xAF,
+    0x1C, 0x00, 0xA9, 0xAF, 0x18, 0x00, 0xA8, 0xAF, 0x00, 0x00, 0xA4, 0xAF,
+    0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xA6, 0xAF, 0x4C, 0x00, 0xA7, 0xAF,
+    0xE0, 0xFF, 0xBD, 0x27, 0x20, 0x00, 0xBD, 0x27, 0x18, 0x00, 0xA8, 0x8F,
+    0x1C, 0x00, 0xA9, 0x8F, 0x20, 0x00, 0xAA, 0x8F, 0x24, 0x00, 0xAB, 0x8F,
+    0x28, 0x00, 0xAC, 0x8F, 0x2C, 0x00, 0xAD, 0x8F, 0x30, 0x00, 0xAE, 0x8F,
+    0x34, 0x00, 0xAF, 0x8F, 0x38, 0x00, 0xB8, 0x8F, 0x3C, 0x00, 0xBF, 0x8F,
+    0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips[] = {
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x98, 0x02, 0x44, 0x8F, 0x03,
+    0x44, 0x8E, 0x04, 0x44, 0x8D, 0x05, 0x44, 0x8C, 0x06, 0x44, 0x8B, 0x07,
+    0x44, 0x8A, 0x08, 0x44, 0x89, 0x09, 0x44, 0x88, 0x0A, 0x54, 0x0E, 0x60,
+    0x44, 0x0E, 0x40, 0x0A, 0x44, 0xC8, 0x44, 0xC9, 0x44, 0xCA, 0x44, 0xCB,
+    0x44, 0xCC, 0x44, 0xCD, 0x44, 0xCE, 0x44, 0xCF, 0x44, 0xD8, 0x44, 0xDF,
+    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sw r31, +60(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-4
+// 0x00000008: sw r24, +56(r29)
+// 0x0000000c: .cfi_offset: r24 at cfa-8
+// 0x0000000c: sw r15, +52(r29)
+// 0x00000010: .cfi_offset: r15 at cfa-12
+// 0x00000010: sw r14, +48(r29)
+// 0x00000014: .cfi_offset: r14 at cfa-16
+// 0x00000014: sw r13, +44(r29)
+// 0x00000018: .cfi_offset: r13 at cfa-20
+// 0x00000018: sw r12, +40(r29)
+// 0x0000001c: .cfi_offset: r12 at cfa-24
+// 0x0000001c: sw r11, +36(r29)
+// 0x00000020: .cfi_offset: r11 at cfa-28
+// 0x00000020: sw r10, +32(r29)
+// 0x00000024: .cfi_offset: r10 at cfa-32
+// 0x00000024: sw r9, +28(r29)
+// 0x00000028: .cfi_offset: r9 at cfa-36
+// 0x00000028: sw r8, +24(r29)
+// 0x0000002c: .cfi_offset: r8 at cfa-40
+// 0x0000002c: sw r4, +0(r29)
+// 0x00000030: sw r5, +68(r29)
+// 0x00000034: sw r6, +72(r29)
+// 0x00000038: sw r7, +76(r29)
+// 0x0000003c: addiu r29, r29, -32
+// 0x00000040: .cfi_def_cfa_offset: 96
+// 0x00000040: addiu r29, r29, 32
+// 0x00000044: .cfi_def_cfa_offset: 64
+// 0x00000044: .cfi_remember_state
+// 0x00000044: lw r8, +24(r29)
+// 0x00000048: .cfi_restore: r8
+// 0x00000048: lw r9, +28(r29)
+// 0x0000004c: .cfi_restore: r9
+// 0x0000004c: lw r10, +32(r29)
+// 0x00000050: .cfi_restore: r10
+// 0x00000050: lw r11, +36(r29)
+// 0x00000054: .cfi_restore: r11
+// 0x00000054: lw r12, +40(r29)
+// 0x00000058: .cfi_restore: r12
+// 0x00000058: lw r13, +44(r29)
+// 0x0000005c: .cfi_restore: r13
+// 0x0000005c: lw r14, +48(r29)
+// 0x00000060: .cfi_restore: r14
+// 0x00000060: lw r15, +52(r29)
+// 0x00000064: .cfi_restore: r15
+// 0x00000064: lw r24, +56(r29)
+// 0x00000068: .cfi_restore: r24
+// 0x00000068: lw r31, +60(r29)
+// 0x0000006c: .cfi_restore: r31
+// 0x0000006c: addiu r29, r29, 64
+// 0x00000070: .cfi_def_cfa_offset: 0
+// 0x00000070: jalr r0, r31
+// 0x00000074: nop
+// 0x00000078: .cfi_restore_state
+// 0x00000078: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64[] = {
+    0xA0, 0xFF, 0xBD, 0x67, 0x58, 0x00, 0xBF, 0xFF, 0x50, 0x00, 0xBE, 0xFF,
+    0x48, 0x00, 0xBC, 0xFF, 0x40, 0x00, 0xB7, 0xFF, 0x38, 0x00, 0xB6, 0xFF,
+    0x30, 0x00, 0xB5, 0xFF, 0x28, 0x00, 0xB4, 0xFF, 0x20, 0x00, 0xB3, 0xFF,
+    0x18, 0x00, 0xB2, 0xFF, 0x00, 0x00, 0xA4, 0xAF, 0x64, 0x00, 0xA5, 0xAF,
+    0x68, 0x00, 0xAE, 0xE7, 0x6C, 0x00, 0xA7, 0xAF, 0x70, 0x00, 0xA8, 0xAF,
+    0xE0, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBD, 0x67, 0x18, 0x00, 0xB2, 0xDF,
+    0x20, 0x00, 0xB3, 0xDF, 0x28, 0x00, 0xB4, 0xDF, 0x30, 0x00, 0xB5, 0xDF,
+    0x38, 0x00, 0xB6, 0xDF, 0x40, 0x00, 0xB7, 0xDF, 0x48, 0x00, 0xBC, 0xDF,
+    0x50, 0x00, 0xBE, 0xDF, 0x58, 0x00, 0xBF, 0xDF, 0x60, 0x00, 0xBD, 0x67,
+    0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64[] = {
+    0x44, 0x0E, 0x60, 0x44, 0x9F, 0x02, 0x44, 0x9E, 0x04, 0x44, 0x9C, 0x06,
+    0x44, 0x97, 0x08, 0x44, 0x96, 0x0A, 0x44, 0x95, 0x0C, 0x44, 0x94, 0x0E,
+    0x44, 0x93, 0x10, 0x44, 0x92, 0x12, 0x58, 0x0E, 0x80, 0x01, 0x44, 0x0E,
+    0x60, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6,
+    0x44, 0xD7, 0x44, 0xDC, 0x44, 0xDE, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48,
+    0x0B, 0x0E, 0x60,
+};
+// 0x00000000: daddiu r29, r29, -96
+// 0x00000004: .cfi_def_cfa_offset: 96
+// 0x00000004: sd r31, +88(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-8
+// 0x00000008: sd r30, +80(r29)
+// 0x0000000c: .cfi_offset: r30 at cfa-16
+// 0x0000000c: sd r28, +72(r29)
+// 0x00000010: .cfi_offset: r28 at cfa-24
+// 0x00000010: sd r23, +64(r29)
+// 0x00000014: .cfi_offset: r23 at cfa-32
+// 0x00000014: sd r22, +56(r29)
+// 0x00000018: .cfi_offset: r22 at cfa-40
+// 0x00000018: sd r21, +48(r29)
+// 0x0000001c: .cfi_offset: r21 at cfa-48
+// 0x0000001c: sd r20, +40(r29)
+// 0x00000020: .cfi_offset: r20 at cfa-56
+// 0x00000020: sd r19, +32(r29)
+// 0x00000024: .cfi_offset: r19 at cfa-64
+// 0x00000024: sd r18, +24(r29)
+// 0x00000028: .cfi_offset: r18 at cfa-72
+// 0x00000028: sw r4, +0(r29)
+// 0x0000002c: sw r5, +100(r29)
+// 0x00000030: swc1 f14, +104(r29)
+// 0x00000034: sw r7, +108(r29)
+// 0x00000038: sw r8, +112(r29)
+// 0x0000003c: daddiu r29, r29, -32
+// 0x00000040: .cfi_def_cfa_offset: 128
+// 0x00000040: daddiu r29, r29, 32
+// 0x00000044: .cfi_def_cfa_offset: 96
+// 0x00000044: .cfi_remember_state
+// 0x00000044: ld r18, +24(r29)
+// 0x00000048: .cfi_restore: r18
+// 0x00000048: ld r19, +32(r29)
+// 0x0000004c: .cfi_restore: r19
+// 0x0000004c: ld r20, +40(r29)
+// 0x00000050: .cfi_restore: r20
+// 0x00000050: ld r21, +48(r29)
+// 0x00000054: .cfi_restore: r21
+// 0x00000054: ld r22, +56(r29)
+// 0x00000058: .cfi_restore: r22
+// 0x00000058: ld r23, +64(r29)
+// 0x0000005c: .cfi_restore: r23
+// 0x0000005c: ld r28, +72(r29)
+// 0x00000060: .cfi_restore: r28
+// 0x00000060: ld r30, +80(r29)
+// 0x00000064: .cfi_restore: r30
+// 0x00000064: ld r31, +88(r29)
+// 0x00000068: .cfi_restore: r31
+// 0x00000068: daddiu r29, r29, 96
+// 0x0000006c: .cfi_def_cfa_offset: 0
+// 0x0000006c: jr r31
+// 0x00000070: nop
+// 0x00000074: .cfi_restore_state
+// 0x00000074: .cfi_def_cfa_offset: 96
+
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 70bfb81..4186891 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -164,6 +164,7 @@
   void CheckParameterAlignImpl();
   void MaxParamNumberImpl();
   void WithoutImplementationImpl();
+  void WithoutImplementationRefReturnImpl();
   void StackArgsIntsFirstImpl();
   void StackArgsFloatsFirstImpl();
   void StackArgsMixedImpl();
@@ -1494,6 +1495,20 @@
 
 JNI_TEST(WithoutImplementation)
 
+void JniCompilerTest::WithoutImplementationRefReturnImpl() {
+  // This will lead to error messages in the log.
+  ScopedLogSeverity sls(LogSeverity::FATAL);
+
+  SetUpForTest(false, "withoutImplementationRefReturn", "()Ljava/lang/Object;", nullptr);
+
+  env_->CallObjectMethod(jobj_, jmethod_);
+
+  EXPECT_TRUE(Thread::Current()->IsExceptionPending());
+  EXPECT_TRUE(env_->ExceptionCheck() == JNI_TRUE);
+}
+
+JNI_TEST(WithoutImplementationRefReturn)
+
 void Java_MyClassNatives_stackArgsIntsFirst(JNIEnv*, jclass, jint i1, jint i2, jint i3,
                                             jint i4, jint i5, jint i6, jint i7, jint i8, jint i9,
                                             jint i10, jfloat f1, jfloat f2, jfloat f3, jfloat f4,
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 2d9e03a..8a14038 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -28,6 +28,7 @@
 #include "compiled_method.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "jni_env_ext.h"
 #include "mirror/art_method.h"
@@ -93,7 +94,7 @@
 
   // Assembler that holds generated instructions
   std::unique_ptr<Assembler> jni_asm(Assembler::Create(instruction_set));
-  jni_asm->InitializeFrameDescriptionEntry();
+  jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GetIncludeDebugSymbols());
 
   // Offsets into data structures
   // TODO: if cross compiling these offsets are for the host not the target
@@ -105,6 +106,7 @@
   const size_t frame_size(main_jni_conv->FrameSize());
   const std::vector<ManagedRegister>& callee_save_regs = main_jni_conv->CalleeSaveRegisters();
   __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
+  DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
 
   // 2. Set up the HandleScope
   mr_conv->ResetIterator(FrameOffset(frame_size));
@@ -424,7 +426,9 @@
 
   // 16. Remove activation - need to restore callee save registers since the GC may have changed
   //     them.
+  DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
   __ RemoveFrame(frame_size, callee_save_regs);
+  DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
 
   // 17. Finalize code generation
   __ EmitSlowPaths();
@@ -432,19 +436,19 @@
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());
   __ FinalizeInstructions(code);
-  jni_asm->FinalizeFrameDescriptionEntry();
-  std::vector<uint8_t>* fde(jni_asm->GetFrameDescriptionEntry());
-  ArrayRef<const uint8_t> cfi_ref;
-  if (fde != nullptr) {
-    cfi_ref = ArrayRef<const uint8_t>(*fde);
-  }
-  return CompiledMethod::SwapAllocCompiledMethodCFI(driver,
-                                                    instruction_set,
-                                                    ArrayRef<const uint8_t>(managed_code),
-                                                    frame_size,
-                                                    main_jni_conv->CoreSpillMask(),
-                                                    main_jni_conv->FpSpillMask(),
-                                                    cfi_ref);
+
+  return CompiledMethod::SwapAllocCompiledMethod(driver,
+                                                 instruction_set,
+                                                 ArrayRef<const uint8_t>(managed_code),
+                                                 frame_size,
+                                                 main_jni_conv->CoreSpillMask(),
+                                                 main_jni_conv->FpSpillMask(),
+                                                 nullptr,  // src_mapping_table.
+                                                 ArrayRef<const uint8_t>(),  // mapping_table.
+                                                 ArrayRef<const uint8_t>(),  // vmap_table.
+                                                 ArrayRef<const uint8_t>(),  // native_gc_map.
+                                                 ArrayRef<const uint8_t>(*jni_asm->cfi().data()),
+                                                 ArrayRef<const LinkerPatch>());
 }
 
 // Copy a single parameter from the managed to the JNI calling convention
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index 4267743..b17cbca 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -48,22 +48,30 @@
   uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11;
   value |= 0xf000d000;  // BL
 
-  uint8_t* addr = &(*code)[literal_offset];
   // Check that we're just overwriting an existing BL.
-  DCHECK_EQ(addr[1] & 0xf8, 0xf0);
-  DCHECK_EQ(addr[3] & 0xd0, 0xd0);
+  DCHECK_EQ(GetInsn32(code, literal_offset) & 0xf800d000, 0xf000d000);
   // Write the new BL.
-  addr[0] = (value >> 16) & 0xff;
-  addr[1] = (value >> 24) & 0xff;
-  addr[2] = (value >> 0) & 0xff;
-  addr[3] = (value >> 8) & 0xff;
+  SetInsn32(code, literal_offset, value);
 }
 
-void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                                   const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                                   uint32_t patch_offset ATTRIBUTE_UNUSED,
-                                                   uint32_t target_offset ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected relative dex cache array patch.";
+void Thumb2RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
+                                                   const LinkerPatch& patch,
+                                                   uint32_t patch_offset,
+                                                   uint32_t target_offset) {
+  uint32_t literal_offset = patch.LiteralOffset();
+  uint32_t pc_literal_offset = patch.PcInsnOffset();
+  uint32_t pc_base = patch_offset + (pc_literal_offset - literal_offset) + 4u /* PC adjustment */;
+  uint32_t diff = target_offset - pc_base;
+
+  uint32_t insn = GetInsn32(code, literal_offset);
+  DCHECK_EQ(insn & 0xff7ff0ffu, 0xf2400000u);  // MOVW/MOVT, unpatched (imm16 == 0).
+  uint32_t diff16 = ((insn & 0x00800000u) != 0u) ? (diff >> 16) : (diff & 0xffffu);
+  uint32_t imm4 = (diff16 >> 12) & 0xfu;
+  uint32_t imm = (diff16 >> 11) & 0x1u;
+  uint32_t imm3 = (diff16 >> 8) & 0x7u;
+  uint32_t imm8 = diff16 & 0xffu;
+  insn = (insn & 0xfbf08f00u) | (imm << 26) | (imm4 << 16) | (imm3 << 12) | imm8;
+  SetInsn32(code, literal_offset, insn);
 }
 
 std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() {
@@ -80,5 +88,31 @@
   return thunk_code;
 }
 
+void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
+  DCHECK_LE(offset + 4u, code->size());
+  DCHECK_EQ(offset & 1u, 0u);
+  uint8_t* addr = &(*code)[offset];
+  addr[0] = (value >> 16) & 0xff;
+  addr[1] = (value >> 24) & 0xff;
+  addr[2] = (value >> 0) & 0xff;
+  addr[3] = (value >> 8) & 0xff;
+}
+
+uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) {
+  DCHECK_LE(offset + 4u, code.size());
+  DCHECK_EQ(offset & 1u, 0u);
+  const uint8_t* addr = &code[offset];
+  return
+      (static_cast<uint32_t>(addr[0]) << 16) +
+      (static_cast<uint32_t>(addr[1]) << 24) +
+      (static_cast<uint32_t>(addr[2]) << 0)+
+      (static_cast<uint32_t>(addr[3]) << 8);
+}
+
+template <typename Alloc>
+uint32_t Thumb2RelativePatcher::GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset) {
+  return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index 5611303..2d474c2 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -34,6 +34,12 @@
  private:
   static std::vector<uint8_t> CompileThunkCode();
 
+  void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
+  static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
+
+  template <typename Alloc>
+  static uint32_t GetInsn32(std::vector<uint8_t, Alloc>* code, uint32_t offset);
+
   // PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
   static constexpr int32_t kPcDisplacement = 4;
 
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index 3b397cc..a057a4c 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -121,6 +121,48 @@
     result.push_back(static_cast<uint8_t>(bl >> 8));
     return result;
   }
+
+  void TestDexCachereference(uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    dex_cache_arrays_begin_ = dex_cache_arrays_begin;
+    static const uint8_t raw_code[] = {
+        0x40, 0xf2, 0x00, 0x00,   // MOVW r0, #0 (placeholder)
+        0xc0, 0xf2, 0x00, 0x00,   // MOVT r0, #0 (placeholder)
+        0x78, 0x44,               // ADD r0, pc
+    };
+    constexpr uint32_t pc_insn_offset = 8u;
+    const ArrayRef<const uint8_t> code(raw_code);
+    LinkerPatch patches[] = {
+        LinkerPatch::DexCacheArrayPatch(0u, nullptr, pc_insn_offset, element_offset),
+        LinkerPatch::DexCacheArrayPatch(4u, nullptr, pc_insn_offset, element_offset),
+    };
+    AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+    Link();
+
+    uint32_t method1_offset = GetMethodOffset(1u);
+    uint32_t pc_base_offset = method1_offset + pc_insn_offset + 4u /* PC adjustment */;
+    uint32_t diff = dex_cache_arrays_begin_ + element_offset - pc_base_offset;
+    // Distribute the bits of the diff between the MOVW and MOVT:
+    uint32_t diffw = diff & 0xffffu;
+    uint32_t difft = diff >> 16;
+    uint32_t movw = 0xf2400000u |           // MOVW r0, #0 (placeholder),
+        ((diffw & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
+        ((diffw & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
+        ((diffw & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
+        ((diffw & 0x00ffu));                // keep imm8 at bits 0-7.
+    uint32_t movt = 0xf2c00000u |           // MOVT r0, #0 (placeholder),
+        ((difft & 0xf000u) << (16 - 12)) |  // move imm4 from bits 12-15 to bits 16-19,
+        ((difft & 0x0800u) << (26 - 11)) |  // move imm from bit 11 to bit 26,
+        ((difft & 0x0700u) << (12 - 8)) |   // move imm3 from bits 8-10 to bits 12-14,
+        ((difft & 0x00ffu));                // keep imm8 at bits 0-7.
+    const uint8_t expected_code[] = {
+        static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24),
+        static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8),
+        static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24),
+        static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8),
+        0x78, 0x44,
+    };
+    EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+  }
 };
 
 const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
@@ -285,5 +327,25 @@
   EXPECT_TRUE(CheckThunk(thunk_offset));
 }
 
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm8) {
+  TestDexCachereference(0x00ff0000u, 0x00fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm3) {
+  TestDexCachereference(0x02ff0000u, 0x05fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceImm) {
+  TestDexCachereference(0x08ff0000u, 0x08fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
+TEST_F(Thumb2RelativePatcherTest, DexCacheReferenceimm4) {
+  TestDexCachereference(0xd0ff0000u, 0x60fcu);
+  ASSERT_LT(GetMethodOffset(1u), 0xfcu);
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 1cbe481..72ddf07 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -258,12 +258,36 @@
   if ((patch_offset & 0xff8) == 0xff8) {  // ...ff8 or ...ffc
     uint32_t adrp = GetInsn(code, literal_offset);
     DCHECK_EQ(adrp & 0xff000000, 0x90000000);
-    // TODO: Improve the check. For now, we're just checking if the next insn is
-    // the LDR using the result of the ADRP, otherwise we implement the workaround.
+    uint32_t next_offset = patch_offset + 4u;
     uint32_t next_insn = GetInsn(code, literal_offset + 4u);
-    bool ok = (next_insn & 0xffc00000) == 0xb9400000 &&  // LDR <Wt>, [<Xn>, #pimm]
-        (((next_insn >> 5) ^ adrp) & 0x1f) == 0;         // <Xn> == ADRP destination reg
-    return !ok;
+
+    // Below we avoid patching sequences where the adrp is followed by a load which can easily
+    // be proved to be aligned.
+
+    // First check if the next insn is the LDR using the result of the ADRP.
+    // LDR <Wt>, [<Xn>, #pimm], where <Xn> == ADRP destination reg.
+    if ((next_insn & 0xffc00000) == 0xb9400000 &&
+        (((next_insn >> 5) ^ adrp) & 0x1f) == 0) {
+      return false;
+    }
+
+    // LDR <Wt>, <label> is always aligned and thus it doesn't cause boundary crossing.
+    if ((next_insn & 0xff000000) == 0x18000000) {
+      return false;
+    }
+
+    // LDR <Xt>, <label> is aligned iff the pc + displacement is a multiple of 8.
+    if ((next_insn & 0xff000000) == 0x58000000) {
+      bool is_aligned_load = (((next_offset >> 2) ^ (next_insn >> 5)) & 1) == 0;
+      return !is_aligned_load;
+    }
+
+    // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned loads, as SP is
+    // guaranteed to be 128-bits aligned and <pimm> is multiple of the load size.
+    if ((next_insn & 0xbfc003e0) == 0xb94003e0) {
+      return false;
+    }
+    return true;
   }
   return false;
 }
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index b36e6d0..21f9367 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -42,6 +42,15 @@
   // LDUR x2, [sp, #4], i.e. unaligned load crossing 64-bit boundary (assuming aligned sp).
   static constexpr uint32_t kLdurInsn = 0xf840405fu;
 
+  // LDR w12, <label> and LDR x12, <label>. Bits 5-23 contain label displacement in 4-byte units.
+  static constexpr uint32_t kLdrWPcRelInsn = 0x1800000cu;
+  static constexpr uint32_t kLdrXPcRelInsn = 0x5800000cu;
+
+  // LDR w13, [SP, #<pimm>] and LDR x13, [SP, #<pimm>]. Bits 10-21 contain displacement from SP
+  // in units of 4-bytes (for 32-bit load) or 8-bytes (for 64-bit load).
+  static constexpr uint32_t kLdrWSpRelInsn = 0xb94003edu;
+  static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu;
+
   uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
                                  const ArrayRef<const LinkerPatch>& method1_patches,
                                  const ArrayRef<const uint8_t>& last_method_code,
@@ -260,20 +269,43 @@
     }
   }
 
-  void TestAdrpLdurLdr(uint32_t adrp_offset, bool has_thunk,
-                       uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+  void TestAdrpInsn2Ldr(uint32_t insn2, uint32_t adrp_offset, bool has_thunk,
+                        uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
     uint32_t method1_offset =
         CompiledCode::AlignCode(kTrampolineSize, kArm64) + sizeof(OatQuickMethodHeader);
     ASSERT_LT(method1_offset, adrp_offset);
     ASSERT_EQ(adrp_offset & 3u, 0u);
     uint32_t num_nops = (adrp_offset - method1_offset) / 4u;
     if (has_thunk) {
-      TestNopsAdrpInsn2LdrHasThunk(num_nops, kLdurInsn, dex_cache_arrays_begin, element_offset);
+      TestNopsAdrpInsn2LdrHasThunk(num_nops, insn2, dex_cache_arrays_begin, element_offset);
     } else {
-      TestNopsAdrpInsn2Ldr(num_nops, kLdurInsn, dex_cache_arrays_begin, element_offset);
+      TestNopsAdrpInsn2Ldr(num_nops, insn2, dex_cache_arrays_begin, element_offset);
     }
     ASSERT_EQ(method1_offset, GetMethodOffset(1u));  // If this fails, num_nops is wrong.
   }
+
+  void TestAdrpLdurLdr(uint32_t adrp_offset, bool has_thunk,
+                       uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+  }
+
+  void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn, int32_t pcrel_disp,
+                           uint32_t adrp_offset, bool has_thunk,
+                           uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    ASSERT_LT(pcrel_disp, 0x100000);
+    ASSERT_GE(pcrel_disp, -0x100000);
+    ASSERT_EQ(pcrel_disp & 0x3, 0);
+    uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5);
+    TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+  }
+
+  void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn, uint32_t sprel_disp_in_load_units,
+                           uint32_t adrp_offset, bool has_thunk,
+                           uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
+    ASSERT_LT(sprel_disp_in_load_units, 0x1000u);
+    uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
+    TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, dex_cache_arrays_begin, element_offset);
+  }
 };
 
 const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
@@ -509,5 +541,42 @@
   TestAdrpLdurLdr(0x1000u, false, 0x12345678u, 0x1234u);
 }
 
+#define TEST_FOR_OFFSETS(test, disp1, disp2) \
+  test(0xff4u, disp1) test(0xff8u, disp1) test(0xffcu, disp1) test(0x1000u, disp1) \
+  test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2)
+
+// LDR <Wt>, <label> is always aligned. We should never have to use a fixup.
+#define LDRW_PCREL_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WPcRel ## disp) { \
+    TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \
+  }
+
+TEST_FOR_OFFSETS(LDRW_PCREL_TEST, 0x1234, 0x1238)
+
+// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8.
+#define LDRX_PCREL_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XPcRel ## disp) { \
+    bool unaligned = ((adrp_offset + 4u + static_cast<uint32_t>(disp)) & 7u) != 0; \
+    bool has_thunk = (adrp_offset == 0xff8u || adrp_offset == 0xffcu) && unaligned; \
+    TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \
+  }
+
+TEST_FOR_OFFSETS(LDRX_PCREL_TEST, 0x1234, 0x1238)
+
+// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed.
+#define LDRW_SPREL_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## WSpRel ## disp) { \
+    TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, disp >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \
+  }
+
+TEST_FOR_OFFSETS(LDRW_SPREL_TEST, 0, 4)
+
+#define LDRX_SPREL_TEST(adrp_offset, disp) \
+  TEST_F(Arm64RelativePatcherTestDefault, DexCacheReference ## adrp_offset ## XSpRel ## disp) { \
+    TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, disp >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \
+  }
+
+TEST_FOR_OFFSETS(LDRX_SPREL_TEST, 0, 8)
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc
index 246cf11..315585d 100644
--- a/compiler/linker/x86/relative_patcher_x86.cc
+++ b/compiler/linker/x86/relative_patcher_x86.cc
@@ -16,14 +16,43 @@
 
 #include "linker/x86/relative_patcher_x86.h"
 
+#include "compiled_method.h"
+
 namespace art {
 namespace linker {
 
-void X86RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                                const LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                                uint32_t patch_offset ATTRIBUTE_UNUSED,
-                                                uint32_t target_offset ATTRIBUTE_UNUSED) {
-  LOG(FATAL) << "Unexpected relative dex cache array patch.";
+void X86RelativePatcher::PatchDexCacheReference(std::vector<uint8_t>* code,
+                                                const LinkerPatch& patch,
+                                                uint32_t patch_offset,
+                                                uint32_t target_offset) {
+  uint32_t anchor_literal_offset = patch.PcInsnOffset();
+  uint32_t literal_offset = patch.LiteralOffset();
+
+  // Check that the anchor points to pop in a "call +0; pop <reg>" sequence.
+  DCHECK_GE(anchor_literal_offset, 5u);
+  DCHECK_LT(anchor_literal_offset, code->size());
+  DCHECK_EQ((*code)[anchor_literal_offset - 5u], 0xe8u);
+  DCHECK_EQ((*code)[anchor_literal_offset - 4u], 0x00u);
+  DCHECK_EQ((*code)[anchor_literal_offset - 3u], 0x00u);
+  DCHECK_EQ((*code)[anchor_literal_offset - 2u], 0x00u);
+  DCHECK_EQ((*code)[anchor_literal_offset - 1u], 0x00u);
+  DCHECK_EQ((*code)[anchor_literal_offset] & 0xf8u, 0x58u);
+
+  // Check that the patched data contains kDummy32BitOffset.
+  constexpr int kDummy32BitOffset = 256;  // Must match X86Mir2Lir::kDummy32BitOffset.
+  DCHECK_LE(literal_offset, code->size());
+  DCHECK_EQ((*code)[literal_offset + 0u], static_cast<uint8_t>(kDummy32BitOffset >> 0));
+  DCHECK_EQ((*code)[literal_offset + 1u], static_cast<uint8_t>(kDummy32BitOffset >> 8));
+  DCHECK_EQ((*code)[literal_offset + 2u], static_cast<uint8_t>(kDummy32BitOffset >> 16));
+  DCHECK_EQ((*code)[literal_offset + 3u], static_cast<uint8_t>(kDummy32BitOffset >> 24));
+
+  // Apply patch.
+  uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
+  uint32_t diff = target_offset - anchor_offset;
+  (*code)[literal_offset + 0u] = static_cast<uint8_t>(diff >> 0);
+  (*code)[literal_offset + 1u] = static_cast<uint8_t>(diff >> 8);
+  (*code)[literal_offset + 2u] = static_cast<uint8_t>(diff >> 16);
+  (*code)[literal_offset + 3u] = static_cast<uint8_t>(diff >> 24);
 }
 
 }  // namespace linker
diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc
index 15ac47e..7acc330 100644
--- a/compiler/linker/x86/relative_patcher_x86_test.cc
+++ b/compiler/linker/x86/relative_patcher_x86_test.cc
@@ -101,5 +101,35 @@
   EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
 }
 
+TEST_F(X86RelativePatcherTest, DexCacheReference) {
+  dex_cache_arrays_begin_ = 0x12345678;
+  constexpr size_t kElementOffset = 0x1234;
+  static const uint8_t raw_code[] = {
+      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
+      0x5b,                                 // pop ebx
+      0x8b, 0x83, 0x00, 0x01, 0x00, 0x00,   // mov eax, [ebx + 256 (kDummy32BitValue)]
+  };
+  constexpr uint32_t anchor_offset = 5u;  // After call +0.
+  ArrayRef<const uint8_t> code(raw_code);
+  LinkerPatch patches[] = {
+      LinkerPatch::DexCacheArrayPatch(code.size() - 4u, nullptr, anchor_offset, kElementOffset),
+  };
+  AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches));
+  Link();
+
+  auto result = method_offset_map_.FindMethodOffset(MethodRef(1u));
+  ASSERT_TRUE(result.first);
+  uint32_t diff =
+      dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset);
+  static const uint8_t expected_code[] = {
+      0xe8, 0x00, 0x00, 0x00, 0x00,         // call +0
+      0x5b,                                 // pop ebx
+      0x8b, 0x83,                           // mov eax, [ebx + diff]
+      static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8),
+      static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24)
+  };
+  EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
 }  // namespace linker
 }  // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 7120920..5b4cc54 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -450,24 +450,18 @@
 
       if (writer_->compiler_driver_->GetCompilerOptions().GetIncludeDebugSymbols()) {
         // Record debug information for this function if we are doing that.
-
-        std::string name = PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
-        if (deduped) {
-          // TODO We should place the DEDUPED tag on the first instance of a deduplicated symbol
-          // so that it will show up in a debuggerd crash report.
-          name += " [ DEDUPED ]";
-        }
-
         const uint32_t quick_code_start = quick_code_offset -
             writer_->oat_header_->GetExecutableOffset() - thumb_offset;
-        const DexFile::CodeItem *code_item = it.GetMethodCodeItem();
-        const DexFile::ClassDef& class_def = dex_file_->GetClassDef(class_def_index_);
-        writer_->method_info_.push_back(DebugInfo(name, deduped,
-              dex_file_->GetClassDescriptor(class_def),
-              dex_file_->GetSourceFile(class_def),
-              quick_code_start, quick_code_start + code_size,
-              code_item == nullptr ? nullptr : dex_file_->GetDebugInfoStream(code_item),
-              compiled_method));
+        writer_->method_info_.push_back(DebugInfo {
+            dex_file_,
+            class_def_index_,
+            it.GetMemberIndex(),
+            it.GetMethodAccessFlags(),
+            it.GetMethodCodeItem(),
+            deduped,
+            quick_code_start,
+            quick_code_start + code_size,
+            compiled_method});
       }
 
       if (kIsDebugBuild) {
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index c472000..51bc9b4 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -115,26 +115,18 @@
   ~OatWriter();
 
   struct DebugInfo {
-    DebugInfo(const std::string& method_name, bool deduped,
-              const char* class_descriptor, const char* src_file_name,
-              uint32_t low_pc, uint32_t high_pc,
-              const uint8_t* dbgstream, CompiledMethod* compiled_method)
-      : method_name_(method_name), deduped_(deduped),
-        class_descriptor_(class_descriptor), src_file_name_(src_file_name),
-        low_pc_(low_pc), high_pc_(high_pc),
-        dbgstream_(dbgstream), compiled_method_(compiled_method) {
-    }
-    std::string method_name_;  // Note: this name is a pretty-printed name.
-    bool        deduped_;
-    const char* class_descriptor_;
-    const char* src_file_name_;
-    uint32_t    low_pc_;
-    uint32_t    high_pc_;
-    const uint8_t* dbgstream_;
+    const DexFile* dex_file_;
+    size_t class_def_index_;
+    uint32_t dex_method_index_;
+    uint32_t access_flags_;
+    const DexFile::CodeItem *code_item_;
+    bool deduped_;
+    uint32_t low_pc_;
+    uint32_t high_pc_;
     CompiledMethod* compiled_method_;
   };
 
-  const std::vector<DebugInfo>& GetCFIMethodInfo() const {
+  const std::vector<DebugInfo>& GetMethodDebugInfo() const {
     return method_info_;
   }
 
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index a912d4c..8a64d81 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -16,16 +16,13 @@
 
 #include "builder.h"
 
+#include "art_field-inl.h"
 #include "base/logging.h"
 #include "class_linker.h"
-#include "dex_file.h"
 #include "dex_file-inl.h"
-#include "dex_instruction.h"
 #include "dex_instruction-inl.h"
 #include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
-#include "mirror/art_field.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "nodes.h"
@@ -656,11 +653,10 @@
   uint16_t field_index = instruction.VRegC_22c();
 
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<1> hs(soa.Self());
-  Handle<mirror::ArtField> resolved_field(hs.NewHandle(
-      compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa)));
+  ArtField* resolved_field =
+      compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
 
-  if (resolved_field.Get() == nullptr) {
+  if (resolved_field == nullptr) {
     MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedField);
     return false;
   }
@@ -728,15 +724,15 @@
   uint16_t field_index = instruction.VRegB_21c();
 
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScope<5> hs(soa.Self());
+  StackHandleScope<4> hs(soa.Self());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(
       dex_compilation_unit_->GetClassLinker()->FindDexCache(*dex_compilation_unit_->GetDexFile())));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
-  Handle<mirror::ArtField> resolved_field(hs.NewHandle(compiler_driver_->ResolveField(
-      soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true)));
+  ArtField* resolved_field = compiler_driver_->ResolveField(
+      soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
 
-  if (resolved_field.Get() == nullptr) {
+  if (resolved_field == nullptr) {
     MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedField);
     return false;
   }
@@ -758,7 +754,7 @@
     std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
         outer_dex_cache.Get(),
         referrer_class.Get(),
-        resolved_field.Get(),
+        resolved_field,
         field_index,
         &storage_index);
     bool can_easily_access = is_put ? pair.second : pair.first;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index da28dc7..8736374 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -82,6 +82,7 @@
   HGraphVisitor* instruction_visitor = GetInstructionVisitor();
   DCHECK_EQ(current_block_index_, 0u);
   GenerateFrameEntry();
+  DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_));
   for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) {
     HBasicBlock* block = block_order_->Get(current_block_index_);
     // Don't generate code for an empty block. Its predecessors will branch to its successor
@@ -415,7 +416,16 @@
   }
 }
 
-void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* src_map) const {
+void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const {
+  for (size_t i = 0; i < pc_infos_.Size(); i++) {
+    struct PcInfo pc_info = pc_infos_.Get(i);
+    uint32_t pc2dex_offset = pc_info.native_pc;
+    int32_t pc2dex_dalvik_offset = pc_info.dex_pc;
+    src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
+  }
+}
+
+void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const {
   uint32_t pc2dex_data_size = 0u;
   uint32_t pc2dex_entries = pc_infos_.Size();
   uint32_t pc2dex_offset = 0u;
@@ -425,19 +435,12 @@
   uint32_t dex2pc_offset = 0u;
   int32_t dex2pc_dalvik_offset = 0;
 
-  if (src_map != nullptr) {
-    src_map->reserve(pc2dex_entries);
-  }
-
   for (size_t i = 0; i < pc2dex_entries; i++) {
     struct PcInfo pc_info = pc_infos_.Get(i);
     pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset);
     pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset);
     pc2dex_offset = pc_info.native_pc;
     pc2dex_dalvik_offset = pc_info.dex_pc;
-    if (src_map != nullptr) {
-      src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
-    }
   }
 
   // Walk over the blocks and find which ones correspond to catch block entries.
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 07ca6b1..b888aca 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -205,7 +205,8 @@
     slow_paths_.Add(slow_path);
   }
 
-  void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const;
+  void BuildSourceMap(DefaultSrcMap* src_map) const;
+  void BuildMappingTable(std::vector<uint8_t>* vector) const;
   void BuildVMapTable(std::vector<uint8_t>* vector) const;
   void BuildNativeGCMap(
       std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
@@ -425,6 +426,8 @@
 
   StackMapStream stack_map_stream_;
 
+  friend class OptimizingCFITest;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
 };
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index cfc798a..a799a51 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -513,6 +513,14 @@
   }
 }
 
+static dwarf::Reg DWARFReg(Register reg) {
+    return dwarf::Reg::ArmCore(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(SRegister reg) {
+    return dwarf::Reg::ArmFp(static_cast<int>(reg));
+}
+
 void CodeGeneratorARM::GenerateFrameEntry() {
   bool skip_overflow_check =
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
@@ -531,12 +539,19 @@
 
   // PC is in the list of callee-save to mimic Quick, but we need to push
   // LR at entry instead.
-  __ PushList((core_spill_mask_ & (~(1 << PC))) | 1 << LR);
+  uint32_t push_mask = (core_spill_mask_ & (~(1 << PC))) | 1 << LR;
+  __ PushList(push_mask);
+  __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(push_mask));
+  __ cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, push_mask, kArmWordSize);
   if (fpu_spill_mask_ != 0) {
     SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
     __ vpushs(start_register, POPCOUNT(fpu_spill_mask_));
+    __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
+    __ cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fpu_spill_mask_, kArmWordSize);
   }
-  __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize()));
+  int adjust = GetFrameSize() - FrameEntrySpillSize();
+  __ AddConstant(SP, -adjust);
+  __ cfi().AdjustCFAOffset(adjust);
   __ StoreToOffset(kStoreWord, R0, SP, 0);
 }
 
@@ -545,10 +560,14 @@
     __ bx(LR);
     return;
   }
-  __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize());
+  int adjust = GetFrameSize() - FrameEntrySpillSize();
+  __ AddConstant(SP, adjust);
+  __ cfi().AdjustCFAOffset(-adjust);
   if (fpu_spill_mask_ != 0) {
     SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
     __ vpops(start_register, POPCOUNT(fpu_spill_mask_));
+    __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_));
+    __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_);
   }
   __ PopList(core_spill_mask_);
 }
@@ -1190,7 +1209,10 @@
 
 void InstructionCodeGeneratorARM::VisitReturnVoid(HReturnVoid* ret) {
   UNUSED(ret);
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderARM::VisitReturn(HReturn* ret) {
@@ -1201,7 +1223,10 @@
 
 void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) {
   UNUSED(ret);
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 439e85c..5fe8adc 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -465,20 +465,67 @@
     //      ...                       : reserved frame space.
     //      sp[0]                     : current method.
     __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
-    __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
-    __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+    GetAssembler()->cfi().AdjustCFAOffset(frame_size);
+    SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+    SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
   }
 }
 
 void CodeGeneratorARM64::GenerateFrameExit() {
   if (!HasEmptyFrame()) {
     int frame_size = GetFrameSize();
-    __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
-    __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+    UnspillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+    UnspillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
     __ Drop(frame_size);
+    GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
   }
 }
 
+static inline dwarf::Reg DWARFReg(CPURegister reg) {
+  if (reg.IsFPRegister()) {
+    return dwarf::Reg::Arm64Fp(reg.code());
+  } else {
+    DCHECK_LT(reg.code(), 31u);  // X0 - X30.
+    return dwarf::Reg::Arm64Core(reg.code());
+  }
+}
+
+void CodeGeneratorARM64::SpillRegisters(vixl::CPURegList registers, int offset) {
+  int size = registers.RegisterSizeInBytes();
+  while (registers.Count() >= 2) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    const CPURegister& dst1 = registers.PopLowestIndex();
+    __ Stp(dst0, dst1, MemOperand(__ StackPointer(), offset));
+    GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
+    GetAssembler()->cfi().RelOffset(DWARFReg(dst1), offset + size);
+    offset += 2 * size;
+  }
+  if (!registers.IsEmpty()) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    __ Str(dst0, MemOperand(__ StackPointer(), offset));
+    GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
+  }
+  DCHECK(registers.IsEmpty());
+}
+
+void CodeGeneratorARM64::UnspillRegisters(vixl::CPURegList registers, int offset) {
+  int size = registers.RegisterSizeInBytes();
+  while (registers.Count() >= 2) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    const CPURegister& dst1 = registers.PopLowestIndex();
+    __ Ldp(dst0, dst1, MemOperand(__ StackPointer(), offset));
+    GetAssembler()->cfi().Restore(DWARFReg(dst0));
+    GetAssembler()->cfi().Restore(DWARFReg(dst1));
+    offset += 2 * size;
+  }
+  if (!registers.IsEmpty()) {
+    const CPURegister& dst0 = registers.PopLowestIndex();
+    __ Ldr(dst0, MemOperand(__ StackPointer(), offset));
+    GetAssembler()->cfi().Restore(DWARFReg(dst0));
+  }
+  DCHECK(registers.IsEmpty());
+}
+
 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
   __ Bind(GetLabelOf(block));
 }
@@ -1659,11 +1706,26 @@
     Register lhs = InputRegisterAt(condition, 0);
     Operand rhs = InputOperandAt(condition, 1);
     Condition arm64_cond = ARM64Condition(condition->GetCondition());
-    if ((arm64_cond == eq || arm64_cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
-      if (arm64_cond == eq) {
-        __ Cbz(lhs, true_target);
-      } else {
-        __ Cbnz(lhs, true_target);
+    if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+      switch (arm64_cond) {
+        case eq:
+          __ Cbz(lhs, true_target);
+          break;
+        case ne:
+          __ Cbnz(lhs, true_target);
+          break;
+        case lt:
+          // Test the sign bit and branch accordingly.
+          __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+          break;
+        case ge:
+          // Test the sign bit and branch accordingly.
+          __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+          break;
+        default:
+          // Without the `static_cast` the compiler throws an error for
+          // `-Werror=sign-promo`.
+          LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
       }
     } else {
       __ Cmp(lhs, rhs);
@@ -2403,8 +2465,11 @@
 
 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) {
   UNUSED(instruction);
+  GetAssembler()->cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ Ret();
+  GetAssembler()->cfi().RestoreState();
+  GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
@@ -2413,8 +2478,11 @@
 
 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) {
   UNUSED(instruction);
+  GetAssembler()->cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ Ret();
+  GetAssembler()->cfi().RestoreState();
+  GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderARM64::VisitShl(HShl* shl) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 7edb129..9430e31 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -227,6 +227,8 @@
 
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
+  void SpillRegisters(vixl::CPURegList registers, int offset);
+  void UnspillRegisters(vixl::CPURegList registers, int offset);
 
   vixl::CPURegList GetFramePreservedCoreRegisters() const {
     return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
new file mode 100644
index 0000000..921c1d8
--- /dev/null
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_utils.h"
+
+#include "base/logging.h"
+
+namespace art {
+
+void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long,
+                                     int64_t* magic, int* shift) {
+  // It does not make sense to calculate magic and shift for zero divisor.
+  DCHECK_NE(divisor, 0);
+
+  /* Implementation according to H.S.Warren's "Hacker's Delight" (Addison Wesley, 2002)
+   * Chapter 10 and T.Grablund, P.L.Montogomery's "Division by Invariant Integers Using
+   * Multiplication" (PLDI 1994).
+   * The magic number M and shift S can be calculated in the following way:
+   * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
+   * where divisor(d) >= 2.
+   * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
+   * where divisor(d) <= -2.
+   * Thus nc can be calculated like:
+   * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
+   * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
+   *
+   * So the shift p is the smallest p satisfying
+   * 2^p > nc * (d - 2^p % d), where d >= 2
+   * 2^p > nc * (d + 2^p % d), where d <= -2.
+   *
+   * The magic number M is calculated by
+   * M = (2^p + d - 2^p % d) / d, where d >= 2
+   * M = (2^p - d - 2^p % d) / d, where d <= -2.
+   *
+   * Notice that p is always bigger than or equal to 32 (resp. 64), so we just return 32 - p
+   * (resp. 64 - p) as the shift number S.
+   */
+
+  int64_t p = is_long ? 63 : 31;
+  const uint64_t exp = is_long ? (UINT64_C(1) << 63) : (UINT32_C(1) << 31);
+
+  // Initialize the computations.
+  uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
+  uint64_t sign_bit = is_long ? static_cast<uint64_t>(divisor) >> 63 :
+                                static_cast<uint32_t>(divisor) >> 31;
+  uint64_t tmp = exp + sign_bit;
+  uint64_t abs_nc = tmp - 1 - (tmp % abs_d);
+  uint64_t quotient1 = exp / abs_nc;
+  uint64_t remainder1 = exp % abs_nc;
+  uint64_t quotient2 = exp / abs_d;
+  uint64_t remainder2 = exp % abs_d;
+
+  /*
+   * To avoid handling both positive and negative divisor, "Hacker's Delight"
+   * introduces a method to handle these 2 cases together to avoid duplication.
+   */
+  uint64_t delta;
+  do {
+    p++;
+    quotient1 = 2 * quotient1;
+    remainder1 = 2 * remainder1;
+    if (remainder1 >= abs_nc) {
+      quotient1++;
+      remainder1 = remainder1 - abs_nc;
+    }
+    quotient2 = 2 * quotient2;
+    remainder2 = 2 * remainder2;
+    if (remainder2 >= abs_d) {
+      quotient2++;
+      remainder2 = remainder2 - abs_d;
+    }
+    delta = abs_d - remainder2;
+  } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
+
+  *magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
+
+  if (!is_long) {
+    *magic = static_cast<int>(*magic);
+  }
+
+  *shift = is_long ? p - 64 : p - 32;
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
new file mode 100644
index 0000000..59b495c
--- /dev/null
+++ b/compiler/optimizing/code_generator_utils.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
+
+#include <cstdint>
+
+namespace art {
+
+// Computes the magic number and the shift needed in the div/rem by constant algorithm, as out
+// arguments `magic` and `shift`
+void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift);
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 92b62e2..48445d7 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -16,6 +16,7 @@
 
 #include "code_generator_x86.h"
 
+#include "code_generator_utils.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "gc/accounting/card_table.h"
@@ -459,7 +460,12 @@
         assembler_(codegen->GetAssembler()),
         codegen_(codegen) {}
 
+static dwarf::Reg DWARFReg(Register reg) {
+    return dwarf::Reg::X86Core(static_cast<int>(reg));
+}
+
 void CodeGeneratorX86::GenerateFrameEntry() {
+  __ cfi().SetCurrentCFAOffset(kX86WordSize);  // return address
   __ Bind(&frame_entry_label_);
   bool skip_overflow_check =
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
@@ -478,10 +484,14 @@
     Register reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
       __ pushl(reg);
+      __ cfi().AdjustCFAOffset(kX86WordSize);
+      __ cfi().RelOffset(DWARFReg(reg), 0);
     }
   }
 
-  __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+  int adjust = GetFrameSize() - FrameEntrySpillSize();
+  __ subl(ESP, Immediate(adjust));
+  __ cfi().AdjustCFAOffset(adjust);
   __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
 }
 
@@ -490,12 +500,16 @@
     return;
   }
 
-  __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+  int adjust = GetFrameSize() - FrameEntrySpillSize();
+  __ addl(ESP, Immediate(adjust));
+  __ cfi().AdjustCFAOffset(-adjust);
 
   for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
     Register reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
       __ popl(reg);
+      __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
+      __ cfi().Restore(DWARFReg(reg));
     }
   }
 }
@@ -1102,8 +1116,11 @@
 
 void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret) {
   UNUSED(ret);
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ ret();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderX86::VisitReturn(HReturn* ret) {
@@ -1161,8 +1178,11 @@
         LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
     }
   }
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ ret();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -2278,6 +2298,133 @@
   __ addl(ESP, Immediate(2 * elem_size));
 }
 
+
+void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(1).IsConstant());
+  DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
+
+  Register out_register = locations->Out().AsRegister<Register>();
+  Register input_register = locations->InAt(0).AsRegister<Register>();
+  int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+  DCHECK(imm == 1 || imm == -1);
+
+  if (instruction->IsRem()) {
+    __ xorl(out_register, out_register);
+  } else {
+    __ movl(out_register, input_register);
+    if (imm == -1) {
+      __ negl(out_register);
+    }
+  }
+}
+
+
+void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+
+  Register out_register = locations->Out().AsRegister<Register>();
+  Register input_register = locations->InAt(0).AsRegister<Register>();
+  int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+  DCHECK(IsPowerOfTwo(std::abs(imm)));
+  Register num = locations->GetTemp(0).AsRegister<Register>();
+
+  __ leal(num, Address(input_register, std::abs(imm) - 1));
+  __ testl(input_register, input_register);
+  __ cmovl(kGreaterEqual, num, input_register);
+  int shift = CTZ(imm);
+  __ sarl(num, Immediate(shift));
+
+  if (imm < 0) {
+    __ negl(num);
+  }
+
+  __ movl(out_register, num);
+}
+
+void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+  Register eax = locations->InAt(0).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
+  Register num;
+  Register edx;
+
+  if (instruction->IsDiv()) {
+    edx = locations->GetTemp(0).AsRegister<Register>();
+    num = locations->GetTemp(1).AsRegister<Register>();
+  } else {
+    edx = locations->Out().AsRegister<Register>();
+    num = locations->GetTemp(0).AsRegister<Register>();
+  }
+
+  DCHECK_EQ(EAX, eax);
+  DCHECK_EQ(EDX, edx);
+  if (instruction->IsDiv()) {
+    DCHECK_EQ(EAX, out);
+  } else {
+    DCHECK_EQ(EDX, out);
+  }
+
+  int64_t magic;
+  int shift;
+  CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+  Label ndiv;
+  Label end;
+  // If numerator is 0, the result is 0, no computation needed.
+  __ testl(eax, eax);
+  __ j(kNotEqual, &ndiv);
+
+  __ xorl(out, out);
+  __ jmp(&end);
+
+  __ Bind(&ndiv);
+
+  // Save the numerator.
+  __ movl(num, eax);
+
+  // EAX = magic
+  __ movl(eax, Immediate(magic));
+
+  // EDX:EAX = magic * numerator
+  __ imull(num);
+
+  if (imm > 0 && magic < 0) {
+    // EDX += num
+    __ addl(edx, num);
+  } else if (imm < 0 && magic > 0) {
+    __ subl(edx, num);
+  }
+
+  // Shift if needed.
+  if (shift != 0) {
+    __ sarl(edx, Immediate(shift));
+  }
+
+  // EDX += 1 if EDX < 0
+  __ movl(eax, edx);
+  __ shrl(edx, Immediate(31));
+  __ addl(edx, eax);
+
+  if (instruction->IsRem()) {
+    __ movl(eax, num);
+    __ imull(edx, Immediate(imm));
+    __ subl(eax, edx);
+    __ movl(edx, eax);
+  } else {
+    __ movl(eax, edx);
+  }
+  __ Bind(&end);
+}
+
 void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   DCHECK(instruction->IsDiv() || instruction->IsRem());
 
@@ -2289,28 +2436,42 @@
 
   switch (instruction->GetResultType()) {
     case Primitive::kPrimInt: {
-      Register second_reg = second.AsRegister<Register>();
       DCHECK_EQ(EAX, first.AsRegister<Register>());
       DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
 
-      SlowPathCodeX86* slow_path =
+      if (instruction->InputAt(1)->IsIntConstant()) {
+        int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+        if (imm == 0) {
+          // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
+        } else if (imm == 1 || imm == -1) {
+          DivRemOneOrMinusOne(instruction);
+        } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+          DivByPowerOfTwo(instruction->AsDiv());
+        } else {
+          DCHECK(imm <= -2 || imm >= 2);
+          GenerateDivRemWithAnyConstant(instruction);
+        }
+      } else {
+        SlowPathCodeX86* slow_path =
           new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(),
-                                                                 is_div);
-      codegen_->AddSlowPath(slow_path);
+              is_div);
+        codegen_->AddSlowPath(slow_path);
 
-      // 0x80000000/-1 triggers an arithmetic exception!
-      // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
-      // it's safe to just use negl instead of more complex comparisons.
+        Register second_reg = second.AsRegister<Register>();
+        // 0x80000000/-1 triggers an arithmetic exception!
+        // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
+        // it's safe to just use negl instead of more complex comparisons.
 
-      __ cmpl(second_reg, Immediate(-1));
-      __ j(kEqual, slow_path->GetEntryLabel());
+        __ cmpl(second_reg, Immediate(-1));
+        __ j(kEqual, slow_path->GetEntryLabel());
 
-      // edx:eax <- sign-extended of eax
-      __ cdq();
-      // eax = quotient, edx = remainder
-      __ idivl(second_reg);
-
-      __ Bind(slow_path->GetExitLabel());
+        // edx:eax <- sign-extended of eax
+        __ cdq();
+        // eax = quotient, edx = remainder
+        __ idivl(second_reg);
+        __ Bind(slow_path->GetExitLabel());
+      }
       break;
     }
 
@@ -2350,10 +2511,16 @@
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RegisterLocation(EAX));
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       // Intel uses edx:eax as the dividend.
       locations->AddTemp(Location::RegisterLocation(EDX));
+      // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+      // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
+      // output and request another temp.
+      if (div->InputAt(1)->IsIntConstant()) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
       break;
     }
     case Primitive::kPrimLong: {
@@ -2411,6 +2578,7 @@
 
 void LocationsBuilderX86::VisitRem(HRem* rem) {
   Primitive::Type type = rem->GetResultType();
+
   LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
       ? LocationSummary::kCall
       : LocationSummary::kNoCall;
@@ -2419,8 +2587,14 @@
   switch (type) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RegisterLocation(EAX));
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
       locations->SetOut(Location::RegisterLocation(EDX));
+      // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+      // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
+      // output and request another temp.
+      if (rem->InputAt(1)->IsIntConstant()) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
       break;
     }
     case Primitive::kPrimLong: {
@@ -3388,7 +3562,13 @@
       // Ensure the value is in a byte register.
       locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
     } else {
-      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+      bool is_fp_type = (value_type == Primitive::kPrimFloat)
+          || (value_type == Primitive::kPrimDouble);
+      if (is_fp_type) {
+        locations->SetInAt(2, Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+      }
     }
     // Temporary registers for the write barrier.
     if (needs_write_barrier) {
@@ -3667,23 +3847,43 @@
 }
 
 void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) {
-  ScratchRegisterScope ensure_scratch(
-      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-  Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
-  __ movl(temp_reg, Address(ESP, src + stack_offset));
-  __ movl(Address(ESP, dst + stack_offset), temp_reg);
+  ScratchRegisterScope possible_scratch(
+      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+  int temp = possible_scratch.GetRegister();
+  if (temp == kNoRegister) {
+    // Use the stack.
+    __ pushl(Address(ESP, src));
+    __ popl(Address(ESP, dst));
+  } else {
+    Register temp_reg = static_cast<Register>(temp);
+    __ movl(temp_reg, Address(ESP, src));
+    __ movl(Address(ESP, dst), temp_reg);
+  }
 }
 
 void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) {
-  ScratchRegisterScope ensure_scratch(
-      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-  Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
-  __ movl(temp_reg, Address(ESP, src + stack_offset));
-  __ movl(Address(ESP, dst + stack_offset), temp_reg);
-  __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize));
-  __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg);
+  ScratchRegisterScope possible_scratch(
+      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+  int temp = possible_scratch.GetRegister();
+  if (temp == kNoRegister) {
+    // Use the stack instead.
+    // Push src low word.
+    __ pushl(Address(ESP, src));
+    // Push src high word. Stack offset = 4.
+    __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */));
+
+    // Pop into dst high word. Stack offset = 8.
+    // Pop with ESP address uses the 'after increment' value of ESP.
+    __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */));
+    // Finally dst low word. Stack offset = 4.
+    __ popl(Address(ESP, dst));
+  } else {
+    Register temp_reg = static_cast<Register>(temp);
+    __ movl(temp_reg, Address(ESP, src));
+    __ movl(Address(ESP, dst), temp_reg);
+    __ movl(temp_reg, Address(ESP, src + kX86WordSize));
+    __ movl(Address(ESP, dst + kX86WordSize), temp_reg);
+  }
 }
 
 void ParallelMoveResolverX86::EmitMove(size_t index) {
@@ -3748,10 +3948,18 @@
           __ xorps(dest, dest);
         } else {
           ScratchRegisterScope ensure_scratch(
-              this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-          Register temp = static_cast<Register>(ensure_scratch.GetRegister());
-          __ movl(temp, Immediate(value));
-          __ movd(dest, temp);
+              this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+          int temp_reg = ensure_scratch.GetRegister();
+          if (temp_reg == kNoRegister) {
+            // Avoid spilling/restoring a scratch register by using the stack.
+            __ pushl(Immediate(value));
+            __ movss(dest, Address(ESP, 0));
+            __ addl(ESP, Immediate(4));
+          } else {
+            Register temp = static_cast<Register>(temp_reg);
+            __ movl(temp, Immediate(value));
+            __ movd(dest, temp);
+          }
         }
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
@@ -3800,42 +4008,96 @@
   }
 }
 
-void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
-  Register suggested_scratch = reg == EAX ? EBX : EAX;
-  ScratchRegisterScope ensure_scratch(
-      this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) {
+  // Prefer to avoid xchg as it isn't speedy on smaller processors.
+  ScratchRegisterScope possible_scratch(
+      this, reg1, codegen_->GetNumberOfCoreRegisters());
+  int temp_reg = possible_scratch.GetRegister();
+  if (temp_reg == kNoRegister || temp_reg == reg2) {
+    __ pushl(reg1);
+    __ movl(reg1, reg2);
+    __ popl(reg2);
+  } else {
+    Register temp = static_cast<Register>(temp_reg);
+    __ movl(temp, reg1);
+    __ movl(reg1, reg2);
+    __ movl(reg2, temp);
+  }
+}
 
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
-  __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
-  __ movl(Address(ESP, mem + stack_offset), reg);
-  __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
+void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
+  ScratchRegisterScope possible_scratch(
+      this, reg, codegen_->GetNumberOfCoreRegisters());
+  int temp_reg = possible_scratch.GetRegister();
+  if (temp_reg == kNoRegister) {
+    __ pushl(Address(ESP, mem));
+    __ movl(Address(ESP, mem + kX86WordSize), reg);
+    __ popl(reg);
+  } else {
+    Register temp = static_cast<Register>(temp_reg);
+    __ movl(temp, Address(ESP, mem));
+    __ movl(Address(ESP, mem), reg);
+    __ movl(reg, temp);
+  }
 }
 
 void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
-  ScratchRegisterScope ensure_scratch(
-      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-
-  Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
-  __ movl(temp_reg, Address(ESP, mem + stack_offset));
-  __ movss(Address(ESP, mem + stack_offset), reg);
-  __ movd(reg, temp_reg);
+  ScratchRegisterScope possible_scratch(
+      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+  int temp_reg = possible_scratch.GetRegister();
+  if (temp_reg == kNoRegister) {
+    __ pushl(Address(ESP, mem));
+    __ movss(Address(ESP, mem + kX86WordSize), reg);
+    __ movss(reg, Address(ESP, 0));
+    __ addl(ESP, Immediate(kX86WordSize));
+  } else {
+    Register temp = static_cast<Register>(temp_reg);
+    __ movl(temp, Address(ESP, mem));
+    __ movss(Address(ESP, mem), reg);
+    __ movd(reg, temp);
+  }
 }
 
 void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
-  ScratchRegisterScope ensure_scratch1(
-      this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
+  ScratchRegisterScope possible_scratch1(
+      this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+  int temp_reg1 = possible_scratch1.GetRegister();
+  if (temp_reg1 == kNoRegister) {
+    // No free registers.  Use the stack.
+    __ pushl(Address(ESP, mem1));
+    __ pushl(Address(ESP, mem2 + kX86WordSize));
+    // Pop with ESP address uses the 'after increment' value of ESP.
+    __ popl(Address(ESP, mem1 + kX86WordSize));
+    __ popl(Address(ESP, mem2));
+  } else {
+    // Got the first one.  Try for a second.
+    ScratchRegisterScope possible_scratch2(
+        this, temp_reg1, codegen_->GetNumberOfCoreRegisters());
+    int temp_reg2 = possible_scratch2.GetRegister();
+    if (temp_reg2 == kNoRegister) {
+      Register temp = static_cast<Register>(temp_reg1);
+      // Bummer.  Only have one free register to use.
+      // Save mem1 on the stack.
+      __ pushl(Address(ESP, mem1));
 
-  Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
-  ScratchRegisterScope ensure_scratch2(
-      this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+      // Copy mem2 into mem1.
+      __ movl(temp, Address(ESP, mem2 + kX86WordSize));
+      __ movl(Address(ESP, mem1 + kX86WordSize), temp);
 
-  int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
-  stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
-  __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
-  __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
-  __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
-  __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
+      // Now pop mem1 into mem2.
+      // Pop with ESP address uses the 'after increment' value of ESP.
+      __ popl(Address(ESP, mem2));
+    } else {
+      // Great.  We have 2 registers to play with.
+      Register temp1 = static_cast<Register>(temp_reg1);
+      Register temp2 = static_cast<Register>(temp_reg2);
+      DCHECK_NE(temp1, temp2);
+      __ movl(temp1, Address(ESP, mem1));
+      __ movl(temp2, Address(ESP, mem2));
+      __ movl(Address(ESP, mem2), temp1);
+      __ movl(Address(ESP, mem1), temp2);
+    }
+  }
 }
 
 void ParallelMoveResolverX86::EmitSwap(size_t index) {
@@ -3844,7 +4106,7 @@
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>());
+    Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
     Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0cc3c65..00a4323 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -106,6 +106,7 @@
   X86Assembler* GetAssembler() const;
 
  private:
+  void Exchange(Register reg1, Register Reg2);
   void Exchange(Register reg, int mem);
   void Exchange(int mem1, int mem2);
   void Exchange32(XmmRegister reg, int mem);
@@ -163,6 +164,9 @@
   void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivByPowerOfTwo(HDiv* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateRemFP(HRem *rem);
   void HandleShift(HBinaryOperation* instruction);
   void GenerateShlLong(const Location& loc, Register shifter);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index cdbc778..01b24ea 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -16,6 +16,7 @@
 
 #include "code_generator_x86_64.h"
 
+#include "code_generator_utils.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
@@ -428,7 +429,8 @@
         location_builder_(graph, this),
         instruction_visitor_(graph, this),
         move_resolver_(graph->GetArena(), this),
-        isa_features_(isa_features) {
+        isa_features_(isa_features),
+        constant_area_start_(0) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
@@ -481,7 +483,15 @@
   }
 }
 
+static dwarf::Reg DWARFReg(Register reg) {
+    return dwarf::Reg::X86_64Core(static_cast<int>(reg));
+}
+static dwarf::Reg DWARFReg(FloatRegister reg) {
+    return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
+}
+
 void CodeGeneratorX86_64::GenerateFrameEntry() {
+  __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
   __ Bind(&frame_entry_label_);
   bool skip_overflow_check = IsLeafMethod()
       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
@@ -501,17 +511,22 @@
     Register reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
       __ pushq(CpuRegister(reg));
+      __ cfi().AdjustCFAOffset(kX86_64WordSize);
+      __ cfi().RelOffset(DWARFReg(reg), 0);
     }
   }
 
-  __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+  int adjust = GetFrameSize() - GetCoreSpillSize();
+  __ subq(CpuRegister(RSP), Immediate(adjust));
+  __ cfi().AdjustCFAOffset(adjust);
   uint32_t xmm_spill_location = GetFpuSpillStart();
   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
 
   for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
-      __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)),
-               XmmRegister(kFpuCalleeSaves[i]));
+      int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+      __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
+      __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
     }
   }
 
@@ -526,17 +541,22 @@
   size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
   for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
     if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
-      __ movsd(XmmRegister(kFpuCalleeSaves[i]),
-               Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)));
+      int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+      __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
+      __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
     }
   }
 
-  __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+  int adjust = GetFrameSize() - GetCoreSpillSize();
+  __ addq(CpuRegister(RSP), Immediate(adjust));
+  __ cfi().AdjustCFAOffset(-adjust);
 
   for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
     Register reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
       __ popq(CpuRegister(reg));
+      __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
+      __ cfi().Restore(DWARFReg(reg));
     }
   }
 }
@@ -1123,8 +1143,11 @@
 
 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) {
   UNUSED(ret);
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ ret();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
@@ -1175,8 +1198,11 @@
         LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
     }
   }
+  __ cfi().RememberState();
   codegen_->GenerateFrameExit();
   __ ret();
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(codegen_->GetFrameSize());
 }
 
 Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -1951,7 +1977,7 @@
     case Primitive::kPrimDouble:
     case Primitive::kPrimFloat: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2015,12 +2041,30 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ addss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ addss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ addsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ addsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
@@ -2048,7 +2092,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2086,12 +2130,30 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ subss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ subss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ subsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ subsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
@@ -2124,7 +2186,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2169,13 +2231,31 @@
 
     case Primitive::kPrimFloat: {
       DCHECK(first.Equals(locations->Out()));
-      __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ mulss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ mulss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
       DCHECK(first.Equals(locations->Out()));
-      __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ mulsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ mulsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
@@ -2259,6 +2339,216 @@
   __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
 }
 
+void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+
+  DCHECK(imm == 1 || imm == -1);
+
+  switch (instruction->GetResultType()) {
+    case Primitive::kPrimInt: {
+      if (instruction->IsRem()) {
+        __ xorl(output_register, output_register);
+      } else {
+        __ movl(output_register, input_register);
+        if (imm == -1) {
+          __ negl(output_register);
+        }
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      if (instruction->IsRem()) {
+        __ xorq(output_register, output_register);
+      } else {
+        __ movq(output_register, input_register);
+        if (imm == -1) {
+          __ negq(output_register);
+        }
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+
+  CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
+
+  int64_t imm = Int64FromConstant(second.GetConstant());
+
+  DCHECK(IsPowerOfTwo(std::abs(imm)));
+
+  CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    __ leal(tmp, Address(numerator, std::abs(imm) - 1));
+    __ testl(numerator, numerator);
+    __ cmov(kGreaterEqual, tmp, numerator);
+    int shift = CTZ(imm);
+    __ sarl(tmp, Immediate(shift));
+
+    if (imm < 0) {
+      __ negl(tmp);
+    }
+
+    __ movl(output_register, tmp);
+  } else {
+    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+    __ movq(rdx, Immediate(std::abs(imm) - 1));
+    __ addq(rdx, numerator);
+    __ testq(numerator, numerator);
+    __ cmov(kGreaterEqual, rdx, numerator);
+    int shift = CTZ(imm);
+    __ sarq(rdx, Immediate(shift));
+
+    if (imm < 0) {
+      __ negq(rdx);
+    }
+
+    __ movq(output_register, rdx);
+  }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+
+  CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
+      : locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
+      : locations->Out().AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  DCHECK_EQ(RAX, eax.AsRegister());
+  DCHECK_EQ(RDX, edx.AsRegister());
+  if (instruction->IsDiv()) {
+    DCHECK_EQ(RAX, out.AsRegister());
+  } else {
+    DCHECK_EQ(RDX, out.AsRegister());
+  }
+
+  int64_t magic;
+  int shift;
+
+  // TODO: can these branches be written as one?
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    int imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+    CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+    __ movl(numerator, eax);
+
+    Label no_div;
+    Label end;
+    __ testl(eax, eax);
+    __ j(kNotEqual, &no_div);
+
+    __ xorl(out, out);
+    __ jmp(&end);
+
+    __ Bind(&no_div);
+
+    __ movl(eax, Immediate(magic));
+    __ imull(numerator);
+
+    if (imm > 0 && magic < 0) {
+      __ addl(edx, numerator);
+    } else if (imm < 0 && magic > 0) {
+      __ subl(edx, numerator);
+    }
+
+    if (shift != 0) {
+      __ sarl(edx, Immediate(shift));
+    }
+
+    __ movl(eax, edx);
+    __ shrl(edx, Immediate(31));
+    __ addl(edx, eax);
+
+    if (instruction->IsRem()) {
+      __ movl(eax, numerator);
+      __ imull(edx, Immediate(imm));
+      __ subl(eax, edx);
+      __ movl(edx, eax);
+    } else {
+      __ movl(eax, edx);
+    }
+    __ Bind(&end);
+  } else {
+    int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
+
+    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+
+    CpuRegister rax = eax;
+    CpuRegister rdx = edx;
+
+    CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
+
+    // Save the numerator.
+    __ movq(numerator, rax);
+
+    // RAX = magic
+    __ movq(rax, Immediate(magic));
+
+    // RDX:RAX = magic * numerator
+    __ imulq(numerator);
+
+    if (imm > 0 && magic < 0) {
+      // RDX += numerator
+      __ addq(rdx, numerator);
+    } else if (imm < 0 && magic > 0) {
+      // RDX -= numerator
+      __ subq(rdx, numerator);
+    }
+
+    // Shift if needed.
+    if (shift != 0) {
+      __ sarq(rdx, Immediate(shift));
+    }
+
+    // RDX += 1 if RDX < 0
+    __ movq(rax, rdx);
+    __ shrq(rdx, Immediate(63));
+    __ addq(rdx, rax);
+
+    if (instruction->IsRem()) {
+      __ movq(rax, numerator);
+
+      if (IsInt<32>(imm)) {
+        __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
+      } else {
+        __ movq(numerator, Immediate(imm));
+        __ imulq(rdx, numerator);
+      }
+
+      __ subq(rax, rdx);
+      __ movq(rdx, rax);
+    } else {
+      __ movq(rax, rdx);
+    }
+  }
+}
+
 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   DCHECK(instruction->IsDiv() || instruction->IsRem());
   Primitive::Type type = instruction->GetResultType();
@@ -2267,37 +2557,52 @@
   bool is_div = instruction->IsDiv();
   LocationSummary* locations = instruction->GetLocations();
 
-  CpuRegister out_reg = locations->Out().AsRegister<CpuRegister>();
-  CpuRegister second_reg = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  Location second = locations->InAt(1);
 
   DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
-  DCHECK_EQ(is_div ? RAX : RDX, out_reg.AsRegister());
+  DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
 
-  SlowPathCodeX86_64* slow_path =
-      new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
-          out_reg.AsRegister(), type, is_div);
-  codegen_->AddSlowPath(slow_path);
+  if (second.IsConstant()) {
+    int64_t imm = Int64FromConstant(second.GetConstant());
 
-  // 0x80000000(00000000)/-1 triggers an arithmetic exception!
-  // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
-  // so it's safe to just use negl instead of more complex comparisons.
-  if (type == Primitive::kPrimInt) {
-    __ cmpl(second_reg, Immediate(-1));
-    __ j(kEqual, slow_path->GetEntryLabel());
-    // edx:eax <- sign-extended of eax
-    __ cdq();
-    // eax = quotient, edx = remainder
-    __ idivl(second_reg);
+    if (imm == 0) {
+      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+    } else if (imm == 1 || imm == -1) {
+      DivRemOneOrMinusOne(instruction);
+    } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) {
+      DivByPowerOfTwo(instruction->AsDiv());
+    } else {
+      DCHECK(imm <= -2 || imm >= 2);
+      GenerateDivRemWithAnyConstant(instruction);
+    }
   } else {
-    __ cmpq(second_reg, Immediate(-1));
-    __ j(kEqual, slow_path->GetEntryLabel());
-    // rdx:rax <- sign-extended of rax
-    __ cqo();
-    // rax = quotient, rdx = remainder
-    __ idivq(second_reg);
-  }
+    SlowPathCodeX86_64* slow_path =
+        new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
+            out.AsRegister(), type, is_div);
+    codegen_->AddSlowPath(slow_path);
 
-  __ Bind(slow_path->GetExitLabel());
+    CpuRegister second_reg = second.AsRegister<CpuRegister>();
+    // 0x80000000(00000000)/-1 triggers an arithmetic exception!
+    // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
+    // so it's safe to just use negl instead of more complex comparisons.
+    if (type == Primitive::kPrimInt) {
+      __ cmpl(second_reg, Immediate(-1));
+      __ j(kEqual, slow_path->GetEntryLabel());
+      // edx:eax <- sign-extended of eax
+      __ cdq();
+      // eax = quotient, edx = remainder
+      __ idivl(second_reg);
+    } else {
+      __ cmpq(second_reg, Immediate(-1));
+      __ j(kEqual, slow_path->GetEntryLabel());
+      // rdx:rax <- sign-extended of rax
+      __ cqo();
+      // rax = quotient, rdx = remainder
+      __ idivq(second_reg);
+    }
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
@@ -2307,17 +2612,23 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RegisterLocation(RAX));
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       // Intel uses edx:eax as the dividend.
       locations->AddTemp(Location::RegisterLocation(RDX));
+      // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
+      // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
+      // output and request another temp.
+      if (div->InputAt(1)->IsConstant()) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
       break;
     }
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2342,12 +2653,30 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ divss(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsStackSlot());
+        __ divss(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      if (second.IsFpuRegister()) {
+        __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+      } else if (second.IsConstant()) {
+        __ divsd(first.AsFpuRegister<XmmRegister>(),
+                 codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+      } else {
+        DCHECK(second.IsDoubleStackSlot());
+        __ divsd(first.AsFpuRegister<XmmRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
     }
 
@@ -2365,9 +2694,15 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RegisterLocation(RAX));
-      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
       // Intel uses rdx:rax as the dividend and puts the remainder in rdx
       locations->SetOut(Location::RegisterLocation(RDX));
+      // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+      // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
+      // output and request another temp.
+      if (rem->InputAt(1)->IsConstant()) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
       break;
     }
 
@@ -3486,15 +3821,27 @@
 
 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
   ScratchRegisterScope ensure_scratch(
-      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+      this, TMP, codegen_->GetNumberOfCoreRegisters());
 
-  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
-  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
-  __ movq(CpuRegister(ensure_scratch.GetRegister()),
-          Address(CpuRegister(RSP), mem2 + stack_offset));
-  __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
-  __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
-          CpuRegister(ensure_scratch.GetRegister()));
+  int temp_reg = ensure_scratch.GetRegister();
+  if (temp_reg == kNoRegister) {
+    // Use the stack as a temporary.
+    // Save mem1 on the stack.
+    __ pushq(Address(CpuRegister(RSP), mem1));
+
+    // Copy mem2 into mem1.
+    __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize));
+    __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP));
+
+    // Now pop mem1 into mem2.
+    __ popq(Address(CpuRegister(RSP), mem2));
+  } else {
+    CpuRegister temp = CpuRegister(temp_reg);
+    __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1));
+    __ movq(temp, Address(CpuRegister(RSP), mem2));
+    __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP));
+    __ movq(Address(CpuRegister(RSP), mem1), temp);
+  }
 }
 
 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
@@ -3503,6 +3850,13 @@
   __ movd(reg, CpuRegister(TMP));
 }
 
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
+  // Prefer to avoid xchg as it isn't speedy on smaller processors.
+  __ movq(CpuRegister(TMP), reg1);
+  __ movq(reg1, reg2);
+  __ movq(reg2, CpuRegister(TMP));
+}
+
 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
   __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
   __ movsd(Address(CpuRegister(RSP), mem), reg);
@@ -3515,7 +3869,7 @@
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+    Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
     Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
@@ -3880,5 +4234,66 @@
   LOG(FATAL) << "Unreachable";
 }
 
+void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
+  // Generate the constant area if needed.
+  X86_64Assembler* assembler = GetAssembler();
+  if (!assembler->IsConstantAreaEmpty()) {
+    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
+    // byte values.  If used for vectors at a later time, this will need to be
+    // updated to 16 bytes with the appropriate offset.
+    assembler->Align(4, 0);
+    constant_area_start_ = assembler->CodeSize();
+    assembler->AddConstantArea();
+  }
+
+  // And finish up.
+  CodeGenerator::Finalize(allocator);
+}
+
+/**
+ * Class to handle late fixup of offsets into constant area.
+ */
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> {
+  public:
+    RIPFixup(const CodeGeneratorX86_64& codegen, int offset)
+      : codegen_(codegen), offset_into_constant_area_(offset) {}
+
+  private:
+    void Process(const MemoryRegion& region, int pos) OVERRIDE {
+      // Patch the correct offset for the instruction.  We use the address of the
+      // 'next' instruction, which is 'pos' (patch the 4 bytes before).
+      int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
+      int relative_position = constant_offset - pos;
+
+      // Patch in the right value.
+      region.StoreUnaligned<int32_t>(pos - 4, relative_position);
+    }
+
+    const CodeGeneratorX86_64& codegen_;
+
+    // Location in constant area that the fixup refers to.
+    int offset_into_constant_area_;
+};
+
+Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
+  return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
+  return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
+  return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
+  AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
+  return Address::RIP(fixup);
+}
+
 }  // namespace x86_64
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 375c0b0..61bf6ac 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -118,6 +118,7 @@
   void Exchange32(CpuRegister reg, int mem);
   void Exchange32(XmmRegister reg, int mem);
   void Exchange32(int mem1, int mem2);
+  void Exchange64(CpuRegister reg1, CpuRegister reg2);
   void Exchange64(CpuRegister reg, int mem);
   void Exchange64(XmmRegister reg, int mem);
   void Exchange64(int mem1, int mem2);
@@ -173,6 +174,9 @@
   void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void GenerateRemFP(HRem *rem);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivByPowerOfTwo(HDiv* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleShift(HBinaryOperation* operation);
   void GenerateMemoryBarrier(MemBarrierKind kind);
@@ -243,6 +247,7 @@
   Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+  void Finalize(CodeAllocator* allocator) OVERRIDE;
 
   InstructionSet GetInstructionSet() const OVERRIDE {
     return InstructionSet::kX86_64;
@@ -274,6 +279,15 @@
     return isa_features_;
   }
 
+  int ConstantAreaStart() const {
+    return constant_area_start_;
+  }
+
+  Address LiteralDoubleAddress(double v);
+  Address LiteralFloatAddress(float v);
+  Address LiteralInt32Address(int32_t v);
+  Address LiteralInt64Address(int64_t v);
+
  private:
   // Labels for each block that will be compiled.
   GrowableArray<Label> block_labels_;
@@ -284,6 +298,10 @@
   X86_64Assembler assembler_;
   const X86_64InstructionSetFeatures& isa_features_;
 
+  // Offset to the start of the constant area in the assembled code.
+  // Used for fixups to the constant area.
+  int constant_area_start_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
 };
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 49c0d38..4c28378 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -337,13 +337,11 @@
 
 HGraphVisualizer::HGraphVisualizer(std::ostream* output,
                                    HGraph* graph,
-                                   const CodeGenerator& codegen,
-                                   const char* method_name)
-  : output_(output), graph_(graph), codegen_(codegen) {
-  if (output == nullptr) {
-    return;
-  }
+                                   const CodeGenerator& codegen)
+  : output_(output), graph_(graph), codegen_(codegen) {}
 
+void HGraphVisualizer::PrintHeader(const char* method_name) const {
+  DCHECK(output_ != nullptr);
   HGraphVisualizerPrinter printer(graph_, *output_, "", true, codegen_);
   printer.StartTag("compilation");
   printer.PrintProperty("name", method_name);
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index bc553ae..513bceb 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -35,9 +35,9 @@
  public:
   HGraphVisualizer(std::ostream* output,
                    HGraph* graph,
-                   const CodeGenerator& codegen,
-                   const char* method_name);
+                   const CodeGenerator& codegen);
 
+  void PrintHeader(const char* method_name) const;
   void DumpGraph(const char* pass_name, bool is_after_pass = true) const;
 
  private:
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 56ec8a7..afbc490 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -24,9 +24,21 @@
 class InstructionSimplifierVisitor : public HGraphVisitor {
  public:
   InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
-      : HGraphVisitor(graph), stats_(stats) {}
+      : HGraphVisitor(graph),
+        stats_(stats) {}
+
+  void Run();
 
  private:
+  void RecordSimplification() {
+    simplification_occurred_ = true;
+    simplifications_at_current_position_++;
+    if (stats_) {
+      stats_->RecordStat(kInstructionSimplifications);
+    }
+  }
+
+  bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop);
   void VisitShift(HBinaryOperation* shift);
 
   void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
@@ -40,6 +52,8 @@
   void VisitAnd(HAnd* instruction) OVERRIDE;
   void VisitDiv(HDiv* instruction) OVERRIDE;
   void VisitMul(HMul* instruction) OVERRIDE;
+  void VisitNeg(HNeg* instruction) OVERRIDE;
+  void VisitNot(HNot* instruction) OVERRIDE;
   void VisitOr(HOr* instruction) OVERRIDE;
   void VisitShl(HShl* instruction) OVERRIDE;
   void VisitShr(HShr* instruction) OVERRIDE;
@@ -48,11 +62,38 @@
   void VisitXor(HXor* instruction) OVERRIDE;
 
   OptimizingCompilerStats* stats_;
+  bool simplification_occurred_ = false;
+  int simplifications_at_current_position_ = 0;
+  // We ensure we do not loop infinitely. The value is a finger in the air guess
+  // that should allow enough simplification.
+  static constexpr int kMaxSamePositionSimplifications = 10;
 };
 
 void InstructionSimplifier::Run() {
   InstructionSimplifierVisitor visitor(graph_, stats_);
-  visitor.VisitInsertionOrder();
+  visitor.Run();
+}
+
+void InstructionSimplifierVisitor::Run() {
+  for (HReversePostOrderIterator it(*GetGraph()); !it.Done();) {
+    // The simplification of an instruction to another instruction may yield
+    // possibilities for other simplifications. So although we perform a reverse
+    // post order visit, we sometimes need to revisit an instruction index.
+    simplification_occurred_ = false;
+    VisitBasicBlock(it.Current());
+    if (simplification_occurred_ &&
+        (simplifications_at_current_position_ < kMaxSamePositionSimplifications)) {
+      // New simplifications may be applicable to the instruction at the
+      // current index, so don't advance the iterator.
+      continue;
+    }
+    if (simplifications_at_current_position_ >= kMaxSamePositionSimplifications) {
+      LOG(WARNING) << "Too many simplifications (" << simplifications_at_current_position_
+          << ") occurred at the current position.";
+    }
+    simplifications_at_current_position_ = 0;
+    it.Advance();
+  }
 }
 
 namespace {
@@ -63,6 +104,35 @@
 
 }  // namespace
 
+// Returns true if the code was simplified to use only one negation operation
+// after the binary operation instead of one on each of the inputs.
+bool InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop) {
+  DCHECK(binop->IsAdd() || binop->IsSub());
+  DCHECK(binop->GetLeft()->IsNeg() && binop->GetRight()->IsNeg());
+  HNeg* left_neg = binop->GetLeft()->AsNeg();
+  HNeg* right_neg = binop->GetRight()->AsNeg();
+  if (!left_neg->HasOnlyOneNonEnvironmentUse() ||
+      !right_neg->HasOnlyOneNonEnvironmentUse()) {
+    return false;
+  }
+  // Replace code looking like
+  //    NEG tmp1, a
+  //    NEG tmp2, b
+  //    ADD dst, tmp1, tmp2
+  // with
+  //    ADD tmp, a, b
+  //    NEG dst, tmp
+  binop->ReplaceInput(left_neg->GetInput(), 0);
+  binop->ReplaceInput(right_neg->GetInput(), 1);
+  left_neg->GetBlock()->RemoveInstruction(left_neg);
+  right_neg->GetBlock()->RemoveInstruction(right_neg);
+  HNeg* neg = new (GetGraph()->GetArena()) HNeg(binop->GetType(), binop);
+  binop->GetBlock()->InsertInstructionBefore(neg, binop->GetNext());
+  binop->ReplaceWithExceptInReplacementAtIndex(neg, 0);
+  RecordSimplification();
+  return true;
+}
+
 void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
   DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
   HConstant* input_cst = instruction->GetConstantRight();
@@ -182,6 +252,36 @@
     //    src
     instruction->ReplaceWith(input_other);
     instruction->GetBlock()->RemoveInstruction(instruction);
+    return;
+  }
+
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  bool left_is_neg = left->IsNeg();
+  bool right_is_neg = right->IsNeg();
+
+  if (left_is_neg && right_is_neg) {
+    if (TryMoveNegOnInputsAfterBinop(instruction)) {
+      return;
+    }
+  }
+
+  HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg();
+  if ((left_is_neg ^ right_is_neg) && neg->HasOnlyOneNonEnvironmentUse()) {
+    // Replace code looking like
+    //    NEG tmp, b
+    //    ADD dst, a, tmp
+    // with
+    //    SUB dst, a, b
+    // We do not perform the optimization if the input negation has environment
+    // uses or multiple non-environment uses as it could lead to worse code. In
+    // particular, we do not want the live range of `b` to be extended if we are
+    // not sure the initial 'NEG' instruction can be removed.
+    HInstruction* other = left_is_neg ? right : left;
+    HSub* sub = new(GetGraph()->GetArena()) HSub(instruction->GetType(), other, neg->GetInput());
+    instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
+    RecordSimplification();
+    neg->GetBlock()->RemoveInstruction(neg);
   }
 }
 
@@ -201,7 +301,7 @@
 
   // We assume that GVN has run before, so we only perform a pointer comparison.
   // If for some reason the values are equal but the pointers are different, we
-  // are still correct and only miss an optimisation opportunity.
+  // are still correct and only miss an optimization opportunity.
   if (instruction->GetLeft() == instruction->GetRight()) {
     // Replace code looking like
     //    AND dst, src, src
@@ -235,6 +335,7 @@
     //    NEG dst, src
     instruction->GetBlock()->ReplaceAndRemoveInstructionWith(
         instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other)));
+    RecordSimplification();
   }
 }
 
@@ -267,6 +368,7 @@
     //    NEG dst, src
     HNeg* neg = new (allocator) HNeg(type, input_other);
     block->ReplaceAndRemoveInstructionWith(instruction, neg);
+    RecordSimplification();
     return;
   }
 
@@ -280,6 +382,7 @@
     // The 'int' and 'long' cases are handled below.
     block->ReplaceAndRemoveInstructionWith(instruction,
                                            new (allocator) HAdd(type, input_other, input_other));
+    RecordSimplification();
     return;
   }
 
@@ -295,10 +398,75 @@
       HIntConstant* shift = GetGraph()->GetIntConstant(WhichPowerOf2(factor));
       HShl* shl = new(allocator) HShl(type, input_other, shift);
       block->ReplaceAndRemoveInstructionWith(instruction, shl);
+      RecordSimplification();
     }
   }
 }
 
+void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) {
+  HInstruction* input = instruction->GetInput();
+  if (input->IsNeg()) {
+    // Replace code looking like
+    //    NEG tmp, src
+    //    NEG dst, tmp
+    // with
+    //    src
+    HNeg* previous_neg = input->AsNeg();
+    instruction->ReplaceWith(previous_neg->GetInput());
+    instruction->GetBlock()->RemoveInstruction(instruction);
+    // We perform the optimization even if the input negation has environment
+    // uses since it allows removing the current instruction. But we only delete
+    // the input negation only if it is does not have any uses left.
+    if (!previous_neg->HasUses()) {
+      previous_neg->GetBlock()->RemoveInstruction(previous_neg);
+    }
+    RecordSimplification();
+    return;
+  }
+
+  if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse()) {
+    // Replace code looking like
+    //    SUB tmp, a, b
+    //    NEG dst, tmp
+    // with
+    //    SUB dst, b, a
+    // We do not perform the optimization if the input subtraction has
+    // environment uses or multiple non-environment uses as it could lead to
+    // worse code. In particular, we do not want the live ranges of `a` and `b`
+    // to be extended if we are not sure the initial 'SUB' instruction can be
+    // removed.
+    HSub* sub = input->AsSub();
+    HSub* new_sub =
+        new (GetGraph()->GetArena()) HSub(instruction->GetType(), sub->GetRight(), sub->GetLeft());
+    instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, new_sub);
+    if (!sub->HasUses()) {
+      sub->GetBlock()->RemoveInstruction(sub);
+    }
+    RecordSimplification();
+  }
+}
+
+void InstructionSimplifierVisitor::VisitNot(HNot* instruction) {
+  HInstruction* input = instruction->GetInput();
+  if (input->IsNot()) {
+    // Replace code looking like
+    //    NOT tmp, src
+    //    NOT dst, tmp
+    // with
+    //    src
+    // We perform the optimization even if the input negation has environment
+    // uses since it allows removing the current instruction. But we only delete
+    // the input negation only if it is does not have any uses left.
+    HNot* previous_not = input->AsNot();
+    instruction->ReplaceWith(previous_not->GetInput());
+    instruction->GetBlock()->RemoveInstruction(instruction);
+    if (!previous_not->HasUses()) {
+      previous_not->GetBlock()->RemoveInstruction(previous_not);
+    }
+    RecordSimplification();
+  }
+}
+
 void InstructionSimplifierVisitor::VisitOr(HOr* instruction) {
   HConstant* input_cst = instruction->GetConstantRight();
   HInstruction* input_other = instruction->GetLeastConstantLeft();
@@ -315,7 +483,7 @@
 
   // We assume that GVN has run before, so we only perform a pointer comparison.
   // If for some reason the values are equal but the pointers are different, we
-  // are still correct and only miss an optimisation opportunity.
+  // are still correct and only miss an optimization opportunity.
   if (instruction->GetLeft() == instruction->GetRight()) {
     // Replace code looking like
     //    OR dst, src, src
@@ -356,20 +524,61 @@
   HBasicBlock* block = instruction->GetBlock();
   ArenaAllocator* allocator = GetGraph()->GetArena();
 
-  if (instruction->GetLeft()->IsConstant()) {
-    int64_t left = Int64FromConstant(instruction->GetLeft()->AsConstant());
-    if (left == 0) {
+  HInstruction* left = instruction->GetLeft();
+  HInstruction* right = instruction->GetRight();
+  if (left->IsConstant()) {
+    if (Int64FromConstant(left->AsConstant()) == 0) {
       // Replace code looking like
       //    SUB dst, 0, src
       // with
       //    NEG dst, src
-      // Note that we cannot optimise `0.0 - x` to `-x` for floating-point. When
+      // Note that we cannot optimize `0.0 - x` to `-x` for floating-point. When
       // `x` is `0.0`, the former expression yields `0.0`, while the later
       // yields `-0.0`.
-      HNeg* neg = new (allocator) HNeg(type, instruction->GetRight());
+      HNeg* neg = new (allocator) HNeg(type, right);
       block->ReplaceAndRemoveInstructionWith(instruction, neg);
+      RecordSimplification();
+      return;
     }
   }
+
+  if (left->IsNeg() && right->IsNeg()) {
+    if (TryMoveNegOnInputsAfterBinop(instruction)) {
+      return;
+    }
+  }
+
+  if (right->IsNeg() && right->HasOnlyOneNonEnvironmentUse()) {
+    // Replace code looking like
+    //    NEG tmp, b
+    //    SUB dst, a, tmp
+    // with
+    //    ADD dst, a, b
+    HAdd* add = new(GetGraph()->GetArena()) HAdd(type, left, right->AsNeg()->GetInput());
+    instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
+    RecordSimplification();
+    right->GetBlock()->RemoveInstruction(right);
+    return;
+  }
+
+  if (left->IsNeg() && left->HasOnlyOneNonEnvironmentUse()) {
+    // Replace code looking like
+    //    NEG tmp, a
+    //    SUB dst, tmp, b
+    // with
+    //    ADD tmp, a, b
+    //    NEG dst, tmp
+    // The second version is not intrinsically better, but enables more
+    // transformations.
+    HAdd* add = new(GetGraph()->GetArena()) HAdd(type, left->AsNeg()->GetInput(), right);
+    instruction->GetBlock()->InsertInstructionBefore(add, instruction);
+    HNeg* neg = new (GetGraph()->GetArena()) HNeg(instruction->GetType(), add);
+    instruction->GetBlock()->InsertInstructionBefore(neg, instruction);
+    instruction->ReplaceWith(neg);
+    instruction->GetBlock()->RemoveInstruction(instruction);
+    RecordSimplification();
+    left->GetBlock()->RemoveInstruction(left);
+  }
 }
 
 void InstructionSimplifierVisitor::VisitUShr(HUShr* instruction) {
@@ -397,6 +606,7 @@
     //    NOT dst, src
     HNot* bitwise_not = new (GetGraph()->GetArena()) HNot(instruction->GetType(), input_other);
     instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, bitwise_not);
+    RecordSimplification();
     return;
   }
 }
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index b6e4510..aec2d19 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -320,6 +320,27 @@
   GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
 }
 
+void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location input = locations->InAt(0);
+  Register input_lo = input.AsRegisterPairLow<Register>();
+  Register input_hi = input.AsRegisterPairHigh<Register>();
+  Location output = locations->Out();
+  Register output_lo = output.AsRegisterPairLow<Register>();
+  Register output_hi = output.AsRegisterPairHigh<Register>();
+
+  X86Assembler* assembler = GetAssembler();
+  // Assign the inputs to the outputs, mixing low/high.
+  __ movl(output_lo, input_hi);
+  __ movl(output_hi, input_lo);
+  __ bswapl(output_lo);
+  __ bswapl(output_hi);
+}
+
 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
 }
@@ -1330,6 +1351,181 @@
   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
 }
 
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+                                       HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Offset is a long, but in 32 bit mode, we only need the low word.
+  // Can we update the invoke here to remove a TypeConvert to Long?
+  locations->SetInAt(2, Location::RequiresRegister());
+  // Expected value must be in EAX or EDX:EAX.
+  // For long, new value must be in ECX:EBX.
+  if (type == Primitive::kPrimLong) {
+    locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
+    locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
+  } else {
+    locations->SetInAt(3, Location::RegisterLocation(EAX));
+    locations->SetInAt(4, Location::RequiresRegister());
+  }
+
+  // Force a byte register for the output.
+  locations->SetOut(Location::RegisterLocation(EAX));
+  if (type == Primitive::kPrimNot) {
+    // Need temp registers for card-marking.
+    locations->AddTemp(Location::RequiresRegister());
+    // Need a byte register for marking.
+    locations->AddTemp(Location::RegisterLocation(ECX));
+  }
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register base = locations->InAt(1).AsRegister<Register>();
+  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
+  Location out = locations->Out();
+  DCHECK_EQ(out.AsRegister<Register>(), EAX);
+
+  if (type == Primitive::kPrimLong) {
+    DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
+    DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
+    DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
+    DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
+    __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
+  } else {
+    // Integer or object.
+    DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
+    Register value = locations->InAt(4).AsRegister<Register>();
+    if (type == Primitive::kPrimNot) {
+      // Mark card for object assuming new value is stored.
+      codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
+                          locations->GetTemp(1).AsRegister<Register>(),
+                          base,
+                          value);
+    }
+
+    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+  }
+
+  // locked cmpxchg has full barrier semantics, and we don't need scheduling
+  // barriers at this time.
+
+  // Convert ZF into the boolean result.
+  __ setb(kZero, out.AsRegister<Register>());
+  __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
+                     X86Assembler* assembler) {
+  Immediate imm_shift(shift);
+  Immediate imm_mask(mask);
+  __ movl(temp, reg);
+  __ shrl(reg, imm_shift);
+  __ andl(temp, imm_mask);
+  __ andl(reg, imm_mask);
+  __ shll(temp, imm_shift);
+  __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register reg = locations->InAt(0).AsRegister<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+   * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+   * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+   */
+  __ bswapl(reg);
+  SwapBits(reg, temp, 1, 0x55555555, assembler);
+  SwapBits(reg, temp, 2, 0x33333333, assembler);
+  SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
+  X86Assembler* assembler =
+    reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+  // We want to swap high/low, then bswap each one, and then do the same
+  // as a 32 bit reverse.
+  // Exchange high and low.
+  __ movl(temp, reg_low);
+  __ movl(reg_low, reg_high);
+  __ movl(reg_high, temp);
+
+  // bit-reverse low
+  __ bswapl(reg_low);
+  SwapBits(reg_low, temp, 1, 0x55555555, assembler);
+  SwapBits(reg_low, temp, 2, 0x33333333, assembler);
+  SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
+
+  // bit-reverse high
+  __ bswapl(reg_high);
+  SwapBits(reg_high, temp, 1, 0x55555555, assembler);
+  SwapBits(reg_high, temp, 2, 0x33333333, assembler);
+  SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -1338,16 +1534,10 @@
 void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
 }  // namespace x86
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index f6fa013..cbf94f0 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -298,25 +298,27 @@
   // TODO: Allow x86 to work with memory. This requires assembler support, see below.
   // locations->SetInAt(0, Location::Any());               // X86 can work on memory directly.
   locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());     // Immediate constant.
-  locations->AddTemp(Location::RequiresFpuRegister());  // FP version of above.
+  locations->AddTemp(Location::RequiresFpuRegister());  // FP reg to hold mask.
 }
 
-static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+static void MathAbsFP(LocationSummary* locations,
+                      bool is64bit,
+                      X86_64Assembler* assembler,
+                      CodeGeneratorX86_64* codegen) {
   Location output = locations->Out();
-  CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
 
   if (output.IsFpuRegister()) {
     // In-register
-    XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+    XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
 
+    // TODO: Can mask directly with constant area using pand if we can guarantee
+    // that the literal is aligned on a 16 byte boundary.  This will avoid a
+    // temporary.
     if (is64bit) {
-      __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
-      __ movd(xmm_temp, cpu_temp);
+      __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
       __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
     } else {
-      __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
-      __ movd(xmm_temp, cpu_temp);
+      __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
       __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
     }
   } else {
@@ -341,7 +343,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -349,7 +351,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
-  MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+  MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
 }
 
 static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -399,8 +401,11 @@
   GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
 }
 
-static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
-                        X86_64Assembler* assembler) {
+static void GenMinMaxFP(LocationSummary* locations,
+                        bool is_min,
+                        bool is_double,
+                        X86_64Assembler* assembler,
+                        CodeGeneratorX86_64* codegen) {
   Location op1_loc = locations->InAt(0);
   Location op2_loc = locations->InAt(1);
   Location out_loc = locations->Out();
@@ -427,7 +432,7 @@
   //
   // This removes one jmp, but needs to copy one input (op1) to out.
   //
-  // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+  // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
 
   XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
 
@@ -461,14 +466,11 @@
 
   // NaN handling.
   __ Bind(&nan);
-  CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-  // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
   if (is_double) {
-    __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+    __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
   } else {
-    __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+    __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
   }
-  __ movd(out, cpu_temp, is_double);
   __ jmp(&done);
 
   // out := op2;
@@ -483,7 +485,7 @@
   __ Bind(&done);
 }
 
-static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
                                                            kIntrinsified);
@@ -492,39 +494,38 @@
   // The following is sub-optimal, but all we can do for now. It would be fine to also accept
   // the second input to be the output (we can simply swap inputs).
   locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());     // Immediate constant.
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  CreateFPFPToFPPlusTempLocations(arena_, invoke);
+  CreateFPFPToFP(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+  GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
 }
 
 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
@@ -1202,6 +1203,175 @@
   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
 }
 
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+                                       HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  // expected value must be in EAX/RAX.
+  locations->SetInAt(3, Location::RegisterLocation(RAX));
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister());
+  if (type == Primitive::kPrimNot) {
+    // Need temp registers for card-marking.
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
+}
+
+static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
+  CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
+  CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
+  DCHECK_EQ(expected.AsRegister(), RAX);
+  CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+  if (type == Primitive::kPrimLong) {
+    __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
+  } else {
+    // Integer or object.
+    if (type == Primitive::kPrimNot) {
+      // Mark card for object assuming new value is stored.
+      codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
+                          locations->GetTemp(1).AsRegister<CpuRegister>(),
+                          base,
+                          value);
+    }
+
+    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+  }
+
+  // locked cmpxchg has full barrier semantics, and we don't need scheduling
+  // barriers at this time.
+
+  // Convert ZF into the boolean result.
+  __ setcc(kZero, out);
+  __ movzxb(out, out);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimInt, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimLong, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
+  GenCAS(Primitive::kPrimNot, invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
+                     X86_64Assembler* assembler) {
+  Immediate imm_shift(shift);
+  Immediate imm_mask(mask);
+  __ movl(temp, reg);
+  __ shrl(reg, imm_shift);
+  __ andl(temp, imm_mask);
+  __ andl(reg, imm_mask);
+  __ shll(temp, imm_shift);
+  __ orl(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
+   * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
+   * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
+   */
+  __ bswapl(reg);
+  SwapBits(reg, temp, 1, 0x55555555, assembler);
+  SwapBits(reg, temp, 2, 0x33333333, assembler);
+  SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::SameAsFirstInput());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
+                       int32_t shift, int64_t mask, X86_64Assembler* assembler) {
+  Immediate imm_shift(shift);
+  __ movq(temp_mask, Immediate(mask));
+  __ movq(temp, reg);
+  __ shrq(reg, imm_shift);
+  __ andq(temp, temp_mask);
+  __ andq(reg, temp_mask);
+  __ shlq(temp, imm_shift);
+  __ orq(reg, temp);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
+  X86_64Assembler* assembler =
+    reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
+  LocationSummary* locations = invoke->GetLocations();
+
+  CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+
+  /*
+   * Use one bswap instruction to reverse byte order first and then use 3 rounds of
+   * swapping bits to reverse bits in a long number x. Using bswap to save instructions
+   * compared to generic luni implementation which has 5 rounds of swapping bits.
+   * x = bswap x
+   * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
+   * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
+   * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
+   */
+  __ bswapq(reg);
+  SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
+  SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
+  SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
+}
+
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -1210,14 +1380,9 @@
 void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
-UNIMPLEMENTED_INTRINSIC(IntegerReverse)
-UNIMPLEMENTED_INTRINSIC(LongReverse)
 UNIMPLEMENTED_INTRINSIC(StringIndexOf)
 UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
-UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index f764eb4..5f50494 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1177,6 +1177,9 @@
   bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); }
   bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); }
   bool HasNonEnvironmentUses() const { return !uses_.IsEmpty(); }
+  bool HasOnlyOneNonEnvironmentUse() const {
+    return !HasEnvironmentUses() && GetUses().HasOnlyOneUse();
+  }
 
   // Does this instruction strictly dominate `other_instruction`?
   // Returns false if this instruction and `other_instruction` are the same.
@@ -1214,6 +1217,13 @@
   void ReplaceWith(HInstruction* instruction);
   void ReplaceInput(HInstruction* replacement, size_t index);
 
+  // This is almost the same as doing `ReplaceWith()`. But in this helper, the
+  // uses of this instruction by `other` are *not* updated.
+  void ReplaceWithExceptInReplacementAtIndex(HInstruction* other, size_t use_index) {
+    ReplaceWith(other);
+    other->ReplaceInput(this, use_index);
+  }
+
   // Move `this` instruction before `cursor`.
   void MoveBefore(HInstruction* cursor);
 
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
new file mode 100644
index 0000000..6d986ba
--- /dev/null
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "cfi_test.h"
+#include "gtest/gtest.h"
+#include "optimizing/code_generator.h"
+#include "utils/assembler.h"
+
+#include "optimizing/optimizing_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class OptimizingCFITest  : public CFITest {
+ public:
+  // Enable this flag to generate the expected outputs.
+  static constexpr bool kGenerateExpected = false;
+
+  void TestImpl(InstructionSet isa, const char* isa_str,
+                const std::vector<uint8_t>& expected_asm,
+                const std::vector<uint8_t>& expected_cfi) {
+    // Setup simple context.
+    ArenaPool pool;
+    ArenaAllocator allocator(&pool);
+    CompilerOptions opts;
+    std::unique_ptr<const InstructionSetFeatures> isa_features;
+    std::string error;
+    isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+    HGraph graph(&allocator);
+    // Generate simple frame with some spills.
+    std::unique_ptr<CodeGenerator> code_gen(
+        CodeGenerator::Create(&graph, isa, *isa_features.get(), opts));
+    const int frame_size = 64;
+    int core_reg = 0;
+    int fp_reg = 0;
+    for (int i = 0; i < 2; i++) {  // Two registers of each kind.
+      for (; core_reg < 32; core_reg++) {
+        if (code_gen->IsCoreCalleeSaveRegister(core_reg)) {
+          auto location = Location::RegisterLocation(core_reg);
+          code_gen->AddAllocatedRegister(location);
+          core_reg++;
+          break;
+        }
+      }
+      for (; fp_reg < 32; fp_reg++) {
+        if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) {
+          auto location = Location::FpuRegisterLocation(fp_reg);
+          code_gen->AddAllocatedRegister(location);
+          fp_reg++;
+          break;
+        }
+      }
+    }
+    code_gen->ComputeSpillMask();
+    code_gen->SetFrameSize(frame_size);
+    code_gen->GenerateFrameEntry();
+    code_gen->GetInstructionVisitor()->VisitReturnVoid(new (&allocator) HReturnVoid());
+    // Get the outputs.
+    InternalCodeAllocator code_allocator;
+    code_gen->Finalize(&code_allocator);
+    const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory();
+    Assembler* opt_asm = code_gen->GetAssembler();
+    const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
+
+    if (kGenerateExpected) {
+      GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+    } else {
+      EXPECT_EQ(expected_asm, actual_asm);
+      EXPECT_EQ(expected_cfi, actual_cfi);
+    }
+  }
+
+ private:
+  class InternalCodeAllocator : public CodeAllocator {
+   public:
+    InternalCodeAllocator() {}
+
+    virtual uint8_t* Allocate(size_t size) {
+      memory_.resize(size);
+      return memory_.data();
+    }
+
+    const std::vector<uint8_t>& GetMemory() { return memory_; }
+
+   private:
+    std::vector<uint8_t> memory_;
+
+    DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
+  };
+};
+
+#define TEST_ISA(isa) \
+  TEST_F(OptimizingCFITest, isa) { \
+    std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+        expected_asm_##isa + arraysize(expected_asm_##isa)); \
+    std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+        expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+    TestImpl(isa, #isa, expected_asm, expected_cfi); \
+  }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+
+#endif  // HAVE_ANDROID_OS
+
+}  // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
new file mode 100644
index 0000000..2125f6e
--- /dev/null
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -0,0 +1,141 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
+    0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+    0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
+    0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E,
+    0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: .cfi_remember_state
+// 0x0000000a: add sp, sp, #44
+// 0x0000000c: .cfi_def_cfa_offset: 20
+// 0x0000000c: vpop.f32 {s16-s17}
+// 0x00000010: .cfi_def_cfa_offset: 12
+// 0x00000010: .cfi_restore_extended: r80
+// 0x00000010: .cfi_restore_extended: r81
+// 0x00000010: pop {r5, r6, pc}
+// 0x00000012: .cfi_restore_state
+// 0x00000012: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+    0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
+    0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9,
+    0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+    0x44, 0x0E, 0x40, 0x44, 0x93, 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44,
+    0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
+    0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: str w0, [sp, #-64]!
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: stp x19, x20, [sp, #40]
+// 0x00000008: .cfi_offset: r19 at cfa-24
+// 0x00000008: .cfi_offset: r20 at cfa-16
+// 0x00000008: str lr, [sp, #56]
+// 0x0000000c: .cfi_offset: r30 at cfa-8
+// 0x0000000c: stp d8, d9, [sp, #24]
+// 0x00000010: .cfi_offset_extended: r72 at cfa-40
+// 0x00000010: .cfi_offset_extended: r73 at cfa-32
+// 0x00000010: .cfi_remember_state
+// 0x00000010: ldp d8, d9, [sp, #24]
+// 0x00000014: .cfi_restore_extended: r72
+// 0x00000014: .cfi_restore_extended: r73
+// 0x00000014: ldp x19, x20, [sp, #40]
+// 0x00000018: .cfi_restore: r19
+// 0x00000018: .cfi_restore: r20
+// 0x00000018: ldr lr, [sp, #56]
+// 0x0000001c: .cfi_restore: r30
+// 0x0000001c: add sp, sp, #0x40 (64)
+// 0x00000020: .cfi_def_cfa_offset: 0
+// 0x00000020: ret
+// 0x00000024: .cfi_restore_state
+// 0x00000024: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86[] = {
+    0x56, 0x55, 0x83, 0xEC, 0x34, 0x89, 0x04, 0x24, 0x83, 0xC4, 0x34, 0x5D,
+    0x5E, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+    0x41, 0x0E, 0x08, 0x86, 0x02, 0x41, 0x0E, 0x0C, 0x85, 0x03, 0x43, 0x0E,
+    0x40, 0x43, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E,
+    0x04, 0xC6, 0x41, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: push esi
+// 0x00000001: .cfi_def_cfa_offset: 8
+// 0x00000001: .cfi_offset: r6 at cfa-8
+// 0x00000001: push ebp
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: sub esp, 52
+// 0x00000005: .cfi_def_cfa_offset: 64
+// 0x00000005: mov [esp], eax
+// 0x00000008: .cfi_remember_state
+// 0x00000008: add esp, 52
+// 0x0000000b: .cfi_def_cfa_offset: 12
+// 0x0000000b: pop ebp
+// 0x0000000c: .cfi_def_cfa_offset: 8
+// 0x0000000c: .cfi_restore: r5
+// 0x0000000c: pop esi
+// 0x0000000d: .cfi_def_cfa_offset: 4
+// 0x0000000d: .cfi_restore: r6
+// 0x0000000d: ret
+// 0x0000000e: .cfi_restore_state
+// 0x0000000e: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+    0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24,
+    0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x89, 0x3C, 0x24, 0xF2,
+    0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24,
+    0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+    0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E,
+    0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x43, 0x0A, 0x47, 0xDD, 0x47,
+    0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6,
+    0x41, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: push rbp
+// 0x00000001: .cfi_def_cfa_offset: 16
+// 0x00000001: .cfi_offset: r6 at cfa-16
+// 0x00000001: push rbx
+// 0x00000002: .cfi_def_cfa_offset: 24
+// 0x00000002: .cfi_offset: r3 at cfa-24
+// 0x00000002: subq rsp, 40
+// 0x00000006: .cfi_def_cfa_offset: 64
+// 0x00000006: movsd [rsp + 32], xmm13
+// 0x0000000d: .cfi_offset: r30 at cfa-32
+// 0x0000000d: movsd [rsp + 24], xmm12
+// 0x00000014: .cfi_offset: r29 at cfa-40
+// 0x00000014: mov [rsp], edi
+// 0x00000017: .cfi_remember_state
+// 0x00000017: movsd xmm12, [rsp + 24]
+// 0x0000001e: .cfi_restore: r29
+// 0x0000001e: movsd xmm13, [rsp + 32]
+// 0x00000025: .cfi_restore: r30
+// 0x00000025: addq rsp, 40
+// 0x00000029: .cfi_def_cfa_offset: 24
+// 0x00000029: pop rbx
+// 0x0000002a: .cfi_def_cfa_offset: 16
+// 0x0000002a: .cfi_restore: r3
+// 0x0000002a: pop rbp
+// 0x0000002b: .cfi_def_cfa_offset: 8
+// 0x0000002b: .cfi_restore: r6
+// 0x0000002b: ret
+// 0x0000002c: .cfi_restore_state
+// 0x0000002c: .cfi_def_cfa_offset: 64
+
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index e474c49..c2b5c99 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -50,6 +50,7 @@
 #include "ssa_builder.h"
 #include "ssa_phi_elimination.h"
 #include "ssa_liveness_analysis.h"
+#include "utils/assembler.h"
 #include "reference_type_propagation.h"
 
 namespace art {
@@ -96,10 +97,13 @@
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
         timing_logger_(method_name, true, true),
         visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
-        visualizer_(visualizer_output, graph, codegen, method_name_) {
+        visualizer_(visualizer_output, graph, codegen) {
     if (strstr(method_name, kStringFilter) == nullptr) {
       timing_logger_enabled_ = visualizer_enabled_ = false;
     }
+    if (visualizer_enabled_) {
+      visualizer_.PrintHeader(method_name_);
+    }
   }
 
   ~PassInfoPrinter() {
@@ -196,15 +200,6 @@
         InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
   }
 
-  bool WriteElf(art::File* file,
-                OatWriter* oat_writer,
-                const std::vector<const art::DexFile*>& dex_files,
-                const std::string& android_root,
-                bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
-                                        *GetCompilerDriver());
-  }
-
   void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
 
   void Init() OVERRIDE;
@@ -362,6 +357,20 @@
   return ArrayRef<const uint8_t>(vector);
 }
 
+static void AllocateRegisters(HGraph* graph,
+                              CodeGenerator* codegen,
+                              PassInfoPrinter* pass_info_printer) {
+  PrepareForRegisterAllocation(graph).Run();
+  SsaLivenessAnalysis liveness(*graph, codegen);
+  {
+    PassInfo pass_info(SsaLivenessAnalysis::kLivenessPassName, pass_info_printer);
+    liveness.Analyze();
+  }
+  {
+    PassInfo pass_info(RegisterAllocator::kRegisterAllocatorPassName, pass_info_printer);
+    RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+  }
+}
 
 CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
                                                      CodeGenerator* codegen,
@@ -373,26 +382,22 @@
   RunOptimizations(graph, compiler_driver, &compilation_stats_,
                    dex_file, dex_compilation_unit, pass_info_printer, &handles);
 
-  PrepareForRegisterAllocation(graph).Run();
-  SsaLivenessAnalysis liveness(*graph, codegen);
-  {
-    PassInfo pass_info(SsaLivenessAnalysis::kLivenessPassName, pass_info_printer);
-    liveness.Analyze();
-  }
-  {
-    PassInfo pass_info(RegisterAllocator::kRegisterAllocatorPassName, pass_info_printer);
-    RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
-  }
+  AllocateRegisters(graph, codegen, pass_info_printer);
 
   CodeVectorAllocator allocator;
   codegen->CompileOptimized(&allocator);
 
+  DefaultSrcMap src_mapping_table;
+  if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) {
+    codegen->BuildSourceMap(&src_mapping_table);
+  }
+
   std::vector<uint8_t> stack_map;
   codegen->BuildStackMaps(&stack_map);
 
   compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized);
 
-  return CompiledMethod::SwapAllocCompiledMethodStackMap(
+  return CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
       codegen->GetInstructionSet(),
       ArrayRef<const uint8_t>(allocator.GetMemory()),
@@ -402,10 +407,14 @@
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      ArrayRef<const uint8_t>(stack_map));
+      &src_mapping_table,
+      ArrayRef<const uint8_t>(),  // mapping_table.
+      ArrayRef<const uint8_t>(stack_map),
+      ArrayRef<const uint8_t>(),  // native_gc_map.
+      ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+      ArrayRef<const LinkerPatch>());
 }
 
-
 CompiledMethod* OptimizingCompiler::CompileBaseline(
     CodeGenerator* codegen,
     CompilerDriver* compiler_driver,
@@ -414,9 +423,11 @@
   codegen->CompileBaseline(&allocator);
 
   std::vector<uint8_t> mapping_table;
+  codegen->BuildMappingTable(&mapping_table);
   DefaultSrcMap src_mapping_table;
-  bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols();
-  codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr);
+  if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) {
+    codegen->BuildSourceMap(&src_mapping_table);
+  }
   std::vector<uint8_t> vmap_table;
   codegen->BuildVMapTable(&vmap_table);
   std::vector<uint8_t> gc_map;
@@ -437,7 +448,8 @@
       AlignVectorSize(mapping_table),
       AlignVectorSize(vmap_table),
       AlignVectorSize(gc_map),
-      ArrayRef<const uint8_t>());
+      ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+      ArrayRef<const LinkerPatch>());
 }
 
 CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
@@ -503,6 +515,8 @@
     compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
     return nullptr;
   }
+  codegen->GetAssembler()->cfi().SetEnabled(
+      compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols());
 
   PassInfoPrinter pass_info_printer(graph,
                                     method_name.c_str(),
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index b97a667..4d5b8d0 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -47,6 +47,7 @@
   kNotCompiledUnhandledInstruction,
   kRemovedCheckedCast,
   kRemovedNullCheck,
+  kInstructionSimplifications,
   kLastStat
 };
 
@@ -110,6 +111,7 @@
       case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
       case kRemovedCheckedCast: return "kRemovedCheckedCast";
       case kRemovedNullCheck: return "kRemovedNullCheck";
+      case kInstructionSimplifications: return "kInstructionSimplifications";
       default: LOG(FATAL) << "invalid stat";
     }
     return "";
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 9df8f56..4936685 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -269,6 +269,20 @@
 }
 
 
+int ParallelMoveResolver::AllocateScratchRegister(int blocked,
+                                                  int register_count) {
+  int scratch = -1;
+  for (int reg = 0; reg < register_count; ++reg) {
+    if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
+      scratch = reg;
+      break;
+    }
+  }
+
+  return scratch;
+}
+
+
 ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
     ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
     : resolver_(resolver),
@@ -282,6 +296,16 @@
 }
 
 
+ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
+    ParallelMoveResolver* resolver, int blocked, int number_of_registers)
+    : resolver_(resolver),
+      reg_(kNoRegister),
+      spilled_(false) {
+  // We don't want to spill a register if none are free.
+  reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers);
+}
+
+
 ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
   if (spilled_) {
     resolver_->RestoreScratch(reg_);
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 3fa1b37..173cffc 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -42,10 +42,15 @@
  protected:
   class ScratchRegisterScope : public ValueObject {
    public:
+    // Spill a scratch register if no regs are free.
     ScratchRegisterScope(ParallelMoveResolver* resolver,
                          int blocked,
                          int if_scratch,
                          int number_of_registers);
+    // Grab a scratch register only if available.
+    ScratchRegisterScope(ParallelMoveResolver* resolver,
+                         int blocked,
+                         int number_of_registers);
     ~ScratchRegisterScope();
 
     int GetRegister() const { return reg_; }
@@ -62,6 +67,8 @@
   // Allocate a scratch register for performing a move. The method will try to use
   // a register that is the destination of a move, but that move has not been emitted yet.
   int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
+  // As above, but return -1 if no free register.
+  int AllocateScratchRegister(int blocked, int register_count);
 
   // Emit a move.
   virtual void EmitMove(size_t index) = 0;
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 8059289..c410660 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -370,40 +370,46 @@
   }
 }
 
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::ArmCore(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(SRegister reg) {
+  return dwarf::Reg::ArmFp(static_cast<int>(reg));
+}
+
 constexpr size_t kFramePointerSize = 4;
 
 void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                               const std::vector<ManagedRegister>& callee_save_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
+  CHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet
   CHECK_ALIGNED(frame_size, kStackAlignment);
   CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
 
   // Push callee saves and link register.
-  RegList push_list = 1 << LR;
-  size_t pushed_values = 1;
-  int32_t min_s = kNumberOfSRegisters;
-  int32_t max_s = -1;
-  for (size_t i = 0; i < callee_save_regs.size(); i++) {
-    if (callee_save_regs.at(i).AsArm().IsCoreRegister()) {
-      Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister();
-      push_list |= 1 << reg;
-      pushed_values++;
+  RegList core_spill_mask = 1 << LR;
+  uint32_t fp_spill_mask = 0;
+  for (const ManagedRegister& reg : callee_save_regs) {
+    if (reg.AsArm().IsCoreRegister()) {
+      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
     } else {
-      CHECK(callee_save_regs.at(i).AsArm().IsSRegister());
-      min_s = std::min(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), min_s);
-      max_s = std::max(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), max_s);
+      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
     }
   }
-  PushList(push_list);
-  if (max_s != -1) {
-    pushed_values += 1 + max_s - min_s;
-    vpushs(static_cast<SRegister>(min_s), 1 + max_s - min_s);
+  PushList(core_spill_mask);
+  cfi_.AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
+  cfi_.RelOffsetForMany(DWARFReg(Register(0)), 0, core_spill_mask, kFramePointerSize);
+  if (fp_spill_mask != 0) {
+    vpushs(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+    cfi_.AdjustCFAOffset(POPCOUNT(fp_spill_mask) * kFramePointerSize);
+    cfi_.RelOffsetForMany(DWARFReg(SRegister(0)), 0, fp_spill_mask, kFramePointerSize);
   }
 
   // Increase frame to required size.
+  int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
   CHECK_GT(frame_size, pushed_values * kFramePointerSize);  // Must at least have space for Method*.
-  size_t adjust = frame_size - (pushed_values * kFramePointerSize);
-  IncreaseFrameSize(adjust);
+  IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize);  // handles CFI as well.
 
   // Write out Method*.
   StoreToOffset(kStoreWord, R0, SP, 0);
@@ -432,46 +438,46 @@
 void ArmAssembler::RemoveFrame(size_t frame_size,
                               const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi_.RememberState();
+
   // Compute callee saves to pop and PC.
-  RegList pop_list = 1 << PC;
-  size_t pop_values = 1;
-  int32_t min_s = kNumberOfSRegisters;
-  int32_t max_s = -1;
-  for (size_t i = 0; i < callee_save_regs.size(); i++) {
-    if (callee_save_regs.at(i).AsArm().IsCoreRegister()) {
-      Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister();
-      pop_list |= 1 << reg;
-      pop_values++;
+  RegList core_spill_mask = 1 << PC;
+  uint32_t fp_spill_mask = 0;
+  for (const ManagedRegister& reg : callee_save_regs) {
+    if (reg.AsArm().IsCoreRegister()) {
+      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
     } else {
-      CHECK(callee_save_regs.at(i).AsArm().IsSRegister());
-      min_s = std::min(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), min_s);
-      max_s = std::max(static_cast<int>(callee_save_regs.at(i).AsArm().AsSRegister()), max_s);
+      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
     }
   }
 
-  if (max_s != -1) {
-    pop_values += 1 + max_s - min_s;
-  }
-
   // Decrease frame to start of callee saves.
+  int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
   CHECK_GT(frame_size, pop_values * kFramePointerSize);
-  size_t adjust = frame_size - (pop_values * kFramePointerSize);
-  DecreaseFrameSize(adjust);
+  DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize));  // handles CFI as well.
 
-  if (max_s != -1) {
-    vpops(static_cast<SRegister>(min_s), 1 + max_s - min_s);
+  if (fp_spill_mask != 0) {
+    vpops(SRegister(CTZ(fp_spill_mask)), POPCOUNT(fp_spill_mask));
+    cfi_.AdjustCFAOffset(-kFramePointerSize * POPCOUNT(fp_spill_mask));
+    cfi_.RestoreMany(DWARFReg(SRegister(0)), fp_spill_mask);
   }
 
   // Pop callee saves and PC.
-  PopList(pop_list);
+  PopList(core_spill_mask);
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 void ArmAssembler::IncreaseFrameSize(size_t adjust) {
   AddConstant(SP, -adjust);
+  cfi_.AdjustCFAOffset(adjust);
 }
 
 void ArmAssembler::DecreaseFrameSize(size_t adjust) {
   AddConstant(SP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 void ArmAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 6286b10..3b42f63 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -373,24 +373,34 @@
 
 
 void Thumb2Assembler::ldrd(Register rd, const Address& ad, Condition cond) {
+  ldrd(rd, Register(rd + 1), ad, cond);
+}
+
+
+void Thumb2Assembler::ldrd(Register rd, Register rd2, const Address& ad, Condition cond) {
   CheckCondition(cond);
-  CHECK_EQ(rd % 2, 0);
+  // Encoding T1.
   // This is different from other loads.  The encoding is like ARM.
   int32_t encoding = B31 | B30 | B29 | B27 | B22 | B20 |
       static_cast<int32_t>(rd) << 12 |
-      (static_cast<int32_t>(rd) + 1) << 8 |
+      static_cast<int32_t>(rd2) << 8 |
       ad.encodingThumbLdrdStrd();
   Emit32(encoding);
 }
 
 
 void Thumb2Assembler::strd(Register rd, const Address& ad, Condition cond) {
+  strd(rd, Register(rd + 1), ad, cond);
+}
+
+
+void Thumb2Assembler::strd(Register rd, Register rd2, const Address& ad, Condition cond) {
   CheckCondition(cond);
-  CHECK_EQ(rd % 2, 0);
+  // Encoding T1.
   // This is different from other loads.  The encoding is like ARM.
   int32_t encoding = B31 | B30 | B29 | B27 | B22 |
       static_cast<int32_t>(rd) << 12 |
-      (static_cast<int32_t>(rd) + 1) << 8 |
+      static_cast<int32_t>(rd2) << 8 |
       ad.encodingThumbLdrdStrd();
   Emit32(encoding);
 }
@@ -2613,14 +2623,16 @@
   Register tmp_reg = kNoRegister;
   if (!Address::CanHoldStoreOffsetThumb(type, offset)) {
     CHECK_NE(base, IP);
-    if (reg != IP) {
+    if (reg != IP &&
+        (type != kStoreWordPair || reg + 1 != IP)) {
       tmp_reg = IP;
     } else {
-      // Be careful not to use IP twice (for `reg` and to build the
-      // Address object used by the store instruction(s) below).
-      // Instead, save R5 on the stack (or R6 if R5 is not available),
-      // use it as secondary temporary register, and restore it after
-      // the store instruction has been emitted.
+      // Be careful not to use IP twice (for `reg` (or `reg` + 1 in
+      // the case of a word-pair store)) and to build the Address
+      // object used by the store instruction(s) below).  Instead,
+      // save R5 on the stack (or R6 if R5 is not available), use it
+      // as secondary temporary register, and restore it after the
+      // store instruction has been emitted.
       tmp_reg = base != R5 ? R5 : R6;
       Push(tmp_reg);
       if (base == SP) {
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 81dd138..e33c240 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -135,9 +135,17 @@
   void ldrsb(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
   void ldrsh(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
 
+  // Load/store register dual instructions using registers `rd` and `rd` + 1.
   void ldrd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
   void strd(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
 
+  // Load/store register dual instructions using registers `rd` and `rd2`.
+  // Note that contrary to the ARM A1 encoding, the Thumb-2 T1 encoding
+  // does not require `rd` to be even, nor `rd2' to be equal to `rd` + 1.
+  void ldrd(Register rd, Register rd2, const Address& ad, Condition cond);
+  void strd(Register rd, Register rd2, const Address& ad, Condition cond);
+
+
   void ldm(BlockAddressMode am, Register base,
            RegList regs, Condition cond = AL) OVERRIDE;
   void stm(BlockAddressMode am, Register base,
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 813996b..5f5561a 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -247,4 +247,103 @@
   DriverStr(expected, "add");
 }
 
+TEST_F(AssemblerThumb2Test, StoreWordToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWord;
+  int32_t offset = 4092;
+  ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::R5, offset);
+
+  const char* expected =
+      "str r0, [sp, #4092]\n"
+      "str ip, [sp, #4092]\n"
+      "str ip, [r5, #4092]\n";
+  DriverStr(expected, "StoreWordToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordToNonThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWord;
+  int32_t offset = 4096;
+  ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::SP, offset);
+  __ StoreToOffset(type, arm::IP, arm::R5, offset);
+
+  const char* expected =
+      "mov ip, #4096\n"       // LoadImmediate(ip, 4096)
+      "add ip, ip, sp\n"
+      "str r0, [ip, #0]\n"
+
+      "str r5, [sp, #-4]!\n"  // Push(r5)
+      "movw r5, #4100\n"      // LoadImmediate(r5, 4096 + kRegisterSize)
+      "add r5, r5, sp\n"
+      "str ip, [r5, #0]\n"
+      "ldr r5, [sp], #4\n"    // Pop(r5)
+
+      "str r6, [sp, #-4]!\n"  // Push(r6)
+      "mov r6, #4096\n"       // LoadImmediate(r6, 4096)
+      "add r6, r6, r5\n"
+      "str ip, [r6, #0]\n"
+      "ldr r6, [sp], #4\n";   // Pop(r6)
+  DriverStr(expected, "StoreWordToNonThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordPairToThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWordPair;
+  int32_t offset = 1020;
+  ASSERT_TRUE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  // We cannot use IP (i.e. R12) as first source register, as it would
+  // force us to use SP (i.e. R13) as second source register, which
+  // would have an "unpredictable" effect according to the ARMv7
+  // specification (the T1 encoding describes the result as
+  // UNPREDICTABLE when of the source registers is R13).
+  //
+  // So we use (R11, IP) (e.g. (R11, R12)) as source registers in the
+  // following instructions.
+  __ StoreToOffset(type, arm::R11, arm::SP, offset);
+  __ StoreToOffset(type, arm::R11, arm::R5, offset);
+
+  const char* expected =
+      "strd r0, r1, [sp, #1020]\n"
+      "strd r11, ip, [sp, #1020]\n"
+      "strd r11, ip, [r5, #1020]\n";
+  DriverStr(expected, "StoreWordPairToThumbOffset");
+}
+
+TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) {
+  arm::StoreOperandType type = arm::kStoreWordPair;
+  int32_t offset = 1024;
+  ASSERT_FALSE(arm::Address::CanHoldStoreOffsetThumb(type, offset));
+
+  __ StoreToOffset(type, arm::R0, arm::SP, offset);
+  // Same comment as in AssemblerThumb2Test.StoreWordPairToThumbOffset
+  // regarding the use of (R11, IP) (e.g. (R11, R12)) as source
+  // registers in the following instructions.
+  __ StoreToOffset(type, arm::R11, arm::SP, offset);
+  __ StoreToOffset(type, arm::R11, arm::R5, offset);
+
+  const char* expected =
+      "mov ip, #1024\n"           // LoadImmediate(ip, 1024)
+      "add ip, ip, sp\n"
+      "strd r0, r1, [ip, #0]\n"
+
+      "str r5, [sp, #-4]!\n"      // Push(r5)
+      "movw r5, #1028\n"          // LoadImmediate(r5, 1024 + kRegisterSize)
+      "add r5, r5, sp\n"
+      "strd r11, ip, [r5, #0]\n"
+      "ldr r5, [sp], #4\n"        // Pop(r5)
+
+      "str r6, [sp, #-4]!\n"      // Push(r6)
+      "mov r6, #1024\n"           // LoadImmediate(r6, 1024)
+      "add r6, r6, r5\n"
+      "strd r11, ip, [r6, #0]\n"
+      "ldr r6, [sp], #4\n";       // Pop(r6)
+  DriverStr(expected, "StoreWordPairToNonThumbOffset");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h
index a496c87..5fde9e8 100644
--- a/compiler/utils/arm/managed_register_arm.h
+++ b/compiler/utils/arm/managed_register_arm.h
@@ -19,6 +19,7 @@
 
 #include "base/logging.h"
 #include "constants_arm.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 58c7367..fbd0411 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -63,12 +63,14 @@
 void Arm64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant(SP, -adjust);
+  cfi().AdjustCFAOffset(adjust);
 }
 
 // See Arm64 PCS Section 5.2.2.1.
 void Arm64Assembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant(SP, adjust);
+  cfi().AdjustCFAOffset(-adjust);
 }
 
 void Arm64Assembler::AddConstant(XRegister rd, int32_t value, Condition cond) {
@@ -638,6 +640,14 @@
   ___ Brk();
 }
 
+static dwarf::Reg DWARFReg(XRegister reg) {
+  return dwarf::Reg::Arm64Core(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(DRegister reg) {
+  return dwarf::Reg::Arm64Fp(static_cast<int>(reg));
+}
+
 constexpr size_t kFramePointerSize = 8;
 constexpr unsigned int kJniRefSpillRegsSize = 11 + 8;
 
@@ -660,45 +670,20 @@
   // TUNING: Use stp.
   // Note: Must match Arm64JniCallingConvention::CoreSpillMask().
   size_t reg_offset = frame_size;
-  reg_offset -= 8;
-  StoreToOffset(LR, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X29, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X28, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X27, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X26, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X25, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X24, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X23, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X22, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X21, SP, reg_offset);
-  reg_offset -= 8;
-  StoreToOffset(X20, SP, reg_offset);
-
-  reg_offset -= 8;
-  StoreDToOffset(D15, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D14, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D13, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D12, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D11, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D10, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D9, SP, reg_offset);
-  reg_offset -= 8;
-  StoreDToOffset(D8, SP, reg_offset);
+  static constexpr XRegister x_spills[] = {
+      LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 };
+  for (size_t i = 0; i < arraysize(x_spills); i++) {
+    XRegister reg = x_spills[i];
+    reg_offset -= 8;
+    StoreToOffset(reg, SP, reg_offset);
+    cfi_.RelOffset(DWARFReg(reg), reg_offset);
+  }
+  for (int d = 15; d >= 8; d--) {
+    DRegister reg = static_cast<DRegister>(d);
+    reg_offset -= 8;
+    StoreDToOffset(reg, SP, reg_offset);
+    cfi_.RelOffset(DWARFReg(reg), reg_offset);
+  }
 
   // Move TR(Caller saved) to ETR(Callee saved). The original (ETR)X21 has been saved on stack.
   // This way we make sure that TR is not trashed by native code.
@@ -734,6 +719,7 @@
 
 void Arm64Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi_.RememberState();
 
   // For now we only check that the size of the frame is greater than the spill size.
   CHECK_EQ(callee_save_regs.size(), kJniRefSpillRegsSize);
@@ -748,51 +734,30 @@
   // TUNING: Use ldp.
   // Note: Must match Arm64JniCallingConvention::CoreSpillMask().
   size_t reg_offset = frame_size;
-  reg_offset -= 8;
-  LoadFromOffset(LR, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X29, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X28, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X27, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X26, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X25, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X24, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X23, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X22, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X21, SP, reg_offset);
-  reg_offset -= 8;
-  LoadFromOffset(X20, SP, reg_offset);
-
-  reg_offset -= 8;
-  LoadDFromOffset(D15, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D14, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D13, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D12, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D11, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D10, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D9, SP, reg_offset);
-  reg_offset -= 8;
-  LoadDFromOffset(D8, SP, reg_offset);
+  static constexpr XRegister x_spills[] = {
+      LR, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20 };
+  for (size_t i = 0; i < arraysize(x_spills); i++) {
+    XRegister reg = x_spills[i];
+    reg_offset -= 8;
+    LoadFromOffset(reg, SP, reg_offset);
+    cfi_.Restore(DWARFReg(reg));
+  }
+  for (int d = 15; d >= 8; d--) {
+    DRegister reg = static_cast<DRegister>(d);
+    reg_offset -= 8;
+    LoadDFromOffset(reg, SP, reg_offset);
+    cfi_.Restore(DWARFReg(reg));
+  }
 
   // Decrease frame size to start of callee saved regs.
   DecreaseFrameSize(frame_size);
 
   // Pop callee saved and return to LR.
   ___ Ret();
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 }  // namespace arm64
diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h
index e1d6f31..62c1d4d 100644
--- a/compiler/utils/arm64/managed_register_arm64.h
+++ b/compiler/utils/arm64/managed_register_arm64.h
@@ -19,6 +19,7 @@
 
 #include "base/logging.h"
 #include "constants_arm64.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 5340dd3..36342c6 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -105,6 +105,9 @@
   CHECK_EQ(Size(), old_size);
 }
 
+void DebugFrameOpCodeWriterForAssembler::ImplicitlyAdvancePC() {
+  this->AdvancePC(assembler_->CodeSize());
+}
 
 Assembler* Assembler::Create(InstructionSet instruction_set) {
   switch (instruction_set) {
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 923ecdb..ebafd3d 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -29,6 +29,7 @@
 #include "offsets.h"
 #include "x86/constants_x86.h"
 #include "x86_64/constants_x86_64.h"
+#include "dwarf/debug_frame_opcode_writer.h"
 
 namespace art {
 
@@ -354,6 +355,23 @@
   friend class AssemblerFixup;
 };
 
+// The purpose of this class is to ensure that we do not have to explicitly
+// call the AdvancePC method (which is good for convenience and correctness).
+class DebugFrameOpCodeWriterForAssembler FINAL
+    : public dwarf::DebugFrameOpCodeWriter<> {
+ public:
+  // This method is called the by the opcode writers.
+  virtual void ImplicitlyAdvancePC() FINAL;
+
+  explicit DebugFrameOpCodeWriterForAssembler(Assembler* buffer)
+      : dwarf::DebugFrameOpCodeWriter<>(),
+        assembler_(buffer) {
+  }
+
+ private:
+  Assembler* assembler_;
+};
+
 class Assembler {
  public:
   static Assembler* Create(InstructionSet instruction_set);
@@ -504,18 +522,20 @@
   // and branch to a ExceptionSlowPath if it is.
   virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
 
-  virtual void InitializeFrameDescriptionEntry() {}
-  virtual void FinalizeFrameDescriptionEntry() {}
-  // Give a vector containing FDE data, or null if not used. Note: the assembler must take care
-  // of handling the lifecycle.
-  virtual std::vector<uint8_t>* GetFrameDescriptionEntry() { return nullptr; }
-
   virtual ~Assembler() {}
 
+  /**
+   * @brief Buffer of DWARF's Call Frame Information opcodes.
+   * @details It is used by debuggers and other tools to unwind the call stack.
+   */
+  DebugFrameOpCodeWriterForAssembler& cfi() { return cfi_; }
+
  protected:
-  Assembler() : buffer_() {}
+  Assembler() : buffer_(), cfi_(this) {}
 
   AssemblerBuffer buffer_;
+
+  DebugFrameOpCodeWriterForAssembler cfi_;
 };
 
 }  // namespace art
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index b13edb6..3fe1a31 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -44,7 +44,9 @@
 
 enum class RegisterView {  // private
   kUsePrimaryName,
-  kUseSecondaryName
+  kUseSecondaryName,
+  kUseTertiaryName,
+  kUseQuaternaryName,
 };
 
 template<typename Ass, typename Reg, typename FPReg, typename Imm>
@@ -97,6 +99,15 @@
         fmt);
   }
 
+  std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) {
+    return RepeatTemplatedRegisters<Reg, Reg>(f,
+        GetRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+        &AssemblerTest::GetRegName<RegisterView::kUseQuaternaryName>,
+        fmt);
+  }
+
   std::string RepeatRr(void (Ass::*f)(Reg, Reg), std::string fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
@@ -240,6 +251,18 @@
     UNREACHABLE();
   }
 
+  // Tertiary register names are the tertiary view on registers, e.g., 16b on 64b systems.
+  virtual std::string GetTertiaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+    UNIMPLEMENTED(FATAL) << "Architecture does not support tertiary registers";
+    UNREACHABLE();
+  }
+
+  // Quaternary register names are the quaternary view on registers, e.g., 8b on 64b systems.
+  virtual std::string GetQuaternaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+    UNIMPLEMENTED(FATAL) << "Architecture does not support quaternary registers";
+    UNREACHABLE();
+  }
+
   std::string GetRegisterName(const Reg& reg) {
     return GetRegName<RegisterView::kUsePrimaryName>(reg);
   }
@@ -520,6 +543,14 @@
       case RegisterView::kUseSecondaryName:
         sreg << GetSecondaryRegisterName(reg);
         break;
+
+      case RegisterView::kUseTertiaryName:
+        sreg << GetTertiaryRegisterName(reg);
+        break;
+
+      case RegisterView::kUseQuaternaryName:
+        sreg << GetQuaternaryRegisterName(reg);
+        break;
     }
     return sreg.str();
   }
diff --git a/compiler/utils/dex_cache_arrays_layout-inl.h b/compiler/utils/dex_cache_arrays_layout-inl.h
index 7d02ce3..2c50c96 100644
--- a/compiler/utils/dex_cache_arrays_layout-inl.h
+++ b/compiler/utils/dex_cache_arrays_layout-inl.h
@@ -26,7 +26,6 @@
 #include "utils.h"
 
 namespace mirror {
-class ArtField;
 class ArtMethod;
 class Class;
 class String;
@@ -34,40 +33,55 @@
 
 namespace art {
 
-inline DexCacheArraysLayout::DexCacheArraysLayout(const DexFile* dex_file)
+inline DexCacheArraysLayout::DexCacheArraysLayout(size_t pointer_size, const DexFile* dex_file)
     : /* types_offset_ is always 0u */
-      methods_offset_(types_offset_ + ArraySize<mirror::Class>(dex_file->NumTypeIds())),
-      strings_offset_(methods_offset_ + ArraySize<mirror::ArtMethod>(dex_file->NumMethodIds())),
-      fields_offset_(strings_offset_ + ArraySize<mirror::String>(dex_file->NumStringIds())),
-      size_(fields_offset_ + ArraySize<mirror::ArtField>(dex_file->NumFieldIds())) {
+      pointer_size_(pointer_size),
+      methods_offset_(types_offset_ + TypesSize(dex_file->NumTypeIds())),
+      strings_offset_(methods_offset_ + MethodsSize(dex_file->NumMethodIds())),
+      fields_offset_(strings_offset_ + StringsSize(dex_file->NumStringIds())),
+      size_(fields_offset_ + FieldsSize(dex_file->NumFieldIds())) {
+  DCHECK(pointer_size == 4u || pointer_size == 8u);
 }
 
 inline size_t DexCacheArraysLayout::TypeOffset(uint32_t type_idx) const {
-  return types_offset_ + ElementOffset<mirror::Class>(type_idx);
+  return types_offset_ + ElementOffset(sizeof(mirror::HeapReference<mirror::Class>), type_idx);
+}
+
+inline size_t DexCacheArraysLayout::TypesSize(size_t num_elements) const {
+  return ArraySize(sizeof(mirror::HeapReference<mirror::Class>), num_elements);
 }
 
 inline size_t DexCacheArraysLayout::MethodOffset(uint32_t method_idx) const {
-  return methods_offset_ + ElementOffset<mirror::ArtMethod>(method_idx);
+  return methods_offset_ + ElementOffset(
+      sizeof(mirror::HeapReference<mirror::ArtMethod>), method_idx);
+}
+
+inline size_t DexCacheArraysLayout::MethodsSize(size_t num_elements) const {
+  return ArraySize(sizeof(mirror::HeapReference<mirror::ArtMethod>), num_elements);
 }
 
 inline size_t DexCacheArraysLayout::StringOffset(uint32_t string_idx) const {
-  return strings_offset_ + ElementOffset<mirror::String>(string_idx);
+  return strings_offset_ + ElementOffset(sizeof(mirror::HeapReference<mirror::String>), string_idx);
+}
+
+inline size_t DexCacheArraysLayout::StringsSize(size_t num_elements) const {
+  return ArraySize(sizeof(mirror::HeapReference<mirror::String>), num_elements);
 }
 
 inline size_t DexCacheArraysLayout::FieldOffset(uint32_t field_idx) const {
-  return fields_offset_ + ElementOffset<mirror::ArtField>(field_idx);
+  return fields_offset_ + ElementOffset(pointer_size_, field_idx);
 }
 
-template <typename MirrorType>
-inline size_t DexCacheArraysLayout::ElementOffset(uint32_t idx) {
-  return mirror::Array::DataOffset(sizeof(mirror::HeapReference<MirrorType>)).Uint32Value() +
-      sizeof(mirror::HeapReference<MirrorType>) * idx;
+inline size_t DexCacheArraysLayout::FieldsSize(size_t num_elements) const {
+  return ArraySize(pointer_size_, num_elements);
 }
 
-template <typename MirrorType>
-inline size_t DexCacheArraysLayout::ArraySize(uint32_t num_elements) {
-  size_t array_size = mirror::ComputeArraySize(
-      num_elements, ComponentSizeShiftWidth<sizeof(mirror::HeapReference<MirrorType>)>());
+inline size_t DexCacheArraysLayout::ElementOffset(size_t element_size, uint32_t idx) {
+  return mirror::Array::DataOffset(element_size).Uint32Value() + element_size * idx;
+}
+
+inline size_t DexCacheArraysLayout::ArraySize(size_t element_size, uint32_t num_elements) {
+  size_t array_size = mirror::ComputeArraySize(num_elements, ComponentSizeShiftWidth(element_size));
   DCHECK_NE(array_size, 0u);  // No overflow expected for dex cache arrays.
   return RoundUp(array_size, kObjectAlignment);
 }
diff --git a/compiler/utils/dex_cache_arrays_layout.h b/compiler/utils/dex_cache_arrays_layout.h
index b461256..8f98ea1 100644
--- a/compiler/utils/dex_cache_arrays_layout.h
+++ b/compiler/utils/dex_cache_arrays_layout.h
@@ -29,6 +29,7 @@
   // Construct an invalid layout.
   DexCacheArraysLayout()
       : /* types_offset_ is always 0u */
+        pointer_size_(0u),
         methods_offset_(0u),
         strings_offset_(0u),
         fields_offset_(0u),
@@ -36,7 +37,7 @@
   }
 
   // Construct a layout for a particular dex file.
-  explicit DexCacheArraysLayout(const DexFile* dex_file);
+  explicit DexCacheArraysLayout(size_t pointer_size, const DexFile* dex_file);
 
   bool Valid() const {
     return Size() != 0u;
@@ -52,36 +53,43 @@
 
   size_t TypeOffset(uint32_t type_idx) const;
 
+  size_t TypesSize(size_t num_elements) const;
+
   size_t MethodsOffset() const {
     return methods_offset_;
   }
 
   size_t MethodOffset(uint32_t method_idx) const;
 
+  size_t MethodsSize(size_t num_elements) const;
+
   size_t StringsOffset() const {
     return strings_offset_;
   }
 
   size_t StringOffset(uint32_t string_idx) const;
 
+  size_t StringsSize(size_t num_elements) const;
+
   size_t FieldsOffset() const {
     return fields_offset_;
   }
 
   size_t FieldOffset(uint32_t field_idx) const;
 
+  size_t FieldsSize(size_t num_elements) const;
+
  private:
   static constexpr size_t types_offset_ = 0u;
+  const size_t pointer_size_;  // Must be first for construction initialization order.
   const size_t methods_offset_;
   const size_t strings_offset_;
   const size_t fields_offset_;
   const size_t size_;
 
-  template <typename MirrorType>
-  static size_t ElementOffset(uint32_t idx);
+  static size_t ElementOffset(size_t element_size, uint32_t idx);
 
-  template <typename MirrorType>
-  static size_t ArraySize(uint32_t num_elements);
+  static size_t ArraySize(size_t element_size, uint32_t num_elements);
 };
 
 }  // namespace art
diff --git a/compiler/utils/dwarf_cfi.cc b/compiler/utils/dwarf_cfi.cc
deleted file mode 100644
index a7e09c6..0000000
--- a/compiler/utils/dwarf_cfi.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "leb128.h"
-#include "utils.h"
-
-#include "dwarf_cfi.h"
-
-namespace art {
-
-void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment) {
-  if (increment < 64) {
-    // Encoding in opcode.
-    buf->push_back(0x1 << 6 | increment);
-  } else if (increment < 256) {
-    // Single byte delta.
-    buf->push_back(0x02);
-    buf->push_back(increment);
-  } else if (increment < 256 * 256) {
-    // Two byte delta.
-    buf->push_back(0x03);
-    buf->push_back(increment & 0xff);
-    buf->push_back((increment >> 8) & 0xff);
-  } else {
-    // Four byte delta.
-    buf->push_back(0x04);
-    Push32(buf, increment);
-  }
-}
-
-void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset) {
-  buf->push_back(0x11);
-  EncodeUnsignedLeb128(reg, buf);
-  EncodeSignedLeb128(offset, buf);
-}
-
-void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset) {
-  buf->push_back((0x2 << 6) | reg);
-  EncodeUnsignedLeb128(offset, buf);
-}
-
-void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset) {
-  buf->push_back(0x0e);
-  EncodeUnsignedLeb128(offset, buf);
-}
-
-void DW_CFA_remember_state(std::vector<uint8_t>* buf) {
-  buf->push_back(0x0a);
-}
-
-void DW_CFA_restore_state(std::vector<uint8_t>* buf) {
-  buf->push_back(0x0b);
-}
-
-void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit) {
-  // 'length' (filled in by other functions).
-  if (is_64bit) {
-    Push32(buf, 0xffffffff);  // Indicates 64bit
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'CIE_pointer' (filled in by linker).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'initial_location' (filled in by linker).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // 'address_range' (filled in by other functions).
-  if (is_64bit) {
-    Push32(buf, 0);
-    Push32(buf, 0);
-  } else {
-    Push32(buf, 0);
-  }
-
-  // Augmentation length: 0
-  buf->push_back(0);
-}
-
-void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit) {
-  const size_t kOffsetOfAddressRange = is_64bit? 28 : 12;
-  CHECK(buf->size() >= kOffsetOfAddressRange + (is_64bit? 8 : 4));
-
-  uint8_t *p = buf->data() + kOffsetOfAddressRange;
-  if (is_64bit) {
-    p[0] = data;
-    p[1] = data >> 8;
-    p[2] = data >> 16;
-    p[3] = data >> 24;
-    p[4] = data >> 32;
-    p[5] = data >> 40;
-    p[6] = data >> 48;
-    p[7] = data >> 56;
-  } else {
-    p[0] = data;
-    p[1] = data >> 8;
-    p[2] = data >> 16;
-    p[3] = data >> 24;
-  }
-}
-
-void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit) {
-  uint64_t length = is_64bit ? buf->size() - 12 : buf->size() - 4;
-  DCHECK_EQ((length & 0x3), 0U);
-
-  uint8_t *p = is_64bit? buf->data() + 4 : buf->data();
-  if (is_64bit) {
-    p[0] = length;
-    p[1] = length >> 8;
-    p[2] = length >> 16;
-    p[3] = length >> 24;
-    p[4] = length >> 32;
-    p[5] = length >> 40;
-    p[6] = length >> 48;
-    p[7] = length >> 56;
-  } else {
-    p[0] = length;
-    p[1] = length >> 8;
-    p[2] = length >> 16;
-    p[3] = length >> 24;
-  }
-}
-
-void PadCFI(std::vector<uint8_t>* buf) {
-  while (buf->size() & 0x3) {
-    buf->push_back(0);
-  }
-}
-
-}  // namespace art
diff --git a/compiler/utils/dwarf_cfi.h b/compiler/utils/dwarf_cfi.h
deleted file mode 100644
index 0c8b151..0000000
--- a/compiler/utils/dwarf_cfi.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_DWARF_CFI_H_
-#define ART_COMPILER_UTILS_DWARF_CFI_H_
-
-#include <vector>
-
-namespace art {
-
-/**
- * @brief Enter a 'DW_CFA_advance_loc' into an FDE buffer
- * @param buf FDE buffer.
- * @param increment Amount by which to increase the current location.
- */
-void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment);
-
-/**
- * @brief Enter a 'DW_CFA_offset_extended_sf' into an FDE buffer
- * @param buf FDE buffer.
- * @param reg Register number.
- * @param offset Offset of register address from CFA.
- */
-void DW_CFA_offset_extended_sf(std::vector<uint8_t>* buf, int reg, int32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_offset' into an FDE buffer
- * @param buf FDE buffer.
- * @param reg Register number.
- * @param offset Offset of register address from CFA.
- */
-void DW_CFA_offset(std::vector<uint8_t>* buf, int reg, uint32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_def_cfa_offset' into an FDE buffer
- * @param buf FDE buffer.
- * @param offset New offset of CFA.
- */
-void DW_CFA_def_cfa_offset(std::vector<uint8_t>* buf, int32_t offset);
-
-/**
- * @brief Enter a 'DW_CFA_remember_state' into an FDE buffer
- * @param buf FDE buffer.
- */
-void DW_CFA_remember_state(std::vector<uint8_t>* buf);
-
-/**
- * @brief Enter a 'DW_CFA_restore_state' into an FDE buffer
- * @param buf FDE buffer.
- */
-void DW_CFA_restore_state(std::vector<uint8_t>* buf);
-
-/**
- * @brief Write FDE header into an FDE buffer
- * @param buf FDE buffer.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit);
-
-/**
- * @brief Set 'address_range' field of an FDE buffer
- * @param buf FDE buffer.
- * @param data Data value.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteFDEAddressRange(std::vector<uint8_t>* buf, uint64_t data, bool is_64bit);
-
-/**
- * @brief Set 'length' field of an FDE buffer
- * @param buf FDE buffer.
- * @param is_64bit If FDE is for 64bit application.
- */
-void WriteCFILength(std::vector<uint8_t>* buf, bool is_64bit);
-
-/**
- * @brief Pad an FDE buffer with 0 until its size is a multiple of 4
- * @param buf FDE buffer.
- */
-void PadCFI(std::vector<uint8_t>* buf);
-}  // namespace art
-
-#endif  // ART_COMPILER_UTILS_DWARF_CFI_H_
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index b5437b0..709a911 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -536,6 +536,10 @@
   Sdc1(reg, base, offset);
 }
 
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::MipsCore(static_cast<int>(reg));
+}
+
 constexpr size_t kFramePointerSize = 4;
 
 void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
@@ -549,10 +553,12 @@
   // Push callee saves and return address
   int stack_offset = frame_size - kFramePointerSize;
   StoreToOffset(kStoreWord, RA, SP, stack_offset);
+  cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
     Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
     StoreToOffset(kStoreWord, reg, SP, stack_offset);
+    cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
 
   // Write out Method*.
@@ -568,31 +574,40 @@
 void MipsAssembler::RemoveFrame(size_t frame_size,
                                 const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi_.RememberState();
 
   // Pop callee saves and return address
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
     Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
     LoadFromOffset(kLoadWord, reg, SP, stack_offset);
+    cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
   }
   LoadFromOffset(kLoadWord, RA, SP, stack_offset);
+  cfi_.Restore(DWARFReg(RA));
 
   // Decrease frame to required size.
   DecreaseFrameSize(frame_size);
 
   // Then jump to the return address.
   Jr(RA);
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 void MipsAssembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant(SP, SP, -adjust);
+  cfi_.AdjustCFAOffset(adjust);
 }
 
 void MipsAssembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant(SP, SP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 void MipsAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/mips/managed_register_mips.h b/compiler/utils/mips/managed_register_mips.h
index dd55cc4..40d39e3 100644
--- a/compiler/utils/mips/managed_register_mips.h
+++ b/compiler/utils/mips/managed_register_mips.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_UTILS_MIPS_MANAGED_REGISTER_MIPS_H_
 
 #include "constants_mips.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 388d274..282ab96 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -568,6 +568,10 @@
   }
 }
 
+static dwarf::Reg DWARFReg(GpuRegister reg) {
+  return dwarf::Reg::Mips64Core(static_cast<int>(reg));
+}
+
 constexpr size_t kFramePointerSize = 8;
 
 void Mips64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
@@ -581,10 +585,12 @@
   // Push callee saves and return address
   int stack_offset = frame_size - kFramePointerSize;
   StoreToOffset(kStoreDoubleword, RA, SP, stack_offset);
+  cfi_.RelOffset(DWARFReg(RA), stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
     stack_offset -= kFramePointerSize;
     GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
     StoreToOffset(kStoreDoubleword, reg, SP, stack_offset);
+    cfi_.RelOffset(DWARFReg(reg), stack_offset);
   }
 
   // Write out Method*.
@@ -612,31 +618,40 @@
 void Mips64Assembler::RemoveFrame(size_t frame_size,
                                   const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi_.RememberState();
 
   // Pop callee saves and return address
   int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
     GpuRegister reg = callee_save_regs.at(i).AsMips64().AsGpuRegister();
     LoadFromOffset(kLoadDoubleword, reg, SP, stack_offset);
+    cfi_.Restore(DWARFReg(reg));
     stack_offset += kFramePointerSize;
   }
   LoadFromOffset(kLoadDoubleword, RA, SP, stack_offset);
+  cfi_.Restore(DWARFReg(RA));
 
   // Decrease frame to required size.
   DecreaseFrameSize(frame_size);
 
   // Then jump to the return address.
   Jr(RA);
+
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 void Mips64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant64(SP, SP, -adjust);
+  cfi_.AdjustCFAOffset(adjust);
 }
 
 void Mips64Assembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   AddConstant64(SP, SP, adjust);
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 void Mips64Assembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index 924a928..4c4705b 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_
 
 #include "constants_mips64.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index b3a1376..51cc7ac 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -20,7 +20,6 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "memory_region.h"
 #include "thread.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 namespace x86 {
@@ -1467,6 +1466,15 @@
   EmitOperand(reg, address);
 }
 
+
+void X86Assembler::cmpxchg8b(const Address& address) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xC7);
+  EmitOperand(1, address);
+}
+
+
 void X86Assembler::mfence() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1631,14 +1639,8 @@
   EmitOperand(reg_or_opcode, Operand(operand));
 }
 
-void X86Assembler::InitializeFrameDescriptionEntry() {
-  WriteFDEHeader(&cfi_info_, false /* is_64bit */);
-}
-
-void X86Assembler::FinalizeFrameDescriptionEntry() {
-  WriteFDEAddressRange(&cfi_info_, buffer_.Size(), false /* is_64bit */);
-  PadCFI(&cfi_info_);
-  WriteCFILength(&cfi_info_, false /* is_64bit */);
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::X86Core(static_cast<int>(reg));
 }
 
 constexpr size_t kFramePointerSize = 4;
@@ -1646,54 +1648,33 @@
 void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                               const std::vector<ManagedRegister>& spill_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
-  cfi_cfa_offset_ = kFramePointerSize;  // Only return address on stack
-  cfi_pc_ = buffer_.Size();  // Nothing emitted yet
-  DCHECK_EQ(cfi_pc_, 0U);
-
-  uint32_t reg_offset = 1;
+  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
+  cfi_.SetCurrentCFAOffset(4);  // Return address on stack.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
-    x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
-    DCHECK(spill.IsCpuRegister());
-    pushl(spill.AsCpuRegister());
+    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+    pushl(spill);
     gpr_count++;
-
-    // DW_CFA_advance_loc
-    DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-    cfi_pc_ = buffer_.Size();
-    // DW_CFA_def_cfa_offset
-    cfi_cfa_offset_ += kFramePointerSize;
-    DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-    // DW_CFA_offset reg offset
-    reg_offset++;
-    DW_CFA_offset(&cfi_info_, spill_regs.at(i).AsX86().DWARFRegId(), reg_offset);
+    cfi_.AdjustCFAOffset(kFramePointerSize);
+    cfi_.RelOffset(DWARFReg(spill), 0);
   }
 
-  // return address then method on stack
+  // return address then method on stack.
   int32_t adjust = frame_size - (gpr_count * kFramePointerSize) -
                    sizeof(StackReference<mirror::ArtMethod>) /*method*/ -
                    kFramePointerSize /*return address*/;
   addl(ESP, Immediate(-adjust));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-
+  cfi_.AdjustCFAOffset(adjust);
   pushl(method_reg.AsX86().AsCpuRegister());
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += kFramePointerSize;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+  cfi_.AdjustCFAOffset(kFramePointerSize);
+  DCHECK_EQ(static_cast<size_t>(cfi_.GetCurrentCFAOffset()), frame_size);
 
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     ManagedRegisterSpill spill = entry_spills.at(i);
     if (spill.AsX86().IsCpuRegister()) {
-      movl(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsCpuRegister());
+      int offset = frame_size + spill.getSpillOffset();
+      movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
     } else {
       DCHECK(spill.AsX86().IsXmmRegister());
       if (spill.getSize() == 8) {
@@ -1709,30 +1690,33 @@
 void X86Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) -
-                      sizeof(StackReference<mirror::ArtMethod>)));
+  cfi_.RememberState();
+  int adjust = frame_size - (spill_regs.size() * kFramePointerSize) -
+               sizeof(StackReference<mirror::ArtMethod>);
+  addl(ESP, Immediate(adjust));
+  cfi_.AdjustCFAOffset(-adjust);
   for (size_t i = 0; i < spill_regs.size(); ++i) {
-    x86::X86ManagedRegister spill = spill_regs.at(i).AsX86();
-    DCHECK(spill.IsCpuRegister());
-    popl(spill.AsCpuRegister());
+    Register spill = spill_regs.at(i).AsX86().AsCpuRegister();
+    popl(spill);
+    cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+    cfi_.Restore(DWARFReg(spill));
   }
   ret();
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 void X86Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addl(ESP, Immediate(-adjust));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+  cfi_.AdjustCFAOffset(adjust);
 }
 
 void X86Assembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addl(ESP, Immediate(adjust));
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 void X86Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index bdf8843..f3675ae 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -205,7 +205,7 @@
 
 class X86Assembler FINAL : public Assembler {
  public:
-  explicit X86Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {}
+  explicit X86Assembler() {}
   virtual ~X86Assembler() {}
 
   /*
@@ -457,6 +457,7 @@
 
   X86Assembler* lock();
   void cmpxchgl(const Address& address, Register reg);
+  void cmpxchg8b(const Address& address);
 
   void mfence();
 
@@ -476,6 +477,10 @@
     lock()->cmpxchgl(address, reg);
   }
 
+  void LockCmpxchg8b(const Address& address) {
+    lock()->cmpxchg8b(address);
+  }
+
   //
   // Misc. functionality
   //
@@ -599,12 +604,6 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
-  void InitializeFrameDescriptionEntry() OVERRIDE;
-  void FinalizeFrameDescriptionEntry() OVERRIDE;
-  std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
-    return &cfi_info_;
-  }
-
  private:
   inline void EmitUint8(uint8_t value);
   inline void EmitInt32(int32_t value);
@@ -623,9 +622,6 @@
   void EmitGenericShift(int rm, Register reg, const Immediate& imm);
   void EmitGenericShift(int rm, Register operand, Register shifter);
 
-  std::vector<uint8_t> cfi_info_;
-  uint32_t cfi_cfa_offset_, cfi_pc_;
-
   DISALLOW_COPY_AND_ASSIGN(X86Assembler);
 };
 
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index fccb510..dba3b6b 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -127,4 +127,49 @@
   DriverStr(expected, "LoadLongConstant");
 }
 
+TEST_F(AssemblerX86Test, LockCmpxchgl) {
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12),
+      x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+      x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12),
+      x86::Register(x86::EDI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+      x86::Register(x86::EBP), 0), x86::Register(x86::ESI));
+  GetAssembler()->LockCmpxchgl(x86::Address(
+        x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0),
+      x86::Register(x86::ESI));
+  const char* expected =
+    "lock cmpxchgl %ESI, 0xc(%EDI,%EBX,4)\n"
+    "lock cmpxchgl %ESI, 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchgl %EDI, 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchgl %ESI, (%EBP)\n"
+    "lock cmpxchgl %ESI, (%EBP,%ESI,1)\n";
+
+  DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86Test, LockCmpxchg8b) {
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::ESI), x86::TIMES_4, 12));
+  GetAssembler()->LockCmpxchg8b(x86::Address(x86::Register(x86::EBP), 0));
+  GetAssembler()->LockCmpxchg8b(x86::Address(
+      x86::Register(x86::EBP), x86::Register(x86::ESI), x86::TIMES_1, 0));
+  const char* expected =
+    "lock cmpxchg8b 0xc(%EDI,%EBX,4)\n"
+    "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchg8b 0xc(%EDI,%ESI,4)\n"
+    "lock cmpxchg8b (%EBP)\n"
+    "lock cmpxchg8b (%EBP,%ESI,1)\n";
+
+  DriverStr(expected, "lock_cmpxchg8b");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h
index 5d46ee2..4e8c41e 100644
--- a/compiler/utils/x86/managed_register_x86.h
+++ b/compiler/utils/x86/managed_register_x86.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_UTILS_X86_MANAGED_REGISTER_X86_H_
 
 #include "constants_x86.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -88,14 +89,6 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86ManagedRegister : public ManagedRegister {
  public:
-  int DWARFRegId() const {
-    CHECK(IsCpuRegister());
-    // For all the X86 registers we care about:
-    // EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
-    // DWARF register id is the same as id_.
-    return static_cast<int>(id_);
-  }
-
   ByteRegister AsByteRegister() const {
     CHECK(IsCpuRegister());
     CHECK_LT(AsCpuRegister(), ESP);  // ESP, EBP, ESI and EDI cannot be encoded as byte registers.
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index e82d90c..638659d 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -20,7 +20,6 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "memory_region.h"
 #include "thread.h"
-#include "utils/dwarf_cfi.h"
 
 namespace art {
 namespace x86_64 {
@@ -210,7 +209,9 @@
 
 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalByteRegNormalizingRex32(dst, src);
+  // Byte register is only in the source register form, so we don't use
+  // EmitOptionalByteRegNormalizingRex32(dst, src);
+  EmitOptionalRex32(dst, src);
   EmitUint8(0x0F);
   EmitUint8(0xB6);
   EmitOperand(dst.LowBits(), src);
@@ -228,7 +229,9 @@
 
 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalByteRegNormalizingRex32(dst, src);
+  // Byte register is only in the source register form, so we don't use
+  // EmitOptionalByteRegNormalizingRex32(dst, src);
+  EmitOptionalRex32(dst, src);
   EmitUint8(0x0F);
   EmitUint8(0xBE);
   EmitOperand(dst.LowBits(), src);
@@ -1621,6 +1624,14 @@
 }
 
 
+void X86_64Assembler::imulq(CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg);
+  EmitUint8(0xF7);
+  EmitOperand(5, Operand(reg));
+}
+
+
 void X86_64Assembler::imull(const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(address);
@@ -1854,11 +1865,22 @@
 
 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(reg, address);
   EmitUint8(0x0F);
   EmitUint8(0xB1);
   EmitOperand(reg.LowBits(), address);
 }
 
+
+void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg, address);
+  EmitUint8(0x0F);
+  EmitUint8(0xB1);
+  EmitOperand(reg.LowBits(), address);
+}
+
+
 void X86_64Assembler::mfence() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1957,6 +1979,10 @@
   for (int i = 1; i < length; i++) {
     EmitUint8(operand.encoding_[i]);
   }
+  AssemblerFixup* fixup = operand.GetFixup();
+  if (fixup != nullptr) {
+    EmitFixup(fixup);
+  }
 }
 
 
@@ -2155,11 +2181,18 @@
 }
 
 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
-  EmitOptionalRex(true, false, dst.NeedsRex(), false, src.NeedsRex());
+  // For src, SPL, BPL, SIL, DIL need the rex prefix.
+  bool force = src.AsRegister() > 3;
+  EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
 }
 
 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
-  uint8_t rex = 0x40 | operand.rex();  // REX.0000
+  uint8_t rex = operand.rex();
+  // For dst, SPL, BPL, SIL, DIL need the rex prefix.
+  bool force = dst.AsRegister() > 3;
+  if (force) {
+    rex |= 0x40;  // REX.0000
+  }
   if (dst.NeedsRex()) {
     rex |= 0x44;  // REX.0R00
   }
@@ -2168,14 +2201,11 @@
   }
 }
 
-void X86_64Assembler::InitializeFrameDescriptionEntry() {
-  WriteFDEHeader(&cfi_info_, true /* is_64bit */);
+static dwarf::Reg DWARFReg(Register reg) {
+  return dwarf::Reg::X86_64Core(static_cast<int>(reg));
 }
-
-void X86_64Assembler::FinalizeFrameDescriptionEntry() {
-  WriteFDEAddressRange(&cfi_info_, buffer_.Size(), true /* is_64bit */);
-  PadCFI(&cfi_info_);
-  WriteCFILength(&cfi_info_, true /* is_64bit */);
+static dwarf::Reg DWARFReg(FloatRegister reg) {
+  return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
 }
 
 constexpr size_t kFramePointerSize = 8;
@@ -2183,11 +2213,8 @@
 void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                                  const std::vector<ManagedRegister>& spill_regs,
                                  const ManagedRegisterEntrySpills& entry_spills) {
-  cfi_cfa_offset_ = kFramePointerSize;  // Only return address on stack
-  cfi_pc_ = buffer_.Size();  // Nothing emitted yet
-  DCHECK_EQ(cfi_pc_, 0U);
-
-  uint32_t reg_offset = 1;
+  DCHECK_EQ(buffer_.Size(), 0U);  // Nothing emitted yet.
+  cfi_.SetCurrentCFAOffset(8);  // Return address on stack.
   CHECK_ALIGNED(frame_size, kStackAlignment);
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
@@ -2195,29 +2222,16 @@
     if (spill.IsCpuRegister()) {
       pushq(spill.AsCpuRegister());
       gpr_count++;
-
-      // DW_CFA_advance_loc
-      DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-      cfi_pc_ = buffer_.Size();
-      // DW_CFA_def_cfa_offset
-      cfi_cfa_offset_ += kFramePointerSize;
-      DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
-      // DW_CFA_offset reg offset
-      reg_offset++;
-      DW_CFA_offset(&cfi_info_, spill.DWARFRegId(), reg_offset);
+      cfi_.AdjustCFAOffset(kFramePointerSize);
+      cfi_.RelOffset(DWARFReg(spill.AsCpuRegister().AsRegister()), 0);
     }
   }
-  // return address then method on stack
+  // return address then method on stack.
   int64_t rest_of_frame = static_cast<int64_t>(frame_size)
                           - (gpr_count * kFramePointerSize)
                           - kFramePointerSize /*return address*/;
   subq(CpuRegister(RSP), Immediate(rest_of_frame));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += rest_of_frame;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+  cfi_.AdjustCFAOffset(rest_of_frame);
 
   // spill xmms
   int64_t offset = rest_of_frame;
@@ -2226,6 +2240,7 @@
     if (spill.IsXmmRegister()) {
       offset -= sizeof(double);
       movsd(Address(CpuRegister(RSP), offset), spill.AsXmmRegister());
+      cfi_.RelOffset(DWARFReg(spill.AsXmmRegister().AsFloatRegister()), offset);
     }
   }
 
@@ -2257,6 +2272,7 @@
 void X86_64Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  cfi_.RememberState();
   int gpr_count = 0;
   // unspill xmms
   int64_t offset = static_cast<int64_t>(frame_size) - (spill_regs.size() * kFramePointerSize) - 2 * kFramePointerSize;
@@ -2265,34 +2281,38 @@
     if (spill.IsXmmRegister()) {
       offset += sizeof(double);
       movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
+      cfi_.Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister()));
     } else {
       gpr_count++;
     }
   }
-  addq(CpuRegister(RSP), Immediate(static_cast<int64_t>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize));
+  int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize;
+  addq(CpuRegister(RSP), Immediate(adjust));
+  cfi_.AdjustCFAOffset(-adjust);
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     x86_64::X86_64ManagedRegister spill = spill_regs.at(i).AsX86_64();
     if (spill.IsCpuRegister()) {
       popq(spill.AsCpuRegister());
+      cfi_.AdjustCFAOffset(-static_cast<int>(kFramePointerSize));
+      cfi_.Restore(DWARFReg(spill.AsCpuRegister().AsRegister()));
     }
   }
   ret();
+  // The CFI should be restored for any code that follows the exit block.
+  cfi_.RestoreState();
+  cfi_.DefCFAOffset(frame_size);
 }
 
 void X86_64Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
-  // DW_CFA_advance_loc
-  DW_CFA_advance_loc(&cfi_info_, buffer_.Size() - cfi_pc_);
-  cfi_pc_ = buffer_.Size();
-  // DW_CFA_def_cfa_offset
-  cfi_cfa_offset_ += adjust;
-  DW_CFA_def_cfa_offset(&cfi_info_, cfi_cfa_offset_);
+  cfi_.AdjustCFAOffset(adjust);
 }
 
 void X86_64Assembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kStackAlignment);
   addq(CpuRegister(RSP), Immediate(adjust));
+  cfi_.AdjustCFAOffset(-adjust);
 }
 
 void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
@@ -2720,5 +2740,55 @@
 #undef __
 }
 
+void X86_64Assembler::AddConstantArea() {
+  const std::vector<int32_t>& area = constant_area_.GetBuffer();
+  for (size_t i = 0, e = area.size(); i < e; i++) {
+    AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+    EmitInt32(area[i]);
+  }
+}
+
+int ConstantArea::AddInt32(int32_t v) {
+  for (size_t i = 0, e = buffer_.size(); i < e; i++) {
+    if (v == buffer_[i]) {
+      return i * elem_size_;
+    }
+  }
+
+  // Didn't match anything.
+  int result = buffer_.size() * elem_size_;
+  buffer_.push_back(v);
+  return result;
+}
+
+int ConstantArea::AddInt64(int64_t v) {
+  int32_t v_low = v;
+  int32_t v_high = v >> 32;
+  if (buffer_.size() > 1) {
+    // Ensure we don't pass the end of the buffer.
+    for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
+      if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
+        return i * elem_size_;
+      }
+    }
+  }
+
+  // Didn't match anything.
+  int result = buffer_.size() * elem_size_;
+  buffer_.push_back(v_low);
+  buffer_.push_back(v_high);
+  return result;
+}
+
+int ConstantArea::AddDouble(double v) {
+  // Treat the value as a 64-bit integer value.
+  return AddInt64(bit_cast<int64_t, double>(v));
+}
+
+int ConstantArea::AddFloat(float v) {
+  // Treat the value as a 32-bit integer value.
+  return AddInt32(bit_cast<int32_t, float>(v));
+}
+
 }  // namespace x86_64
 }  // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 39f781c..15b8b15 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -97,9 +97,13 @@
         && (reg.NeedsRex() == ((rex_ & 1) != 0));  // REX.000B bits match.
   }
 
+  AssemblerFixup* GetFixup() const {
+    return fixup_;
+  }
+
  protected:
   // Operand can be sub classed (e.g: Address).
-  Operand() : rex_(0), length_(0) { }
+  Operand() : rex_(0), length_(0), fixup_(nullptr) { }
 
   void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
     CHECK_EQ(mod_in & ~3, 0);
@@ -136,12 +140,17 @@
     length_ += disp_size;
   }
 
+  void SetFixup(AssemblerFixup* fixup) {
+    fixup_ = fixup;
+  }
+
  private:
   uint8_t rex_;
   uint8_t length_;
   uint8_t encoding_[6];
+  AssemblerFixup* fixup_;
 
-  explicit Operand(CpuRegister reg) : rex_(0), length_(0) { SetModRM(3, reg); }
+  explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
 
   // Get the operand encoding byte at the given index.
   uint8_t encoding_at(int index_in) const {
@@ -226,12 +235,25 @@
       result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP));
       result.SetDisp32(addr);
     } else {
+      // RIP addressing is done using RBP as the base register.
+      // The value in RBP isn't used.  Instead the offset is added to RIP.
       result.SetModRM(0, CpuRegister(RBP));
       result.SetDisp32(addr);
     }
     return result;
   }
 
+  // An RIP relative address that will be fixed up later.
+  static Address RIP(AssemblerFixup* fixup) {
+    Address result;
+    // RIP addressing is done using RBP as the base register.
+    // The value in RBP isn't used.  Instead the offset is added to RIP.
+    result.SetModRM(0, CpuRegister(RBP));
+    result.SetDisp32(0);
+    result.SetFixup(fixup);
+    return result;
+  }
+
   // If no_rip is true then the Absolute address isn't RIP relative.
   static Address Absolute(ThreadOffset<8> addr, bool no_rip = false) {
     return Absolute(addr.Int32Value(), no_rip);
@@ -242,9 +264,46 @@
 };
 
 
+/**
+ * Class to handle constant area values.
+ */
+class ConstantArea {
+  public:
+    ConstantArea() {}
+
+    // Add a double to the constant area, returning the offset into
+    // the constant area where the literal resides.
+    int AddDouble(double v);
+
+    // Add a float to the constant area, returning the offset into
+    // the constant area where the literal resides.
+    int AddFloat(float v);
+
+    // Add an int32_t to the constant area, returning the offset into
+    // the constant area where the literal resides.
+    int AddInt32(int32_t v);
+
+    // Add an int64_t to the constant area, returning the offset into
+    // the constant area where the literal resides.
+    int AddInt64(int64_t v);
+
+    int GetSize() const {
+      return buffer_.size() * elem_size_;
+    }
+
+    const std::vector<int32_t>& GetBuffer() const {
+      return buffer_;
+    }
+
+  private:
+    static constexpr size_t elem_size_ = sizeof(int32_t);
+    std::vector<int32_t> buffer_;
+};
+
+
 class X86_64Assembler FINAL : public Assembler {
  public:
-  X86_64Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {}
+  X86_64Assembler() {}
   virtual ~X86_64Assembler() {}
 
   /*
@@ -468,6 +527,7 @@
   void imull(CpuRegister reg, const Immediate& imm);
   void imull(CpuRegister reg, const Address& address);
 
+  void imulq(CpuRegister src);
   void imulq(CpuRegister dst, CpuRegister src);
   void imulq(CpuRegister reg, const Immediate& imm);
   void imulq(CpuRegister reg, const Address& address);
@@ -517,6 +577,7 @@
 
   X86_64Assembler* lock();
   void cmpxchgl(const Address& address, CpuRegister reg);
+  void cmpxchgq(const Address& address, CpuRegister reg);
 
   void mfence();
 
@@ -539,6 +600,10 @@
     lock()->cmpxchgl(address, reg);
   }
 
+  void LockCmpxchgq(const Address& address, CpuRegister reg) {
+    lock()->cmpxchgq(address, reg);
+  }
+
   //
   // Misc. functionality
   //
@@ -663,11 +728,27 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
-  void InitializeFrameDescriptionEntry() OVERRIDE;
-  void FinalizeFrameDescriptionEntry() OVERRIDE;
-  std::vector<uint8_t>* GetFrameDescriptionEntry() OVERRIDE {
-    return &cfi_info_;
-  }
+  // Add a double to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  int AddDouble(double v) { return constant_area_.AddDouble(v); }
+
+  // Add a float to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  int AddFloat(float v)   { return constant_area_.AddFloat(v); }
+
+  // Add an int32_t to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  int AddInt32(int32_t v) { return constant_area_.AddInt32(v); }
+
+  // Add an int64_t to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  int AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
+
+  // Add the contents of the constant area to the assembler buffer.
+  void AddConstantArea();
+
+  // Is the constant area empty? Return true if there are no literals in the constant area.
+  bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
 
  private:
   void EmitUint8(uint8_t value);
@@ -714,8 +795,7 @@
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
   void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
 
-  std::vector<uint8_t> cfi_info_;
-  uint32_t cfi_cfa_offset_, cfi_pc_;
+  ConstantArea constant_area_;
 
   DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
 };
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 4402dfc..116190a 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -174,6 +174,40 @@
       secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14d");
       secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15d");
 
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "ax");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bx");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cx");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dx");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bp");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "sp");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "si");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "di");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14w");
+      tertiary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15w");
+
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "al");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "bl");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "cl");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "dl");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "bpl");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "spl");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "sil");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "dil");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14b");
+      quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15b");
+
       fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM0));
       fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM1));
       fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM2));
@@ -216,9 +250,21 @@
     return secondary_register_names_[reg];
   }
 
+  std::string GetTertiaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE {
+    CHECK(tertiary_register_names_.find(reg) != tertiary_register_names_.end());
+    return tertiary_register_names_[reg];
+  }
+
+  std::string GetQuaternaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE {
+    CHECK(quaternary_register_names_.find(reg) != quaternary_register_names_.end());
+    return quaternary_register_names_[reg];
+  }
+
  private:
   std::vector<x86_64::CpuRegister*> registers_;
   std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_;
+  std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> tertiary_register_names_;
+  std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> quaternary_register_names_;
 
   std::vector<x86_64::XmmRegister*> fp_registers_;
 };
@@ -269,6 +315,10 @@
   DriverStr(Repeatri(&x86_64::X86_64Assembler::addl, 4U, "add ${imm}, %{reg}"), "addli");
 }
 
+TEST_F(AssemblerX86_64Test, ImulqReg1) {
+  DriverStr(RepeatR(&x86_64::X86_64Assembler::imulq, "imulq %{reg}"), "imulq");
+}
+
 TEST_F(AssemblerX86_64Test, ImulqRegs) {
   DriverStr(RepeatRR(&x86_64::X86_64Assembler::imulq, "imulq %{reg2}, %{reg1}"), "imulq");
 }
@@ -539,6 +589,56 @@
   // DriverStr(Repeatrr(&x86_64::X86_64Assembler::xchgl, "xchgl %{reg2}, %{reg1}"), "xchgl");
 }
 
+TEST_F(AssemblerX86_64Test, LockCmpxchgl) {
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::R8));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgl(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+      x86_64::CpuRegister(x86_64::RSI));
+  const char* expected =
+    "lock cmpxchgl %ESI, 0xc(%RDI,%RBX,4)\n"
+    "lock cmpxchgl %ESI, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchgl %R8d, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchgl %ESI, (%R13)\n"
+    "lock cmpxchgl %ESI, (%R13,%R9,1)\n";
+
+  DriverStr(expected, "lock_cmpxchgl");
+}
+
+TEST_F(AssemblerX86_64Test, LockCmpxchgq) {
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
+      x86_64::CpuRegister(x86_64::R8));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
+  GetAssembler()->LockCmpxchgq(x86_64::Address(
+      x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
+      x86_64::CpuRegister(x86_64::RSI));
+  const char* expected =
+    "lock cmpxchg %RSI, 0xc(%RDI,%RBX,4)\n"
+    "lock cmpxchg %RSI, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchg %R8, 0xc(%RDI,%R9,4)\n"
+    "lock cmpxchg %RSI, (%R13)\n"
+    "lock cmpxchg %RSI, (%R13,%R9,1)\n";
+
+  DriverStr(expected, "lock_cmpxchg");
+}
+
 TEST_F(AssemblerX86_64Test, Movl) {
   GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
       x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
@@ -824,31 +924,12 @@
                                "l", "ge", "le" };
 
   std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
-  std::string byte_regs[16];
-  byte_regs[x86_64::RAX] = "al";
-  byte_regs[x86_64::RBX] = "bl";
-  byte_regs[x86_64::RCX] = "cl";
-  byte_regs[x86_64::RDX] = "dl";
-  byte_regs[x86_64::RBP] = "bpl";
-  byte_regs[x86_64::RSP] = "spl";
-  byte_regs[x86_64::RSI] = "sil";
-  byte_regs[x86_64::RDI] = "dil";
-  byte_regs[x86_64::R8] = "r8b";
-  byte_regs[x86_64::R9] = "r9b";
-  byte_regs[x86_64::R10] = "r10b";
-  byte_regs[x86_64::R11] = "r11b";
-  byte_regs[x86_64::R12] = "r12b";
-  byte_regs[x86_64::R13] = "r13b";
-  byte_regs[x86_64::R14] = "r14b";
-  byte_regs[x86_64::R15] = "r15b";
-
   std::ostringstream str;
 
   for (auto reg : registers) {
     for (size_t i = 0; i < 15; ++i) {
       assembler->setcc(static_cast<x86_64::Condition>(i), *reg);
-      str << "set" << suffixes[i] << " %" << byte_regs[reg->AsRegister()] << "\n";
+      str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(*reg) << "\n";
     }
   }
 
@@ -979,4 +1060,12 @@
   DriverFn(&decreaseframe_test_fn, "DecreaseFrame");
 }
 
+TEST_F(AssemblerX86_64Test, MovzxbRegs) {
+  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movzxb, "movzbl %{reg2}, %{reg1}"), "movzxb");
+}
+
+TEST_F(AssemblerX86_64Test, MovsxbRegs) {
+  DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h
index 3a96ad0..47bbb44 100644
--- a/compiler/utils/x86_64/managed_register_x86_64.h
+++ b/compiler/utils/x86_64/managed_register_x86_64.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_
 
 #include "constants_x86_64.h"
+#include "dwarf/register.h"
 #include "utils/managed_register.h"
 
 namespace art {
@@ -87,21 +88,6 @@
 // There is a one-to-one mapping between ManagedRegister and register id.
 class X86_64ManagedRegister : public ManagedRegister {
  public:
-  int DWARFRegId() const {
-    CHECK(IsCpuRegister());
-    switch (id_) {
-      case RAX: return  0;
-      case RDX: return  1;
-      case RCX: return  2;
-      case RBX: return  3;
-      case RSI: return  4;
-      case RDI: return  5;
-      case RBP: return  6;
-      case RSP: return  7;
-      default: return static_cast<int>(id_);  // R8 ~ R15
-    }
-  }
-
   CpuRegister AsCpuRegister() const {
     CHECK(IsCpuRegister());
     return CpuRegister(static_cast<Register>(id_));
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 2e1b7ae..6af6f18 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -109,6 +109,11 @@
 
   UsageError("Usage: dex2oat [options]...");
   UsageError("");
+  UsageError("  -j<number>: specifies the number of threads used for compilation.");
+  UsageError("       Default is the number of detected hardware threads available on the");
+  UsageError("       host system.");
+  UsageError("      Example: -j12");
+  UsageError("");
   UsageError("  --dex-file=<dex-file>: specifies a .dex, .jar, or .apk file to compile.");
   UsageError("      Example: --dex-file=/system/framework/core.jar");
   UsageError("");
@@ -188,11 +193,6 @@
   UsageError("      Example: --compiler-filter=everything");
   UsageError("      Default: speed");
   UsageError("");
-  UsageError("  --huge-method-max=<method-instruction-count>: the threshold size for a huge");
-  UsageError("      method for compiler filter tuning.");
-  UsageError("      Example: --huge-method-max=%d", CompilerOptions::kDefaultHugeMethodThreshold);
-  UsageError("      Default: %d", CompilerOptions::kDefaultHugeMethodThreshold);
-  UsageError("");
   UsageError("  --huge-method-max=<method-instruction-count>: threshold size for a huge");
   UsageError("      method for compiler filter tuning.");
   UsageError("      Example: --huge-method-max=%d", CompilerOptions::kDefaultHugeMethodThreshold);
@@ -1503,11 +1503,16 @@
     return failure_count;
   }
 
-  // Returns true if dex_files has a dex with the named location.
+  // Returns true if dex_files has a dex with the named location. We compare canonical locations,
+  // so that relative and absolute paths will match. Not caching for the dex_files isn't very
+  // efficient, but under normal circumstances the list is neither large nor is this part too
+  // sensitive.
   static bool DexFilesContains(const std::vector<const DexFile*>& dex_files,
                                const std::string& location) {
+    std::string canonical_location(DexFile::GetDexCanonicalLocation(location.c_str()));
     for (size_t i = 0; i < dex_files.size(); ++i) {
-      if (dex_files[i]->GetLocation() == location) {
+      if (DexFile::GetDexCanonicalLocation(dex_files[i]->GetLocation().c_str()) ==
+          canonical_location) {
         return true;
       }
     }
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index c9aa8c8..691c43f 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -23,7 +23,6 @@
 	disassembler_arm.cc \
 	disassembler_arm64.cc \
 	disassembler_mips.cc \
-	disassembler_mips64.cc \
 	disassembler_x86.cc
 
 # $(1): target or host
@@ -81,6 +80,8 @@
   endif
 
   LOCAL_C_INCLUDES += $(ART_C_INCLUDES) art/runtime
+  LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)
+  LOCAL_MULTILIB := both
 
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
diff --git a/disassembler/disassembler.cc b/disassembler/disassembler.cc
index fbc8dbb..c05c3ed 100644
--- a/disassembler/disassembler.cc
+++ b/disassembler/disassembler.cc
@@ -23,7 +23,6 @@
 #include "disassembler_arm.h"
 #include "disassembler_arm64.h"
 #include "disassembler_mips.h"
-#include "disassembler_mips64.h"
 #include "disassembler_x86.h"
 
 namespace art {
@@ -34,9 +33,9 @@
   } else if (instruction_set == kArm64) {
     return new arm64::DisassemblerArm64(options);
   } else if (instruction_set == kMips) {
-    return new mips::DisassemblerMips(options);
+    return new mips::DisassemblerMips(options, false);
   } else if (instruction_set == kMips64) {
-    return new mips64::DisassemblerMips64(options);
+    return new mips::DisassemblerMips(options, true);
   } else if (instruction_set == kX86) {
     return new x86::DisassemblerX86(options, false);
   } else if (instruction_set == kX86_64) {
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index e2b7341..ac81737 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -44,6 +44,7 @@
 static const uint32_t kITypeMask = (0x3f << kOpcodeShift);
 static const uint32_t kJTypeMask = (0x3f << kOpcodeShift);
 static const uint32_t kRTypeMask = ((0x3f << kOpcodeShift) | (0x3f));
+static const uint32_t kSpecial0Mask = (0x3f << kOpcodeShift);
 static const uint32_t kSpecial2Mask = (0x3f << kOpcodeShift);
 static const uint32_t kFpMask = kRTypeMask;
 
@@ -61,6 +62,7 @@
   { kRTypeMask, 7, "srav", "DTS", },
   { kRTypeMask, 8, "jr", "S", },
   { kRTypeMask | (0x1f << 11), 9 | (31 << 11), "jalr", "S", },  // rd = 31 is implicit.
+  { kRTypeMask | (0x1f << 11), 9, "jr", "S", },  // rd = 0 is implicit.
   { kRTypeMask, 9, "jalr", "DS", },  // General case.
   { kRTypeMask | (0x1f << 6), 10, "movz", "DST", },
   { kRTypeMask | (0x1f << 6), 11, "movn", "DST", },
@@ -71,6 +73,9 @@
   { kRTypeMask, 17, "mthi", "S", },
   { kRTypeMask, 18, "mflo", "D", },
   { kRTypeMask, 19, "mtlo", "S", },
+  { kRTypeMask, 20, "dsllv", "DTS", },
+  { kRTypeMask, 22, "dsrlv", "DTS", },
+  { kRTypeMask, 23, "dsrav", "DTS", },
   { kRTypeMask | (0x1f << 6), 24, "mult", "ST", },
   { kRTypeMask | (0x1f << 6), 25, "multu", "ST", },
   { kRTypeMask | (0x1f << 6), 26, "div", "ST", },
@@ -89,12 +94,38 @@
   { kRTypeMask, 39, "nor", "DST", },
   { kRTypeMask, 42, "slt", "DST", },
   { kRTypeMask, 43, "sltu", "DST", },
-  // 0, 48, tge
-  // 0, 49, tgeu
-  // 0, 50, tlt
-  // 0, 51, tltu
-  // 0, 52, teq
-  // 0, 54, tne
+  { kRTypeMask, 45, "daddu", "DST", },
+  { kRTypeMask, 46, "dsub", "DST", },
+  { kRTypeMask, 47, "dsubu", "DST", },
+  // TODO: tge[u], tlt[u], teg, tne
+  // TODO: seleqz, selnez
+  { kRTypeMask, 56, "dsll", "DTA", },
+  { kRTypeMask, 58, "dsrl", "DTA", },
+  { kRTypeMask, 59, "dsra", "DTA", },
+  { kRTypeMask, 60, "dsll32", "DTA", },
+  { kRTypeMask | (0x1f << 21), 62 | (1 << 21), "drotr32", "DTA", },
+  { kRTypeMask, 62, "dsrl32", "DTA", },
+  { kRTypeMask, 63, "dsra32", "DTA", },
+
+  // SPECIAL0
+  { kSpecial0Mask | 0x7ff, (2 << 6) | 24, "mul", "DST" },
+  { kSpecial0Mask | 0x7ff, (3 << 6) | 24, "muh", "DST" },
+  { kSpecial0Mask | 0x7ff, (2 << 6) | 25, "mulu", "DST" },
+  { kSpecial0Mask | 0x7ff, (3 << 6) | 25, "muhu", "DST" },
+  { kSpecial0Mask | 0x7ff, (2 << 6) | 26, "div", "DST" },
+  { kSpecial0Mask | 0x7ff, (3 << 6) | 26, "mod", "DST" },
+  { kSpecial0Mask | 0x7ff, (2 << 6) | 27, "divu", "DST" },
+  { kSpecial0Mask | 0x7ff, (3 << 6) | 27, "modu", "DST" },
+  { kSpecial0Mask | 0x7ff, (2 << 6) | 28, "dmul", "DST" },
+  { kSpecial0Mask | 0x7ff, (3 << 6) | 28, "dmuh", "DST" },
+  { kSpecial0Mask | 0x7ff, (2 << 6) | 29, "dmulu", "DST" },
+  { kSpecial0Mask | 0x7ff, (3 << 6) | 29, "dmuhu", "DST" },
+  { kSpecial0Mask | 0x7ff, (2 << 6) | 30, "ddiv", "DST" },
+  { kSpecial0Mask | 0x7ff, (3 << 6) | 30, "dmod", "DST" },
+  { kSpecial0Mask | 0x7ff, (2 << 6) | 31, "ddivu", "DST" },
+  { kSpecial0Mask | 0x7ff, (3 << 6) | 31, "dmodu", "DST" },
+  // TODO: [d]clz, [d]clo
+  // TODO: sdbbp
 
   // SPECIAL2
   { kSpecial2Mask | 0x7ff, (28 << kOpcodeShift) | 2, "mul", "DST" },
@@ -120,6 +151,8 @@
   { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (18 << 16), "bltzall", "SB" },
   { kITypeMask | (0x1f << 16), 6 << kOpcodeShift | (0 << 16), "blez", "SB" },
   { kITypeMask | (0x1f << 16), 7 << kOpcodeShift | (0 << 16), "bgtz", "SB" },
+  { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (6 << 16), "dahi", "Si", },
+  { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (30 << 16), "dati", "Si", },
 
   { 0xffff0000, (4 << kOpcodeShift), "b", "B" },
   { 0xffff0000, (1 << kOpcodeShift) | (17 << 16), "bal", "B" },
@@ -130,27 +163,35 @@
   { kITypeMask, 11 << kOpcodeShift, "sltiu", "TSi", },
   { kITypeMask, 12 << kOpcodeShift, "andi", "TSi", },
   { kITypeMask, 13 << kOpcodeShift, "ori", "TSi", },
-  { kITypeMask, 14 << kOpcodeShift, "ori", "TSi", },
-  { kITypeMask, 15 << kOpcodeShift, "lui", "TI", },
+  { kITypeMask, 14 << kOpcodeShift, "xori", "TSi", },
+  { kITypeMask | (0x1f << 21), 15 << kOpcodeShift, "lui", "TI", },
+  { kITypeMask, 15 << kOpcodeShift, "aui", "TSI", },
+  { kITypeMask, 25 << kOpcodeShift, "daddiu", "TSi", },
+  { kITypeMask, 29 << kOpcodeShift, "daui", "TSi", },
 
   { kITypeMask, 32u << kOpcodeShift, "lb", "TO", },
   { kITypeMask, 33u << kOpcodeShift, "lh", "TO", },
   { kITypeMask, 35u << kOpcodeShift, "lw", "TO", },
   { kITypeMask, 36u << kOpcodeShift, "lbu", "TO", },
   { kITypeMask, 37u << kOpcodeShift, "lhu", "TO", },
+  { kITypeMask, 39u << kOpcodeShift, "lwu", "TO", },
   { kITypeMask, 40u << kOpcodeShift, "sb", "TO", },
   { kITypeMask, 41u << kOpcodeShift, "sh", "TO", },
   { kITypeMask, 43u << kOpcodeShift, "sw", "TO", },
   { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", },
   { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", },
+  { kITypeMask, 55u << kOpcodeShift, "ld", "TO", },
   { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", },
   { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", },
+  { kITypeMask, 63u << kOpcodeShift, "sd", "TO", },
 
   // Floating point.
-  { kFpMask | (0x1f << 21), kCop1 | (0x00 << 21) | 0, "mfc1", "Td" },
-  { kFpMask | (0x1f << 21), kCop1 | (0x03 << 21) | 0, "mfhc1", "Td" },
-  { kFpMask | (0x1f << 21), kCop1 | (0x04 << 21) | 0, "mtc1", "Td" },
-  { kFpMask | (0x1f << 21), kCop1 | (0x07 << 21) | 0, "mthc1", "Td" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x00 << 21), "mfc1", "Td" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x01 << 21), "dmfc1", "Td" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x03 << 21), "mfhc1", "Td" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x04 << 21), "mtc1", "Td" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x05 << 21), "dmtc1", "Td" },
+  { kFpMask | (0x1f << 21), kCop1 | (0x07 << 21), "mthc1", "Td" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 0, "add", "fadt" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 1, "sub", "fadt" },
   { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 2, "mul", "fadt" },
@@ -249,7 +290,11 @@
               args << StringPrintf("%+d(r%d)", offset, rs);
               if (rs == 17) {
                 args << "  ; ";
-                Thread::DumpThreadOffset<4>(args, offset);
+                if (is64bit_) {
+                  Thread::DumpThreadOffset<8>(args, offset);
+                } else {
+                  Thread::DumpThreadOffset<4>(args, offset);
+                }
               }
             }
             break;
diff --git a/disassembler/disassembler_mips.h b/disassembler/disassembler_mips.h
index 00b2f8d..67c3fcb 100644
--- a/disassembler/disassembler_mips.h
+++ b/disassembler/disassembler_mips.h
@@ -26,12 +26,15 @@
 
 class DisassemblerMips FINAL : public Disassembler {
  public:
-  explicit DisassemblerMips(DisassemblerOptions* options) : Disassembler(options) {}
+  explicit DisassemblerMips(DisassemblerOptions* options, bool is64bit) : Disassembler(options),
+      is64bit_(is64bit) {}
 
   size_t Dump(std::ostream& os, const uint8_t* begin) OVERRIDE;
   void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
 
  private:
+  const bool is64bit_;
+
   DISALLOW_COPY_AND_ASSIGN(DisassemblerMips);
 };
 
diff --git a/disassembler/disassembler_mips64.cc b/disassembler/disassembler_mips64.cc
deleted file mode 100644
index 1b6e6be..0000000
--- a/disassembler/disassembler_mips64.cc
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "disassembler_mips64.h"
-
-#include <ostream>
-#include <sstream>
-
-#include "base/logging.h"
-#include "base/stringprintf.h"
-#include "thread.h"
-
-namespace art {
-namespace mips64 {
-
-
-struct Mips64Instruction {
-  uint32_t    mask;
-  uint32_t    value;
-  const char* name;
-  const char* args_fmt;
-
-  bool Matches(uint32_t instruction) const {
-    return (instruction & mask) == value;
-  }
-};
-
-static const uint32_t kOpcodeShift = 26;
-static const uint32_t kCop1 = (17 << kOpcodeShift);
-static const uint32_t kITypeMask = (0x3f << kOpcodeShift);
-static const uint32_t kJTypeMask = (0x3f << kOpcodeShift);
-static const uint32_t kRTypeMask = ((0x3f << kOpcodeShift) | (0x3f));
-static const uint32_t kSpecial0Mask = (0x3f << kOpcodeShift);
-static const uint32_t kFpMask = kRTypeMask;
-
-static const Mips64Instruction gMips64Instructions[] = {
-  // "sll r0, r0, 0" is the canonical "nop", used in delay slots.
-  { 0xffffffff, 0, "nop", "" },
-
-  // R-type instructions.
-  { kRTypeMask, 0, "sll", "DTA", },
-  // 0, 1, movci
-  { kRTypeMask, 2, "srl", "DTA", },
-  { kRTypeMask, 3, "sra", "DTA", },
-  { kRTypeMask, 4, "sllv", "DTS", },
-  { kRTypeMask, 6, "srlv", "DTS", },
-  { kRTypeMask, 7, "srav", "DTS", },
-  { kRTypeMask | (0x1f << 11), 9 | (31 << 11), "jalr", "S", },  // rd = 31 is implicit.
-  { kRTypeMask | (0x1f << 11), 9, "jr", "S", },  // rd = 0 is implicit.
-  { kRTypeMask, 9, "jalr", "DS", },  // General case.
-  { kRTypeMask, 12, "syscall", "", },  // TODO: code
-  { kRTypeMask, 13, "break", "", },  // TODO: code
-  { kRTypeMask, 15, "sync", "", },  // TODO: type
-  { kRTypeMask, 20, "dsllv", "DTS", },
-  { kRTypeMask, 22, "dsrlv", "DTS", },
-  { kRTypeMask, 23, "dsrav", "DTS", },
-  { kRTypeMask, 33, "addu", "DST", },
-  { kRTypeMask, 34, "sub", "DST", },
-  { kRTypeMask, 35, "subu", "DST", },
-  { kRTypeMask, 36, "and", "DST", },
-  { kRTypeMask, 37, "or", "DST", },
-  { kRTypeMask, 38, "xor", "DST", },
-  { kRTypeMask, 39, "nor", "DST", },
-  { kRTypeMask, 42, "slt", "DST", },
-  { kRTypeMask, 43, "sltu", "DST", },
-  { kRTypeMask, 45, "daddu", "DST", },
-  { kRTypeMask, 46, "dsub", "DST", },
-  { kRTypeMask, 47, "dsubu", "DST", },
-  // TODO: seleqz, selnez
-  { kRTypeMask, 56, "dsll", "DTA", },
-  { kRTypeMask, 58, "dsrl", "DTA", },
-  { kRTypeMask, 59, "dsra", "DTA", },
-  { kRTypeMask, 60, "dsll32", "DTA", },
-  { kRTypeMask | (0x1f << 21), 62 | (1 << 21), "drotr32", "DTA", },
-  { kRTypeMask, 62, "dsrl32", "DTA", },
-  { kRTypeMask, 63, "dsra32", "DTA", },
-
-  // SPECIAL0
-  { kSpecial0Mask | 0x7ff, (2 << 6) | 24, "mul", "DST" },
-  { kSpecial0Mask | 0x7ff, (3 << 6) | 24, "muh", "DST" },
-  { kSpecial0Mask | 0x7ff, (2 << 6) | 25, "mulu", "DST" },
-  { kSpecial0Mask | 0x7ff, (3 << 6) | 25, "muhu", "DST" },
-  { kSpecial0Mask | 0x7ff, (2 << 6) | 26, "div", "DST" },
-  { kSpecial0Mask | 0x7ff, (3 << 6) | 26, "mod", "DST" },
-  { kSpecial0Mask | 0x7ff, (2 << 6) | 27, "divu", "DST" },
-  { kSpecial0Mask | 0x7ff, (3 << 6) | 27, "modu", "DST" },
-  { kSpecial0Mask | 0x7ff, (2 << 6) | 28, "dmul", "DST" },
-  { kSpecial0Mask | 0x7ff, (3 << 6) | 28, "dmuh", "DST" },
-  { kSpecial0Mask | 0x7ff, (2 << 6) | 29, "dmulu", "DST" },
-  { kSpecial0Mask | 0x7ff, (3 << 6) | 29, "dmuhu", "DST" },
-  { kSpecial0Mask | 0x7ff, (2 << 6) | 30, "ddiv", "DST" },
-  { kSpecial0Mask | 0x7ff, (3 << 6) | 30, "dmod", "DST" },
-  { kSpecial0Mask | 0x7ff, (2 << 6) | 31, "ddivu", "DST" },
-  { kSpecial0Mask | 0x7ff, (3 << 6) | 31, "dmodu", "DST" },
-  // TODO: [d]clz, [d]clo
-  // TODO: sdbbp
-
-  // J-type instructions.
-  { kJTypeMask, 2 << kOpcodeShift, "j", "L" },
-  { kJTypeMask, 3 << kOpcodeShift, "jal", "L" },
-
-  // I-type instructions.
-  { kITypeMask, 4 << kOpcodeShift, "beq", "STB" },
-  { kITypeMask, 5 << kOpcodeShift, "bne", "STB" },
-  { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (1 << 16), "bgez", "SB" },
-  { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (0 << 16), "bltz", "SB" },
-  { kITypeMask | (0x1f << 16), 6 << kOpcodeShift | (0 << 16), "blez", "SB" },
-  { kITypeMask | (0x1f << 16), 7 << kOpcodeShift | (0 << 16), "bgtz", "SB" },
-  { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (6 << 16), "dahi", "Si", },
-  { kITypeMask | (0x1f << 16), 1 << kOpcodeShift | (30 << 16), "dati", "Si", },
-
-  { 0xffff0000, (4 << kOpcodeShift), "b", "B" },
-  { 0xffff0000, (1 << kOpcodeShift) | (17 << 16), "bal", "B" },
-
-  { kITypeMask, 9 << kOpcodeShift, "addiu", "TSi", },
-  { kITypeMask, 10 << kOpcodeShift, "slti", "TSi", },
-  { kITypeMask, 11 << kOpcodeShift, "sltiu", "TSi", },
-  { kITypeMask, 12 << kOpcodeShift, "andi", "TSi", },
-  { kITypeMask, 13 << kOpcodeShift, "ori", "TSi", },
-  { kITypeMask, 14 << kOpcodeShift, "xori", "TSi", },
-  { kITypeMask | (0x1f << 21), 15 << kOpcodeShift, "lui", "TI", },
-  { kITypeMask, 15 << kOpcodeShift, "aui", "TSI", },
-  { kITypeMask, 25 << kOpcodeShift, "daddiu", "TSi", },
-  { kITypeMask, 29 << kOpcodeShift, "daui", "TSi", },
-
-  { kITypeMask, 32u << kOpcodeShift, "lb", "TO", },
-  { kITypeMask, 33u << kOpcodeShift, "lh", "TO", },
-  { kITypeMask, 35u << kOpcodeShift, "lw", "TO", },
-  { kITypeMask, 36u << kOpcodeShift, "lbu", "TO", },
-  { kITypeMask, 37u << kOpcodeShift, "lhu", "TO", },
-  { kITypeMask, 39u << kOpcodeShift, "lwu", "TO", },
-  { kITypeMask, 40u << kOpcodeShift, "sb", "TO", },
-  { kITypeMask, 41u << kOpcodeShift, "sh", "TO", },
-  { kITypeMask, 43u << kOpcodeShift, "sw", "TO", },
-  { kITypeMask, 49u << kOpcodeShift, "lwc1", "tO", },
-  { kITypeMask, 53u << kOpcodeShift, "ldc1", "tO", },
-  { kITypeMask, 55u << kOpcodeShift, "ld", "TO", },
-  { kITypeMask, 57u << kOpcodeShift, "swc1", "tO", },
-  { kITypeMask, 61u << kOpcodeShift, "sdc1", "tO", },
-  { kITypeMask, 63u << kOpcodeShift, "sd", "TO", },
-
-  // Floating point.
-  { kFpMask | (0x1f << 21), kCop1 | (0x00 << 21), "mfc1", "Td" },
-  { kFpMask | (0x1f << 21), kCop1 | (0x01 << 21), "dmfc1", "Td" },
-  { kFpMask | (0x1f << 21), kCop1 | (0x04 << 21), "mtc1", "Td" },
-  { kFpMask | (0x1f << 21), kCop1 | (0x05 << 21), "dmtc1", "Td" },
-  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 0, "add", "fadt" },
-  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 1, "sub", "fadt" },
-  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 2, "mul", "fadt" },
-  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 3, "div", "fadt" },
-  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 4, "sqrt", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 5, "abs", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 6, "mov", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 7, "neg", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 8, "round.l", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 9, "trunc.l", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 10, "ceil.l", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 11, "floor.l", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 12, "round.w", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 13, "trunc.w", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 14, "ceil.w", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 15, "floor.w", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 32, "cvt.s", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 33, "cvt.d", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 36, "cvt.w", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 37, "cvt.l", "fad" },
-  { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 38, "cvt.ps", "fad" },
-};
-
-static uint32_t ReadU32(const uint8_t* ptr) {
-  // We only support little-endian MIPS64.
-  return ptr[0] | (ptr[1] << 8) | (ptr[2] << 16) | (ptr[3] << 24);
-}
-
-static void DumpMips64(std::ostream& os, const uint8_t* instr_ptr) {
-  uint32_t instruction = ReadU32(instr_ptr);
-
-  uint32_t rs = (instruction >> 21) & 0x1f;  // I-type, R-type.
-  uint32_t rt = (instruction >> 16) & 0x1f;  // I-type, R-type.
-  uint32_t rd = (instruction >> 11) & 0x1f;  // R-type.
-  uint32_t sa = (instruction >>  6) & 0x1f;  // R-type.
-
-  std::string opcode;
-  std::ostringstream args;
-
-  // TODO: remove this!
-  uint32_t op = (instruction >> 26) & 0x3f;
-  uint32_t function = (instruction & 0x3f);  // R-type.
-  opcode = StringPrintf("op=%d fn=%d", op, function);
-
-  for (size_t i = 0; i < arraysize(gMips64Instructions); ++i) {
-    if (gMips64Instructions[i].Matches(instruction)) {
-      opcode = gMips64Instructions[i].name;
-      for (const char* args_fmt = gMips64Instructions[i].args_fmt; *args_fmt; ++args_fmt) {
-        switch (*args_fmt) {
-          case 'A':  // sa (shift amount).
-            args << sa;
-            break;
-          case 'B':  // Branch offset.
-            {
-              int32_t offset = static_cast<int16_t>(instruction & 0xffff);
-              offset <<= 2;
-              offset += 4;  // Delay slot.
-              args << StringPrintf("%p  ; %+d", instr_ptr + offset, offset);
-            }
-            break;
-          case 'D': args << 'r' << rd; break;
-          case 'd': args << 'f' << rd; break;
-          case 'a': args << 'f' << sa; break;
-          case 'f':  // Floating point "fmt".
-            {
-              size_t fmt = (instruction >> 21) & 0x7;  // TODO: other fmts?
-              switch (fmt) {
-                case 0: opcode += ".s"; break;
-                case 1: opcode += ".d"; break;
-                case 4: opcode += ".w"; break;
-                case 5: opcode += ".l"; break;
-                case 6: opcode += ".ps"; break;
-                default: opcode += ".?"; break;
-              }
-              continue;  // No ", ".
-            }
-          case 'I':  // Upper 16-bit immediate.
-            args << reinterpret_cast<void*>((instruction & 0xffff) << 16);
-            break;
-          case 'i':  // Sign-extended lower 16-bit immediate.
-            args << static_cast<int16_t>(instruction & 0xffff);
-            break;
-          case 'L':  // Jump label.
-            {
-              // TODO: is this right?
-              uint32_t instr_index = (instruction & 0x1ffffff);
-              uint32_t target = (instr_index << 2);
-              target |= (reinterpret_cast<uintptr_t>(instr_ptr + 4)
-                        & 0xf0000000);
-              args << reinterpret_cast<void*>(target);
-            }
-            break;
-          case 'O':  // +x(rs)
-            {
-              int32_t offset = static_cast<int16_t>(instruction & 0xffff);
-              args << StringPrintf("%+d(r%d)", offset, rs);
-              if (rs == 17) {
-                args << "  ; ";
-                Thread::DumpThreadOffset<8>(args, offset);
-              }
-            }
-            break;
-          case 'S': args << 'r' << rs; break;
-          case 's': args << 'f' << rs; break;
-          case 'T': args << 'r' << rt; break;
-          case 't': args << 'f' << rt; break;
-        }
-        if (*(args_fmt + 1)) {
-          args << ", ";
-        }
-      }
-      break;
-    }
-  }
-
-  os << StringPrintf("%p: %08x\t%-7s ", instr_ptr, instruction, opcode.c_str())
-     << args.str() << '\n';
-}
-
-size_t DisassemblerMips64::Dump(std::ostream& os, const uint8_t* begin) {
-  DumpMips64(os, begin);
-  return 4;
-}
-
-void DisassemblerMips64::Dump(std::ostream& os, const uint8_t* begin,
-                            const uint8_t* end) {
-  for (const uint8_t* cur = begin; cur < end; cur += 4) {
-    DumpMips64(os, cur);
-  }
-}
-
-}  // namespace mips64
-}  // namespace art
diff --git a/disassembler/disassembler_mips64.h b/disassembler/disassembler_mips64.h
deleted file mode 100644
index 06efdc8..0000000
--- a/disassembler/disassembler_mips64.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_DISASSEMBLER_DISASSEMBLER_MIPS64_H_
-#define ART_DISASSEMBLER_DISASSEMBLER_MIPS64_H_
-
-#include <vector>
-
-#include "disassembler.h"
-
-namespace art {
-namespace mips64 {
-
-class DisassemblerMips64 FINAL : public Disassembler {
- public:
-  explicit DisassemblerMips64(DisassemblerOptions* options) : Disassembler(options) {}
-
-  size_t Dump(std::ostream& os, const uint8_t* begin) OVERRIDE;
-  void Dump(std::ostream& os, const uint8_t* begin, const uint8_t* end) OVERRIDE;
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(DisassemblerMips64);
-};
-
-}  // namespace mips64
-}  // namespace art
-
-#endif  // ART_DISASSEMBLER_DISASSEMBLER_MIPS64_H_
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index a1834e1..ba0c0bd 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -942,7 +942,7 @@
           opcode1 = "pextrw";
           prefix[2] = 0;
           has_modrm = true;
-          store = true;
+          load = true;
           src_reg_file = SSE;
           immediate_bytes = 1;
         } else {
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 9b57ecb..34a4c14 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -168,7 +168,10 @@
        << "\n\n";
 
     const uint8_t* image_begin_unaligned = boot_image_header.GetImageBegin();
-    const uint8_t* image_end_unaligned = image_begin_unaligned + boot_image_header.GetImageSize();
+    const uint8_t* image_mirror_end_unaligned = image_begin_unaligned +
+        boot_image_header.GetImageSize();
+    const uint8_t* image_end_unaligned = image_mirror_end_unaligned +
+        boot_image_header.GetArtFieldsSize();
 
     // Adjust range to nearest page
     const uint8_t* image_begin = AlignDown(image_begin_unaligned, kPageSize);
@@ -350,7 +353,7 @@
     size_t dirty_object_bytes = 0;
     {
       const uint8_t* begin_image_ptr = image_begin_unaligned;
-      const uint8_t* end_image_ptr = image_end_unaligned;
+      const uint8_t* end_image_ptr = image_mirror_end_unaligned;
 
       const uint8_t* current = begin_image_ptr + RoundUp(sizeof(ImageHeader), kObjectAlignment);
       while (reinterpret_cast<const uintptr_t>(current)
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 322d3aa..a36e5b1 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "arch/instruction_set_features.h"
+#include "art_field-inl.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
@@ -40,7 +41,6 @@
 #include "image.h"
 #include "indenter.h"
 #include "mapping_table.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
@@ -1549,9 +1549,6 @@
     } else if (type->IsClassClass()) {
       mirror::Class* klass = value->AsClass();
       os << StringPrintf("%p   Class: %s\n", klass, PrettyDescriptor(klass).c_str());
-    } else if (type->IsArtFieldClass()) {
-      mirror::ArtField* field = value->AsArtField();
-      os << StringPrintf("%p   Field: %s\n", field, PrettyField(field).c_str());
     } else if (type->IsArtMethodClass()) {
       mirror::ArtMethod* method = value->AsArtMethod();
       os << StringPrintf("%p   Method: %s\n", method, PrettyMethod(method).c_str());
@@ -1560,7 +1557,7 @@
     }
   }
 
-  static void PrintField(std::ostream& os, mirror::ArtField* field, mirror::Object* obj)
+  static void PrintField(std::ostream& os, ArtField* field, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     os << StringPrintf("%s: ", field->GetName());
     switch (field->GetTypeAsPrimitiveType()) {
@@ -1619,12 +1616,9 @@
     if (super != nullptr) {
       DumpFields(os, obj, super);
     }
-    mirror::ObjectArray<mirror::ArtField>* fields = klass->GetIFields();
-    if (fields != nullptr) {
-      for (int32_t i = 0; i < fields->GetLength(); i++) {
-        mirror::ArtField* field = fields->Get(i);
-        PrintField(os, field, obj);
-      }
+    ArtField* fields = klass->GetIFields();
+    for (size_t i = 0, count = klass->NumInstanceFields(); i < count; i++) {
+      PrintField(os, &fields[i], obj);
     }
   }
 
@@ -1686,9 +1680,6 @@
       mirror::Class* klass = obj->AsClass();
       os << StringPrintf("%p: java.lang.Class \"%s\" (", obj, PrettyDescriptor(klass).c_str())
          << klass->GetStatus() << ")\n";
-    } else if (obj->IsArtField()) {
-      os << StringPrintf("%p: java.lang.reflect.ArtField %s\n", obj,
-                         PrettyField(obj->AsArtField()).c_str());
     } else if (obj->IsArtMethod()) {
       os << StringPrintf("%p: java.lang.reflect.ArtMethod %s\n", obj,
                          PrettyMethod(obj->AsArtMethod()).c_str());
@@ -1725,14 +1716,15 @@
         PrettyObjectValue(indent_os, value_class, value);
       }
     } else if (obj->IsClass()) {
-      mirror::ObjectArray<mirror::ArtField>* sfields = obj->AsClass()->GetSFields();
-      if (sfields != nullptr) {
+      mirror::Class* klass = obj->AsClass();
+      ArtField* sfields = klass->GetSFields();
+      const size_t num_fields = klass->NumStaticFields();
+      if (num_fields != 0) {
         indent_os << "STATICS:\n";
         Indenter indent2_filter(indent_os.rdbuf(), kIndentChar, kIndentBy1Count);
         std::ostream indent2_os(&indent2_filter);
-        for (int32_t i = 0; i < sfields->GetLength(); i++) {
-          mirror::ArtField* field = sfields->Get(i);
-          PrintField(indent2_os, field, field->GetDeclaringClass());
+        for (size_t i = 0; i < num_fields; i++) {
+          PrintField(indent2_os, &sfields[i], sfields[i].GetDeclaringClass());
         }
       }
     } else if (obj->IsArtMethod()) {
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 9584064..74c9c38 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -24,6 +24,7 @@
 #include <string>
 #include <vector>
 
+#include "art_field-inl.h"
 #include "base/dumpable.h"
 #include "base/scoped_flock.h"
 #include "base/stringpiece.h"
@@ -34,7 +35,6 @@
 #include "elf_file_impl.h"
 #include "gc/space/image_space.h"
 #include "image.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference.h"
@@ -415,13 +415,64 @@
   return true;
 }
 
+void PatchOat::PatchArtFields(const ImageHeader* image_header) {
+  const size_t art_field_size = image_header->GetArtFieldsSize();
+  const size_t art_field_offset = image_header->GetArtFieldsOffset();
+  for (size_t pos = 0; pos < art_field_size; pos += sizeof(ArtField)) {
+    auto* field = reinterpret_cast<ArtField*>(heap_->Begin() + art_field_offset + pos);
+    auto* dest_field = RelocatedCopyOf(field);
+    dest_field->SetDeclaringClass(RelocatedAddressOfPointer(field->GetDeclaringClass()));
+  }
+}
+
+void PatchOat::PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots) {
+  auto* dex_caches = down_cast<mirror::ObjectArray<mirror::DexCache>*>(
+      img_roots->Get(ImageHeader::kDexCaches));
+  for (size_t i = 0, count = dex_caches->GetLength(); i < count; ++i) {
+    auto* dex_cache = dex_caches->GetWithoutChecks(i);
+    auto* fields = dex_cache->GetResolvedFields();
+    if (fields == nullptr) {
+      continue;
+    }
+    CHECK(!fields->IsObjectArray());
+    CHECK(fields->IsArrayInstance());
+    auto* component_type = fields->GetClass()->GetComponentType();
+    if (component_type->IsPrimitiveInt()) {
+      mirror::IntArray* arr = fields->AsIntArray();
+      mirror::IntArray* copy_arr = down_cast<mirror::IntArray*>(RelocatedCopyOf(arr));
+      for (size_t j = 0, count2 = arr->GetLength(); j < count2; ++j) {
+        auto f = arr->GetWithoutChecks(j);
+        if (f != 0) {
+          copy_arr->SetWithoutChecks<false>(j, f + delta_);
+        }
+      }
+    } else {
+      CHECK(component_type->IsPrimitiveLong());
+      mirror::LongArray* arr = fields->AsLongArray();
+      mirror::LongArray* copy_arr = down_cast<mirror::LongArray*>(RelocatedCopyOf(arr));
+      for (size_t j = 0, count2 = arr->GetLength(); j < count2; ++j) {
+        auto f = arr->GetWithoutChecks(j);
+        if (f != 0) {
+          copy_arr->SetWithoutChecks<false>(j, f + delta_);
+        }
+      }
+    }
+  }
+}
+
 bool PatchOat::PatchImage() {
   ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   CHECK_GT(image_->Size(), sizeof(ImageHeader));
   // These are the roots from the original file.
-  mirror::Object* img_roots = image_header->GetImageRoots();
+  auto* img_roots = image_header->GetImageRoots();
   image_header->RelocateImage(delta_);
 
+  // Patch and update ArtFields.
+  PatchArtFields(image_header);
+
+  // Patch dex file int/long arrays which point to ArtFields.
+  PatchDexFileArrays(img_roots);
+
   VisitObject(img_roots);
   if (!image_header->IsValid()) {
     LOG(ERROR) << "reloction renders image header invalid";
@@ -448,7 +499,7 @@
                                          bool is_static_unused ATTRIBUTE_UNUSED) const {
   mirror::Object* referent = obj->GetFieldObject<mirror::Object, kVerifyNone>(off);
   DCHECK(patcher_->InHeap(referent)) << "Referent is not in the heap.";
-  mirror::Object* moved_object = patcher_->RelocatedAddressOf(referent);
+  mirror::Object* moved_object = patcher_->RelocatedAddressOfPointer(referent);
   copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(off, moved_object);
 }
 
@@ -457,30 +508,10 @@
   MemberOffset off = mirror::Reference::ReferentOffset();
   mirror::Object* referent = ref->GetReferent();
   DCHECK(patcher_->InHeap(referent)) << "Referent is not in the heap.";
-  mirror::Object* moved_object = patcher_->RelocatedAddressOf(referent);
+  mirror::Object* moved_object = patcher_->RelocatedAddressOfPointer(referent);
   copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(off, moved_object);
 }
 
-mirror::Object* PatchOat::RelocatedCopyOf(mirror::Object* obj) {
-  if (obj == nullptr) {
-    return nullptr;
-  }
-  DCHECK_GT(reinterpret_cast<uintptr_t>(obj), reinterpret_cast<uintptr_t>(heap_->Begin()));
-  DCHECK_LT(reinterpret_cast<uintptr_t>(obj), reinterpret_cast<uintptr_t>(heap_->End()));
-  uintptr_t heap_off =
-      reinterpret_cast<uintptr_t>(obj) - reinterpret_cast<uintptr_t>(heap_->Begin());
-  DCHECK_LT(heap_off, image_->Size());
-  return reinterpret_cast<mirror::Object*>(image_->Begin() + heap_off);
-}
-
-mirror::Object* PatchOat::RelocatedAddressOf(mirror::Object* obj) {
-  if (obj == nullptr) {
-    return nullptr;
-  } else {
-    return reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(obj) + delta_);
-  }
-}
-
 const OatHeader* PatchOat::GetOatHeader(const ElfFile* elf_file) {
   if (elf_file->Is64Bit()) {
     return GetOatHeader<ElfFileImpl64>(elf_file->GetImpl64());
@@ -507,7 +538,7 @@
   if (kUseBakerOrBrooksReadBarrier) {
     object->AssertReadBarrierPointer();
     if (kUseBrooksReadBarrier) {
-      mirror::Object* moved_to = RelocatedAddressOf(object);
+      mirror::Object* moved_to = RelocatedAddressOfPointer(object);
       copy->SetReadBarrierPointer(moved_to);
       DCHECK_EQ(copy->GetReadBarrierPointer(), moved_to);
     }
@@ -516,6 +547,12 @@
   object->VisitReferences<true, kVerifyNone>(visitor, visitor);
   if (object->IsArtMethod<kVerifyNone>()) {
     FixupMethod(down_cast<mirror::ArtMethod*>(object), down_cast<mirror::ArtMethod*>(copy));
+  } else if (object->IsClass<kVerifyNone>()) {
+    mirror::Class* klass = down_cast<mirror::Class*>(object);
+    down_cast<mirror::Class*>(copy)->SetSFieldsUnchecked(
+        RelocatedAddressOfPointer(klass->GetSFields()));
+    down_cast<mirror::Class*>(copy)->SetIFieldsUnchecked(
+        RelocatedAddressOfPointer(klass->GetIFields()));
   }
 }
 
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 578df3a..418650a 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -117,12 +117,31 @@
   bool PatchOatHeader(ElfFileImpl* oat_file);
 
   bool PatchImage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PatchArtFields(const ImageHeader* image_header) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool WriteElf(File* out);
   bool WriteImage(File* out);
 
-  mirror::Object* RelocatedCopyOf(mirror::Object*);
-  mirror::Object* RelocatedAddressOf(mirror::Object* obj);
+  template <typename T>
+  T* RelocatedCopyOf(T* obj) {
+    if (obj == nullptr) {
+      return nullptr;
+    }
+    DCHECK_GT(reinterpret_cast<uintptr_t>(obj), reinterpret_cast<uintptr_t>(heap_->Begin()));
+    DCHECK_LT(reinterpret_cast<uintptr_t>(obj), reinterpret_cast<uintptr_t>(heap_->End()));
+    uintptr_t heap_off =
+        reinterpret_cast<uintptr_t>(obj) - reinterpret_cast<uintptr_t>(heap_->Begin());
+    DCHECK_LT(heap_off, image_->Size());
+    return reinterpret_cast<T*>(image_->Begin() + heap_off);
+  }
+
+  template <typename T>
+  T* RelocatedAddressOfPointer(T* obj) {
+    return obj == nullptr ? nullptr :
+        reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(obj) + delta_);
+  }
 
   // Look up the oat header from any elf file.
   static const OatHeader* GetOatHeader(const ElfFile* elf_file);
diff --git a/runtime/Android.mk b/runtime/Android.mk
index c0e7f47..d3488fc 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -19,6 +19,7 @@
 include art/build/Android.common_build.mk
 
 LIBART_COMMON_SRC_FILES := \
+  art_field.cc \
   atomic.cc.arm \
   barrier.cc \
   base/allocator.cc \
@@ -96,9 +97,9 @@
   jit/jit_instrumentation.cc \
   jni_internal.cc \
   jobject_comparator.cc \
+  linear_alloc.cc \
   mem_map.cc \
   memory_region.cc \
-  mirror/art_field.cc \
   mirror/art_method.cc \
   mirror/array.cc \
   mirror/class.cc \
diff --git a/runtime/arch/mips64/instruction_set_features_mips64.cc b/runtime/arch/mips64/instruction_set_features_mips64.cc
index 8c48a08..5c0c914 100644
--- a/runtime/arch/mips64/instruction_set_features_mips64.cc
+++ b/runtime/arch/mips64/instruction_set_features_mips64.cc
@@ -27,7 +27,6 @@
 const Mips64InstructionSetFeatures* Mips64InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED) {
   if (variant != "default" && variant != "mips64r6") {
-    std::ostringstream os;
     LOG(WARNING) << "Unexpected CPU variant for Mips64 using defaults: " << variant;
   }
   bool smp = true;  // Conservative default.
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 0769687..d7de119 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -16,9 +16,9 @@
 
 #include <cstdio>
 
+#include "art_field-inl.h"
 #include "common_runtime_test.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/string-inl.h"
@@ -1305,7 +1305,7 @@
 }
 
 
-static void GetSetBooleanStatic(Handle<mirror::ArtField>* f, Thread* self,
+static void GetSetBooleanStatic(ArtField* f, Thread* self,
                                 mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
@@ -1313,14 +1313,14 @@
   uint8_t values[num_values] = { 0, 1, 2, 128, 0xFF };
 
   for (size_t i = 0; i < num_values; ++i) {
-    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                               static_cast<size_t>(values[i]),
                               0U,
                               StubTest::GetEntrypoint(self, kQuickSet8Static),
                               self,
                               referrer);
 
-    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                            0U, 0U,
                                            StubTest::GetEntrypoint(self, kQuickGetBooleanStatic),
                                            self,
@@ -1335,21 +1335,21 @@
   std::cout << "Skipping set_boolean_static as I don't know how to do that on " << kRuntimeISA << std::endl;
 #endif
 }
-static void GetSetByteStatic(Handle<mirror::ArtField>* f, Thread* self,
-                             mirror::ArtMethod* referrer, StubTest* test)
+static void GetSetByteStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
+                             StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   int8_t values[] = { -128, -64, 0, 64, 127 };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                               static_cast<size_t>(values[i]),
                               0U,
                               StubTest::GetEntrypoint(self, kQuickSet8Static),
                               self,
                               referrer);
 
-    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                            0U, 0U,
                                            StubTest::GetEntrypoint(self, kQuickGetByteStatic),
                                            self,
@@ -1365,26 +1365,26 @@
 }
 
 
-static void GetSetBooleanInstance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
-                                  Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+static void GetSetBooleanInstance(Handle<mirror::Object>* obj, ArtField* f, Thread* self,
+                                  mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   uint8_t values[] = { 0, true, 2, 128, 0xFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
                               StubTest::GetEntrypoint(self, kQuickSet8Instance),
                               self,
                               referrer);
 
-    uint8_t res = f->Get()->GetBoolean(obj->Get());
+    uint8_t res = f->GetBoolean(obj->Get());
     EXPECT_EQ(values[i], res) << "Iteration " << i;
 
-    f->Get()->SetBoolean<false>(obj->Get(), res);
+    f->SetBoolean<false>(obj->Get(), res);
 
-    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                             reinterpret_cast<size_t>(obj->Get()),
                                             0U,
                                             StubTest::GetEntrypoint(self, kQuickGetBooleanInstance),
@@ -1399,25 +1399,25 @@
   std::cout << "Skipping set_boolean_instance as I don't know how to do that on " << kRuntimeISA << std::endl;
 #endif
 }
-static void GetSetByteInstance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
+static void GetSetByteInstance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   int8_t values[] = { -128, -64, 0, 64, 127 };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
                               StubTest::GetEntrypoint(self, kQuickSet8Instance),
                               self,
                               referrer);
 
-    int8_t res = f->Get()->GetByte(obj->Get());
+    int8_t res = f->GetByte(obj->Get());
     EXPECT_EQ(res, values[i]) << "Iteration " << i;
-    f->Get()->SetByte<false>(obj->Get(), ++res);
+    f->SetByte<false>(obj->Get(), ++res);
 
-    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                             reinterpret_cast<size_t>(obj->Get()),
                                             0U,
                                             StubTest::GetEntrypoint(self, kQuickGetByteInstance),
@@ -1433,21 +1433,21 @@
 #endif
 }
 
-static void GetSetCharStatic(Handle<mirror::ArtField>* f, Thread* self, mirror::ArtMethod* referrer,
+static void GetSetCharStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                              StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   uint16_t values[] = { 0, 1, 2, 255, 32768, 0xFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                               static_cast<size_t>(values[i]),
                               0U,
                               StubTest::GetEntrypoint(self, kQuickSet16Static),
                               self,
                               referrer);
 
-    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                            0U, 0U,
                                            StubTest::GetEntrypoint(self, kQuickGetCharStatic),
                                            self,
@@ -1462,21 +1462,21 @@
   std::cout << "Skipping set_char_static as I don't know how to do that on " << kRuntimeISA << std::endl;
 #endif
 }
-static void GetSetShortStatic(Handle<mirror::ArtField>* f, Thread* self,
+static void GetSetShortStatic(ArtField* f, Thread* self,
                               mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   int16_t values[] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                               static_cast<size_t>(values[i]),
                               0U,
                               StubTest::GetEntrypoint(self, kQuickSet16Static),
                               self,
                               referrer);
 
-    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                            0U, 0U,
                                            StubTest::GetEntrypoint(self, kQuickGetShortStatic),
                                            self,
@@ -1492,25 +1492,25 @@
 #endif
 }
 
-static void GetSetCharInstance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
-                             Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+static void GetSetCharInstance(Handle<mirror::Object>* obj, ArtField* f,
+                               Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   uint16_t values[] = { 0, 1, 2, 255, 32768, 0xFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
                               StubTest::GetEntrypoint(self, kQuickSet16Instance),
                               self,
                               referrer);
 
-    uint16_t res = f->Get()->GetChar(obj->Get());
+    uint16_t res = f->GetChar(obj->Get());
     EXPECT_EQ(res, values[i]) << "Iteration " << i;
-    f->Get()->SetChar<false>(obj->Get(), ++res);
+    f->SetChar<false>(obj->Get(), ++res);
 
-    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                             reinterpret_cast<size_t>(obj->Get()),
                                             0U,
                                             StubTest::GetEntrypoint(self, kQuickGetCharInstance),
@@ -1525,25 +1525,25 @@
   std::cout << "Skipping set_char_instance as I don't know how to do that on " << kRuntimeISA << std::endl;
 #endif
 }
-static void GetSetShortInstance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
+static void GetSetShortInstance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   int16_t values[] = { -0x7FFF, -32768, 0, 255, 32767, 0x7FFE };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
                               StubTest::GetEntrypoint(self, kQuickSet16Instance),
                               self,
                               referrer);
 
-    int16_t res = f->Get()->GetShort(obj->Get());
+    int16_t res = f->GetShort(obj->Get());
     EXPECT_EQ(res, values[i]) << "Iteration " << i;
-    f->Get()->SetShort<false>(obj->Get(), ++res);
+    f->SetShort<false>(obj->Get(), ++res);
 
-    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                             reinterpret_cast<size_t>(obj->Get()),
                                             0U,
                                             StubTest::GetEntrypoint(self, kQuickGetShortInstance),
@@ -1559,21 +1559,21 @@
 #endif
 }
 
-static void GetSet32Static(Handle<mirror::ArtField>* f, Thread* self, mirror::ArtMethod* referrer,
+static void GetSet32Static(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                            StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   uint32_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                               static_cast<size_t>(values[i]),
                               0U,
                               StubTest::GetEntrypoint(self, kQuickSet32Static),
                               self,
                               referrer);
 
-    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                            0U, 0U,
                                            StubTest::GetEntrypoint(self, kQuickGet32Static),
                                            self,
@@ -1590,27 +1590,27 @@
 }
 
 
-static void GetSet32Instance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
+static void GetSet32Instance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
   uint32_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
                               StubTest::GetEntrypoint(self, kQuickSet32Instance),
                               self,
                               referrer);
 
-    int32_t res = f->Get()->GetInt(obj->Get());
+    int32_t res = f->GetInt(obj->Get());
     EXPECT_EQ(res, static_cast<int32_t>(values[i])) << "Iteration " << i;
 
     res++;
-    f->Get()->SetInt<false>(obj->Get(), res);
+    f->SetInt<false>(obj->Get(), res);
 
-    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                             reinterpret_cast<size_t>(obj->Get()),
                                             0U,
                                             StubTest::GetEntrypoint(self, kQuickGet32Instance),
@@ -1649,17 +1649,17 @@
 }
 #endif
 
-static void GetSetObjStatic(Handle<mirror::ArtField>* f, Thread* self, mirror::ArtMethod* referrer,
+static void GetSetObjStatic(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                             StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
-  set_and_check_static((*f)->GetDexFieldIndex(), nullptr, self, referrer, test);
+  set_and_check_static(f->GetDexFieldIndex(), nullptr, self, referrer, test);
 
   // Allocate a string object for simplicity.
   mirror::String* str = mirror::String::AllocFromModifiedUtf8(self, "Test");
-  set_and_check_static((*f)->GetDexFieldIndex(), str, self, referrer, test);
+  set_and_check_static(f->GetDexFieldIndex(), str, self, referrer, test);
 
-  set_and_check_static((*f)->GetDexFieldIndex(), nullptr, self, referrer, test);
+  set_and_check_static(f->GetDexFieldIndex(), nullptr, self, referrer, test);
 #else
   UNUSED(f, self, referrer, test);
   LOG(INFO) << "Skipping setObjstatic as I don't know how to do that on " << kRuntimeISA;
@@ -1670,18 +1670,18 @@
 
 
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
-static void set_and_check_instance(Handle<mirror::ArtField>* f, mirror::Object* trg,
+static void set_and_check_instance(ArtField* f, mirror::Object* trg,
                                    mirror::Object* val, Thread* self, mirror::ArtMethod* referrer,
                                    StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+  test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                             reinterpret_cast<size_t>(trg),
                             reinterpret_cast<size_t>(val),
                             StubTest::GetEntrypoint(self, kQuickSetObjInstance),
                             self,
                             referrer);
 
-  size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+  size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                          reinterpret_cast<size_t>(trg),
                                          0U,
                                          StubTest::GetEntrypoint(self, kQuickGetObjInstance),
@@ -1690,11 +1690,11 @@
 
   EXPECT_EQ(res, reinterpret_cast<size_t>(val)) << "Value " << val;
 
-  EXPECT_EQ(val, f->Get()->GetObj(trg));
+  EXPECT_EQ(val, f->GetObj(trg));
 }
 #endif
 
-static void GetSetObjInstance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
+static void GetSetObjInstance(Handle<mirror::Object>* obj, ArtField* f,
                               Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || (defined(__x86_64__) && !defined(__APPLE__))
@@ -1716,20 +1716,20 @@
 
 // TODO: Complete these tests for 32b architectures.
 
-static void GetSet64Static(Handle<mirror::ArtField>* f, Thread* self, mirror::ArtMethod* referrer,
+static void GetSet64Static(ArtField* f, Thread* self, mirror::ArtMethod* referrer,
                            StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
   uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3UWithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3UWithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                values[i],
                                StubTest::GetEntrypoint(self, kQuickSet64Static),
                                self,
                                referrer);
 
-    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                            0U, 0U,
                                            StubTest::GetEntrypoint(self, kQuickGet64Static),
                                            self,
@@ -1746,27 +1746,27 @@
 }
 
 
-static void GetSet64Instance(Handle<mirror::Object>* obj, Handle<mirror::ArtField>* f,
+static void GetSet64Instance(Handle<mirror::Object>* obj, ArtField* f,
                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if (defined(__x86_64__) && !defined(__APPLE__)) || defined(__aarch64__)
   uint64_t values[] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
 
   for (size_t i = 0; i < arraysize(values); ++i) {
-    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                               reinterpret_cast<size_t>(obj->Get()),
                               static_cast<size_t>(values[i]),
                               StubTest::GetEntrypoint(self, kQuickSet64Instance),
                               self,
                               referrer);
 
-    int64_t res = f->Get()->GetLong(obj->Get());
+    int64_t res = f->GetLong(obj->Get());
     EXPECT_EQ(res, static_cast<int64_t>(values[i])) << "Iteration " << i;
 
     res++;
-    f->Get()->SetLong<false>(obj->Get(), res);
+    f->SetLong<false>(obj->Get(), res);
 
-    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>(f->GetDexFieldIndex()),
                                             reinterpret_cast<size_t>(obj->Get()),
                                             0U,
                                             StubTest::GetEntrypoint(self, kQuickGet64Instance),
@@ -1792,7 +1792,7 @@
   CHECK(o != NULL);
 
   ScopedObjectAccess soa(self);
-  StackHandleScope<5> hs(self);
+  StackHandleScope<4> hs(self);
   Handle<mirror::Object> obj(hs.NewHandle(soa.Decode<mirror::Object*>(o)));
   Handle<mirror::Class> c(hs.NewHandle(obj->GetClass()));
   // Need a method as a referrer
@@ -1801,112 +1801,80 @@
   // Play with it...
 
   // Static fields.
-  {
-    Handle<mirror::ObjectArray<mirror::ArtField>> fields(hs.NewHandle(c.Get()->GetSFields()));
-    int32_t num_fields = fields->GetLength();
-    for (int32_t i = 0; i < num_fields; ++i) {
-      StackHandleScope<1> hs2(self);
-      Handle<mirror::ArtField> f(hs2.NewHandle(fields->Get(i)));
-
-      Primitive::Type type = f->GetTypeAsPrimitiveType();
-      switch (type) {
-        case Primitive::Type::kPrimBoolean:
-          if (test_type == type) {
-            GetSetBooleanStatic(&f, self, m.Get(), test);
-          }
-          break;
-        case Primitive::Type::kPrimByte:
-          if (test_type == type) {
-            GetSetByteStatic(&f, self, m.Get(), test);
-          }
-          break;
-        case Primitive::Type::kPrimChar:
-          if (test_type == type) {
-            GetSetCharStatic(&f, self, m.Get(), test);
-          }
-          break;
-        case Primitive::Type::kPrimShort:
-          if (test_type == type) {
-            GetSetShortStatic(&f, self, m.Get(), test);
-          }
-          break;
-        case Primitive::Type::kPrimInt:
-          if (test_type == type) {
-            GetSet32Static(&f, self, m.Get(), test);
-          }
-          break;
-
-        case Primitive::Type::kPrimLong:
-          if (test_type == type) {
-            GetSet64Static(&f, self, m.Get(), test);
-          }
-          break;
-
-        case Primitive::Type::kPrimNot:
-          // Don't try array.
-          if (test_type == type && f->GetTypeDescriptor()[0] != '[') {
-            GetSetObjStatic(&f, self, m.Get(), test);
-          }
-          break;
-
-        default:
-          break;  // Skip.
-      }
+  ArtField* fields = c->GetSFields();
+  size_t num_fields = c->NumStaticFields();
+  for (size_t i = 0; i < num_fields; ++i) {
+    ArtField* f = &fields[i];
+    Primitive::Type type = f->GetTypeAsPrimitiveType();
+    if (test_type != type) {
+     continue;
+    }
+    switch (type) {
+      case Primitive::Type::kPrimBoolean:
+        GetSetBooleanStatic(f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimByte:
+        GetSetByteStatic(f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimChar:
+        GetSetCharStatic(f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimShort:
+        GetSetShortStatic(f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimInt:
+        GetSet32Static(f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimLong:
+        GetSet64Static(f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimNot:
+        // Don't try array.
+        if (f->GetTypeDescriptor()[0] != '[') {
+          GetSetObjStatic(f, self, m.Get(), test);
+        }
+        break;
+      default:
+        break;  // Skip.
     }
   }
 
   // Instance fields.
-  {
-    Handle<mirror::ObjectArray<mirror::ArtField>> fields(hs.NewHandle(c.Get()->GetIFields()));
-    int32_t num_fields = fields->GetLength();
-    for (int32_t i = 0; i < num_fields; ++i) {
-      StackHandleScope<1> hs2(self);
-      Handle<mirror::ArtField> f(hs2.NewHandle(fields->Get(i)));
-
-      Primitive::Type type = f->GetTypeAsPrimitiveType();
-      switch (type) {
-        case Primitive::Type::kPrimBoolean:
-          if (test_type == type) {
-            GetSetBooleanInstance(&obj, &f, self, m.Get(), test);
-          }
-          break;
-        case Primitive::Type::kPrimByte:
-          if (test_type == type) {
-            GetSetByteInstance(&obj, &f, self, m.Get(), test);
-          }
-          break;
-        case Primitive::Type::kPrimChar:
-          if (test_type == type) {
-            GetSetCharInstance(&obj, &f, self, m.Get(), test);
-          }
-          break;
-        case Primitive::Type::kPrimShort:
-          if (test_type == type) {
-            GetSetShortInstance(&obj, &f, self, m.Get(), test);
-          }
-          break;
-        case Primitive::Type::kPrimInt:
-          if (test_type == type) {
-            GetSet32Instance(&obj, &f, self, m.Get(), test);
-          }
-          break;
-
-        case Primitive::Type::kPrimLong:
-          if (test_type == type) {
-            GetSet64Instance(&obj, &f, self, m.Get(), test);
-          }
-          break;
-
-        case Primitive::Type::kPrimNot:
-          // Don't try array.
-          if (test_type == type && f->GetTypeDescriptor()[0] != '[') {
-            GetSetObjInstance(&obj, &f, self, m.Get(), test);
-          }
-          break;
-
-        default:
-          break;  // Skip.
-      }
+  fields = c->GetIFields();
+  num_fields = c->NumInstanceFields();
+  for (size_t i = 0; i < num_fields; ++i) {
+    ArtField* f = &fields[i];
+    Primitive::Type type = f->GetTypeAsPrimitiveType();
+    if (test_type != type) {
+      continue;
+    }
+    switch (type) {
+      case Primitive::Type::kPrimBoolean:
+        GetSetBooleanInstance(&obj, f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimByte:
+        GetSetByteInstance(&obj, f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimChar:
+        GetSetCharInstance(&obj, f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimShort:
+        GetSetShortInstance(&obj, f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimInt:
+        GetSet32Instance(&obj, f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimLong:
+        GetSet64Instance(&obj, f, self, m.Get(), test);
+        break;
+      case Primitive::Type::kPrimNot:
+        // Don't try array.
+        if (f->GetTypeDescriptor()[0] != '[') {
+          GetSetObjInstance(&obj, f, self, m.Get(), test);
+        }
+        break;
+      default:
+        break;  // Skip.
     }
   }
 
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index 8227633..ef39999 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -63,7 +63,6 @@
   bool known_variant = FindVariantInArray(x86_known_variants, arraysize(x86_known_variants),
                                           variant);
   if (!known_variant && variant != "default") {
-    std::ostringstream os;
     LOG(WARNING) << "Unexpected CPU variant for X86 using defaults: " << variant;
   }
 
diff --git a/runtime/mirror/art_field-inl.h b/runtime/art_field-inl.h
similarity index 78%
rename from runtime/mirror/art_field-inl.h
rename to runtime/art_field-inl.h
index 986852f..aeb1273 100644
--- a/runtime/mirror/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -14,57 +14,51 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_MIRROR_ART_FIELD_INL_H_
-#define ART_RUNTIME_MIRROR_ART_FIELD_INL_H_
+#ifndef ART_RUNTIME_ART_FIELD_INL_H_
+#define ART_RUNTIME_ART_FIELD_INL_H_
 
 #include "art_field.h"
 
 #include "base/logging.h"
 #include "class_linker.h"
-#include "dex_cache.h"
 #include "gc/accounting/card_table-inl.h"
 #include "jvalue.h"
-#include "object-inl.h"
+#include "mirror/dex_cache.h"
+#include "mirror/object-inl.h"
 #include "primitive.h"
 #include "thread-inl.h"
 #include "scoped_thread_state_change.h"
 #include "well_known_classes.h"
 
 namespace art {
-namespace mirror {
 
-inline uint32_t ArtField::ClassSize() {
-  uint32_t vtable_entries = Object::kVTableLength;
-  return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0);
-}
-
-inline Class* ArtField::GetDeclaringClass() {
-  Class* result = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(ArtField, declaring_class_));
-  DCHECK(result != NULL);
+inline mirror::Class* ArtField::GetDeclaringClass() {
+  mirror::Class* result = declaring_class_.Read();
+  DCHECK(result != nullptr);
   DCHECK(result->IsLoaded() || result->IsErroneous());
   return result;
 }
 
-inline void ArtField::SetDeclaringClass(Class *new_declaring_class) {
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ArtField, declaring_class_), new_declaring_class);
+inline void ArtField::SetDeclaringClass(mirror::Class* new_declaring_class) {
+  declaring_class_ = GcRoot<mirror::Class>(new_declaring_class);
 }
 
 inline uint32_t ArtField::GetAccessFlags() {
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
-  return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, access_flags_));
+  return access_flags_;
 }
 
 inline MemberOffset ArtField::GetOffset() {
   DCHECK(GetDeclaringClass()->IsResolved() || GetDeclaringClass()->IsErroneous());
-  return MemberOffset(GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, offset_)));
+  return MemberOffset(offset_);
 }
 
 inline MemberOffset ArtField::GetOffsetDuringLinking() {
   DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
-  return MemberOffset(GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, offset_)));
+  return MemberOffset(offset_);
 }
 
-inline uint32_t ArtField::Get32(Object* object) {
+inline uint32_t ArtField::Get32(mirror::Object* object) {
   DCHECK(object != nullptr) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
@@ -74,7 +68,7 @@
 }
 
 template<bool kTransactionActive>
-inline void ArtField::Set32(Object* object, uint32_t new_value) {
+inline void ArtField::Set32(mirror::Object* object, uint32_t new_value) {
   DCHECK(object != nullptr) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
@@ -84,7 +78,7 @@
   }
 }
 
-inline uint64_t ArtField::Get64(Object* object) {
+inline uint64_t ArtField::Get64(mirror::Object* object) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
@@ -94,7 +88,7 @@
 }
 
 template<bool kTransactionActive>
-inline void ArtField::Set64(Object* object, uint64_t new_value) {
+inline void ArtField::Set64(mirror::Object* object, uint64_t new_value) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
@@ -104,17 +98,17 @@
   }
 }
 
-inline Object* ArtField::GetObj(Object* object) {
+inline mirror::Object* ArtField::GetObj(mirror::Object* object) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
-    return object->GetFieldObjectVolatile<Object>(GetOffset());
+    return object->GetFieldObjectVolatile<mirror::Object>(GetOffset());
   }
-  return object->GetFieldObject<Object>(GetOffset());
+  return object->GetFieldObject<mirror::Object>(GetOffset());
 }
 
 template<bool kTransactionActive>
-inline void ArtField::SetObj(Object* object, Object* new_value) {
+inline void ArtField::SetObj(mirror::Object* object, mirror::Object* new_value) {
   DCHECK(object != NULL) << PrettyField(this);
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
@@ -143,46 +137,46 @@
     object->SetField ## type<kTransactionActive>(GetOffset(), value); \
   }
 
-inline uint8_t ArtField::GetBoolean(Object* object) {
+inline uint8_t ArtField::GetBoolean(mirror::Object* object) {
   FIELD_GET(object, Boolean);
 }
 
 template<bool kTransactionActive>
-inline void ArtField::SetBoolean(Object* object, uint8_t z) {
+inline void ArtField::SetBoolean(mirror::Object* object, uint8_t z) {
   FIELD_SET(object, Boolean, z);
 }
 
-inline int8_t ArtField::GetByte(Object* object) {
+inline int8_t ArtField::GetByte(mirror::Object* object) {
   FIELD_GET(object, Byte);
 }
 
 template<bool kTransactionActive>
-inline void ArtField::SetByte(Object* object, int8_t b) {
+inline void ArtField::SetByte(mirror::Object* object, int8_t b) {
   FIELD_SET(object, Byte, b);
 }
 
-inline uint16_t ArtField::GetChar(Object* object) {
+inline uint16_t ArtField::GetChar(mirror::Object* object) {
   FIELD_GET(object, Char);
 }
 
 template<bool kTransactionActive>
-inline void ArtField::SetChar(Object* object, uint16_t c) {
+inline void ArtField::SetChar(mirror::Object* object, uint16_t c) {
   FIELD_SET(object, Char, c);
 }
 
-inline int16_t ArtField::GetShort(Object* object) {
+inline int16_t ArtField::GetShort(mirror::Object* object) {
   FIELD_GET(object, Short);
 }
 
 template<bool kTransactionActive>
-inline void ArtField::SetShort(Object* object, int16_t s) {
+inline void ArtField::SetShort(mirror::Object* object, int16_t s) {
   FIELD_SET(object, Short, s);
 }
 
 #undef FIELD_GET
 #undef FIELD_SET
 
-inline int32_t ArtField::GetInt(Object* object) {
+inline int32_t ArtField::GetInt(mirror::Object* object) {
   if (kIsDebugBuild) {
     Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
@@ -191,7 +185,7 @@
 }
 
 template<bool kTransactionActive>
-inline void ArtField::SetInt(Object* object, int32_t i) {
+inline void ArtField::SetInt(mirror::Object* object, int32_t i) {
   if (kIsDebugBuild) {
     Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
@@ -199,7 +193,7 @@
   Set32<kTransactionActive>(object, i);
 }
 
-inline int64_t ArtField::GetLong(Object* object) {
+inline int64_t ArtField::GetLong(mirror::Object* object) {
   if (kIsDebugBuild) {
     Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
@@ -208,7 +202,7 @@
 }
 
 template<bool kTransactionActive>
-inline void ArtField::SetLong(Object* object, int64_t j) {
+inline void ArtField::SetLong(mirror::Object* object, int64_t j) {
   if (kIsDebugBuild) {
     Primitive::Type type = GetTypeAsPrimitiveType();
     CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
@@ -216,7 +210,7 @@
   Set64<kTransactionActive>(object, j);
 }
 
-inline float ArtField::GetFloat(Object* object) {
+inline float ArtField::GetFloat(mirror::Object* object) {
   DCHECK_EQ(Primitive::kPrimFloat, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetI(Get32(object));
@@ -224,14 +218,14 @@
 }
 
 template<bool kTransactionActive>
-inline void ArtField::SetFloat(Object* object, float f) {
+inline void ArtField::SetFloat(mirror::Object* object, float f) {
   DCHECK_EQ(Primitive::kPrimFloat, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetF(f);
   Set32<kTransactionActive>(object, bits.GetI());
 }
 
-inline double ArtField::GetDouble(Object* object) {
+inline double ArtField::GetDouble(mirror::Object* object) {
   DCHECK_EQ(Primitive::kPrimDouble, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetJ(Get64(object));
@@ -239,20 +233,20 @@
 }
 
 template<bool kTransactionActive>
-inline void ArtField::SetDouble(Object* object, double d) {
+inline void ArtField::SetDouble(mirror::Object* object, double d) {
   DCHECK_EQ(Primitive::kPrimDouble, GetTypeAsPrimitiveType()) << PrettyField(this);
   JValue bits;
   bits.SetD(d);
   Set64<kTransactionActive>(object, bits.GetJ());
 }
 
-inline Object* ArtField::GetObject(Object* object) {
+inline mirror::Object* ArtField::GetObject(mirror::Object* object) {
   DCHECK_EQ(Primitive::kPrimNot, GetTypeAsPrimitiveType()) << PrettyField(this);
   return GetObj(object);
 }
 
 template<bool kTransactionActive>
-inline void ArtField::SetObject(Object* object, Object* l) {
+inline void ArtField::SetObject(mirror::Object* object, mirror::Object* l) {
   DCHECK_EQ(Primitive::kPrimNot, GetTypeAsPrimitiveType()) << PrettyField(this);
   SetObj<kTransactionActive>(object, l);
 }
@@ -291,7 +285,7 @@
 }
 
 template <bool kResolve>
-inline Class* ArtField::GetType() {
+inline mirror::Class* ArtField::GetType() {
   const uint32_t field_index = GetDexFieldIndex();
   auto* declaring_class = GetDeclaringClass();
   if (UNLIKELY(declaring_class->IsProxyClass())) {
@@ -321,7 +315,7 @@
   return GetDexCache()->GetDexFile();
 }
 
-inline String* ArtField::GetStringName(Thread* self, bool resolve) {
+inline mirror::String* ArtField::GetStringName(Thread* self, bool resolve) {
   auto dex_field_index = GetDexFieldIndex();
   CHECK_NE(dex_field_index, DexFile::kDexNoIndex);
   auto* dex_cache = GetDexCache();
@@ -336,7 +330,6 @@
   return name;
 }
 
-}  // namespace mirror
 }  // namespace art
 
-#endif  // ART_RUNTIME_MIRROR_ART_FIELD_INL_H_
+#endif  // ART_RUNTIME_ART_FIELD_INL_H_
diff --git a/runtime/art_field.cc b/runtime/art_field.cc
new file mode 100644
index 0000000..cdf8967
--- /dev/null
+++ b/runtime/art_field.cc
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "art_field.h"
+
+#include "art_field-inl.h"
+#include "gc/accounting/card_table-inl.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
+#include "runtime.h"
+#include "scoped_thread_state_change.h"
+#include "utils.h"
+#include "well_known_classes.h"
+
+namespace art {
+
+ArtField::ArtField() : access_flags_(0), field_dex_idx_(0), offset_(0) {
+  declaring_class_ = GcRoot<mirror::Class>(nullptr);
+}
+
+void ArtField::SetOffset(MemberOffset num_bytes) {
+  DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
+  if (kIsDebugBuild && Runtime::Current()->IsAotCompiler() &&
+      Runtime::Current()->IsCompilingBootImage()) {
+    Primitive::Type type = GetTypeAsPrimitiveType();
+    if (type == Primitive::kPrimDouble || type == Primitive::kPrimLong) {
+      DCHECK_ALIGNED(num_bytes.Uint32Value(), 8);
+    }
+  }
+  // Not called within a transaction.
+  offset_ = num_bytes.Uint32Value();
+}
+
+void ArtField::VisitRoots(RootVisitor* visitor) {
+  declaring_class_.VisitRoot(visitor, RootInfo(kRootStickyClass));
+}
+
+ArtField* ArtField::FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
+  DCHECK(klass != nullptr);
+  auto* instance_fields = klass->GetIFields();
+  for (size_t i = 0, count = klass->NumInstanceFields(); i < count; ++i) {
+    if (instance_fields[i].GetOffset().Uint32Value() == field_offset) {
+      return &instance_fields[i];
+    }
+  }
+  // We did not find field in the class: look into superclass.
+  return (klass->GetSuperClass() != nullptr) ?
+      FindInstanceFieldWithOffset(klass->GetSuperClass(), field_offset) : nullptr;
+}
+
+}  // namespace art
diff --git a/runtime/art_field.h b/runtime/art_field.h
new file mode 100644
index 0000000..5bdbe71
--- /dev/null
+++ b/runtime/art_field.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ART_FIELD_H_
+#define ART_RUNTIME_ART_FIELD_H_
+
+#include <jni.h>
+
+#include "gc_root.h"
+#include "modifiers.h"
+#include "object_callbacks.h"
+#include "offsets.h"
+#include "primitive.h"
+#include "read_barrier_option.h"
+
+namespace art {
+
+class DexFile;
+class ScopedObjectAccessAlreadyRunnable;
+
+namespace mirror {
+class Class;
+class DexCache;
+class Object;
+class String;
+}  // namespace mirror
+
+class ArtField {
+ public:
+  ArtField();
+
+  mirror::Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void SetDeclaringClass(mirror::Class *new_declaring_class)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Not called within a transaction.
+    access_flags_ = new_access_flags;
+  }
+
+  bool IsPublic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (GetAccessFlags() & kAccPublic) != 0;
+  }
+
+  bool IsStatic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (GetAccessFlags() & kAccStatic) != 0;
+  }
+
+  bool IsFinal() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (GetAccessFlags() & kAccFinal) != 0;
+  }
+
+  uint32_t GetDexFieldIndex() {
+    return field_dex_idx_;
+  }
+
+  void SetDexFieldIndex(uint32_t new_idx) {
+    // Not called within a transaction.
+    field_dex_idx_ = new_idx;
+  }
+
+  // Offset to field within an Object.
+  MemberOffset GetOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  static MemberOffset OffsetOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(ArtField, offset_));
+  }
+
+  MemberOffset GetOffsetDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void SetOffset(MemberOffset num_bytes) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // field access, null object for static fields
+  uint8_t GetBoolean(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void SetBoolean(mirror::Object* object, uint8_t z) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  int8_t GetByte(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void SetByte(mirror::Object* object, int8_t b) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  uint16_t GetChar(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void SetChar(mirror::Object* object, uint16_t c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  int16_t GetShort(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void SetShort(mirror::Object* object, int16_t s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  int32_t GetInt(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void SetInt(mirror::Object* object, int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  int64_t GetLong(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void SetLong(mirror::Object* object, int64_t j) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  float GetFloat(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void SetFloat(mirror::Object* object, float f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  double GetDouble(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void SetDouble(mirror::Object* object, double d) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  mirror::Object* GetObject(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void SetObject(mirror::Object* object, mirror::Object* l)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Raw field accesses.
+  uint32_t Get32(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void Set32(mirror::Object* object, uint32_t new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  uint64_t Get64(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void Set64(mirror::Object* object, uint64_t new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  mirror::Object* GetObj(mirror::Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive>
+  void SetObj(mirror::Object* object, mirror::Object* new_value)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void VisitRoots(RootVisitor* visitor)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool IsVolatile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return (GetAccessFlags() & kAccVolatile) != 0;
+  }
+
+  // Returns an instance field with this offset in the given class or nullptr if not found.
+  static ArtField* FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Resolves / returns the name from the dex cache.
+  mirror::String* GetStringName(Thread* self, bool resolve)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  const char* GetTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  Primitive::Type GetTypeAsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool IsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <bool kResolve>
+  mirror::Class* GetType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  size_t FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  const DexFile* GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  GcRoot<mirror::Class>& DeclaringClassRoot() {
+    return declaring_class_;
+  }
+
+ private:
+  GcRoot<mirror::Class> declaring_class_;
+
+  uint32_t access_flags_;
+
+  // Dex cache index of field id
+  uint32_t field_dex_idx_;
+
+  // Offset of field within an instance or in the Class' static fields
+  uint32_t offset_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ART_FIELD_H_
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index dba4af8..4c83e88 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -135,13 +135,13 @@
 #define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
             art::mirror::Class::ComponentTypeOffset().Int32Value())
-#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (52 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (44 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
             art::mirror::Class::AccessFlagsOffset().Int32Value())
-#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (80 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_OBJECT_SIZE_OFFSET (96 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_OBJECT_SIZE_OFFSET,
             art::mirror::Class::ObjectSizeOffset().Int32Value())
-#define MIRROR_CLASS_STATUS_OFFSET (92 + MIRROR_OBJECT_HEADER_SIZE)
+#define MIRROR_CLASS_STATUS_OFFSET (108 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_STATUS_OFFSET,
             art::mirror::Class::StatusOffset().Int32Value())
 
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index dd29404..59d38ad 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -132,11 +132,10 @@
   free(reinterpret_cast<void*>(memory_));
 }
 
-MemMapArena::MemMapArena(size_t size) {
+MemMapArena::MemMapArena(size_t size, bool low_4gb) {
   std::string error_msg;
-  map_.reset(
-      MemMap::MapAnonymous("dalvik-LinearAlloc", nullptr, size, PROT_READ | PROT_WRITE, false,
-                           false, &error_msg));
+  map_.reset(MemMap::MapAnonymous(
+      "LinearAlloc", nullptr, size, PROT_READ | PROT_WRITE, low_4gb, false, &error_msg));
   CHECK(map_.get() != nullptr) << error_msg;
   memory_ = map_->Begin();
   size_ = map_->Size();
@@ -156,8 +155,12 @@
   }
 }
 
-ArenaPool::ArenaPool(bool use_malloc)
-    : use_malloc_(use_malloc), lock_("Arena pool lock"), free_arenas_(nullptr) {
+ArenaPool::ArenaPool(bool use_malloc, bool low_4gb)
+    : use_malloc_(use_malloc), lock_("Arena pool lock", kArenaPoolLock), free_arenas_(nullptr),
+      low_4gb_(low_4gb) {
+  if (low_4gb) {
+    CHECK(!use_malloc) << "low4gb must use map implementation";
+  }
   if (!use_malloc) {
     MemMap::Init();
   }
@@ -182,7 +185,8 @@
     }
   }
   if (ret == nullptr) {
-    ret = use_malloc_ ? static_cast<Arena*>(new MallocArena(size)) : new MemMapArena(size);
+    ret = use_malloc_ ? static_cast<Arena*>(new MallocArena(size)) :
+        new MemMapArena(size, low_4gb_);
   }
   ret->Reset();
   return ret;
@@ -229,6 +233,17 @@
   return ArenaAllocatorStats::BytesAllocated();
 }
 
+size_t ArenaAllocator::BytesUsed() const {
+  size_t total = ptr_ - begin_;
+  if (arena_head_ != nullptr) {
+    for (Arena* cur_arena = arena_head_->next_; cur_arena != nullptr;
+         cur_arena = cur_arena->next_) {
+     total += cur_arena->GetBytesAllocated();
+    }
+  }
+  return total;
+}
+
 ArenaAllocator::ArenaAllocator(ArenaPool* pool)
   : pool_(pool),
     begin_(nullptr),
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index cc7b856..3a86b61 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -165,7 +165,7 @@
 
 class MemMapArena FINAL : public Arena {
  public:
-  explicit MemMapArena(size_t size = Arena::kDefaultSize);
+  explicit MemMapArena(size_t size, bool low_4gb);
   virtual ~MemMapArena() { }
   void Release() OVERRIDE;
 
@@ -175,7 +175,7 @@
 
 class ArenaPool {
  public:
-  explicit ArenaPool(bool use_malloc = true);
+  explicit ArenaPool(bool use_malloc = true, bool low_4gb = false);
   ~ArenaPool();
   Arena* AllocArena(size_t size) LOCKS_EXCLUDED(lock_);
   void FreeArenaChain(Arena* first) LOCKS_EXCLUDED(lock_);
@@ -188,6 +188,7 @@
   const bool use_malloc_;
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   Arena* free_arenas_ GUARDED_BY(lock_);
+  const bool low_4gb_;
   DISALLOW_COPY_AND_ASSIGN(ArenaPool);
 };
 
@@ -227,6 +228,9 @@
   void ObtainNewArenaForAllocation(size_t allocation_size);
   size_t BytesAllocated() const;
   MemStats GetMemStats() const;
+  // The BytesUsed method sums up bytes allocated from arenas in arena_head_ and nodes.
+  // TODO: Change BytesAllocated to this behavior?
+  size_t BytesUsed() const;
 
  private:
   static constexpr size_t kAlignment = 8;
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 6e7b04f..6e4b96c 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -73,6 +73,7 @@
   kRosAllocBulkFreeLock,
   kAllocSpaceLock,
   kBumpPointerSpaceBlockLock,
+  kArenaPoolLock,
   kDexFileMethodInlinerLock,
   kDexFileToMethodInlinerMapLock,
   kMarkSweepMarkStackLock,
@@ -97,6 +98,7 @@
   kAllocTrackerLock,
   kDeoptimizationLock,
   kProfilerLock,
+  kJdwpShutdownLock,
   kJdwpEventListLock,
   kJdwpAttachLock,
   kJdwpStartLock,
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 2b0167d..f94ebea 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -19,6 +19,7 @@
 #include <sys/mman.h>
 #include <zlib.h>
 
+#include "art_field-inl.h"
 #include "base/logging.h"
 #include "base/to_str.h"
 #include "class_linker.h"
@@ -27,7 +28,6 @@
 #include "gc/space/space.h"
 #include "java_vm_ext.h"
 #include "jni_internal.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -169,7 +169,7 @@
       return false;
     }
 
-    mirror::ArtField* f = CheckFieldID(soa, fid);
+    ArtField* f = CheckFieldID(soa, fid);
     if (f == nullptr) {
       return false;
     }
@@ -248,7 +248,7 @@
   bool CheckStaticFieldID(ScopedObjectAccess& soa, jclass java_class, jfieldID fid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     mirror::Class* c = soa.Decode<mirror::Class*>(java_class);
-    mirror::ArtField* f = CheckFieldID(soa, fid);
+    ArtField* f = CheckFieldID(soa, fid);
     if (f == nullptr) {
       return false;
     }
@@ -565,7 +565,7 @@
     if (!is_static && !CheckInstanceFieldID(soa, obj, fid)) {
       return false;
     }
-    mirror::ArtField* field = soa.DecodeField(fid);
+    ArtField* field = soa.DecodeField(fid);
     DCHECK(field != nullptr);  // Already checked by Check.
     if (is_static != field->IsStatic()) {
       AbortF("attempt to access %s field %s: %p",
@@ -817,7 +817,7 @@
       }
       case 'f': {  // jfieldID
         jfieldID fid = arg.f;
-        mirror::ArtField* f = soa.DecodeField(fid);
+        ArtField* f = soa.DecodeField(fid);
         *msg += PrettyField(f);
         if (!entry) {
           StringAppendF(msg, " (%p)", fid);
@@ -986,14 +986,15 @@
     return true;
   }
 
-  mirror::ArtField* CheckFieldID(ScopedObjectAccess& soa, jfieldID fid)
+  ArtField* CheckFieldID(ScopedObjectAccess& soa, jfieldID fid)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (fid == nullptr) {
       AbortF("jfieldID was NULL");
       return nullptr;
     }
-    mirror::ArtField* f = soa.DecodeField(fid);
-    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(f) || !f->IsArtField()) {
+    ArtField* f = soa.DecodeField(fid);
+    // TODO: Better check here.
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(f->GetDeclaringClass())) {
       Runtime::Current()->GetHeap()->DumpSpaces(LOG(ERROR));
       AbortF("invalid jfieldID: %p", fid);
       return nullptr;
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 5198769..87d1c4c 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -17,10 +17,10 @@
 #ifndef ART_RUNTIME_CLASS_LINKER_INL_H_
 #define ART_RUNTIME_CLASS_LINKER_INL_H_
 
+#include "art_field.h"
 #include "class_linker.h"
 #include "gc_root-inl.h"
 #include "gc/heap-inl.h"
-#include "mirror/art_field.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/iftable.h"
@@ -88,7 +88,7 @@
   return resolved_type;
 }
 
-inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, mirror::ArtField* referrer) {
+inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, ArtField* referrer) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
   mirror::DexCache* dex_cache_ptr = declaring_class->GetDexCache();
   mirror::Class* resolved_type = dex_cache_ptr->GetResolvedType(type_idx);
@@ -133,15 +133,19 @@
   return resolved_method;
 }
 
-inline mirror::ArtField* ClassLinker::GetResolvedField(uint32_t field_idx,
-                                                       mirror::Class* field_declaring_class) {
-  return field_declaring_class->GetDexCache()->GetResolvedField(field_idx);
+inline ArtField* ClassLinker::GetResolvedField(uint32_t field_idx, mirror::DexCache* dex_cache) {
+  return dex_cache->GetResolvedField(field_idx, image_pointer_size_);
 }
 
-inline mirror::ArtField* ClassLinker::ResolveField(uint32_t field_idx, mirror::ArtMethod* referrer,
+inline ArtField* ClassLinker::GetResolvedField(
+    uint32_t field_idx, mirror::Class* field_declaring_class) {
+  return GetResolvedField(field_idx, field_declaring_class->GetDexCache());
+}
+
+inline ArtField* ClassLinker::ResolveField(uint32_t field_idx, mirror::ArtMethod* referrer,
                                                    bool is_static) {
   mirror::Class* declaring_class = referrer->GetDeclaringClass();
-  mirror::ArtField* resolved_field = GetResolvedField(field_idx, declaring_class);
+  ArtField* resolved_field = GetResolvedField(field_idx, declaring_class);
   if (UNLIKELY(resolved_field == NULL)) {
     StackHandleScope<2> hs(Thread::Current());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
@@ -187,17 +191,6 @@
                              ifcount * mirror::IfTable::kMax));
 }
 
-inline mirror::ObjectArray<mirror::ArtField>* ClassLinker::AllocArtFieldArray(Thread* self,
-                                                                              size_t length) {
-  gc::Heap* const heap = Runtime::Current()->GetHeap();
-  // Can't have movable field arrays for mark compact since we need these arrays to always be valid
-  // so that we can do Object::VisitReferences in the case where the fields don't fit in the
-  // reference offsets word.
-  return mirror::ObjectArray<mirror::ArtField>::Alloc(
-      self, GetClassRoot(kJavaLangReflectArtFieldArrayClass), length,
-      kMoveFieldArrays ? heap->GetCurrentAllocator() : heap->GetCurrentNonMovingAllocator());
-}
-
 inline mirror::Class* ClassLinker::GetClassRoot(ClassRoot class_root)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(!class_roots_.IsNull());
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 33d75d2..935c401 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -25,6 +25,7 @@
 #include <utility>
 #include <vector>
 
+#include "art_field-inl.h"
 #include "base/casts.h"
 #include "base/logging.h"
 #include "base/scoped_flock.h"
@@ -46,11 +47,11 @@
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "leb128.h"
+#include "linear_alloc.h"
 #include "oat.h"
 #include "oat_file.h"
 #include "oat_file_assistant.h"
 #include "object_lock.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
@@ -77,6 +78,8 @@
 
 namespace art {
 
+static constexpr bool kSanityCheckObjects = kIsDebugBuild;
+
 static void ThrowNoClassDefFoundError(const char* fmt, ...)
     __attribute__((__format__(__printf__, 1, 2)))
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -186,7 +189,7 @@
 template<int n>
 static void ShuffleForward(size_t* current_field_idx,
                            MemberOffset* field_offset,
-                           std::deque<mirror::ArtField*>* grouped_and_sorted_fields,
+                           std::deque<ArtField*>* grouped_and_sorted_fields,
                            FieldGaps* gaps)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(current_field_idx != nullptr);
@@ -196,7 +199,7 @@
 
   DCHECK(IsPowerOfTwo(n));
   while (!grouped_and_sorted_fields->empty()) {
-    mirror::ArtField* field = grouped_and_sorted_fields->front();
+    ArtField* field = grouped_and_sorted_fields->front();
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     if (Primitive::ComponentSize(type) < n) {
       break;
@@ -357,6 +360,13 @@
   mirror::IntArray::SetArrayClass(int_array_class.Get());
   SetClassRoot(kIntArrayClass, int_array_class.Get());
 
+  // Create long array type for AllocDexCache (done in AppendToBootClassPath).
+  Handle<mirror::Class> long_array_class(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::Array::ClassSize())));
+  long_array_class->SetComponentType(GetClassRoot(kPrimitiveLong));
+  mirror::LongArray::SetArrayClass(long_array_class.Get());
+  SetClassRoot(kLongArrayClass, long_array_class.Get());
+
   // now that these are registered, we can use AllocClass() and AllocObjectArray
 
   // Set up DexCache. This cannot be done later since AppendToBootClassPath calls AllocDexCache.
@@ -366,15 +376,8 @@
   java_lang_DexCache->SetObjectSize(mirror::DexCache::InstanceSize());
   mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusResolved, self);
 
-  // Constructor, Field, Method, and AbstractMethod are necessary so
+  // Constructor, Method, and AbstractMethod are necessary so
   // that FindClass can link members.
-  Handle<mirror::Class> java_lang_reflect_ArtField(hs.NewHandle(
-      AllocClass(self, java_lang_Class.Get(), mirror::ArtField::ClassSize())));
-  CHECK(java_lang_reflect_ArtField.Get() != nullptr);
-  java_lang_reflect_ArtField->SetObjectSize(mirror::ArtField::InstanceSize());
-  SetClassRoot(kJavaLangReflectArtField, java_lang_reflect_ArtField.Get());
-  mirror::Class::SetStatus(java_lang_reflect_ArtField, mirror::Class::kStatusResolved, self);
-  mirror::ArtField::SetClass(java_lang_reflect_ArtField.Get());
 
   Handle<mirror::Class> java_lang_reflect_ArtMethod(hs.NewHandle(
     AllocClass(self, java_lang_Class.Get(), mirror::ArtMethod::ClassSize())));
@@ -398,12 +401,6 @@
   object_array_art_method->SetComponentType(java_lang_reflect_ArtMethod.Get());
   SetClassRoot(kJavaLangReflectArtMethodArrayClass, object_array_art_method.Get());
 
-  Handle<mirror::Class> object_array_art_field(hs.NewHandle(
-      AllocClass(self, java_lang_Class.Get(),
-                 mirror::ObjectArray<mirror::ArtField>::ClassSize())));
-  object_array_art_field->SetComponentType(java_lang_reflect_ArtField.Get());
-  SetClassRoot(kJavaLangReflectArtFieldArrayClass, object_array_art_field.Get());
-
   // Setup boot_class_path_ and register class_path now that we can use AllocObjectArray to create
   // DexCache instances. Needs to be after String, Field, Method arrays since AllocDexCache uses
   // these roots.
@@ -471,8 +468,8 @@
   mirror::Class* found_int_array_class = FindSystemClass(self, "[I");
   CHECK_EQ(int_array_class.Get(), found_int_array_class);
 
-  SetClassRoot(kLongArrayClass, FindSystemClass(self, "[J"));
-  mirror::LongArray::SetArrayClass(GetClassRoot(kLongArrayClass));
+  mirror::Class* found_long_array_class = FindSystemClass(self, "[J");
+  CHECK_EQ(long_array_class.Get(), found_long_array_class);
 
   SetClassRoot(kFloatArrayClass, FindSystemClass(self, "[F"));
   mirror::FloatArray::SetArrayClass(GetClassRoot(kFloatArrayClass));
@@ -513,10 +510,6 @@
   mirror::Class* Art_method_class = FindSystemClass(self, "Ljava/lang/reflect/ArtMethod;");
   CHECK_EQ(java_lang_reflect_ArtMethod.Get(), Art_method_class);
 
-  mirror::Class::SetStatus(java_lang_reflect_ArtField, mirror::Class::kStatusNotReady, self);
-  mirror::Class* Art_field_class = FindSystemClass(self, "Ljava/lang/reflect/ArtField;");
-  CHECK_EQ(java_lang_reflect_ArtField.Get(), Art_field_class);
-
   mirror::Class* String_array_class =
       FindSystemClass(self, GetClassRootDescriptor(kJavaLangStringArrayClass));
   CHECK_EQ(object_array_string.Get(), String_array_class);
@@ -525,10 +518,6 @@
       FindSystemClass(self, GetClassRootDescriptor(kJavaLangReflectArtMethodArrayClass));
   CHECK_EQ(object_array_art_method.Get(), Art_method_array_class);
 
-  mirror::Class* Art_field_array_class =
-      FindSystemClass(self, GetClassRootDescriptor(kJavaLangReflectArtFieldArrayClass));
-  CHECK_EQ(object_array_art_field.Get(), Art_field_array_class);
-
   // End of special init trickery, subsequent classes may be loaded via FindSystemClass.
 
   // Create java.lang.reflect.Proxy root.
@@ -624,23 +613,23 @@
   mirror::Class* java_lang_ref_FinalizerReference =
       FindSystemClass(self, "Ljava/lang/ref/FinalizerReference;");
 
-  mirror::ArtField* pendingNext = java_lang_ref_Reference->GetInstanceField(0);
+  ArtField* pendingNext = java_lang_ref_Reference->GetInstanceField(0);
   CHECK_STREQ(pendingNext->GetName(), "pendingNext");
   CHECK_STREQ(pendingNext->GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
 
-  mirror::ArtField* queue = java_lang_ref_Reference->GetInstanceField(1);
+  ArtField* queue = java_lang_ref_Reference->GetInstanceField(1);
   CHECK_STREQ(queue->GetName(), "queue");
   CHECK_STREQ(queue->GetTypeDescriptor(), "Ljava/lang/ref/ReferenceQueue;");
 
-  mirror::ArtField* queueNext = java_lang_ref_Reference->GetInstanceField(2);
+  ArtField* queueNext = java_lang_ref_Reference->GetInstanceField(2);
   CHECK_STREQ(queueNext->GetName(), "queueNext");
   CHECK_STREQ(queueNext->GetTypeDescriptor(), "Ljava/lang/ref/Reference;");
 
-  mirror::ArtField* referent = java_lang_ref_Reference->GetInstanceField(3);
+  ArtField* referent = java_lang_ref_Reference->GetInstanceField(3);
   CHECK_STREQ(referent->GetName(), "referent");
   CHECK_STREQ(referent->GetTypeDescriptor(), "Ljava/lang/Object;");
 
-  mirror::ArtField* zombie = java_lang_ref_FinalizerReference->GetInstanceField(2);
+  ArtField* zombie = java_lang_ref_FinalizerReference->GetInstanceField(2);
   CHECK_STREQ(zombie->GetName(), "zombie");
   CHECK_STREQ(zombie->GetTypeDescriptor(), "Ljava/lang/Object;");
 
@@ -802,6 +791,23 @@
   }
 }
 
+void SanityCheckObjectsCallback(mirror::Object* obj, void* arg ATTRIBUTE_UNUSED)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  DCHECK(obj != nullptr);
+  CHECK(obj->GetClass() != nullptr) << "Null class " << obj;
+  CHECK(obj->GetClass()->GetClass() != nullptr) << "Null class class " << obj;
+  if (obj->IsClass()) {
+    auto klass = obj->AsClass();
+    ArtField* fields[2] = { klass->GetSFields(), klass->GetIFields() };
+    size_t num_fields[2] = { klass->NumStaticFields(), klass->NumInstanceFields() };
+    for (size_t i = 0; i < 2; ++i) {
+      for (size_t j = 0; j < num_fields[i]; ++j) {
+        CHECK_EQ(fields[i][j].GetDeclaringClass(), klass);
+      }
+    }
+  }
+}
+
 void ClassLinker::InitFromImage() {
   VLOG(startup) << "ClassLinker::InitFromImage entering";
   CHECK(!init_done_);
@@ -882,6 +888,18 @@
   if (!runtime->IsAotCompiler() && runtime->GetInstrumentation()->InterpretOnly()) {
     heap->VisitObjects(InitFromImageInterpretOnlyCallback, this);
   }
+  if (kSanityCheckObjects) {
+    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+      auto* dex_cache = dex_caches->Get(i);
+      for (size_t j = 0; j < dex_cache->NumResolvedFields(); ++j) {
+        auto* field = dex_cache->GetResolvedField(j, image_pointer_size_);
+        if (field != nullptr) {
+          CHECK(field->GetDeclaringClass()->GetClass() != nullptr);
+        }
+      }
+    }
+    heap->VisitObjects(SanityCheckObjectsCallback, nullptr);
+  }
 
   // reinit class_roots_
   mirror::Class::SetClassClass(class_roots->Get(kJavaLangClass));
@@ -894,7 +912,6 @@
   mirror::Field::SetClass(GetClassRoot(kJavaLangReflectField));
   mirror::Field::SetArrayClass(GetClassRoot(kJavaLangReflectFieldArrayClass));
   mirror::Reference::SetClass(GetClassRoot(kJavaLangRefReference));
-  mirror::ArtField::SetClass(GetClassRoot(kJavaLangReflectArtField));
   mirror::BooleanArray::SetArrayClass(GetClassRoot(kBooleanArrayClass));
   mirror::ByteArray::SetArrayClass(GetClassRoot(kByteArrayClass));
   mirror::CharArray::SetArrayClass(GetClassRoot(kCharArrayClass));
@@ -913,17 +930,22 @@
 
 void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) {
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(
+      visitor, RootInfo(kRootStickyClass));
   if ((flags & kVisitRootFlagAllRoots) != 0) {
-    BufferedRootVisitor<128> buffered_visitor(visitor, RootInfo(kRootStickyClass));
     for (GcRoot<mirror::Class>& root : class_table_) {
       buffered_visitor.VisitRoot(root);
+      root.Read()->VisitFieldRoots(buffered_visitor);
     }
+    // PreZygote classes can't move so we won't need to update fields' declaring classes.
     for (GcRoot<mirror::Class>& root : pre_zygote_class_table_) {
       buffered_visitor.VisitRoot(root);
+      root.Read()->VisitFieldRoots(buffered_visitor);
     }
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_class_roots_) {
       mirror::Class* old_ref = root.Read<kWithoutReadBarrier>();
+      old_ref->VisitFieldRoots(buffered_visitor);
       root.VisitRoot(visitor, RootInfo(kRootStickyClass));
       mirror::Class* new_ref = root.Read<kWithoutReadBarrier>();
       if (UNLIKELY(new_ref != old_ref)) {
@@ -936,6 +958,7 @@
       }
     }
   }
+  buffered_visitor.Flush();  // Flush before clearing new_class_roots_.
   if ((flags & kVisitRootFlagClearRootLog) != 0) {
     new_class_roots_.clear();
   }
@@ -1076,7 +1099,6 @@
   mirror::Class::ResetClass();
   mirror::String::ResetClass();
   mirror::Reference::ResetClass();
-  mirror::ArtField::ResetClass();
   mirror::ArtMethod::ResetClass();
   mirror::Field::ResetClass();
   mirror::Field::ResetArrayClass();
@@ -1094,7 +1116,7 @@
 }
 
 mirror::DexCache* ClassLinker::AllocDexCache(Thread* self, const DexFile& dex_file) {
-  gc::Heap* heap = Runtime::Current()->GetHeap();
+  gc::Heap* const heap = Runtime::Current()->GetHeap();
   StackHandleScope<16> hs(self);
   Handle<mirror::Class> dex_cache_class(hs.NewHandle(GetClassRoot(kJavaLangDexCache)));
   Handle<mirror::DexCache> dex_cache(
@@ -1124,8 +1146,12 @@
   if (methods.Get() == nullptr) {
     return nullptr;
   }
-  Handle<mirror::ObjectArray<mirror::ArtField>>
-      fields(hs.NewHandle(AllocArtFieldArray(self, dex_file.NumFieldIds())));
+  Handle<mirror::Array> fields;
+  if (image_pointer_size_ == 8) {
+    fields = hs.NewHandle<mirror::Array>(mirror::LongArray::Alloc(self, dex_file.NumFieldIds()));
+  } else {
+    fields = hs.NewHandle<mirror::Array>(mirror::IntArray::Alloc(self, dex_file.NumFieldIds()));
+  }
   if (fields.Get() == nullptr) {
     return nullptr;
   }
@@ -1153,11 +1179,6 @@
   return AllocClass(self, GetClassRoot(kJavaLangClass), class_size);
 }
 
-mirror::ArtField* ClassLinker::AllocArtField(Thread* self) {
-  return down_cast<mirror::ArtField*>(
-      GetClassRoot(kJavaLangReflectArtField)->AllocNonMovableObject(self));
-}
-
 mirror::ArtMethod* ClassLinker::AllocArtMethod(Thread* self) {
   return down_cast<mirror::ArtMethod*>(
       GetClassRoot(kJavaLangReflectArtMethod)->AllocNonMovableObject(self));
@@ -1272,16 +1293,13 @@
     StackHandleScope<3> hs(self);
     // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
     // We need to get the DexPathList and loop through it.
-    Handle<mirror::ArtField> cookie_field =
-        hs.NewHandle(soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie));
-    Handle<mirror::ArtField> dex_file_field =
-        hs.NewHandle(
-            soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile));
+    ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+    ArtField* const dex_file_field =
+        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
     mirror::Object* dex_path_list =
         soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList)->
         GetObject(class_loader.Get());
-    if (dex_path_list != nullptr && dex_file_field.Get() != nullptr &&
-        cookie_field.Get() != nullptr) {
+    if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
       // DexPathList has an array dexElements of Elements[] which each contain a dex file.
       mirror::Object* dex_elements_obj =
           soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
@@ -1432,8 +1450,6 @@
       klass.Assign(GetClassRoot(kJavaLangRefReference));
     } else if (strcmp(descriptor, "Ljava/lang/DexCache;") == 0) {
       klass.Assign(GetClassRoot(kJavaLangDexCache));
-    } else if (strcmp(descriptor, "Ljava/lang/reflect/ArtField;") == 0) {
-      klass.Assign(GetClassRoot(kJavaLangReflectArtField));
     } else if (strcmp(descriptor, "Ljava/lang/reflect/ArtMethod;") == 0) {
       klass.Assign(GetClassRoot(kJavaLangReflectArtMethod));
     }
@@ -1451,16 +1467,10 @@
     return nullptr;
   }
   klass->SetDexCache(FindDexCache(dex_file));
-  LoadClass(self, dex_file, dex_class_def, klass, class_loader.Get());
+
+  SetupClass(dex_file, dex_class_def, klass, class_loader.Get());
+
   ObjectLock<mirror::Class> lock(self, klass);
-  if (self->IsExceptionPending()) {
-    // An exception occured during load, set status to erroneous while holding klass' lock in case
-    // notification is necessary.
-    if (!klass->IsErroneous()) {
-      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
-    }
-    return nullptr;
-  }
   klass->SetClinitThreadId(self->GetTid());
 
   // Add the newly loaded class to the loaded classes table.
@@ -1471,6 +1481,20 @@
     return EnsureResolved(self, descriptor, existing);
   }
 
+  // Load the fields and other things after we are inserted in the table. This is so that we don't
+  // end up allocating unfree-able linear alloc resources and then lose the race condition. The
+  // other reason is that the field roots are only visited from the class table. So we need to be
+  // inserted before we allocate / fill in these fields.
+  LoadClass(self, dex_file, dex_class_def, klass);
+  if (self->IsExceptionPending()) {
+    // An exception occured during load, set status to erroneous while holding klass' lock in case
+    // notification is necessary.
+    if (!klass->IsErroneous()) {
+      mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
+    }
+    return nullptr;
+  }
+
   // Finish loading (if necessary) by finding parents
   CHECK(!klass->IsLoaded());
   if (!LoadSuperAndInterfaces(klass, dex_file)) {
@@ -1844,12 +1868,8 @@
   }
 }
 
-
-
-void ClassLinker::LoadClass(Thread* self, const DexFile& dex_file,
-                            const DexFile::ClassDef& dex_class_def,
-                            Handle<mirror::Class> klass,
-                            mirror::ClassLoader* class_loader) {
+void ClassLinker::SetupClass(const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
+                             Handle<mirror::Class> klass, mirror::ClassLoader* class_loader) {
   CHECK(klass.Get() != nullptr);
   CHECK(klass->GetDexCache() != nullptr);
   CHECK_EQ(mirror::Class::kStatusNotReady, klass->GetStatus());
@@ -1867,13 +1887,15 @@
   klass->SetDexClassDefIndex(dex_file.GetIndexForClassDef(dex_class_def));
   klass->SetDexTypeIndex(dex_class_def.class_idx_);
   CHECK(klass->GetDexCacheStrings() != nullptr);
+}
 
+void ClassLinker::LoadClass(Thread* self, const DexFile& dex_file,
+                            const DexFile::ClassDef& dex_class_def,
+                            Handle<mirror::Class> klass) {
   const uint8_t* class_data = dex_file.GetClassData(dex_class_def);
   if (class_data == nullptr) {
     return;  // no fields or methods - for example a marker interface
   }
-
-
   bool has_oat_class = false;
   if (Runtime::Current()->IsStarted() && !Runtime::Current()->IsAotCompiler()) {
     OatFile::OatClass oat_class = FindOatClass(dex_file, klass->GetDexClassDefIndex(),
@@ -1887,51 +1909,41 @@
   }
 }
 
+ArtField* ClassLinker::AllocArtFieldArray(Thread* self, size_t length) {
+  auto* const la = Runtime::Current()->GetLinearAlloc();
+  auto* ptr = reinterpret_cast<ArtField*>(la->AllocArray<ArtField>(self, length));
+  CHECK(ptr!= nullptr);
+  std::uninitialized_fill_n(ptr, length, ArtField());
+  return ptr;
+}
+
 void ClassLinker::LoadClassMembers(Thread* self, const DexFile& dex_file,
                                    const uint8_t* class_data,
                                    Handle<mirror::Class> klass,
                                    const OatFile::OatClass* oat_class) {
-  // Load fields.
+  // Load static fields.
   ClassDataItemIterator it(dex_file, class_data);
-  if (it.NumStaticFields() != 0) {
-    mirror::ObjectArray<mirror::ArtField>* statics = AllocArtFieldArray(self, it.NumStaticFields());
-    if (UNLIKELY(statics == nullptr)) {
-      CHECK(self->IsExceptionPending());  // OOME.
-      return;
-    }
-    klass->SetSFields(statics);
-  }
-  if (it.NumInstanceFields() != 0) {
-    mirror::ObjectArray<mirror::ArtField>* fields =
-        AllocArtFieldArray(self, it.NumInstanceFields());
-    if (UNLIKELY(fields == nullptr)) {
-      CHECK(self->IsExceptionPending());  // OOME.
-      return;
-    }
-    klass->SetIFields(fields);
-  }
+  const size_t num_sfields = it.NumStaticFields();
+  ArtField* sfields = num_sfields != 0 ? AllocArtFieldArray(self, num_sfields) : nullptr;
   for (size_t i = 0; it.HasNextStaticField(); i++, it.Next()) {
+    CHECK_LT(i, num_sfields);
     self->AllowThreadSuspension();
-    StackHandleScope<1> hs(self);
-    Handle<mirror::ArtField> sfield(hs.NewHandle(AllocArtField(self)));
-    if (UNLIKELY(sfield.Get() == nullptr)) {
-      CHECK(self->IsExceptionPending());  // OOME.
-      return;
-    }
-    klass->SetStaticField(i, sfield.Get());
-    LoadField(dex_file, it, klass, sfield);
+    LoadField(it, klass, &sfields[i]);
   }
+  klass->SetSFields(sfields);
+  klass->SetNumStaticFields(num_sfields);
+  DCHECK_EQ(klass->NumStaticFields(), num_sfields);
+  // Load instance fields.
+  const size_t num_ifields = it.NumInstanceFields();
+  ArtField* ifields = num_ifields != 0 ? AllocArtFieldArray(self, num_ifields) : nullptr;
   for (size_t i = 0; it.HasNextInstanceField(); i++, it.Next()) {
+    CHECK_LT(i, num_ifields);
     self->AllowThreadSuspension();
-    StackHandleScope<1> hs(self);
-    Handle<mirror::ArtField> ifield(hs.NewHandle(AllocArtField(self)));
-    if (UNLIKELY(ifield.Get() == nullptr)) {
-      CHECK(self->IsExceptionPending());  // OOME.
-      return;
-    }
-    klass->SetInstanceField(i, ifield.Get());
-    LoadField(dex_file, it, klass, ifield);
+    LoadField(it, klass, &ifields[i]);
   }
+  klass->SetIFields(ifields);
+  klass->SetNumInstanceFields(num_ifields);
+  DCHECK_EQ(klass->NumInstanceFields(), num_ifields);
 
   // Load methods.
   if (it.NumDirectMethods() != 0) {
@@ -1994,10 +2006,9 @@
   DCHECK(!it.HasNext());
 }
 
-void ClassLinker::LoadField(const DexFile& /*dex_file*/, const ClassDataItemIterator& it,
-                            Handle<mirror::Class> klass,
-                            Handle<mirror::ArtField> dst) {
-  uint32_t field_idx = it.GetMemberIndex();
+void ClassLinker::LoadField(const ClassDataItemIterator& it, Handle<mirror::Class> klass,
+                            ArtField* dst) {
+  const uint32_t field_idx = it.GetMemberIndex();
   dst->SetDexFieldIndex(field_idx);
   dst->SetDeclaringClass(klass.Get());
   dst->SetAccessFlags(it.GetFieldAccessFlags());
@@ -2281,13 +2292,12 @@
     } else if (strcmp(descriptor,
                       GetClassRootDescriptor(kJavaLangReflectArtMethodArrayClass)) == 0) {
       new_class.Assign(GetClassRoot(kJavaLangReflectArtMethodArrayClass));
-    } else if (strcmp(descriptor,
-                      GetClassRootDescriptor(kJavaLangReflectArtFieldArrayClass)) == 0) {
-      new_class.Assign(GetClassRoot(kJavaLangReflectArtFieldArrayClass));
     } else if (strcmp(descriptor, "[C") == 0) {
       new_class.Assign(GetClassRoot(kCharArrayClass));
     } else if (strcmp(descriptor, "[I") == 0) {
       new_class.Assign(GetClassRoot(kIntArrayClass));
+    } else if (strcmp(descriptor, "[J") == 0) {
+      new_class.Assign(GetClassRoot(kLongArrayClass));
     }
   }
   if (new_class.Get() == nullptr) {
@@ -2918,32 +2928,20 @@
   mirror::Class::SetStatus(klass, mirror::Class::kStatusIdx, self);
 
   // Instance fields are inherited, but we add a couple of static fields...
-  {
-    mirror::ObjectArray<mirror::ArtField>* sfields = AllocArtFieldArray(self, 2);
-    if (UNLIKELY(sfields == nullptr)) {
-      CHECK(self->IsExceptionPending());  // OOME.
-      return nullptr;
-    }
-    klass->SetSFields(sfields);
-  }
+  const size_t num_fields = 2;
+  ArtField* sfields = AllocArtFieldArray(self, num_fields);
+  klass->SetSFields(sfields);
+  klass->SetNumStaticFields(num_fields);
+
   // 1. Create a static field 'interfaces' that holds the _declared_ interfaces implemented by
   // our proxy, so Class.getInterfaces doesn't return the flattened set.
-  Handle<mirror::ArtField> interfaces_sfield(hs.NewHandle(AllocArtField(self)));
-  if (UNLIKELY(interfaces_sfield.Get() == nullptr)) {
-    CHECK(self->IsExceptionPending());  // OOME.
-    return nullptr;
-  }
-  klass->SetStaticField(0, interfaces_sfield.Get());
+  ArtField* interfaces_sfield = &sfields[0];
   interfaces_sfield->SetDexFieldIndex(0);
   interfaces_sfield->SetDeclaringClass(klass.Get());
   interfaces_sfield->SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
+
   // 2. Create a static field 'throws' that holds exceptions thrown by our methods.
-  Handle<mirror::ArtField> throws_sfield(hs.NewHandle(AllocArtField(self)));
-  if (UNLIKELY(throws_sfield.Get() == nullptr)) {
-    CHECK(self->IsExceptionPending());  // OOME.
-    return nullptr;
-  }
-  klass->SetStaticField(1, throws_sfield.Get());
+  ArtField* throws_sfield = &sfields[1];
   throws_sfield->SetDexFieldIndex(1);
   throws_sfield->SetDeclaringClass(klass.Get());
   throws_sfield->SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
@@ -3331,11 +3329,11 @@
     // Eagerly fill in static fields so that the we don't have to do as many expensive
     // Class::FindStaticField in ResolveField.
     for (size_t i = 0; i < num_static_fields; ++i) {
-      mirror::ArtField* field = klass->GetStaticField(i);
+      ArtField* field = klass->GetStaticField(i);
       const uint32_t field_idx = field->GetDexFieldIndex();
-      mirror::ArtField* resolved_field = dex_cache->GetResolvedField(field_idx);
+      ArtField* resolved_field = dex_cache->GetResolvedField(field_idx, image_pointer_size_);
       if (resolved_field == nullptr) {
-        dex_cache->SetResolvedField(field_idx, field);
+        dex_cache->SetResolvedField(field_idx, field, image_pointer_size_);
       } else {
         DCHECK_EQ(field, resolved_field);
       }
@@ -3349,9 +3347,8 @@
       DCHECK(field_it.HasNextStaticField());
       CHECK(can_init_statics);
       for ( ; value_it.HasNext(); value_it.Next(), field_it.Next()) {
-        StackHandleScope<1> hs2(self);
-        Handle<mirror::ArtField> field(hs2.NewHandle(
-            ResolveField(dex_file, field_it.GetMemberIndex(), dex_cache, class_loader, true)));
+        ArtField* field = ResolveField(
+            dex_file, field_it.GetMemberIndex(), dex_cache, class_loader, true);
         if (Runtime::Current()->IsActiveTransaction()) {
           value_it.ReadValueToField<true>(field);
         } else {
@@ -3585,21 +3582,17 @@
 }
 
 void ClassLinker::FixupTemporaryDeclaringClass(mirror::Class* temp_class, mirror::Class* new_class) {
-  mirror::ObjectArray<mirror::ArtField>* fields = new_class->GetIFields();
-  if (fields != nullptr) {
-    for (int index = 0; index < fields->GetLength(); index ++) {
-      if (fields->Get(index)->GetDeclaringClass() == temp_class) {
-        fields->Get(index)->SetDeclaringClass(new_class);
-      }
+  ArtField* fields = new_class->GetIFields();
+  for (size_t i = 0, count = new_class->NumInstanceFields(); i < count; i++) {
+    if (fields[i].GetDeclaringClass() == temp_class) {
+      fields[i].SetDeclaringClass(new_class);
     }
   }
 
   fields = new_class->GetSFields();
-  if (fields != nullptr) {
-    for (int index = 0; index < fields->GetLength(); index ++) {
-      if (fields->Get(index)->GetDeclaringClass() == temp_class) {
-        fields->Get(index)->SetDeclaringClass(new_class);
-      }
+  for (size_t i = 0, count = new_class->NumStaticFields(); i < count; i++) {
+    if (fields[i].GetDeclaringClass() == temp_class) {
+      fields[i].SetDeclaringClass(new_class);
     }
   }
 
@@ -4566,7 +4559,7 @@
   explicit LinkFieldsComparator() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   }
   // No thread safety analysis as will be called from STL. Checked lock held in constructor.
-  bool operator()(mirror::ArtField* field1, mirror::ArtField* field2)
+  bool operator()(ArtField* field1, ArtField* field2)
       NO_THREAD_SAFETY_ANALYSIS {
     // First come reference fields, then 64-bit, then 32-bit, and then 16-bit, then finally 8-bit.
     Primitive::Type type1 = field1->GetTypeAsPrimitiveType();
@@ -4599,11 +4592,8 @@
 bool ClassLinker::LinkFields(Thread* self, Handle<mirror::Class> klass, bool is_static,
                              size_t* class_size) {
   self->AllowThreadSuspension();
-  size_t num_fields =
-      is_static ? klass->NumStaticFields() : klass->NumInstanceFields();
-
-  mirror::ObjectArray<mirror::ArtField>* fields =
-      is_static ? klass->GetSFields() : klass->GetIFields();
+  const size_t num_fields = is_static ? klass->NumStaticFields() : klass->NumInstanceFields();
+  ArtField* const fields = is_static ? klass->GetSFields() : klass->GetIFields();
 
   // Initialize field_offset
   MemberOffset field_offset(0);
@@ -4622,13 +4612,11 @@
 
   // we want a relatively stable order so that adding new fields
   // minimizes disruption of C++ version such as Class and Method.
-  std::deque<mirror::ArtField*> grouped_and_sorted_fields;
+  std::deque<ArtField*> grouped_and_sorted_fields;
   const char* old_no_suspend_cause = self->StartAssertNoThreadSuspension(
       "Naked ArtField references in deque");
   for (size_t i = 0; i < num_fields; i++) {
-    mirror::ArtField* f = fields->Get(i);
-    CHECK(f != nullptr) << PrettyClass(klass.Get());
-    grouped_and_sorted_fields.push_back(f);
+    grouped_and_sorted_fields.push_back(&fields[i]);
   }
   std::sort(grouped_and_sorted_fields.begin(), grouped_and_sorted_fields.end(),
             LinkFieldsComparator());
@@ -4639,7 +4627,7 @@
   FieldGaps gaps;
 
   for (; current_field < num_fields; current_field++) {
-    mirror::ArtField* field = grouped_and_sorted_fields.front();
+    ArtField* field = grouped_and_sorted_fields.front();
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     bool isPrimitive = type != Primitive::kPrimNot;
     if (isPrimitive) {
@@ -4673,7 +4661,7 @@
     // We know there are no non-reference fields in the Reference classes, and we know
     // that 'referent' is alphabetically last, so this is easy...
     CHECK_EQ(num_reference_fields, num_fields) << PrettyClass(klass.Get());
-    CHECK_STREQ(fields->Get(num_fields - 1)->GetName(), "referent") << PrettyClass(klass.Get());
+    CHECK_STREQ(fields[num_fields - 1].GetName(), "referent") << PrettyClass(klass.Get());
     --num_reference_fields;
   }
 
@@ -4712,16 +4700,12 @@
                                     sizeof(mirror::HeapReference<mirror::Object>));
     MemberOffset current_ref_offset = start_ref_offset;
     for (size_t i = 0; i < num_fields; i++) {
-      mirror::ArtField* field = fields->Get(i);
-      if ((false)) {  // enable to debug field layout
-        LOG(INFO) << "LinkFields: " << (is_static ? "static" : "instance")
-                    << " class=" << PrettyClass(klass.Get())
-                    << " field=" << PrettyField(field)
-                    << " offset="
-                    << field->GetField32(mirror::ArtField::OffsetOffset());
-      }
+      ArtField* field = &fields[i];
+      VLOG(class_linker) << "LinkFields: " << (is_static ? "static" : "instance")
+          << " class=" << PrettyClass(klass.Get()) << " field=" << PrettyField(field) << " offset="
+          << field->GetOffset();
       if (i != 0) {
-        mirror::ArtField* prev_field = fields->Get(i - 1u);
+        ArtField* const prev_field = &fields[i - 1];
         // NOTE: The field names can be the same. This is not possible in the Java language
         // but it's valid Java/dex bytecode and for example proguard can generate such bytecode.
         CHECK_LE(strcmp(prev_field->GetName(), field->GetName()), 0);
@@ -4993,12 +4977,11 @@
   }
 }
 
-mirror::ArtField* ClassLinker::ResolveField(const DexFile& dex_file, uint32_t field_idx,
-                                            Handle<mirror::DexCache> dex_cache,
-                                            Handle<mirror::ClassLoader> class_loader,
-                                            bool is_static) {
+ArtField* ClassLinker::ResolveField(const DexFile& dex_file, uint32_t field_idx,
+                                    Handle<mirror::DexCache> dex_cache,
+                                    Handle<mirror::ClassLoader> class_loader, bool is_static) {
   DCHECK(dex_cache.Get() != nullptr);
-  mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
+  ArtField* resolved = dex_cache->GetResolvedField(field_idx, image_pointer_size_);
   if (resolved != nullptr) {
     return resolved;
   }
@@ -5031,16 +5014,15 @@
       return nullptr;
     }
   }
-  dex_cache->SetResolvedField(field_idx, resolved);
+  dex_cache->SetResolvedField(field_idx, resolved, image_pointer_size_);
   return resolved;
 }
 
-mirror::ArtField* ClassLinker::ResolveFieldJLS(const DexFile& dex_file,
-                                               uint32_t field_idx,
-                                               Handle<mirror::DexCache> dex_cache,
-                                               Handle<mirror::ClassLoader> class_loader) {
+ArtField* ClassLinker::ResolveFieldJLS(const DexFile& dex_file, uint32_t field_idx,
+                                       Handle<mirror::DexCache> dex_cache,
+                                       Handle<mirror::ClassLoader> class_loader) {
   DCHECK(dex_cache.Get() != nullptr);
-  mirror::ArtField* resolved = dex_cache->GetResolvedField(field_idx);
+  ArtField* resolved = dex_cache->GetResolvedField(field_idx, image_pointer_size_);
   if (resolved != nullptr) {
     return resolved;
   }
@@ -5059,7 +5041,7 @@
       dex_file.GetTypeId(field_id.type_idx_).descriptor_idx_));
   resolved = mirror::Class::FindField(self, klass, name, type);
   if (resolved != nullptr) {
-    dex_cache->SetResolvedField(field_idx, resolved);
+    dex_cache->SetResolvedField(field_idx, resolved, image_pointer_size_);
   } else {
     ThrowNoSuchFieldError("", klass.Get(), type, name);
   }
@@ -5189,12 +5171,10 @@
     "Ljava/lang/String;",
     "Ljava/lang/DexCache;",
     "Ljava/lang/ref/Reference;",
-    "Ljava/lang/reflect/ArtField;",
     "Ljava/lang/reflect/ArtMethod;",
     "Ljava/lang/reflect/Field;",
     "Ljava/lang/reflect/Proxy;",
     "[Ljava/lang/String;",
-    "[Ljava/lang/reflect/ArtField;",
     "[Ljava/lang/reflect/ArtMethod;",
     "[Ljava/lang/reflect/Field;",
     "Ljava/lang/ClassLoader;",
@@ -5309,12 +5289,12 @@
   }
 
   // For now, create a libcore-level DexFile for each ART DexFile. This "explodes" multidex.
-  StackHandleScope<11> hs(self);
+  StackHandleScope<10> hs(self);
 
-  Handle<mirror::ArtField> h_dex_elements_field =
-      hs.NewHandle(soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements));
+  ArtField* dex_elements_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
 
-  mirror::Class* dex_elements_class = h_dex_elements_field->GetType<true>();
+  mirror::Class* dex_elements_class = dex_elements_field->GetType<true>();
   DCHECK(dex_elements_class != nullptr);
   DCHECK(dex_elements_class->IsArrayClass());
   Handle<mirror::ObjectArray<mirror::Object>> h_dex_elements(hs.NewHandle(
@@ -5322,14 +5302,12 @@
   Handle<mirror::Class> h_dex_element_class =
       hs.NewHandle(dex_elements_class->GetComponentType());
 
-  Handle<mirror::ArtField> h_element_file_field =
-      hs.NewHandle(
-          soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile));
-  DCHECK_EQ(h_dex_element_class.Get(), h_element_file_field->GetDeclaringClass());
+  ArtField* element_file_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+  DCHECK_EQ(h_dex_element_class.Get(), element_file_field->GetDeclaringClass());
 
-  Handle<mirror::ArtField> h_cookie_field =
-      hs.NewHandle(soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie));
-  DCHECK_EQ(h_cookie_field->GetDeclaringClass(), h_element_file_field->GetType<false>());
+  ArtField* cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  DCHECK_EQ(cookie_field->GetDeclaringClass(), element_file_field->GetType<false>());
 
   // Fill the elements array.
   int32_t index = 0;
@@ -5341,13 +5319,13 @@
     h_long_array->Set(0, reinterpret_cast<intptr_t>(dex_file));
 
     Handle<mirror::Object> h_dex_file = hs2.NewHandle(
-        h_cookie_field->GetDeclaringClass()->AllocObject(self));
+        cookie_field->GetDeclaringClass()->AllocObject(self));
     DCHECK(h_dex_file.Get() != nullptr);
-    h_cookie_field->SetObject<false>(h_dex_file.Get(), h_long_array.Get());
+    cookie_field->SetObject<false>(h_dex_file.Get(), h_long_array.Get());
 
     Handle<mirror::Object> h_element = hs2.NewHandle(h_dex_element_class->AllocObject(self));
     DCHECK(h_element.Get() != nullptr);
-    h_element_file_field->SetObject<false>(h_element.Get(), h_dex_file.Get());
+    element_file_field->SetObject<false>(h_element.Get(), h_dex_file.Get());
 
     h_dex_elements->Set(index, h_element.Get());
     index++;
@@ -5356,10 +5334,10 @@
 
   // Create DexPathList.
   Handle<mirror::Object> h_dex_path_list = hs.NewHandle(
-      h_dex_elements_field->GetDeclaringClass()->AllocObject(self));
+      dex_elements_field->GetDeclaringClass()->AllocObject(self));
   DCHECK(h_dex_path_list.Get() != nullptr);
   // Set elements.
-  h_dex_elements_field->SetObject<false>(h_dex_path_list.Get(), h_dex_elements.Get());
+  dex_elements_field->SetObject<false>(h_dex_path_list.Get(), h_dex_elements.Get());
 
   // Create PathClassLoader.
   Handle<mirror::Class> h_path_class_class = hs.NewHandle(
@@ -5368,20 +5346,20 @@
       h_path_class_class->AllocObject(self));
   DCHECK(h_path_class_loader.Get() != nullptr);
   // Set DexPathList.
-  Handle<mirror::ArtField> h_path_list_field = hs.NewHandle(
-      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList));
-  DCHECK(h_path_list_field.Get() != nullptr);
-  h_path_list_field->SetObject<false>(h_path_class_loader.Get(), h_dex_path_list.Get());
+  ArtField* path_list_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList);
+  DCHECK(path_list_field != nullptr);
+  path_list_field->SetObject<false>(h_path_class_loader.Get(), h_dex_path_list.Get());
 
   // Make a pretend boot-classpath.
   // TODO: Should we scan the image?
-  Handle<mirror::ArtField> h_parent_field = hs.NewHandle(
+  ArtField* const parent_field =
       mirror::Class::FindField(self, hs.NewHandle(h_path_class_loader->GetClass()), "parent",
-                               "Ljava/lang/ClassLoader;"));
-  DCHECK(h_parent_field.Get() != nullptr);
+                               "Ljava/lang/ClassLoader;");
+  DCHECK(parent_field!= nullptr);
   mirror::Object* boot_cl =
       soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self);
-  h_parent_field->SetObject<false>(h_path_class_loader.Get(), boot_cl);
+  parent_field->SetObject<false>(h_path_class_loader.Get(), boot_cl);
 
   // Make it a global ref and return.
   ScopedLocalRef<jobject> local_ref(
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 577fec2..2427462 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -70,12 +70,10 @@
     kJavaLangString,
     kJavaLangDexCache,
     kJavaLangRefReference,
-    kJavaLangReflectArtField,
     kJavaLangReflectArtMethod,
     kJavaLangReflectField,
     kJavaLangReflectProxy,
     kJavaLangStringArrayClass,
-    kJavaLangReflectArtFieldArrayClass,
     kJavaLangReflectArtMethodArrayClass,
     kJavaLangReflectFieldArrayClass,
     kJavaLangClassLoader,
@@ -201,7 +199,7 @@
   mirror::Class* ResolveType(uint16_t type_idx, mirror::ArtMethod* referrer)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::Class* ResolveType(uint16_t type_idx, mirror::ArtField* referrer)
+  mirror::Class* ResolveType(uint16_t type_idx, ArtField* referrer)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a type with the given ID from the DexFile, storing the
@@ -232,10 +230,11 @@
                                    InvokeType type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ArtField* GetResolvedField(uint32_t field_idx, mirror::Class* field_declaring_class)
+  ArtField* GetResolvedField(uint32_t field_idx, mirror::Class* field_declaring_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::ArtField* ResolveField(uint32_t field_idx, mirror::ArtMethod* referrer,
-                                 bool is_static)
+  ArtField* GetResolvedField(uint32_t field_idx, mirror::DexCache* dex_cache)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtField* ResolveField(uint32_t field_idx, mirror::ArtMethod* referrer, bool is_static)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Resolve a field with a given ID from the DexFile, storing the
@@ -243,7 +242,7 @@
   // in ResolveType. What is unique is the is_static argument which is
   // used to determine if we are resolving a static or non-static
   // field.
-  mirror::ArtField* ResolveField(const DexFile& dex_file,
+  ArtField* ResolveField(const DexFile& dex_file,
                                  uint32_t field_idx,
                                  Handle<mirror::DexCache> dex_cache,
                                  Handle<mirror::ClassLoader> class_loader,
@@ -254,7 +253,7 @@
   // result in DexCache. The ClassLinker and ClassLoader are used as
   // in ResolveType. No is_static argument is provided so that Java
   // field resolution semantics are followed.
-  mirror::ArtField* ResolveFieldJLS(const DexFile& dex_file, uint32_t field_idx,
+  ArtField* ResolveFieldJLS(const DexFile& dex_file, uint32_t field_idx,
                                     Handle<mirror::DexCache> dex_cache,
                                     Handle<mirror::ClassLoader> class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -354,7 +353,7 @@
   mirror::IfTable* AllocIfTable(Thread* self, size_t ifcount)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ObjectArray<mirror::ArtField>* AllocArtFieldArray(Thread* self, size_t length)
+  ArtField* AllocArtFieldArray(Thread* self, size_t length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::ObjectArray<mirror::StackTraceElement>* AllocStackTraceElementArray(Thread* self,
@@ -485,7 +484,6 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   mirror::DexCache* AllocDexCache(Thread* self, const DexFile& dex_file)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::ArtField* AllocArtField(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::Class* CreatePrimitiveClass(Thread* self, Primitive::Type type)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -507,15 +505,21 @@
   uint32_t SizeOfClassWithoutEmbeddedTables(const DexFile& dex_file,
                                             const DexFile::ClassDef& dex_class_def);
 
+  // Setup the classloader, class def index, type idx so that we can insert this class in the class
+  // table.
+  void SetupClass(const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
+                  Handle<mirror::Class> klass, mirror::ClassLoader* class_loader)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void LoadClass(Thread* self, const DexFile& dex_file, const DexFile::ClassDef& dex_class_def,
-                 Handle<mirror::Class> klass, mirror::ClassLoader* class_loader)
+                 Handle<mirror::Class> klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void LoadClassMembers(Thread* self, const DexFile& dex_file, const uint8_t* class_data,
                         Handle<mirror::Class> klass, const OatFile::OatClass* oat_class)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void LoadField(const DexFile& dex_file, const ClassDataItemIterator& it,
-                 Handle<mirror::Class> klass, Handle<mirror::ArtField> dst)
+  void LoadField(const ClassDataItemIterator& it, Handle<mirror::Class> klass,
+                 ArtField* dst)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::ArtMethod* LoadMethod(Thread* self, const DexFile& dex_file,
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 3f6c5a0..a31a785 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -19,14 +19,13 @@
 #include <memory>
 #include <string>
 
+#include "art_field-inl.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
 #include "dex_file.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "gc/heap.h"
 #include "mirror/accessible_object.h"
-#include "mirror/art_field-inl.h"
-#include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
@@ -173,10 +172,9 @@
         method->GetDeclaringClass()->GetDexCache()->GetResolvedTypes()));
   }
 
-  void AssertField(mirror::Class* klass, mirror::ArtField* field)
+  void AssertField(mirror::Class* klass, ArtField* field)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     EXPECT_TRUE(field != nullptr);
-    EXPECT_TRUE(field->GetClass() != nullptr);
     EXPECT_EQ(klass, field->GetDeclaringClass());
     EXPECT_TRUE(field->GetName() != nullptr);
     EXPECT_TRUE(field->GetType<true>() != nullptr);
@@ -262,30 +260,27 @@
     }
 
     for (size_t i = 0; i < klass->NumInstanceFields(); i++) {
-      mirror::ArtField* field = klass->GetInstanceField(i);
+      ArtField* field = klass->GetInstanceField(i);
       AssertField(klass.Get(), field);
       EXPECT_FALSE(field->IsStatic());
     }
 
     for (size_t i = 0; i < klass->NumStaticFields(); i++) {
-      mirror::ArtField* field = klass->GetStaticField(i);
+      ArtField* field = klass->GetStaticField(i);
       AssertField(klass.Get(), field);
       EXPECT_TRUE(field->IsStatic());
     }
 
     // Confirm that all instances field offsets are packed together at the start.
     EXPECT_GE(klass->NumInstanceFields(), klass->NumReferenceInstanceFields());
-    StackHandleScope<1> hs(Thread::Current());
-    MutableHandle<mirror::ArtField> fhandle = hs.NewHandle<mirror::ArtField>(nullptr);
     MemberOffset start_ref_offset = klass->GetFirstReferenceInstanceFieldOffset();
     MemberOffset end_ref_offset(start_ref_offset.Uint32Value() +
                                 klass->NumReferenceInstanceFields() *
                                     sizeof(mirror::HeapReference<mirror::Object>));
     MemberOffset current_ref_offset = start_ref_offset;
     for (size_t i = 0; i < klass->NumInstanceFields(); i++) {
-      mirror::ArtField* field = klass->GetInstanceField(i);
-      fhandle.Assign(field);
-      mirror::Class* field_type = fhandle->GetType<true>();
+      ArtField* field = klass->GetInstanceField(i);
+      mirror::Class* field_type = field->GetType<true>();
       ASSERT_TRUE(field_type != nullptr);
       if (!field->IsPrimitiveType()) {
         ASSERT_TRUE(!field_type->IsPrimitive());
@@ -293,7 +288,7 @@
         if (current_ref_offset.Uint32Value() == end_ref_offset.Uint32Value()) {
           // While Reference.referent is not primitive, the ClassLinker
           // treats it as such so that the garbage collector won't scan it.
-          EXPECT_EQ(PrettyField(fhandle.Get()),
+          EXPECT_EQ(PrettyField(field),
                     "java.lang.Object java.lang.ref.Reference.referent");
         } else {
           current_ref_offset = MemberOffset(current_ref_offset.Uint32Value() +
@@ -425,7 +420,7 @@
     }
 
     for (size_t i = 0; i < offsets.size(); i++) {
-      mirror::ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
+      ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
       StringPiece field_name(field->GetName());
       if (field_name != offsets[i].java_name) {
         error = true;
@@ -434,7 +429,7 @@
     if (error) {
       for (size_t i = 0; i < offsets.size(); i++) {
         CheckOffset& offset = offsets[i];
-        mirror::ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
+        ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
         StringPiece field_name(field->GetName());
         if (field_name != offsets[i].java_name) {
           LOG(ERROR) << "JAVA FIELD ORDER MISMATCH NEXT LINE:";
@@ -448,7 +443,7 @@
 
     for (size_t i = 0; i < offsets.size(); i++) {
       CheckOffset& offset = offsets[i];
-      mirror::ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
+      ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
       if (field->GetOffset().Uint32Value() != offset.cpp_offset) {
         error = true;
       }
@@ -456,7 +451,7 @@
     if (error) {
       for (size_t i = 0; i < offsets.size(); i++) {
         CheckOffset& offset = offsets[i];
-        mirror::ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
+        ArtField* field = is_static ? klass->GetStaticField(i) : klass->GetInstanceField(i);
         if (field->GetOffset().Uint32Value() != offset.cpp_offset) {
           LOG(ERROR) << "OFFSET MISMATCH NEXT LINE:";
         }
@@ -486,15 +481,6 @@
   };
 };
 
-struct ArtFieldOffsets : public CheckOffsets<mirror::ArtField> {
-  ArtFieldOffsets() : CheckOffsets<mirror::ArtField>(false, "Ljava/lang/reflect/ArtField;") {
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtField, access_flags_),    "accessFlags"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtField, declaring_class_), "declaringClass"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtField, field_dex_idx_),   "fieldDexIndex"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtField, offset_),          "offset"));
-  };
-};
-
 struct ArtMethodOffsets : public CheckOffsets<mirror::ArtMethod> {
   ArtMethodOffsets() : CheckOffsets<mirror::ArtMethod>(false, "Ljava/lang/reflect/ArtMethod;") {
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, access_flags_),                   "accessFlags"));
@@ -522,8 +508,10 @@
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, ifields_),                       "iFields"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, iftable_),                       "ifTable"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, name_),                          "name"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, num_instance_fields_),           "numInstanceFields"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, num_reference_instance_fields_), "numReferenceInstanceFields"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, num_reference_static_fields_),   "numReferenceStaticFields"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, num_static_fields_),             "numStaticFields"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, object_size_),                   "objectSize"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, primitive_type_),                "primitiveType"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, reference_instance_offsets_),    "referenceInstanceOffsets"));
@@ -629,7 +617,6 @@
 TEST_F(ClassLinkerTest, ValidateFieldOrderOfJavaCppUnionClasses) {
   ScopedObjectAccess soa(Thread::Current());
   EXPECT_TRUE(ObjectOffsets().Check());
-  EXPECT_TRUE(ArtFieldOffsets().Check());
   EXPECT_TRUE(ArtMethodOffsets().Check());
   EXPECT_TRUE(ClassOffsets().Check());
   EXPECT_TRUE(StringOffsets().Check());
@@ -844,21 +831,21 @@
   NullHandle<mirror::ClassLoader> class_loader;
   mirror::Class* c;
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Boolean;", class_loader);
-  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
+  EXPECT_STREQ("value", c->GetIFields()[0].GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Byte;", class_loader);
-  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
+  EXPECT_STREQ("value", c->GetIFields()[0].GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Character;", class_loader);
-  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
+  EXPECT_STREQ("value", c->GetIFields()[0].GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Double;", class_loader);
-  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
+  EXPECT_STREQ("value", c->GetIFields()[0].GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Float;", class_loader);
-  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
+  EXPECT_STREQ("value", c->GetIFields()[0].GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Integer;", class_loader);
-  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
+  EXPECT_STREQ("value", c->GetIFields()[0].GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Long;", class_loader);
-  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
+  EXPECT_STREQ("value", c->GetIFields()[0].GetName());
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/Short;", class_loader);
-  EXPECT_STREQ("value", c->GetIFields()->Get(0)->GetName());
+  EXPECT_STREQ("value", c->GetIFields()[0].GetName());
 }
 
 TEST_F(ClassLinkerTest, TwoClassLoadersOneClass) {
@@ -892,49 +879,47 @@
 
   EXPECT_EQ(9U, statics->NumStaticFields());
 
-  mirror::ArtField* s0 = mirror::Class::FindStaticField(soa.Self(), statics, "s0", "Z");
-  std::string temp;
-  EXPECT_STREQ(s0->GetClass()->GetDescriptor(&temp), "Ljava/lang/reflect/ArtField;");
+  ArtField* s0 = mirror::Class::FindStaticField(soa.Self(), statics, "s0", "Z");
   EXPECT_EQ(s0->GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
   EXPECT_EQ(true, s0->GetBoolean(statics.Get()));
   s0->SetBoolean<false>(statics.Get(), false);
 
-  mirror::ArtField* s1 = mirror::Class::FindStaticField(soa.Self(), statics, "s1", "B");
+  ArtField* s1 = mirror::Class::FindStaticField(soa.Self(), statics, "s1", "B");
   EXPECT_EQ(s1->GetTypeAsPrimitiveType(), Primitive::kPrimByte);
   EXPECT_EQ(5, s1->GetByte(statics.Get()));
   s1->SetByte<false>(statics.Get(), 6);
 
-  mirror::ArtField* s2 = mirror::Class::FindStaticField(soa.Self(), statics, "s2", "C");
+  ArtField* s2 = mirror::Class::FindStaticField(soa.Self(), statics, "s2", "C");
   EXPECT_EQ(s2->GetTypeAsPrimitiveType(), Primitive::kPrimChar);
   EXPECT_EQ('a', s2->GetChar(statics.Get()));
   s2->SetChar<false>(statics.Get(), 'b');
 
-  mirror::ArtField* s3 = mirror::Class::FindStaticField(soa.Self(), statics, "s3", "S");
+  ArtField* s3 = mirror::Class::FindStaticField(soa.Self(), statics, "s3", "S");
   EXPECT_EQ(s3->GetTypeAsPrimitiveType(), Primitive::kPrimShort);
   EXPECT_EQ(-536, s3->GetShort(statics.Get()));
   s3->SetShort<false>(statics.Get(), -535);
 
-  mirror::ArtField* s4 = mirror::Class::FindStaticField(soa.Self(), statics, "s4", "I");
+  ArtField* s4 = mirror::Class::FindStaticField(soa.Self(), statics, "s4", "I");
   EXPECT_EQ(s4->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
   EXPECT_EQ(2000000000, s4->GetInt(statics.Get()));
   s4->SetInt<false>(statics.Get(), 2000000001);
 
-  mirror::ArtField* s5 = mirror::Class::FindStaticField(soa.Self(), statics, "s5", "J");
+  ArtField* s5 = mirror::Class::FindStaticField(soa.Self(), statics, "s5", "J");
   EXPECT_EQ(s5->GetTypeAsPrimitiveType(), Primitive::kPrimLong);
   EXPECT_EQ(0x1234567890abcdefLL, s5->GetLong(statics.Get()));
   s5->SetLong<false>(statics.Get(), INT64_C(0x34567890abcdef12));
 
-  mirror::ArtField* s6 = mirror::Class::FindStaticField(soa.Self(), statics, "s6", "F");
+  ArtField* s6 = mirror::Class::FindStaticField(soa.Self(), statics, "s6", "F");
   EXPECT_EQ(s6->GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
   EXPECT_DOUBLE_EQ(0.5, s6->GetFloat(statics.Get()));
   s6->SetFloat<false>(statics.Get(), 0.75);
 
-  mirror::ArtField* s7 = mirror::Class::FindStaticField(soa.Self(), statics, "s7", "D");
+  ArtField* s7 = mirror::Class::FindStaticField(soa.Self(), statics, "s7", "D");
   EXPECT_EQ(s7->GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
   EXPECT_DOUBLE_EQ(16777217.0, s7->GetDouble(statics.Get()));
   s7->SetDouble<false>(statics.Get(), 16777219);
 
-  mirror::ArtField* s8 = mirror::Class::FindStaticField(soa.Self(), statics, "s8",
+  ArtField* s8 = mirror::Class::FindStaticField(soa.Self(), statics, "s8",
                                                         "Ljava/lang/String;");
   EXPECT_EQ(s8->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
   EXPECT_TRUE(s8->GetObject(statics.Get())->AsString()->Equals("android"));
@@ -1006,13 +991,13 @@
   EXPECT_EQ(Aj1, A->FindVirtualMethodForVirtualOrInterface(Jj1));
   EXPECT_EQ(Aj2, A->FindVirtualMethodForVirtualOrInterface(Jj2));
 
-  mirror::ArtField* Afoo = mirror::Class::FindStaticField(soa.Self(), A, "foo",
+  ArtField* Afoo = mirror::Class::FindStaticField(soa.Self(), A, "foo",
                                                           "Ljava/lang/String;");
-  mirror::ArtField* Bfoo = mirror::Class::FindStaticField(soa.Self(), B, "foo",
+  ArtField* Bfoo = mirror::Class::FindStaticField(soa.Self(), B, "foo",
                                                           "Ljava/lang/String;");
-  mirror::ArtField* Jfoo = mirror::Class::FindStaticField(soa.Self(), J, "foo",
+  ArtField* Jfoo = mirror::Class::FindStaticField(soa.Self(), J, "foo",
                                                           "Ljava/lang/String;");
-  mirror::ArtField* Kfoo = mirror::Class::FindStaticField(soa.Self(), K, "foo",
+  ArtField* Kfoo = mirror::Class::FindStaticField(soa.Self(), K, "foo",
                                                           "Ljava/lang/String;");
   ASSERT_TRUE(Afoo != nullptr);
   EXPECT_EQ(Afoo, Bfoo);
@@ -1110,9 +1095,6 @@
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/DexCache;", class_loader);
   EXPECT_EQ(c->GetClassSize(), mirror::DexCache::ClassSize());
 
-  c = class_linker_->FindClass(soa.Self(), "Ljava/lang/reflect/ArtField;", class_loader);
-  EXPECT_EQ(c->GetClassSize(), mirror::ArtField::ClassSize());
-
   c = class_linker_->FindClass(soa.Self(), "Ljava/lang/reflect/ArtMethod;", class_loader);
   EXPECT_EQ(c->GetClassSize(), mirror::ArtMethod::ClassSize());
 }
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index d400010..60b7fa2 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -398,7 +398,7 @@
 
   ScopedObjectAccess soa(Thread::Current());
 
-  StackHandleScope<4> hs(Thread::Current());
+  StackHandleScope<2> hs(soa.Self());
   Handle<mirror::ClassLoader> class_loader = hs.NewHandle(
       soa.Decode<mirror::ClassLoader*>(jclass_loader));
 
@@ -409,16 +409,13 @@
 
   // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
   // We need to get the DexPathList and loop through it.
-  Handle<mirror::ArtField> cookie_field =
-      hs.NewHandle(soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie));
-  Handle<mirror::ArtField> dex_file_field =
-      hs.NewHandle(
-          soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile));
+  ArtField* cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* dex_file_field =
+      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   mirror::Object* dex_path_list =
       soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList)->
       GetObject(class_loader.Get());
-  if (dex_path_list != nullptr && dex_file_field.Get() != nullptr &&
-      cookie_field.Get() != nullptr) {
+  if (dex_path_list != nullptr && dex_file_field!= nullptr && cookie_field != nullptr) {
     // DexPathList has an array dexElements of Elements[] which each contain a dex file.
     mirror::Object* dex_elements_obj =
         soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 36de221..407746f 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -18,6 +18,7 @@
 
 #include <sstream>
 
+#include "art_field-inl.h"
 #include "base/logging.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
@@ -160,7 +161,7 @@
   ThrowException("Ljava/lang/IllegalAccessError;", referrer, msg.str().c_str());
 }
 
-void ThrowIllegalAccessErrorField(mirror::Class* referrer, mirror::ArtField* accessed) {
+void ThrowIllegalAccessErrorField(mirror::Class* referrer, ArtField* accessed) {
   std::ostringstream msg;
   msg << "Field '" << PrettyField(accessed, false) << "' is inaccessible to class '"
       << PrettyDescriptor(referrer) << "'";
@@ -168,7 +169,7 @@
 }
 
 void ThrowIllegalAccessErrorFinalField(mirror::ArtMethod* referrer,
-                                       mirror::ArtField* accessed) {
+                                       ArtField* accessed) {
   std::ostringstream msg;
   msg << "Final field '" << PrettyField(accessed, false) << "' cannot be written to by method '"
       << PrettyMethod(referrer) << "'";
@@ -226,7 +227,7 @@
                  msg.str().c_str());
 }
 
-void ThrowIncompatibleClassChangeErrorField(mirror::ArtField* resolved_field, bool is_static,
+void ThrowIncompatibleClassChangeErrorField(ArtField* resolved_field, bool is_static,
                                             mirror::ArtMethod* referrer) {
   std::ostringstream msg;
   msg << "Expected '" << PrettyField(resolved_field) << "' to be a "
@@ -314,7 +315,7 @@
 
 // NullPointerException
 
-void ThrowNullPointerExceptionForFieldAccess(mirror::ArtField* field, bool is_read) {
+void ThrowNullPointerExceptionForFieldAccess(ArtField* field, bool is_read) {
   std::ostringstream msg;
   msg << "Attempt to " << (is_read ? "read from" : "write to")
       << " field '" << PrettyField(field, true) << "' on a null object reference";
@@ -394,7 +395,7 @@
     case Instruction::IGET_BYTE:
     case Instruction::IGET_CHAR:
     case Instruction::IGET_SHORT: {
-      mirror::ArtField* field =
+      ArtField* field =
           Runtime::Current()->GetClassLinker()->ResolveField(instr->VRegC_22c(), method, false);
       ThrowNullPointerExceptionForFieldAccess(field, true /* read */);
       break;
@@ -408,7 +409,7 @@
     case Instruction::IGET_OBJECT_QUICK: {
       // Since we replaced the field index, we ask the verifier to tell us which
       // field is accessed at this location.
-      mirror::ArtField* field =
+      ArtField* field =
           verifier::MethodVerifier::FindAccessedFieldAtDexPc(method, throw_dex_pc);
       if (field != NULL) {
         // NPE with precise message.
@@ -426,7 +427,7 @@
     case Instruction::IPUT_BYTE:
     case Instruction::IPUT_CHAR:
     case Instruction::IPUT_SHORT: {
-      mirror::ArtField* field =
+      ArtField* field =
           Runtime::Current()->GetClassLinker()->ResolveField(instr->VRegC_22c(), method, false);
       ThrowNullPointerExceptionForFieldAccess(field, false /* write */);
       break;
@@ -440,7 +441,7 @@
     case Instruction::IPUT_OBJECT_QUICK: {
       // Since we replaced the field index, we ask the verifier to tell us which
       // field is accessed at this location.
-      mirror::ArtField* field =
+      ArtField* field =
           verifier::MethodVerifier::FindAccessedFieldAtDexPc(method, throw_dex_pc);
       if (field != NULL) {
         // NPE with precise message.
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 9e749e3..df95cf9 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -22,11 +22,11 @@
 
 namespace art {
 namespace mirror {
-  class ArtField;
   class ArtMethod;
   class Class;
   class Object;
 }  // namespace mirror
+class ArtField;
 class Signature;
 class StringPiece;
 
@@ -81,10 +81,10 @@
 void ThrowIllegalAccessErrorMethod(mirror::Class* referrer, mirror::ArtMethod* accessed)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIllegalAccessErrorField(mirror::Class* referrer, mirror::ArtField* accessed)
+void ThrowIllegalAccessErrorField(mirror::Class* referrer, ArtField* accessed)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIllegalAccessErrorFinalField(mirror::ArtMethod* referrer, mirror::ArtField* accessed)
+void ThrowIllegalAccessErrorFinalField(mirror::ArtMethod* referrer, ArtField* accessed)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowIllegalAccessError(mirror::Class* referrer, const char* fmt, ...)
@@ -112,7 +112,7 @@
                                                                 mirror::ArtMethod* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIncompatibleClassChangeErrorField(mirror::ArtField* resolved_field, bool is_static,
+void ThrowIncompatibleClassChangeErrorField(ArtField* resolved_field, bool is_static,
                                             mirror::ArtMethod* referrer)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -160,7 +160,7 @@
 
 // NullPointerException
 
-void ThrowNullPointerExceptionForFieldAccess(mirror::ArtField* field,
+void ThrowNullPointerExceptionForFieldAccess(ArtField* field,
                                              bool is_read)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) COLD_ATTR;
 
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index cfcdf4c..c074b54 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -21,6 +21,7 @@
 #include <set>
 
 #include "arch/context.h"
+#include "art_field-inl.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
@@ -30,7 +31,6 @@
 #include "gc/space/space-inl.h"
 #include "handle_scope.h"
 #include "jdwp/object_registry.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
@@ -266,14 +266,14 @@
   }
 
   void FieldRead(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                 uint32_t dex_pc, mirror::ArtField* field)
+                 uint32_t dex_pc, ArtField* field)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     UNUSED(thread);
     Dbg::PostFieldAccessEvent(method, dex_pc, this_object, field);
   }
 
   void FieldWritten(Thread* thread ATTRIBUTE_UNUSED, mirror::Object* this_object,
-                    mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field,
+                    mirror::ArtMethod* method, uint32_t dex_pc, ArtField* field,
                     const JValue& field_value)
       OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Dbg::PostFieldModificationEvent(method, dex_pc, this_object, field, &field_value);
@@ -307,7 +307,6 @@
 // Runtime JDWP state.
 static JDWP::JdwpState* gJdwpState = nullptr;
 static bool gDebuggerConnected;  // debugger or DDMS is connected.
-static bool gDisposed;           // debugger called VirtualMachine.Dispose, so we should drop the connection.
 
 static bool gDdmThreadNotification = false;
 
@@ -319,6 +318,7 @@
 static Dbg::HpsgWhat gDdmNhsgWhat;
 
 bool Dbg::gDebuggerActive = false;
+bool Dbg::gDisposed = false;
 ObjectRegistry* Dbg::gRegistry = nullptr;
 
 // Recent allocation tracking.
@@ -551,7 +551,7 @@
     gJdwpState->PostVMDeath();
   }
   // Prevent the JDWP thread from processing JDWP incoming packets after we close the connection.
-  Disposed();
+  Dispose();
   delete gJdwpState;
   gJdwpState = nullptr;
   delete gRegistry;
@@ -599,14 +599,6 @@
   gDisposed = false;
 }
 
-void Dbg::Disposed() {
-  gDisposed = true;
-}
-
-bool Dbg::IsDisposed() {
-  return gDisposed;
-}
-
 bool Dbg::RequiresDeoptimization() {
   // We don't need deoptimization if everything runs with interpreter after
   // enabling -Xint mode.
@@ -1275,18 +1267,37 @@
   return JDWP::ERR_NONE;
 }
 
-JDWP::ObjectId Dbg::CreateString(const std::string& str) {
-  return gRegistry->Add(mirror::String::AllocFromModifiedUtf8(Thread::Current(), str.c_str()));
+JDWP::JdwpError Dbg::CreateString(const std::string& str, JDWP::ObjectId* new_string_id) {
+  Thread* self = Thread::Current();
+  mirror::String* new_string = mirror::String::AllocFromModifiedUtf8(self, str.c_str());
+  if (new_string == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    self->ClearException();
+    LOG(ERROR) << "Could not allocate string";
+    *new_string_id = 0;
+    return JDWP::ERR_OUT_OF_MEMORY;
+  }
+  *new_string_id = gRegistry->Add(new_string);
+  return JDWP::ERR_NONE;
 }
 
-JDWP::JdwpError Dbg::CreateObject(JDWP::RefTypeId class_id, JDWP::ObjectId* new_object) {
+JDWP::JdwpError Dbg::CreateObject(JDWP::RefTypeId class_id, JDWP::ObjectId* new_object_id) {
   JDWP::JdwpError error;
   mirror::Class* c = DecodeClass(class_id, &error);
   if (c == nullptr) {
-    *new_object = 0;
+    *new_object_id = 0;
     return error;
   }
-  *new_object = gRegistry->Add(c->AllocObject(Thread::Current()));
+  Thread* self = Thread::Current();
+  mirror::Object* new_object = c->AllocObject(self);
+  if (new_object == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    self->ClearException();
+    LOG(ERROR) << "Could not allocate object of type " << PrettyDescriptor(c);
+    *new_object_id = 0;
+    return JDWP::ERR_OUT_OF_MEMORY;
+  }
+  *new_object_id = gRegistry->Add(new_object);
   return JDWP::ERR_NONE;
 }
 
@@ -1294,21 +1305,30 @@
  * Used by Eclipse's "Display" view to evaluate "new byte[5]" to get "(byte[]) [0, 0, 0, 0, 0]".
  */
 JDWP::JdwpError Dbg::CreateArrayObject(JDWP::RefTypeId array_class_id, uint32_t length,
-                                       JDWP::ObjectId* new_array) {
+                                       JDWP::ObjectId* new_array_id) {
   JDWP::JdwpError error;
   mirror::Class* c = DecodeClass(array_class_id, &error);
   if (c == nullptr) {
-    *new_array = 0;
+    *new_array_id = 0;
     return error;
   }
-  *new_array = gRegistry->Add(mirror::Array::Alloc<true>(Thread::Current(), c, length,
-                                                         c->GetComponentSizeShift(),
-                                                         Runtime::Current()->GetHeap()->GetCurrentAllocator()));
+  Thread* self = Thread::Current();
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  mirror::Array* new_array = mirror::Array::Alloc<true>(self, c, length,
+                                                        c->GetComponentSizeShift(),
+                                                        heap->GetCurrentAllocator());
+  if (new_array == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    self->ClearException();
+    LOG(ERROR) << "Could not allocate array of type " << PrettyDescriptor(c);
+    *new_array_id = 0;
+    return JDWP::ERR_OUT_OF_MEMORY;
+  }
+  *new_array_id = gRegistry->Add(new_array);
   return JDWP::ERR_NONE;
 }
 
-JDWP::FieldId Dbg::ToFieldId(const mirror::ArtField* f) {
-  CHECK(!kMovingFields);
+JDWP::FieldId Dbg::ToFieldId(const ArtField* f) {
   return static_cast<JDWP::FieldId>(reinterpret_cast<uintptr_t>(f));
 }
 
@@ -1318,10 +1338,9 @@
   return static_cast<JDWP::MethodId>(reinterpret_cast<uintptr_t>(m));
 }
 
-static mirror::ArtField* FromFieldId(JDWP::FieldId fid)
+static ArtField* FromFieldId(JDWP::FieldId fid)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  CHECK(!kMovingFields);
-  return reinterpret_cast<mirror::ArtField*>(static_cast<uintptr_t>(fid));
+  return reinterpret_cast<ArtField*>(static_cast<uintptr_t>(fid));
 }
 
 static mirror::ArtMethod* FromMethodId(JDWP::MethodId mid)
@@ -1358,8 +1377,8 @@
 }
 
 bool Dbg::MatchField(JDWP::RefTypeId expected_type_id, JDWP::FieldId expected_field_id,
-                     mirror::ArtField* event_field) {
-  mirror::ArtField* expected_field = FromFieldId(expected_field_id);
+                     ArtField* event_field) {
+  ArtField* expected_field = FromFieldId(expected_field_id);
   if (expected_field != event_field) {
     return false;
   }
@@ -1396,7 +1415,7 @@
 }
 
 std::string Dbg::GetFieldName(JDWP::FieldId field_id) {
-  mirror::ArtField* f = FromFieldId(field_id);
+  ArtField* f = FromFieldId(field_id);
   if (f == nullptr) {
     return "NULL";
   }
@@ -1483,7 +1502,7 @@
   expandBufAdd4BE(pReply, instance_field_count + static_field_count);
 
   for (size_t i = 0; i < instance_field_count + static_field_count; ++i) {
-    mirror::ArtField* f = (i < instance_field_count) ? c->GetInstanceField(i) : c->GetStaticField(i - instance_field_count);
+    ArtField* f = (i < instance_field_count) ? c->GetInstanceField(i) : c->GetStaticField(i - instance_field_count);
     expandBufAddFieldId(pReply, ToFieldId(f));
     expandBufAddUtf8String(pReply, f->GetName());
     expandBufAddUtf8String(pReply, f->GetTypeDescriptor());
@@ -1653,7 +1672,7 @@
 
 void Dbg::OutputFieldValue(JDWP::FieldId field_id, const JValue* field_value,
                            JDWP::ExpandBuf* pReply) {
-  mirror::ArtField* f = FromFieldId(field_id);
+  ArtField* f = FromFieldId(field_id);
   JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor());
   OutputJValue(tag, field_value, pReply);
 }
@@ -1696,7 +1715,7 @@
   if ((!is_static && o == nullptr) || error != JDWP::ERR_NONE) {
     return JDWP::ERR_INVALID_OBJECT;
   }
-  mirror::ArtField* f = FromFieldId(field_id);
+  ArtField* f = FromFieldId(field_id);
 
   mirror::Class* receiver_class = c;
   if (receiver_class == nullptr && o != nullptr) {
@@ -1758,7 +1777,7 @@
   if ((!is_static && o == nullptr) || error != JDWP::ERR_NONE) {
     return JDWP::ERR_INVALID_OBJECT;
   }
-  mirror::ArtField* f = FromFieldId(field_id);
+  ArtField* f = FromFieldId(field_id);
 
   // The RI only enforces the static/non-static mismatch in one direction.
   // TODO: should we change the tests and check both?
@@ -1795,11 +1814,10 @@
     if (v != nullptr) {
       mirror::Class* field_type;
       {
-        StackHandleScope<3> hs(Thread::Current());
+        StackHandleScope<2> hs(Thread::Current());
         HandleWrapper<mirror::Object> h_v(hs.NewHandleWrapper(&v));
-        HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
         HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o));
-        field_type = h_f->GetType<true>();
+        field_type = f->GetType<true>();
       }
       if (!field_type->IsAssignableFrom(v->GetClass())) {
         return JDWP::ERR_INVALID_OBJECT;
@@ -1876,7 +1894,7 @@
   // We still need to report the zombie threads' names, so we can't just call Thread::GetThreadName.
   mirror::Object* thread_object = gRegistry->Get<mirror::Object*>(thread_id, &error);
   CHECK(thread_object != nullptr) << error;
-  mirror::ArtField* java_lang_Thread_name_field =
+  ArtField* java_lang_Thread_name_field =
       soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
   mirror::String* s =
       reinterpret_cast<mirror::String*>(java_lang_Thread_name_field->GetObject(thread_object));
@@ -1904,7 +1922,7 @@
   } else if (error == JDWP::ERR_NONE) {
     mirror::Class* c = soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_Thread);
     CHECK(c != nullptr);
-    mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_group);
+    ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_group);
     CHECK(f != nullptr);
     mirror::Object* group = f->GetObject(thread_object);
     CHECK(group != nullptr);
@@ -1945,7 +1963,7 @@
     return error;
   }
   ScopedAssertNoThreadSuspension ants(soa.Self(), "Debugger: GetThreadGroupName");
-  mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
+  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
   CHECK(f != nullptr);
   mirror::String* s = reinterpret_cast<mirror::String*>(f->GetObject(thread_group));
 
@@ -1964,7 +1982,7 @@
   mirror::Object* parent;
   {
     ScopedAssertNoThreadSuspension ants(soa.Self(), "Debugger: GetThreadGroupParent");
-    mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_parent);
+    ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_parent);
     CHECK(f != nullptr);
     parent = f->GetObject(thread_group);
   }
@@ -1979,7 +1997,7 @@
   CHECK(thread_group != nullptr);
 
   // Get the ArrayList<ThreadGroup> "groups" out of this thread group...
-  mirror::ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
+  ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
   mirror::Object* groups_array_list = groups_field->GetObject(thread_group);
   {
     // The "groups" field is declared as a java.util.List: check it really is
@@ -1991,8 +2009,8 @@
   }
 
   // Get the array and size out of the ArrayList<ThreadGroup>...
-  mirror::ArtField* array_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_array);
-  mirror::ArtField* size_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_size);
+  ArtField* array_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_array);
+  ArtField* size_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_size);
   mirror::ObjectArray<mirror::Object>* groups_array =
       array_field->GetObject(groups_array_list)->AsObjectArray<mirror::Object>();
   const int32_t size = size_field->GetInt(groups_array_list);
@@ -2038,7 +2056,7 @@
 
 JDWP::ObjectId Dbg::GetSystemThreadGroupId() {
   ScopedObjectAccessUnchecked soa(Thread::Current());
-  mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup);
+  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup);
   mirror::Object* group = f->GetObject(f->GetDeclaringClass());
   return gRegistry->Add(group);
 }
@@ -2132,7 +2150,7 @@
   if (desired_thread_group == nullptr) {
     return true;
   }
-  mirror::ArtField* thread_group_field = soa.DecodeField(WellKnownClasses::java_lang_Thread_group);
+  ArtField* thread_group_field = soa.DecodeField(WellKnownClasses::java_lang_Thread_group);
   DCHECK(thread_group_field != nullptr);
   mirror::Object* group = thread_group_field->GetObject(peer);
   return (group == desired_thread_group);
@@ -2721,7 +2739,7 @@
 }
 
 void Dbg::PostFieldAccessEvent(mirror::ArtMethod* m, int dex_pc,
-                               mirror::Object* this_object, mirror::ArtField* f) {
+                               mirror::Object* this_object, ArtField* f) {
   if (!IsDebuggerActive()) {
     return;
   }
@@ -2734,7 +2752,7 @@
 }
 
 void Dbg::PostFieldModificationEvent(mirror::ArtMethod* m, int dex_pc,
-                                     mirror::Object* this_object, mirror::ArtField* f,
+                                     mirror::Object* this_object, ArtField* f,
                                      const JValue* field_value) {
   if (!IsDebuggerActive()) {
     return;
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 62eda62..c287121 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -37,13 +37,13 @@
 
 namespace art {
 namespace mirror {
-class ArtField;
 class ArtMethod;
 class Class;
 class Object;
 class Throwable;
 }  // namespace mirror
 class AllocRecord;
+class ArtField;
 class ObjectRegistry;
 class ScopedObjectAccessUnchecked;
 class StackVisitor;
@@ -239,7 +239,9 @@
   static void GoActive()
       LOCKS_EXCLUDED(Locks::breakpoint_lock_, Locks::deoptimization_lock_, Locks::mutator_lock_);
   static void Disconnected() LOCKS_EXCLUDED(Locks::deoptimization_lock_, Locks::mutator_lock_);
-  static void Disposed();
+  static void Dispose() {
+    gDisposed = true;
+  }
 
   // Returns true if we're actually debugging with a real debugger, false if it's
   // just DDMS (or nothing at all).
@@ -255,9 +257,12 @@
 
   // Returns true if a method has any breakpoints.
   static bool MethodHasAnyBreakpoints(mirror::ArtMethod* method)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::breakpoint_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(Locks::breakpoint_lock_);
 
-  static bool IsDisposed();
+  static bool IsDisposed() {
+    return gDisposed;
+  }
 
   /*
    * Time, in milliseconds, since the last debugger activity.  Does not
@@ -313,12 +318,12 @@
                                           JDWP::Request* request)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static JDWP::ObjectId CreateString(const std::string& str)
+  static JDWP::JdwpError CreateString(const std::string& str, JDWP::ObjectId* new_string_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  static JDWP::JdwpError CreateObject(JDWP::RefTypeId class_id, JDWP::ObjectId* new_object)
+  static JDWP::JdwpError CreateObject(JDWP::RefTypeId class_id, JDWP::ObjectId* new_object_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static JDWP::JdwpError CreateArrayObject(JDWP::RefTypeId array_class_id, uint32_t length,
-                                           JDWP::ObjectId* new_array)
+                                           JDWP::ObjectId* new_array_id)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   //
@@ -335,7 +340,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static bool MatchField(JDWP::RefTypeId expected_type_id, JDWP::FieldId expected_field_id,
-                         mirror::ArtField* event_field)
+                         ArtField* event_field)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static bool MatchInstance(JDWP::ObjectId expected_instance_id, mirror::Object* event_instance)
@@ -520,10 +525,10 @@
     kMethodExit     = 0x08,
   };
   static void PostFieldAccessEvent(mirror::ArtMethod* m, int dex_pc, mirror::Object* this_object,
-                                   mirror::ArtField* f)
+                                   ArtField* f)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostFieldModificationEvent(mirror::ArtMethod* m, int dex_pc,
-                                         mirror::Object* this_object, mirror::ArtField* f,
+                                         mirror::Object* this_object, ArtField* f,
                                          const JValue* field_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void PostException(mirror::Throwable* exception)
@@ -701,7 +706,7 @@
   static JDWP::JdwpTypeTag GetTypeTag(mirror::Class* klass)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static JDWP::FieldId ToFieldId(const mirror::ArtField* f)
+  static JDWP::FieldId ToFieldId(const ArtField* f)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static void SetJdwpLocation(JDWP::JdwpLocation* location, mirror::ArtMethod* m, uint32_t dex_pc)
@@ -756,6 +761,10 @@
   // Indicates whether the debugger is making requests.
   static bool gDebuggerActive;
 
+  // Indicates whether we should drop the JDWP connection because the runtime stops or the
+  // debugger called VirtualMachine.Dispose.
+  static bool gDisposed;
+
   // The registry mapping objects to JDWP ids.
   static ObjectRegistry* gRegistry;
 
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 8685d8e..03a47a3 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -27,6 +27,7 @@
 #include <memory>
 #include <sstream>
 
+#include "art_field-inl.h"
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "class_linker.h"
@@ -34,7 +35,6 @@
 #include "dex_file_verifier.h"
 #include "globals.h"
 #include "leb128.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/string.h"
 #include "os.h"
@@ -1210,7 +1210,7 @@
 }
 
 template<bool kTransactionActive>
-void EncodedStaticFieldValueIterator::ReadValueToField(Handle<mirror::ArtField> field) const {
+void EncodedStaticFieldValueIterator::ReadValueToField(ArtField* field) const {
   switch (type_) {
     case kBoolean: field->SetBoolean<kTransactionActive>(field->GetDeclaringClass(), jval_.z); break;
     case kByte:    field->SetByte<kTransactionActive>(field->GetDeclaringClass(), jval_.b); break;
@@ -1222,13 +1222,11 @@
     case kDouble:  field->SetDouble<kTransactionActive>(field->GetDeclaringClass(), jval_.d); break;
     case kNull:    field->SetObject<kTransactionActive>(field->GetDeclaringClass(), NULL); break;
     case kString: {
-      CHECK(!kMovingFields);
       mirror::String* resolved = linker_->ResolveString(dex_file_, jval_.i, *dex_cache_);
       field->SetObject<kTransactionActive>(field->GetDeclaringClass(), resolved);
       break;
     }
     case kType: {
-      CHECK(!kMovingFields);
       mirror::Class* resolved = linker_->ResolveType(dex_file_, jval_.i, *dex_cache_,
                                                      *class_loader_);
       field->SetObject<kTransactionActive>(field->GetDeclaringClass(), resolved);
@@ -1237,8 +1235,8 @@
     default: UNIMPLEMENTED(FATAL) << ": type " << type_;
   }
 }
-template void EncodedStaticFieldValueIterator::ReadValueToField<true>(Handle<mirror::ArtField> field) const;
-template void EncodedStaticFieldValueIterator::ReadValueToField<false>(Handle<mirror::ArtField> field) const;
+template void EncodedStaticFieldValueIterator::ReadValueToField<true>(ArtField* field) const;
+template void EncodedStaticFieldValueIterator::ReadValueToField<false>(ArtField* field) const;
 
 CatchHandlerIterator::CatchHandlerIterator(const DexFile::CodeItem& code_item, uint32_t address) {
   handler_.address_ = -1;
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 8e2d6c2..5bdd9b6 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -37,11 +37,11 @@
 // TODO: remove dependencies on mirror classes, primarily by moving
 // EncodedStaticFieldValueIterator to its own file.
 namespace mirror {
-  class ArtField;
   class ArtMethod;
   class ClassLoader;
   class DexCache;
 }  // namespace mirror
+class ArtField;
 class ClassLinker;
 class MemMap;
 class OatDexFile;
@@ -1298,7 +1298,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<bool kTransactionActive>
-  void ReadValueToField(Handle<mirror::ArtField> field) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ReadValueToField(ArtField* field) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool HasNext() const { return pos_ < array_size_; }
 
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index bc5cf9b..411ec43 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -1630,8 +1630,10 @@
   return frame->CIE_pointer != 0;
 }
 
-static bool FixupEHFrame(off_t base_address_delta,
-                           uint8_t* eh_frame, size_t eh_frame_size) {
+template <typename Elf_SOff>
+static bool FixupEHFrame(Elf_SOff base_address_delta, uint8_t* eh_frame, size_t eh_frame_size) {
+  // TODO: Check the spec whether this is really data-dependent, or whether it's clear from the
+  //       ELF file whether we should expect 32-bit or 64-bit.
   if (*(reinterpret_cast<uint32_t*>(eh_frame)) == 0xffffffff) {
     FDE64* last_frame = reinterpret_cast<FDE64*>(eh_frame + eh_frame_size);
     FDE64* frame = NextFDE(reinterpret_cast<FDE64*>(eh_frame));
@@ -1643,6 +1645,7 @@
     }
     return true;
   } else {
+    CHECK(IsInt<32>(base_address_delta));
     FDE32* last_frame = reinterpret_cast<FDE32*>(eh_frame + eh_frame_size);
     FDE32* frame = NextFDE(reinterpret_cast<FDE32*>(eh_frame));
     for (; frame < last_frame; frame = NextFDE(frame)) {
@@ -1772,7 +1775,9 @@
   uint8_t* current_instruction_;
 };
 
-static bool FixupDebugLine(off_t base_offset_delta, DebugLineInstructionIterator* iter) {
+template <typename Elf_SOff>
+static bool FixupDebugLine(Elf_SOff base_offset_delta, DebugLineInstructionIterator* iter) {
+  CHECK(IsInt<32>(base_offset_delta));
   for (; iter->GetInstruction(); iter->Next()) {
     if (iter->IsExtendedOpcode() && iter->GetOpcode() == dwarf::DW_LNE_set_address) {
       *reinterpret_cast<uint32_t*>(iter->GetArguments()) += base_offset_delta;
@@ -2044,7 +2049,9 @@
   DebugTag* current_tag_;
 };
 
-static bool FixupDebugInfo(off_t base_address_delta, DebugInfoIterator* iter) {
+template <typename Elf_SOff>
+static bool FixupDebugInfo(Elf_SOff base_address_delta, DebugInfoIterator* iter) {
+  CHECK(IsInt<32>(base_address_delta));
   do {
     if (iter->GetCurrentTag()->GetAttrSize(dwarf::DW_AT_low_pc) != sizeof(int32_t) ||
         iter->GetCurrentTag()->GetAttrSize(dwarf::DW_AT_high_pc) != sizeof(int32_t)) {
@@ -2066,7 +2073,7 @@
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupDebugSections(off_t base_address_delta) {
+    ::FixupDebugSections(typename std::make_signed<Elf_Off>::type base_address_delta) {
   const Elf_Shdr* debug_info = FindSectionByName(".debug_info");
   const Elf_Shdr* debug_abbrev = FindSectionByName(".debug_abbrev");
   const Elf_Shdr* eh_frame = FindSectionByName(".eh_frame");
@@ -2280,7 +2287,7 @@
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::Fixup(uintptr_t base_address) {
+    ::Fixup(Elf_Addr base_address) {
   if (!FixupDynamic(base_address)) {
     LOG(WARNING) << "Failed to fixup .dynamic in " << file_->GetPath();
     return false;
@@ -2305,7 +2312,8 @@
     LOG(WARNING) << "Failed to fixup .rel.dyn in " << file_->GetPath();
     return false;
   }
-  if (!FixupDebugSections(base_address)) {
+  static_assert(sizeof(Elf_Off) >= sizeof(base_address), "Potentially losing precision.");
+  if (!FixupDebugSections(static_cast<Elf_Off>(base_address))) {
     LOG(WARNING) << "Failed to fixup debug sections in " << file_->GetPath();
     return false;
   }
@@ -2317,7 +2325,7 @@
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupDynamic(uintptr_t base_address) {
+    ::FixupDynamic(Elf_Addr base_address) {
   for (Elf_Word i = 0; i < GetDynamicNum(); i++) {
     Elf_Dyn& elf_dyn = GetDynamic(i);
     Elf_Word d_tag = elf_dyn.d_tag;
@@ -2341,7 +2349,7 @@
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupSectionHeaders(uintptr_t base_address) {
+    ::FixupSectionHeaders(Elf_Addr base_address) {
   for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) {
     Elf_Shdr* sh = GetSectionHeader(i);
     CHECK(sh != nullptr);
@@ -2365,7 +2373,7 @@
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupProgramHeaders(uintptr_t base_address) {
+    ::FixupProgramHeaders(Elf_Addr base_address) {
   // TODO: ELFObjectFile doesn't have give to Elf_Phdr, so we do that ourselves for now.
   for (Elf_Word i = 0; i < GetProgramHeaderNum(); i++) {
     Elf_Phdr* ph = GetProgramHeader(i);
@@ -2392,7 +2400,7 @@
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupSymbols(uintptr_t base_address, bool dynamic) {
+    ::FixupSymbols(Elf_Addr base_address, bool dynamic) {
   Elf_Word section_type = dynamic ? SHT_DYNSYM : SHT_SYMTAB;
   // TODO: Unfortunate ELFObjectFile has protected symbol access, so use ElfFile
   Elf_Shdr* symbol_section = FindSectionByType(section_type);
@@ -2422,7 +2430,7 @@
           typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
 bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
     Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
-    ::FixupRelocations(uintptr_t base_address) {
+    ::FixupRelocations(Elf_Addr base_address) {
   for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) {
     Elf_Shdr* sh = GetSectionHeader(i);
     CHECK(sh != nullptr);
@@ -2622,7 +2630,14 @@
     return elf_file->elf32_->Strip(error_msg);
 }
 
-bool ElfFile::Fixup(uintptr_t base_address) {
+bool ElfFile::Fixup(uint64_t base_address) {
+  if (elf64_.get() != nullptr) {
+    return elf64_->Fixup(static_cast<Elf64_Addr>(base_address));
+  } else {
+    DCHECK(elf32_.get() != nullptr);
+    CHECK(IsUint<32>(base_address)) << std::hex << base_address;
+    return elf32_->Fixup(static_cast<Elf32_Addr>(base_address));
+  }
   DELEGATE_TO_IMPL(Fixup, base_address);
 }
 
diff --git a/runtime/elf_file.h b/runtime/elf_file.h
index 41c54bc..286c2a6 100644
--- a/runtime/elf_file.h
+++ b/runtime/elf_file.h
@@ -78,9 +78,9 @@
 
   // Fixup an ELF file so that that oat header will be loaded at oat_begin.
   // Returns true on success, false on failure.
-  static bool Fixup(File* file, uintptr_t oat_data_begin);
+  static bool Fixup(File* file, uint64_t oat_data_begin);
 
-  bool Fixup(uintptr_t base_address);
+  bool Fixup(uint64_t base_address);
 
   bool Is64Bit() const {
     return elf64_.get() != nullptr;
diff --git a/runtime/elf_file_impl.h b/runtime/elf_file_impl.h
index a70fa17..16d3857 100644
--- a/runtime/elf_file_impl.h
+++ b/runtime/elf_file_impl.h
@@ -19,6 +19,7 @@
 
 #include <map>
 #include <memory>
+#include <type_traits>
 #include <vector>
 
 // Explicitly include our own elf.h to avoid Linux and other dependencies.
@@ -102,13 +103,13 @@
   // executable is true at run time, false at compile time.
   bool Load(bool executable, std::string* error_msg);
 
-  bool Fixup(uintptr_t base_address);
-  bool FixupDynamic(uintptr_t base_address);
-  bool FixupSectionHeaders(uintptr_t base_address);
-  bool FixupProgramHeaders(uintptr_t base_address);
-  bool FixupSymbols(uintptr_t base_address, bool dynamic);
-  bool FixupRelocations(uintptr_t base_address);
-  bool FixupDebugSections(off_t base_address_delta);
+  bool Fixup(Elf_Addr base_address);
+  bool FixupDynamic(Elf_Addr base_address);
+  bool FixupSectionHeaders(Elf_Addr base_address);
+  bool FixupProgramHeaders(Elf_Addr base_address);
+  bool FixupSymbols(Elf_Addr base_address, bool dynamic);
+  bool FixupRelocations(Elf_Addr base_address);
+  bool FixupDebugSections(typename std::make_signed<Elf_Off>::type base_address_delta);
 
   bool Strip(std::string* error_msg);
 
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 8a13d34..cbfba12 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -252,7 +252,7 @@
 }
 
 template<FindFieldType type, bool access_check>
-inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
+inline ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
                                            Thread* self, size_t expected_size) {
   bool is_primitive;
   bool is_set;
@@ -269,7 +269,7 @@
     default:                     is_primitive = true;  is_set = true;  is_static = true;  break;
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::ArtField* resolved_field = class_linker->ResolveField(field_idx, referrer, is_static);
+  ArtField* resolved_field = class_linker->ResolveField(field_idx, referrer, is_static);
   if (UNLIKELY(resolved_field == nullptr)) {
     DCHECK(self->IsExceptionPending());  // Throw exception and unwind.
     return nullptr;  // Failure.
@@ -324,7 +324,7 @@
 // Explicit template declarations of FindFieldFromCode for all field access types.
 #define EXPLICIT_FIND_FIELD_FROM_CODE_TEMPLATE_DECL(_type, _access_check) \
 template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) ALWAYS_INLINE \
-mirror::ArtField* FindFieldFromCode<_type, _access_check>(uint32_t field_idx, \
+ArtField* FindFieldFromCode<_type, _access_check>(uint32_t field_idx, \
                                                           mirror::ArtMethod* referrer, \
                                                           Thread* self, size_t expected_size) \
 
@@ -469,11 +469,11 @@
 #undef EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL
 
 // Fast path field resolution that can't initialize classes or throw exceptions.
-inline mirror::ArtField* FindFieldFast(uint32_t field_idx,
+inline ArtField* FindFieldFast(uint32_t field_idx,
                                        mirror::ArtMethod* referrer,
                                        FindFieldType type, size_t expected_size) {
-  mirror::ArtField* resolved_field =
-      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx);
+  ArtField* resolved_field =
+      referrer->GetDeclaringClass()->GetDexCache()->GetResolvedField(field_idx, sizeof(void*));
   if (UNLIKELY(resolved_field == nullptr)) {
     return nullptr;
   }
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 70e2851..1d8df68 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -16,11 +16,11 @@
 
 #include "entrypoints/entrypoint_utils.h"
 
+#include "art_field-inl.h"
 #include "base/mutex.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "gc/accounting/card_table-inl.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 77eec46..8d419f8 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -30,14 +30,14 @@
 namespace art {
 
 namespace mirror {
-  class Class;
   class Array;
-  class ArtField;
   class ArtMethod;
+  class Class;
   class Object;
   class String;
 }  // namespace mirror
 
+class ArtField;
 class ScopedObjectAccessAlreadyRunnable;
 class Thread;
 
@@ -132,7 +132,7 @@
 };
 
 template<FindFieldType type, bool access_check>
-inline mirror::ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
+inline ArtField* FindFieldFromCode(uint32_t field_idx, mirror::ArtMethod* referrer,
                                            Thread* self, size_t expected_size)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -143,7 +143,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Fast path field resolution that can't initialize classes or throw exceptions.
-inline mirror::ArtField* FindFieldFast(uint32_t field_idx,
+inline ArtField* FindFieldFast(uint32_t field_idx,
                                        mirror::ArtMethod* referrer,
                                        FindFieldType type, size_t expected_size)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 22bf939..b5a7c09 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
+#include "art_field-inl.h"
 #include "callee_save_frame.h"
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 
@@ -29,8 +29,7 @@
                                            Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
-                                          sizeof(int8_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t));
   if (LIKELY(field != nullptr)) {
     return field->GetByte(field->GetDeclaringClass());
   }
@@ -45,8 +44,7 @@
                                                Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
-                                          sizeof(int8_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int8_t));
   if (LIKELY(field != nullptr)) {
     return field->GetBoolean(field->GetDeclaringClass());
   }
@@ -61,8 +59,7 @@
                                              Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
-                                          sizeof(int16_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t));
   if (LIKELY(field != nullptr)) {
     return field->GetShort(field->GetDeclaringClass());
   }
@@ -78,8 +75,7 @@
                                              Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
-                                          sizeof(int16_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int16_t));
   if (LIKELY(field != nullptr)) {
     return field->GetChar(field->GetDeclaringClass());
   }
@@ -95,8 +91,7 @@
                                            Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
-                                          sizeof(int32_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int32_t));
   if (LIKELY(field != nullptr)) {
     return field->Get32(field->GetDeclaringClass());
   }
@@ -112,8 +107,7 @@
                                            Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead,
-                                          sizeof(int64_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveRead, sizeof(int64_t));
   if (LIKELY(field != nullptr)) {
     return field->Get64(field->GetDeclaringClass());
   }
@@ -129,8 +123,8 @@
                                                    Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectRead,
-                                          sizeof(mirror::HeapReference<mirror::Object>));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectRead,
+                                  sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     return field->GetObj(field->GetDeclaringClass());
   }
@@ -146,8 +140,7 @@
                                              mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
-                                          sizeof(int8_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetByte(obj);
   }
@@ -167,8 +160,7 @@
                                                  mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
-                                          sizeof(int8_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetBoolean(obj);
   }
@@ -187,8 +179,7 @@
                                                mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
-                                          sizeof(int16_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetShort(obj);
   }
@@ -208,8 +199,7 @@
                                                mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
-                                          sizeof(int16_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int16_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetChar(obj);
   }
@@ -229,8 +219,7 @@
                                              mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
-                                          sizeof(int32_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int32_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->Get32(obj);
   }
@@ -250,8 +239,7 @@
                                              mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead,
-                                          sizeof(int64_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveRead, sizeof(int64_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->Get64(obj);
   }
@@ -272,13 +260,13 @@
                                                      Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectRead,
-                                          sizeof(mirror::HeapReference<mirror::Object>));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectRead,
+                                  sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     return field->GetObj(obj);
   }
-  field = FindFieldFromCode<InstanceObjectRead, true>(field_idx, referrer, self,
-                                                      sizeof(mirror::HeapReference<mirror::Object>));
+  field = FindFieldFromCode<InstanceObjectRead, true>(
+      field_idx, referrer, self, sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     if (UNLIKELY(obj == nullptr)) {
       ThrowNullPointerExceptionForFieldAccess(field, true);
@@ -293,8 +281,7 @@
                                      mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
-                                          sizeof(int8_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int8_t));
   if (LIKELY(field != nullptr)) {
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     // Compiled code can't use transactional mode.
@@ -325,8 +312,7 @@
                                       mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
-                                          sizeof(int16_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int16_t));
   if (LIKELY(field != nullptr)) {
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     // Compiled code can't use transactional mode.
@@ -357,8 +343,7 @@
                                       mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
-                                          sizeof(int32_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int32_t));
   if (LIKELY(field != nullptr)) {
     // Compiled code can't use transactional mode.
     field->Set32<false>(field->GetDeclaringClass(), new_value);
@@ -377,8 +362,7 @@
                                       uint64_t new_value, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite,
-                                          sizeof(int64_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t));
   if (LIKELY(field != nullptr)) {
     // Compiled code can't use transactional mode.
     field->Set64<false>(field->GetDeclaringClass(), new_value);
@@ -397,8 +381,8 @@
                                        mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
-                                          sizeof(mirror::HeapReference<mirror::Object>));
+  ArtField* field = FindFieldFast(field_idx, referrer, StaticObjectWrite,
+                                  sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr)) {
     if (LIKELY(!field->IsPrimitiveType())) {
       // Compiled code can't use transactional mode.
@@ -420,8 +404,7 @@
                                        mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
-                                          sizeof(int8_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int8_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     // Compiled code can't use transactional mode.
@@ -460,8 +443,7 @@
                                         mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
-                                          sizeof(int16_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int16_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     Primitive::Type type = field->GetTypeAsPrimitiveType();
     // Compiled code can't use transactional mode.
@@ -501,8 +483,7 @@
                                         mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
-                                          sizeof(int32_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int32_t));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     // Compiled code can't use transactional mode.
     field->Set32<false>(obj, new_value);
@@ -530,8 +511,7 @@
                                         mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite,
-                                          sizeof(int64_t));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstancePrimitiveWrite, sizeof(int64_t));
   if (LIKELY(field != nullptr  && obj != nullptr)) {
     // Compiled code can't use transactional mode.
     field->Set64<false>(obj, new_value);
@@ -556,8 +536,8 @@
                                          mirror::ArtMethod* referrer, Thread* self)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  mirror::ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
-                                          sizeof(mirror::HeapReference<mirror::Object>));
+  ArtField* field = FindFieldFast(field_idx, referrer, InstanceObjectWrite,
+                                  sizeof(mirror::HeapReference<mirror::Object>));
   if (LIKELY(field != nullptr && obj != nullptr)) {
     // Compiled code can't use transactional mode.
     field->SetObj<false>(obj, new_value);
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index c1276b5..e478d2a 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -68,7 +68,6 @@
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
-
 extern void JniMethodEndSynchronized(uint32_t saved_local_ref_cookie, jobject locked,
                                      Thread* self) {
   GoToRunnable(self);
@@ -76,38 +75,34 @@
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
-extern mirror::Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
-                                                 Thread* self) {
-  GoToRunnable(self);
-  mirror::Object* o = self->DecodeJObject(result);  // Must decode before pop.
+// Common result handling for EndWithReference.
+static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result,
+                                                             uint32_t saved_local_ref_cookie,
+                                                             Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS {
+  // Must decode before pop. The 'result' may not be valid in case of an exception, though.
+  mirror::Object* o = self->IsExceptionPending() ? nullptr : self->DecodeJObject(result);
   PopLocalReferences(saved_local_ref_cookie, self);
   // Process result.
   if (UNLIKELY(self->GetJniEnv()->check_jni)) {
-    if (self->IsExceptionPending()) {
-      return NULL;
-    }
     CheckReferenceResult(o, self);
   }
   VerifyObject(o);
   return o;
 }
 
+extern mirror::Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
+                                                 Thread* self) {
+  GoToRunnable(self);
+  return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self);
+}
+
 extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
                                                              uint32_t saved_local_ref_cookie,
                                                              jobject locked, Thread* self) {
   GoToRunnable(self);
-  UnlockJniSynchronizedMethod(locked, self);  // Must decode before pop.
-  mirror::Object* o = self->DecodeJObject(result);
-  PopLocalReferences(saved_local_ref_cookie, self);
-  // Process result.
-  if (UNLIKELY(self->GetJniEnv()->check_jni)) {
-    if (self->IsExceptionPending()) {
-      return NULL;
-    }
-    CheckReferenceResult(o, self);
-  }
-  VerifyObject(o);
-  return o;
+  UnlockJniSynchronizedMethod(locked, self);
+  return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self);
 }
 
 }  // namespace art
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index a3fac58..cd3f910 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -28,7 +28,6 @@
 #include "gc/heap.h"
 #include "gc/space/space.h"
 #include "gc/space/image_space.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index b16a146..eeb385e 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -26,7 +26,6 @@
 #include "gc/collector/semi_space.h"
 #include "gc/heap.h"
 #include "gc/space/space.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index ad8d988..2da8325 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -16,12 +16,12 @@
 
 #include "space_bitmap-inl.h"
 
+#include "art_field-inl.h"
 #include "base/stringprintf.h"
 #include "dex_file-inl.h"
 #include "mem_map.h"
 #include "mirror/object-inl.h"
 #include "mirror/class.h"
-#include "mirror/art_field.h"
 #include "mirror/object_array.h"
 
 namespace art {
@@ -190,15 +190,13 @@
     WalkInstanceFields(visited, callback, obj, super, arg);
   }
   // Walk instance fields
-  mirror::ObjectArray<mirror::ArtField>* fields = klass->GetIFields();
-  if (fields != NULL) {
-    for (int32_t i = 0; i < fields->GetLength(); i++) {
-      mirror::ArtField* field = fields->Get(i);
-      if (!field->IsPrimitiveType()) {
-        mirror::Object* value = field->GetObj(obj);
-        if (value != NULL) {
-          WalkFieldsInOrder(visited, callback, value, arg);
-        }
+  auto* fields = klass->GetIFields();
+  for (size_t i = 0, count = klass->NumInstanceFields(); i < count; ++i) {
+    ArtField* field = &fields[i];
+    if (!field->IsPrimitiveType()) {
+      mirror::Object* value = field->GetObj(obj);
+      if (value != nullptr) {
+        WalkFieldsInOrder(visited, callback, value, arg);
       }
     }
   }
@@ -219,15 +217,13 @@
   WalkInstanceFields(visited, callback, obj, klass, arg);
   // Walk static fields of a Class
   if (obj->IsClass()) {
-    mirror::ObjectArray<mirror::ArtField>* fields = klass->GetSFields();
-    if (fields != NULL) {
-      for (int32_t i = 0; i < fields->GetLength(); i++) {
-        mirror::ArtField* field = fields->Get(i);
-        if (!field->IsPrimitiveType()) {
-          mirror::Object* value = field->GetObj(NULL);
-          if (value != NULL) {
-            WalkFieldsInOrder(visited, callback, value, arg);
-          }
+    auto* sfields = klass->GetSFields();
+    for (size_t i = 0, count = klass->NumStaticFields(); i < count; ++i) {
+      ArtField* field = &sfields[i];
+      if (!field->IsPrimitiveType()) {
+        mirror::Object* value = field->GetObj(nullptr);
+        if (value != nullptr) {
+          WalkFieldsInOrder(visited, callback, value, arg);
         }
       }
     }
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 19d4e1a..eabb1c2 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -16,12 +16,12 @@
 
 #include "concurrent_copying.h"
 
+#include "art_field-inl.h"
 #include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space.h"
 #include "intern_table.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/object-inl.h"
 #include "scoped_thread_state_change.h"
 #include "thread-inl.h"
@@ -1148,11 +1148,11 @@
     mirror::Object** root = roots[i];
     mirror::Object* ref = *root;
     if (ref == nullptr || region_space_->IsInToSpace(ref)) {
-      return;
+      continue;
     }
     mirror::Object* to_ref = Mark(ref);
     if (to_ref == ref) {
-      return;
+      continue;
     }
     Atomic<mirror::Object*>* addr = reinterpret_cast<Atomic<mirror::Object*>*>(root);
     mirror::Object* expected_ref = ref;
@@ -1173,11 +1173,11 @@
     mirror::CompressedReference<mirror::Object>* root = roots[i];
     mirror::Object* ref = root->AsMirrorPtr();
     if (ref == nullptr || region_space_->IsInToSpace(ref)) {
-      return;
+      continue;
     }
     mirror::Object* to_ref = Mark(ref);
     if (to_ref == ref) {
-      return;
+      continue;
     }
     auto* addr = reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root);
     auto expected_ref = mirror::CompressedReference<mirror::Object>::FromMirrorPtr(ref);
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 8902df8..3c247cd 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -35,8 +35,6 @@
 #include "jni_internal.h"
 #include "mark_sweep-inl.h"
 #include "monitor.h"
-#include "mirror/art_field.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
diff --git a/runtime/gc/collector/mark_sweep-inl.h b/runtime/gc/collector/mark_sweep-inl.h
index 104ed36..4e3845e 100644
--- a/runtime/gc/collector/mark_sweep-inl.h
+++ b/runtime/gc/collector/mark_sweep-inl.h
@@ -17,10 +17,9 @@
 #ifndef ART_RUNTIME_GC_COLLECTOR_MARK_SWEEP_INL_H_
 #define ART_RUNTIME_GC_COLLECTOR_MARK_SWEEP_INL_H_
 
-#include "gc/collector/mark_sweep.h"
+#include "mark_sweep.h"
 
 #include "gc/heap.h"
-#include "mirror/art_field.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/reference.h"
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 79d1034..ed2e295 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -39,7 +39,6 @@
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
 #include "mark_sweep-inl.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index b9153c1..83da5a8 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -23,6 +23,7 @@
 #include <memory>
 #include <vector>
 
+#include "art_field-inl.h"
 #include "base/allocator.h"
 #include "base/dumpable.h"
 #include "base/histogram-inl.h"
@@ -58,7 +59,6 @@
 #include "heap-inl.h"
 #include "image.h"
 #include "intern_table.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
 #include "mirror/object-inl.h"
@@ -233,7 +233,7 @@
       CHECK_GT(oat_file_end_addr, image_space->End());
       requested_alloc_space_begin = AlignUp(oat_file_end_addr, kPageSize);
     } else {
-      LOG(WARNING) << "Could not create image space with image file '" << image_file_name << "'. "
+      LOG(ERROR) << "Could not create image space with image file '" << image_file_name << "'. "
                    << "Attempting to fall back to imageless running. Error was: " << error_msg;
     }
   }
@@ -482,7 +482,7 @@
                                       non_moving_space_->GetMemMap());
     if (!no_gap) {
       MemMap::DumpMaps(LOG(ERROR));
-      LOG(FATAL) << "There's a gap between the image space and the main space";
+      LOG(FATAL) << "There's a gap between the image space and the non-moving space";
     }
   }
   if (running_on_valgrind_) {
@@ -2708,12 +2708,12 @@
           // Print which field of the object is dead.
           if (!obj->IsObjectArray()) {
             mirror::Class* klass = is_static ? obj->AsClass() : obj->GetClass();
-            CHECK(klass != NULL);
-            mirror::ObjectArray<mirror::ArtField>* fields = is_static ? klass->GetSFields()
-                                                                      : klass->GetIFields();
-            CHECK(fields != NULL);
-            for (int32_t i = 0; i < fields->GetLength(); ++i) {
-              mirror::ArtField* cur = fields->Get(i);
+            CHECK(klass != nullptr);
+            auto* fields = is_static ? klass->GetSFields() : klass->GetIFields();
+            auto num_fields = is_static ? klass->NumStaticFields() : klass->NumInstanceFields();
+            CHECK_EQ(fields == nullptr, num_fields == 0u);
+            for (size_t i = 0; i < num_fields; ++i) {
+              ArtField* cur = &fields[i];
               if (cur->GetOffset().Int32Value() == offset.Int32Value()) {
                 LOG(ERROR) << (is_static ? "Static " : "") << "field in the live stack is "
                           << PrettyField(cur);
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 1fb3252..e28e8d7 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -43,8 +43,9 @@
 Atomic<uint32_t> ImageSpace::bitmap_index_(0);
 
 ImageSpace::ImageSpace(const std::string& image_filename, const char* image_location,
-                       MemMap* mem_map, accounting::ContinuousSpaceBitmap* live_bitmap)
-    : MemMapSpace(image_filename, mem_map, mem_map->Begin(), mem_map->End(), mem_map->End(),
+                       MemMap* mem_map, accounting::ContinuousSpaceBitmap* live_bitmap,
+                       uint8_t* end)
+    : MemMapSpace(image_filename, mem_map, mem_map->Begin(), end, end,
                   kGcRetentionPolicyNeverCollect),
       image_location_(image_location) {
   DCHECK(live_bitmap != nullptr);
@@ -642,10 +643,10 @@
 void ImageSpace::VerifyImageAllocations() {
   uint8_t* current = Begin() + RoundUp(sizeof(ImageHeader), kObjectAlignment);
   while (current < End()) {
-    DCHECK_ALIGNED(current, kObjectAlignment);
-    mirror::Object* obj = reinterpret_cast<mirror::Object*>(current);
-    CHECK(live_bitmap_->Test(obj));
+    CHECK_ALIGNED(current, kObjectAlignment);
+    auto* obj = reinterpret_cast<mirror::Object*>(current);
     CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
+    CHECK(live_bitmap_->Test(obj)) << PrettyTypeOf(obj);
     if (kUseBakerOrBrooksReadBarrier) {
       obj->AssertReadBarrierPointer();
     }
@@ -675,7 +676,6 @@
     *error_msg = StringPrintf("Invalid image header in '%s'", image_filename);
     return nullptr;
   }
-
   // Check that the file is large enough.
   uint64_t image_file_size = static_cast<uint64_t>(file->GetLength());
   if (image_header.GetImageSize() > image_file_size) {
@@ -683,23 +683,18 @@
                               image_file_size, image_header.GetImageSize());
     return nullptr;
   }
-  if (image_header.GetBitmapOffset() + image_header.GetImageBitmapSize() != image_file_size) {
-    *error_msg = StringPrintf("Image file too small for image bitmap: %" PRIu64 " vs. %zu.",
-                              image_file_size,
-                              image_header.GetBitmapOffset() + image_header.GetImageBitmapSize());
+  auto end_of_bitmap = image_header.GetImageBitmapOffset() + image_header.GetImageBitmapSize();
+  if (end_of_bitmap != image_file_size) {
+    *error_msg = StringPrintf(
+        "Image file size does not equal end of bitmap: size=%" PRIu64 " vs. %zu.", image_file_size,
+        end_of_bitmap);
     return nullptr;
   }
 
   // Note: The image header is part of the image due to mmap page alignment required of offset.
-  std::unique_ptr<MemMap> map(MemMap::MapFileAtAddress(image_header.GetImageBegin(),
-                                                 image_header.GetImageSize(),
-                                                 PROT_READ | PROT_WRITE,
-                                                 MAP_PRIVATE,
-                                                 file->Fd(),
-                                                 0,
-                                                 false,
-                                                 image_filename,
-                                                 error_msg));
+  std::unique_ptr<MemMap> map(MemMap::MapFileAtAddress(
+      image_header.GetImageBegin(), image_header.GetImageSize() + image_header.GetArtFieldsSize(),
+      PROT_READ | PROT_WRITE, MAP_PRIVATE, file->Fd(), 0, false, image_filename, error_msg));
   if (map.get() == NULL) {
     DCHECK(!error_msg->empty());
     return nullptr;
@@ -710,7 +705,7 @@
   std::unique_ptr<MemMap> image_map(
       MemMap::MapFileAtAddress(nullptr, image_header.GetImageBitmapSize(),
                                PROT_READ, MAP_PRIVATE,
-                               file->Fd(), image_header.GetBitmapOffset(),
+                               file->Fd(), image_header.GetImageBitmapOffset(),
                                false,
                                image_filename,
                                error_msg));
@@ -730,8 +725,9 @@
     return nullptr;
   }
 
+  uint8_t* const image_end = map->Begin() + image_header.GetImageSize();
   std::unique_ptr<ImageSpace> space(new ImageSpace(image_filename, image_location,
-                                             map.release(), bitmap.release()));
+                                                   map.release(), bitmap.release(), image_end));
 
   // VerifyImageAllocations() will be called later in Runtime::Init()
   // as some class roots like ArtMethod::java_lang_reflect_ArtMethod_
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index d7f8057..9ae2af4 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -145,7 +145,7 @@
   std::unique_ptr<accounting::ContinuousSpaceBitmap> live_bitmap_;
 
   ImageSpace(const std::string& name, const char* image_location,
-             MemMap* mem_map, accounting::ContinuousSpaceBitmap* live_bitmap);
+             MemMap* mem_map, accounting::ContinuousSpaceBitmap* live_bitmap, uint8_t* end);
 
   // The OatFile associated with the image during early startup to
   // reserve space contiguous to the image. It is later released to
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index 2f4da3f..0d3c93b 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -30,6 +30,9 @@
 template <size_t kBufferSize>
 class BufferedRootVisitor;
 
+// Dependent on pointer size so that we don't have frames that are too big on 64 bit.
+static const size_t kDefaultBufferedRootCount = 1024 / sizeof(void*);
+
 enum RootType {
   kRootUnknown = 0,
   kRootJNIGlobal,
diff --git a/runtime/globals.h b/runtime/globals.h
index ac8751c..4d7fd2e 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -71,8 +71,6 @@
 static constexpr bool kMoveFieldArrays = !kMarkCompactSupport;
 // True if we allow moving classes.
 static constexpr bool kMovingClasses = !kMarkCompactSupport;
-// True if we allow moving fields.
-static constexpr bool kMovingFields = false;
 // True if we allow moving methods.
 static constexpr bool kMovingMethods = false;
 
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index d6a6595..23af25d 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -37,6 +37,7 @@
 
 #include <set>
 
+#include "art_field-inl.h"
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "base/unix_file/fd_file.h"
@@ -51,7 +52,6 @@
 #include "globals.h"
 #include "jdwp/jdwp.h"
 #include "jdwp/jdwp_priv.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -949,6 +949,10 @@
 }
 
 void Hprof::DumpHeapClass(mirror::Class* klass) {
+  if (!klass->IsLoaded() && !klass->IsErroneous()) {
+    // Class is allocated but not yet loaded: we cannot access its fields or super class.
+    return;
+  }
   size_t sFieldCount = klass->NumStaticFields();
   if (sFieldCount != 0) {
     int byteLength = sFieldCount * sizeof(JValue);  // TODO bogus; fields are packed
@@ -995,7 +999,7 @@
     __ AddClassStaticsId(klass);
 
     for (size_t i = 0; i < sFieldCount; ++i) {
-      mirror::ArtField* f = klass->GetStaticField(i);
+      ArtField* f = klass->GetStaticField(i);
 
       size_t size;
       HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
@@ -1034,7 +1038,7 @@
   int iFieldCount = klass->IsObjectClass() ? 0 : klass->NumInstanceFields();
   __ AddU2((uint16_t)iFieldCount);
   for (int i = 0; i < iFieldCount; ++i) {
-    mirror::ArtField* f = klass->GetInstanceField(i);
+    ArtField* f = klass->GetInstanceField(i);
     __ AddStringId(LookupStringId(f->GetName()));
     HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), nullptr);
     __ AddU1(t);
@@ -1098,7 +1102,7 @@
   while (!klass->IsObjectClass()) {
     int ifieldCount = klass->NumInstanceFields();
     for (int i = 0; i < ifieldCount; ++i) {
-      mirror::ArtField* f = klass->GetInstanceField(i);
+      ArtField* f = klass->GetInstanceField(i);
       size_t size;
       auto t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
       switch (t) {
diff --git a/runtime/image.cc b/runtime/image.cc
index 3cb2580..2d8c1c4 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,10 +24,12 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '1', '4', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '1', '5', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
+                         uint32_t art_fields_offset,
+                         uint32_t art_fields_size,
                          uint32_t image_bitmap_offset,
                          uint32_t image_bitmap_size,
                          uint32_t image_roots,
@@ -39,6 +41,8 @@
                          bool compile_pic)
   : image_begin_(image_begin),
     image_size_(image_size),
+    art_fields_offset_(art_fields_offset),
+    art_fields_size_(art_fields_size),
     image_bitmap_offset_(image_bitmap_offset),
     image_bitmap_size_(image_bitmap_size),
     oat_checksum_(oat_checksum),
diff --git a/runtime/image.h b/runtime/image.h
index 3c527b8..613414a 100644
--- a/runtime/image.h
+++ b/runtime/image.h
@@ -32,6 +32,8 @@
 
   ImageHeader(uint32_t image_begin,
               uint32_t image_size_,
+              uint32_t art_fields_offset,
+              uint32_t art_fields_size,
               uint32_t image_bitmap_offset,
               uint32_t image_bitmap_size,
               uint32_t image_roots,
@@ -53,6 +55,14 @@
     return static_cast<uint32_t>(image_size_);
   }
 
+  size_t GetArtFieldsOffset() const {
+    return art_fields_offset_;
+  }
+
+  size_t GetArtFieldsSize() const {
+    return art_fields_size_;
+  }
+
   size_t GetImageBitmapOffset() const {
     return image_bitmap_offset_;
   }
@@ -89,10 +99,6 @@
     return patch_delta_;
   }
 
-  size_t GetBitmapOffset() const {
-    return RoundUp(image_size_, kPageSize);
-  }
-
   static std::string GetOatLocationFromImageLocation(const std::string& image) {
     std::string oat_filename = image;
     if (oat_filename.length() <= 3) {
@@ -140,6 +146,12 @@
   // Image size, not page aligned.
   uint32_t image_size_;
 
+  // ArtField array offset.
+  uint32_t art_fields_offset_;
+
+  // ArtField size in bytes.
+  uint32_t art_fields_size_;
+
   // Image bitmap offset in the file.
   uint32_t image_bitmap_offset_;
 
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index a3aa1de..5012965 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -64,7 +64,8 @@
 }
 
 IndirectReferenceTable::IndirectReferenceTable(size_t initialCount,
-                                               size_t maxCount, IndirectRefKind desiredKind)
+                                               size_t maxCount, IndirectRefKind desiredKind,
+                                               bool abort_on_error)
     : kind_(desiredKind),
       max_entries_(maxCount) {
   CHECK_GT(initialCount, 0U);
@@ -75,16 +76,28 @@
   const size_t table_bytes = maxCount * sizeof(IrtEntry);
   table_mem_map_.reset(MemMap::MapAnonymous("indirect ref table", nullptr, table_bytes,
                                             PROT_READ | PROT_WRITE, false, false, &error_str));
-  CHECK(table_mem_map_.get() != nullptr) << error_str;
-  CHECK_EQ(table_mem_map_->Size(), table_bytes);
+  if (abort_on_error) {
+    CHECK(table_mem_map_.get() != nullptr) << error_str;
+    CHECK_EQ(table_mem_map_->Size(), table_bytes);
+    CHECK(table_mem_map_->Begin() != nullptr);
+  } else if (table_mem_map_.get() == nullptr ||
+             table_mem_map_->Size() != table_bytes ||
+             table_mem_map_->Begin() == nullptr) {
+    table_mem_map_.reset();
+    LOG(ERROR) << error_str;
+    return;
+  }
   table_ = reinterpret_cast<IrtEntry*>(table_mem_map_->Begin());
-  CHECK(table_ != nullptr);
   segment_state_.all = IRT_FIRST_SEGMENT;
 }
 
 IndirectReferenceTable::~IndirectReferenceTable() {
 }
 
+bool IndirectReferenceTable::IsValid() const {
+  return table_mem_map_.get() != nullptr;
+}
+
 IndirectRef IndirectReferenceTable::Add(uint32_t cookie, mirror::Object* obj) {
   IRTSegmentState prevState;
   prevState.all = cookie;
@@ -243,7 +256,7 @@
 }
 
 void IndirectReferenceTable::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
-  BufferedRootVisitor<128> root_visitor(visitor, root_info);
+  BufferedRootVisitor<kDefaultBufferedRootCount> root_visitor(visitor, root_info);
   for (auto ref : *this) {
     root_visitor.VisitRootIfNonNull(*ref);
   }
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 25b0281..0072184 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -258,10 +258,15 @@
 
 class IndirectReferenceTable {
  public:
-  IndirectReferenceTable(size_t initialCount, size_t maxCount, IndirectRefKind kind);
+  // WARNING: When using with abort_on_error = false, the object may be in a partially
+  //          initialized state. Use IsValid() to check.
+  IndirectReferenceTable(size_t initialCount, size_t maxCount, IndirectRefKind kind,
+                         bool abort_on_error = true);
 
   ~IndirectReferenceTable();
 
+  bool IsValid() const;
+
   /*
    * Add a new entry.  "obj" must be a valid non-NULL object reference.
    *
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index dea157a..f8c0e83 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -929,7 +929,7 @@
 
 void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
-                                         mirror::ArtField* field) const {
+                                         ArtField* field) const {
   if (HasFieldReadListeners()) {
     std::shared_ptr<std::list<InstrumentationListener*>> original(field_read_listeners_);
     for (InstrumentationListener* listener : *original.get()) {
@@ -940,7 +940,7 @@
 
 void Instrumentation::FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
                                          mirror::ArtMethod* method, uint32_t dex_pc,
-                                         mirror::ArtField* field, const JValue& field_value) const {
+                                         ArtField* field, const JValue& field_value) const {
   if (HasFieldWriteListeners()) {
     std::shared_ptr<std::list<InstrumentationListener*>> original(field_write_listeners_);
     for (InstrumentationListener* listener : *original.get()) {
@@ -1082,7 +1082,7 @@
   if (IsDeoptimizedMethodsEmpty()) {
     return;
   }
-  BufferedRootVisitor<128> roots(visitor, RootInfo(kRootVMInternal));
+  BufferedRootVisitor<kDefaultBufferedRootCount> roots(visitor, RootInfo(kRootVMInternal));
   for (auto pair : deoptimized_methods_) {
     roots.VisitRoot(pair.second);
   }
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 77314c60..41821a6 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -30,12 +30,12 @@
 
 namespace art {
 namespace mirror {
-  class ArtField;
   class ArtMethod;
   class Class;
   class Object;
   class Throwable;
 }  // namespace mirror
+class ArtField;
 union JValue;
 class Thread;
 
@@ -82,11 +82,11 @@
 
   // Call-back for when we read from a field.
   virtual void FieldRead(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                         uint32_t dex_pc, mirror::ArtField* field) = 0;
+                         uint32_t dex_pc, ArtField* field) = 0;
 
   // Call-back for when we write into a field.
   virtual void FieldWritten(Thread* thread, mirror::Object* this_object, mirror::ArtMethod* method,
-                            uint32_t dex_pc, mirror::ArtField* field, const JValue& field_value) = 0;
+                            uint32_t dex_pc, ArtField* field, const JValue& field_value) = 0;
 
   // Call-back when an exception is caught.
   virtual void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
@@ -301,7 +301,7 @@
   // Inform listeners that we read a field (only supported by the interpreter).
   void FieldReadEvent(Thread* thread, mirror::Object* this_object,
                       mirror::ArtMethod* method, uint32_t dex_pc,
-                      mirror::ArtField* field) const
+                      ArtField* field) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(HasFieldReadListeners())) {
       FieldReadEventImpl(thread, this_object, method, dex_pc, field);
@@ -311,7 +311,7 @@
   // Inform listeners that we write a field (only supported by the interpreter).
   void FieldWriteEvent(Thread* thread, mirror::Object* this_object,
                        mirror::ArtMethod* method, uint32_t dex_pc,
-                       mirror::ArtField* field, const JValue& field_value) const
+                       ArtField* field, const JValue& field_value) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (UNLIKELY(HasFieldWriteListeners())) {
       FieldWriteEventImpl(thread, this_object, method, dex_pc, field, field_value);
@@ -377,11 +377,11 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
                            mirror::ArtMethod* method, uint32_t dex_pc,
-                           mirror::ArtField* field) const
+                           ArtField* field) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
                            mirror::ArtMethod* method, uint32_t dex_pc,
-                           mirror::ArtField* field, const JValue& field_value) const
+                           ArtField* field, const JValue& field_value) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Read barrier-aware utility functions for accessing deoptimized_methods_
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 8e85435..1f1f9e8 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -336,7 +336,8 @@
 }
 
 void InternTable::Table::VisitRoots(RootVisitor* visitor) {
-  BufferedRootVisitor<128> buffered_visitor(visitor, RootInfo(kRootInternedString));
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(
+      visitor, RootInfo(kRootInternedString));
   for (auto& intern : pre_zygote_table_) {
     buffered_visitor.VisitRoot(intern);
   }
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 375d644..3ae611b 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -281,11 +281,10 @@
         // object in the destructor.
         Class* field_class;
         {
-          StackHandleScope<3> hs(self);
-          HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
+          StackHandleScope<2> hs(self);
           HandleWrapper<mirror::Object> h_reg(hs.NewHandleWrapper(&reg));
           HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-          field_class = h_f->GetType<true>();
+          field_class = f->GetType<true>();
         }
         if (!reg->VerifierInstanceOf(field_class)) {
           // This should never happen.
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 2f8bf55..0e0d56a 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -24,6 +24,7 @@
 #include <iostream>
 #include <sstream>
 
+#include "art_field-inl.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "class_linker-inl.h"
@@ -32,7 +33,6 @@
 #include "dex_instruction-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "handle_scope-inl.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
@@ -41,7 +41,6 @@
 #include "thread.h"
 #include "well_known_classes.h"
 
-using ::art::mirror::ArtField;
 using ::art::mirror::ArtMethod;
 using ::art::mirror::Array;
 using ::art::mirror::BooleanArray;
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 9af8102..4fb634b 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -208,20 +208,22 @@
   // going the reflective Dex way.
   mirror::Class* klass = shadow_frame->GetVRegReference(arg_offset)->AsClass();
   mirror::String* name2 = shadow_frame->GetVRegReference(arg_offset + 1)->AsString();
-  mirror::ArtField* found = nullptr;
-  mirror::ObjectArray<mirror::ArtField>* fields = klass->GetIFields();
-  for (int32_t i = 0; i < fields->GetLength() && found == nullptr; ++i) {
-    mirror::ArtField* f = fields->Get(i);
+  ArtField* found = nullptr;
+  ArtField* fields = klass->GetIFields();
+  for (int32_t i = 0, count = klass->NumInstanceFields(); i < count; ++i) {
+    ArtField* f = &fields[i];
     if (name2->Equals(f->GetName())) {
       found = f;
+      break;
     }
   }
   if (found == nullptr) {
     fields = klass->GetSFields();
-    for (int32_t i = 0; i < fields->GetLength() && found == nullptr; ++i) {
-      mirror::ArtField* f = fields->Get(i);
+    for (int32_t i = 0, count = klass->NumStaticFields(); i < count; ++i) {
+      ArtField* f = &fields[i];
       if (name2->Equals(f->GetName())) {
         found = f;
+        break;
       }
     }
   }
@@ -779,6 +781,31 @@
   result->SetL(mirror::Array::CreateMultiArray(self, h_class, h_dimensions));
 }
 
+static void UnstartedJNIArrayCreateObjectArray(Thread* self,
+                                               mirror::ArtMethod* method ATTRIBUTE_UNUSED,
+                                               mirror::Object* receiver ATTRIBUTE_UNUSED,
+                                               uint32_t* args,
+                                               JValue* result)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  int32_t length = static_cast<int32_t>(args[1]);
+  if (length < 0) {
+    ThrowNegativeArraySizeException(length);
+    return;
+  }
+  mirror::Class* element_class = reinterpret_cast<mirror::Class*>(args[0])->AsClass();
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  mirror::Class* array_class = class_linker->FindArrayClass(self, &element_class);
+  if (UNLIKELY(array_class == NULL)) {
+    CHECK(self->IsExceptionPending());
+    return;
+  }
+  DCHECK(array_class->IsObjectArrayClass());
+  mirror::Array* new_array = mirror::ObjectArray<mirror::Object*>::Alloc(
+      self, array_class, length, runtime->GetHeap()->GetCurrentAllocator());
+  result->SetL(new_array);
+}
+
 static void UnstartedJNIThrowableNativeFillInStackTrace(Thread* self,
                                                         mirror::ArtMethod* method ATTRIBUTE_UNUSED,
                                                         mirror::Object* receiver ATTRIBUTE_UNUSED,
@@ -973,6 +1000,8 @@
           &UnstartedJNIStringFastIndexOf },
       { "java.lang.Object java.lang.reflect.Array.createMultiArray(java.lang.Class, int[])",
           &UnstartedJNIArrayCreateMultiArray },
+      { "java.lang.Object java.lang.reflect.Array.createObjectArray(java.lang.Class, int)",
+          &UnstartedJNIArrayCreateObjectArray },
       { "java.lang.Object java.lang.Throwable.nativeFillInStackTrace()",
           &UnstartedJNIThrowableNativeFillInStackTrace },
       { "int java.lang.System.identityHashCode(java.lang.Object)",
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index e16221c..a503b17 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -33,11 +33,11 @@
 
 namespace art {
 
+class ArtField;
 union JValue;
 class Thread;
 
 namespace mirror {
-  class ArtField;
   class ArtMethod;
   class Class;
   class Object;
@@ -207,7 +207,7 @@
    * "fieldValue" is non-null for field modification events only.
    * "is_modification" is true for field modification, false for field access.
    */
-  void PostFieldEvent(const EventLocation* pLoc, mirror::ArtField* field, mirror::Object* thisPtr,
+  void PostFieldEvent(const EventLocation* pLoc, ArtField* field, mirror::Object* thisPtr,
                       const JValue* fieldValue, bool is_modification)
       LOCKS_EXCLUDED(event_list_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -403,6 +403,14 @@
   // Used for VirtualMachine.Exit command handling.
   bool should_exit_;
   int exit_status_;
+
+  // Used to synchronize runtime shutdown with JDWP command handler thread.
+  // When the runtime shuts down, it needs to stop JDWP command handler thread by closing the
+  // JDWP connection. However, if the JDWP thread is processing a command, it needs to wait
+  // for the command to finish so we can send its reply before closing the connection.
+  Mutex shutdown_lock_ ACQUIRED_AFTER(event_list_lock_);
+  ConditionVariable shutdown_cond_ GUARDED_BY(shutdown_lock_);
+  bool processing_request_ GUARDED_BY(shutdown_lock_);
 };
 
 std::string DescribeField(const FieldId& field_id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index c9a4483..ccf8bff 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -21,6 +21,7 @@
 #include <string.h>
 #include <unistd.h>
 
+#include "art_field-inl.h"
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "debugger.h"
@@ -28,7 +29,6 @@
 #include "jdwp/jdwp_expand_buf.h"
 #include "jdwp/jdwp_priv.h"
 #include "jdwp/object_registry.h"
-#include "mirror/art_field-inl.h"
 #include "scoped_thread_state_change.h"
 #include "thread-inl.h"
 
@@ -119,7 +119,7 @@
   mirror::Class*        locationClass;    /* ClassOnly */
   mirror::Class*        exceptionClass;   /* ExceptionOnly */
   bool                  caught;           /* ExceptionOnly */
-  mirror::ArtField*     field;            /* FieldOnly */
+  ArtField*             field;            /* FieldOnly */
   mirror::Object*       thisPtr;          /* InstanceOnly */
   /* nothing for StepOnly -- handled differently */
 };
@@ -914,7 +914,7 @@
   SendRequestAndPossiblySuspend(pReq, suspend_policy, thread_id);
 }
 
-void JdwpState::PostFieldEvent(const EventLocation* pLoc, mirror::ArtField* field,
+void JdwpState::PostFieldEvent(const EventLocation* pLoc, ArtField* field,
                                mirror::Object* this_object, const JValue* fieldValue,
                                bool is_modification) {
   DCHECK(pLoc != nullptr);
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index add1394..d0ca214 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -271,7 +271,7 @@
 
 static JdwpError VM_Dispose(JdwpState*, Request*, ExpandBuf*)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  Dbg::Disposed();
+  Dbg::Dispose();
   return ERR_NONE;
 }
 
@@ -315,11 +315,12 @@
 static JdwpError VM_CreateString(JdwpState*, Request* request, ExpandBuf* pReply)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   std::string str(request->ReadUtf8String());
-  ObjectId stringId = Dbg::CreateString(str);
-  if (stringId == 0) {
-    return ERR_OUT_OF_MEMORY;
+  ObjectId string_id;
+  JdwpError status = Dbg::CreateString(str, &string_id);
+  if (status != ERR_NONE) {
+    return status;
   }
-  expandBufAddObjectId(pReply, stringId);
+  expandBufAddObjectId(pReply, string_id);
   return ERR_NONE;
 }
 
@@ -711,9 +712,6 @@
   if (status != ERR_NONE) {
     return status;
   }
-  if (object_id == 0) {
-    return ERR_OUT_OF_MEMORY;
-  }
   return RequestInvoke(state, request, pReply, thread_id, object_id, class_id, method_id, true);
 }
 
@@ -730,9 +728,6 @@
   if (status != ERR_NONE) {
     return status;
   }
-  if (object_id == 0) {
-    return ERR_OUT_OF_MEMORY;
-  }
   expandBufAdd1(pReply, JT_ARRAY);
   expandBufAddObjectId(pReply, object_id);
   return ERR_NONE;
@@ -1657,6 +1652,7 @@
       if (result == ERR_NONE) {
         request->CheckConsumed();
       }
+      self->AssertNoPendingException();
       break;
     }
   }
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index e2b88a5..5b30f0c 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -126,6 +126,7 @@
  */
 ssize_t JdwpNetStateBase::WritePacket(ExpandBuf* pReply, size_t length) {
   MutexLock mu(Thread::Current(), socket_lock_);
+  DCHECK(IsConnected()) << "Connection with debugger is closed";
   DCHECK_LE(length, expandBufGetLength(pReply));
   return TEMP_FAILURE_RETRY(write(clientSock, expandBufGetBuffer(pReply), length));
 }
@@ -140,6 +141,7 @@
 
 ssize_t JdwpNetStateBase::WriteBufferedPacketLocked(const std::vector<iovec>& iov) {
   socket_lock_.AssertHeld(Thread::Current());
+  DCHECK(IsConnected()) << "Connection with debugger is closed";
   return TEMP_FAILURE_RETRY(writev(clientSock, &iov[0], iov.size()));
 }
 
@@ -225,7 +227,10 @@
       jdwp_token_owner_thread_id_(0),
       ddm_is_active_(false),
       should_exit_(false),
-      exit_status_(0) {
+      exit_status_(0),
+      shutdown_lock_("JDWP shutdown lock", kJdwpShutdownLock),
+      shutdown_cond_("JDWP shutdown condition variable", shutdown_lock_),
+      processing_request_(false) {
 }
 
 /*
@@ -338,10 +343,20 @@
 JdwpState::~JdwpState() {
   if (netState != nullptr) {
     /*
-     * Close down the network to inspire the thread to halt.
+     * Close down the network to inspire the thread to halt. If a request is being processed,
+     * we need to wait for it to finish first.
      */
-    VLOG(jdwp) << "JDWP shutting down net...";
-    netState->Shutdown();
+    {
+      Thread* self = Thread::Current();
+      MutexLock mu(self, shutdown_lock_);
+      while (processing_request_) {
+        VLOG(jdwp) << "JDWP command in progress: wait for it to finish ...";
+        shutdown_cond_.Wait(self);
+      }
+
+      VLOG(jdwp) << "JDWP shutting down net...";
+      netState->Shutdown();
+    }
 
     if (debug_thread_started_) {
       run = false;
@@ -369,7 +384,13 @@
 
 // Returns "false" if we encounter a connection-fatal error.
 bool JdwpState::HandlePacket() {
-  JdwpNetStateBase* netStateBase = reinterpret_cast<JdwpNetStateBase*>(netState);
+  Thread* const self = Thread::Current();
+  {
+    MutexLock mu(self, shutdown_lock_);
+    processing_request_ = true;
+  }
+  JdwpNetStateBase* netStateBase = netState;
+  CHECK(netStateBase != nullptr) << "Connection has been closed";
   JDWP::Request request(netStateBase->input_buffer_, netStateBase->input_count_);
 
   ExpandBuf* pReply = expandBufAlloc();
@@ -388,6 +409,11 @@
   }
   expandBufFree(pReply);
   netStateBase->ConsumeBytes(request.GetLength());
+  {
+    MutexLock mu(self, shutdown_lock_);
+    processing_request_ = false;
+    shutdown_cond_.Broadcast(self);
+  }
   return true;
 }
 
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 425d2d3..9d5d74f 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -31,12 +31,12 @@
 
 namespace art {
 namespace mirror {
-  class ArtField;
   class ArtMethod;
   class Class;
   class Object;
   class Throwable;
 }  // namespace mirror
+class ArtField;
 union JValue;
 class Thread;
 
@@ -77,10 +77,10 @@
                             mirror::ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
   virtual void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
                          mirror::ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                         mirror::ArtField* /*field*/) OVERRIDE { }
+                         ArtField* /*field*/) OVERRIDE { }
   virtual void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
                             mirror::ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                            mirror::ArtField* /*field*/, const JValue& /*field_value*/)
+                            ArtField* /*field*/, const JValue& /*field_value*/)
       OVERRIDE { }
   virtual void ExceptionCaught(Thread* /*thread*/,
                                mirror::Throwable* /*exception_object*/) OVERRIDE { }
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index b2d3835..84fc404 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -28,11 +28,29 @@
 
 static constexpr size_t kLocalsInitial = 64;  // Arbitrary.
 
+// Checking "locals" requires the mutator lock, but at creation time we're really only interested
+// in validity, which isn't changing. To avoid grabbing the mutator lock, factored out and tagged
+// with NO_THREAD_SAFETY_ANALYSIS.
+static bool CheckLocalsValid(JNIEnvExt* in) NO_THREAD_SAFETY_ANALYSIS {
+  if (in == nullptr) {
+    return false;
+  }
+  return in->locals.IsValid();
+}
+
+JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in) {
+  std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in));
+  if (CheckLocalsValid(ret.get())) {
+    return ret.release();
+  }
+  return nullptr;
+}
+
 JNIEnvExt::JNIEnvExt(Thread* self_in, JavaVMExt* vm_in)
     : self(self_in),
       vm(vm_in),
       local_ref_cookie(IRT_FIRST_SEGMENT),
-      locals(kLocalsInitial, kLocalsMax, kLocal),
+      locals(kLocalsInitial, kLocalsMax, kLocal, false),
       check_jni(false),
       critical(0),
       monitors("monitors", kMonitorsInitial, kMonitorsMax) {
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index af87cb4..29d912c 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -34,7 +34,8 @@
 static constexpr size_t kLocalsMax = 512;
 
 struct JNIEnvExt : public JNIEnv {
-  JNIEnvExt(Thread* self, JavaVMExt* vm);
+  static JNIEnvExt* Create(Thread* self, JavaVMExt* vm);
+
   ~JNIEnvExt();
 
   void DumpReferenceTables(std::ostream& os)
@@ -87,6 +88,11 @@
 
   // Used by -Xcheck:jni.
   const JNINativeInterface* unchecked_functions;
+
+ private:
+  // The constructor should not be called directly. It may leave the object in an erronuous state,
+  // and the result needs to be checked.
+  JNIEnvExt(Thread* self, JavaVMExt* vm);
 };
 
 // Used to save and restore the JNIEnvExt state when not going through code created by the JNI
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 9ec64d4..8a5461b 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -23,6 +23,7 @@
 #include <utility>
 #include <vector>
 
+#include "art_field-inl.h"
 #include "atomic.h"
 #include "base/allocator.h"
 #include "base/logging.h"
@@ -37,7 +38,6 @@
 #include "interpreter/interpreter.h"
 #include "jni_env_ext.h"
 #include "java_vm_ext.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -184,7 +184,7 @@
   if (c.Get() == nullptr) {
     return nullptr;
   }
-  mirror::ArtField* field = nullptr;
+  ArtField* field = nullptr;
   mirror::Class* field_type;
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   if (sig[1] != '\0') {
@@ -379,7 +379,7 @@
   static jobject ToReflectedField(JNIEnv* env, jclass, jfieldID fid, jboolean) {
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
-    mirror::ArtField* f = soa.DecodeField(fid);
+    ArtField* f = soa.DecodeField(fid);
     return soa.AddLocalReference<jobject>(mirror::Field::CreateFromArtField(soa.Self(), f, true));
   }
 
@@ -1203,14 +1203,14 @@
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
     mirror::Object* o = soa.Decode<mirror::Object*>(obj);
-    mirror::ArtField* f = soa.DecodeField(fid);
+    ArtField* f = soa.DecodeField(fid);
     return soa.AddLocalReference<jobject>(f->GetObject(o));
   }
 
   static jobject GetStaticObjectField(JNIEnv* env, jclass, jfieldID fid) {
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
-    mirror::ArtField* f = soa.DecodeField(fid);
+    ArtField* f = soa.DecodeField(fid);
     return soa.AddLocalReference<jobject>(f->GetObject(f->GetDeclaringClass()));
   }
 
@@ -1220,7 +1220,7 @@
     ScopedObjectAccess soa(env);
     mirror::Object* o = soa.Decode<mirror::Object*>(java_object);
     mirror::Object* v = soa.Decode<mirror::Object*>(java_value);
-    mirror::ArtField* f = soa.DecodeField(fid);
+    ArtField* f = soa.DecodeField(fid);
     f->SetObject<false>(o, v);
   }
 
@@ -1228,7 +1228,7 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid);
     ScopedObjectAccess soa(env);
     mirror::Object* v = soa.Decode<mirror::Object*>(java_value);
-    mirror::ArtField* f = soa.DecodeField(fid);
+    ArtField* f = soa.DecodeField(fid);
     f->SetObject<false>(f->GetDeclaringClass(), v);
   }
 
@@ -1237,13 +1237,13 @@
   CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(fid); \
   ScopedObjectAccess soa(env); \
   mirror::Object* o = soa.Decode<mirror::Object*>(instance); \
-  mirror::ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = soa.DecodeField(fid); \
   return f->Get ##fn (o)
 
 #define GET_STATIC_PRIMITIVE_FIELD(fn) \
   CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(fid); \
   ScopedObjectAccess soa(env); \
-  mirror::ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = soa.DecodeField(fid); \
   return f->Get ##fn (f->GetDeclaringClass())
 
 #define SET_PRIMITIVE_FIELD(fn, instance, value) \
@@ -1251,13 +1251,13 @@
   CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid); \
   ScopedObjectAccess soa(env); \
   mirror::Object* o = soa.Decode<mirror::Object*>(instance); \
-  mirror::ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = soa.DecodeField(fid); \
   f->Set ##fn <false>(o, value)
 
 #define SET_STATIC_PRIMITIVE_FIELD(fn, value) \
   CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid); \
   ScopedObjectAccess soa(env); \
-  mirror::ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = soa.DecodeField(fid); \
   f->Set ##fn <false>(f->GetDeclaringClass(), value)
 
   static jboolean GetBooleanField(JNIEnv* env, jobject obj, jfieldID fid) {
diff --git a/runtime/leb128.h b/runtime/leb128.h
index d36b690..2e27b8e 100644
--- a/runtime/leb128.h
+++ b/runtime/leb128.h
@@ -136,6 +136,19 @@
   dest->push_back(out);
 }
 
+// Overwrite encoded Leb128 with a new value. The new value must be less than
+// or equal to the old value to ensure that it fits the allocated space.
+static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
+  const uint8_t* old_end = dest;
+  uint32_t old_value = DecodeUnsignedLeb128(&old_end);
+  DCHECK_LE(value, old_value);
+  for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
+    // Use longer encoding than necessary to fill the allocated space.
+    end[-1] |= 0x80;
+    end[0] = 0;
+  }
+}
+
 static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) {
   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
   uint8_t out = value & 0x7f;
diff --git a/runtime/leb128_test.cc b/runtime/leb128_test.cc
index 5d157dc..87e13ff 100644
--- a/runtime/leb128_test.cc
+++ b/runtime/leb128_test.cc
@@ -252,6 +252,25 @@
   EXPECT_EQ(data_size, static_cast<size_t>(encoded_data_ptr - encoded_data));
 }
 
+TEST(Leb128Test, UnsignedUpdate) {
+  for (size_t i = 0; i < arraysize(uleb128_tests); ++i) {
+    for (size_t j = 0; j < arraysize(uleb128_tests); ++j) {
+      uint32_t old_value = uleb128_tests[i].decoded;
+      uint32_t new_value = uleb128_tests[j].decoded;
+      // We can only make the encoded value smaller.
+      if (new_value <= old_value) {
+        uint8_t encoded_data[5];
+        uint8_t* old_end = EncodeUnsignedLeb128(encoded_data, old_value);
+        UpdateUnsignedLeb128(encoded_data, new_value);
+        const uint8_t* new_end = encoded_data;
+        EXPECT_EQ(DecodeUnsignedLeb128(&new_end), new_value);
+        // Even if the new value needs fewer bytes, we should fill the space.
+        EXPECT_EQ(new_end, old_end);
+      }
+    }
+  }
+}
+
 TEST(Leb128Test, Speed) {
   std::unique_ptr<Histogram<uint64_t>> enc_hist(new Histogram<uint64_t>("Leb128EncodeSpeedTest", 5));
   std::unique_ptr<Histogram<uint64_t>> dec_hist(new Histogram<uint64_t>("Leb128DecodeSpeedTest", 5));
diff --git a/runtime/linear_alloc.cc b/runtime/linear_alloc.cc
new file mode 100644
index 0000000..fe6bee6
--- /dev/null
+++ b/runtime/linear_alloc.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "linear_alloc.h"
+
+#include "thread-inl.h"
+
+namespace art {
+
+LinearAlloc::LinearAlloc(ArenaPool* pool) : lock_("linear alloc"), allocator_(pool) {
+}
+
+void* LinearAlloc::Alloc(Thread* self, size_t size) {
+  MutexLock mu(self, lock_);
+  return allocator_.Alloc(size);
+}
+
+size_t LinearAlloc::GetUsedMemory() const {
+  MutexLock mu(Thread::Current(), lock_);
+  return allocator_.BytesUsed();
+}
+
+}  // namespace art
diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h
new file mode 100644
index 0000000..6d8eda6
--- /dev/null
+++ b/runtime/linear_alloc.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_LINEAR_ALLOC_H_
+#define ART_RUNTIME_LINEAR_ALLOC_H_
+
+#include "base/arena_allocator.h"
+
+namespace art {
+
+class ArenaPool;
+
+// TODO: Support freeing if we add poor man's class unloading.
+class LinearAlloc {
+ public:
+  explicit LinearAlloc(ArenaPool* pool);
+
+  void* Alloc(Thread* self, size_t size);
+
+  // Allocate and construct an array of structs of type T.
+  template<class T>
+  T* AllocArray(Thread* self, size_t elements) {
+    return reinterpret_cast<T*>(Alloc(self, elements * sizeof(T)));
+  }
+
+  // Return the number of bytes used in the allocator.
+  size_t GetUsedMemory() const;
+
+ private:
+  mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  ArenaAllocator allocator_ GUARDED_BY(lock_);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_LINEAR_ALLOC_H_
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 588615f..edd2888 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -622,7 +622,7 @@
 }
 
 void MemMap::DumpMapsLocked(std::ostream& os) {
-  os << maps_;
+  os << *maps_;
 }
 
 bool MemMap::HasMemMap(MemMap* map) {
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 6452f31..8b3418d 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -203,7 +203,7 @@
 template<typename T>
 inline PrimitiveArray<T>* PrimitiveArray<T>::Alloc(Thread* self, size_t length) {
   Array* raw_array = Array::Alloc<true>(self, GetArrayClass(), length,
-                                        ComponentSizeShiftWidth<sizeof(T)>(),
+                                        ComponentSizeShiftWidth(sizeof(T)),
                                         Runtime::Current()->GetHeap()->GetCurrentAllocator());
   return down_cast<PrimitiveArray<T>*>(raw_array);
 }
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 115fcf2..832ad68 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -54,7 +54,7 @@
   }
 
   void SetLength(int32_t length) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK_GE(length, 0);
+    DCHECK_GE(length, 0);
     // We use non transactional version since we can't undo this write. We also disable checking
     // since it would fail during a transaction.
     SetField32<false, false, kVerifyNone>(OFFSET_OF_OBJECT_MEMBER(Array, length_), length);
diff --git a/runtime/mirror/art_field.cc b/runtime/mirror/art_field.cc
deleted file mode 100644
index 83602d4..0000000
--- a/runtime/mirror/art_field.cc
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "art_field.h"
-
-#include "art_field-inl.h"
-#include "gc/accounting/card_table-inl.h"
-#include "object-inl.h"
-#include "object_array-inl.h"
-#include "runtime.h"
-#include "scoped_thread_state_change.h"
-#include "utils.h"
-#include "well_known_classes.h"
-
-namespace art {
-namespace mirror {
-
-// TODO: Get global references for these
-GcRoot<Class> ArtField::java_lang_reflect_ArtField_;
-
-void ArtField::SetClass(Class* java_lang_reflect_ArtField) {
-  CHECK(java_lang_reflect_ArtField_.IsNull());
-  CHECK(java_lang_reflect_ArtField != NULL);
-  java_lang_reflect_ArtField_ = GcRoot<Class>(java_lang_reflect_ArtField);
-}
-
-void ArtField::ResetClass() {
-  CHECK(!java_lang_reflect_ArtField_.IsNull());
-  java_lang_reflect_ArtField_ = GcRoot<Class>(nullptr);
-}
-
-void ArtField::SetOffset(MemberOffset num_bytes) {
-  DCHECK(GetDeclaringClass()->IsLoaded() || GetDeclaringClass()->IsErroneous());
-  if (kIsDebugBuild && Runtime::Current()->IsAotCompiler() &&
-      Runtime::Current()->IsCompilingBootImage()) {
-    Primitive::Type type = GetTypeAsPrimitiveType();
-    if (type == Primitive::kPrimDouble || type == Primitive::kPrimLong) {
-      DCHECK_ALIGNED(num_bytes.Uint32Value(), 8);
-    }
-  }
-  // Not called within a transaction.
-  SetField32<false>(OFFSET_OF_OBJECT_MEMBER(ArtField, offset_), num_bytes.Uint32Value());
-}
-
-void ArtField::VisitRoots(RootVisitor* visitor) {
-  java_lang_reflect_ArtField_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
-}
-
-// TODO: we could speed up the search if fields are ordered by offsets.
-ArtField* ArtField::FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset) {
-  DCHECK(klass != nullptr);
-  ObjectArray<ArtField>* instance_fields = klass->GetIFields();
-  if (instance_fields != nullptr) {
-    for (int32_t i = 0, e = instance_fields->GetLength(); i < e; ++i) {
-      mirror::ArtField* field = instance_fields->GetWithoutChecks(i);
-      if (field->GetOffset().Uint32Value() == field_offset) {
-        return field;
-      }
-    }
-  }
-  // We did not find field in the class: look into superclass.
-  if (klass->GetSuperClass() != NULL) {
-    return FindInstanceFieldWithOffset(klass->GetSuperClass(), field_offset);
-  } else {
-    return nullptr;
-  }
-}
-
-}  // namespace mirror
-}  // namespace art
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
deleted file mode 100644
index 9d95cb9..0000000
--- a/runtime/mirror/art_field.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_MIRROR_ART_FIELD_H_
-#define ART_RUNTIME_MIRROR_ART_FIELD_H_
-
-#include <jni.h>
-
-#include "gc_root.h"
-#include "modifiers.h"
-#include "object.h"
-#include "object_callbacks.h"
-#include "primitive.h"
-#include "read_barrier_option.h"
-
-namespace art {
-
-struct ArtFieldOffsets;
-class DexFile;
-class ScopedObjectAccessAlreadyRunnable;
-
-namespace mirror {
-
-class DexCache;
-
-// C++ mirror of java.lang.reflect.ArtField
-class MANAGED ArtField FINAL : public Object {
- public:
-  // Size of java.lang.reflect.ArtField.class.
-  static uint32_t ClassSize();
-
-  // Size of an instance of java.lang.reflect.ArtField not including its value array.
-  static constexpr uint32_t InstanceSize() {
-    return sizeof(ArtField);
-  }
-
-  Class* GetDeclaringClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  void SetDeclaringClass(Class *new_declaring_class) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  uint32_t GetAccessFlags() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  void SetAccessFlags(uint32_t new_access_flags) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Not called within a transaction.
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(ArtField, access_flags_), new_access_flags);
-  }
-
-  bool IsPublic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return (GetAccessFlags() & kAccPublic) != 0;
-  }
-
-  bool IsStatic() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return (GetAccessFlags() & kAccStatic) != 0;
-  }
-
-  bool IsFinal() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return (GetAccessFlags() & kAccFinal) != 0;
-  }
-
-  uint32_t GetDexFieldIndex() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetField32(OFFSET_OF_OBJECT_MEMBER(ArtField, field_dex_idx_));
-  }
-
-  void SetDexFieldIndex(uint32_t new_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Not called within a transaction.
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(ArtField, field_dex_idx_), new_idx);
-  }
-
-  // Offset to field within an Object.
-  MemberOffset GetOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  static MemberOffset OffsetOffset() {
-    return MemberOffset(OFFSETOF_MEMBER(ArtField, offset_));
-  }
-
-  MemberOffset GetOffsetDuringLinking() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  void SetOffset(MemberOffset num_bytes) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // field access, null object for static fields
-  uint8_t GetBoolean(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void SetBoolean(Object* object, uint8_t z) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int8_t GetByte(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void SetByte(Object* object, int8_t b) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  uint16_t GetChar(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void SetChar(Object* object, uint16_t c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int16_t GetShort(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void SetShort(Object* object, int16_t s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int32_t GetInt(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void SetInt(Object* object, int32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  int64_t GetLong(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void SetLong(Object* object, int64_t j) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  float GetFloat(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void SetFloat(Object* object, float f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  double GetDouble(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void SetDouble(Object* object, double d) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  Object* GetObject(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void SetObject(Object* object, Object* l) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // Raw field accesses.
-  uint32_t Get32(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void Set32(Object* object, uint32_t new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  uint64_t Get64(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void Set64(Object* object, uint64_t new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  Object* GetObj(Object* object) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<bool kTransactionActive>
-  void SetObj(Object* object, Object* new_value) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  static Class* GetJavaLangReflectArtField()  SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(!java_lang_reflect_ArtField_.IsNull());
-    return java_lang_reflect_ArtField_.Read<kReadBarrierOption>();
-  }
-
-  static void SetClass(Class* java_lang_reflect_ArtField);
-  static void ResetClass();
-  static void VisitRoots(RootVisitor* visitor)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  bool IsVolatile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return (GetAccessFlags() & kAccVolatile) != 0;
-  }
-
-  // Returns an instance field with this offset in the given class or nullptr if not found.
-  static ArtField* FindInstanceFieldWithOffset(mirror::Class* klass, uint32_t field_offset)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  const char* GetName() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // Resolves / returns the name from the dex cache.
-  String* GetStringName(Thread* self, bool resolve) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  const char* GetTypeDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  Primitive::Type GetTypeAsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  bool IsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  template <bool kResolve>
-  Class* GetType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  size_t FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  const DexFile* GetDexFile() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
- private:
-  // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
-  // The class we are a part of
-  HeapReference<Class> declaring_class_;
-
-  uint32_t access_flags_;
-
-  // Dex cache index of field id
-  uint32_t field_dex_idx_;
-
-  // Offset of field within an instance or in the Class' static fields
-  uint32_t offset_;
-
-  static GcRoot<Class> java_lang_reflect_ArtField_;
-
-  friend struct art::ArtFieldOffsets;  // for verifying offset information
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ArtField);
-};
-
-}  // namespace mirror
-}  // namespace art
-
-#endif  // ART_RUNTIME_MIRROR_ART_FIELD_H_
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 0ccf5db..fb427dc 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -36,7 +36,7 @@
 namespace mirror {
 
 inline uint32_t ArtMethod::ClassSize() {
-  uint32_t vtable_entries = Object::kVTableLength + 8;
+  uint32_t vtable_entries = Object::kVTableLength + 7;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0);
 }
 
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index edbbb4a..92aea1f 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -53,7 +53,7 @@
 
 ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
                                           jobject jlr_method) {
-  mirror::ArtField* f =
+  ArtField* f =
       soa.DecodeField(WellKnownClasses::java_lang_reflect_AbstractMethod_artMethod);
   mirror::ArtMethod* method = f->GetObject(soa.Decode<mirror::Object*>(jlr_method))->AsArtMethod();
   DCHECK(method != nullptr);
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index 22481ce..55b8068 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -183,6 +183,10 @@
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_), new_method_index);
   }
 
+  static MemberOffset DexMethodIndexOffset() {
+    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_method_index_);
+  }
+
   static MemberOffset MethodIndexOffset() {
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
   }
@@ -214,6 +218,8 @@
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_cache_resolved_types_);
   }
 
+  ALWAYS_INLINE ObjectArray<ArtMethod>* GetDexCacheResolvedMethods()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   ALWAYS_INLINE ArtMethod* GetDexCacheResolvedMethod(uint16_t method_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   ALWAYS_INLINE void SetDexCacheResolvedMethod(uint16_t method_idx, ArtMethod* new_method)
@@ -434,10 +440,6 @@
         EntryPointFromJniOffset(pointer_size), entrypoint, pointer_size);
   }
 
-  static MemberOffset GetMethodIndexOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
-  }
-
   // Is this a CalleSaveMethod or ResolutionMethod and therefore doesn't adhere to normal
   // conventions for a method of managed code. Returns false for Proxy methods.
   bool IsRuntimeMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -604,9 +606,6 @@
  private:
   ALWAYS_INLINE void CheckObjectSizeEqualsMirrorSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ALWAYS_INLINE ObjectArray<ArtMethod>* GetDexCacheResolvedMethods()
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   ALWAYS_INLINE ObjectArray<Class>* GetDexCacheResolvedTypes()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index c368dc6..5b72e5a 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -397,9 +397,9 @@
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, iftable_), new_iftable);
 }
 
-inline ObjectArray<ArtField>* Class::GetIFields() {
+inline ArtField* Class::GetIFields() {
   DCHECK(IsLoaded() || IsErroneous());
-  return GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
+  return GetFieldPtr<ArtField*>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
 }
 
 inline MemberOffset Class::GetFirstReferenceInstanceFieldOffset() {
@@ -432,55 +432,46 @@
   return MemberOffset(base);
 }
 
-inline void Class::SetIFields(ObjectArray<ArtField>* new_ifields)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  DCHECK(NULL == GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_)));
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_), new_ifields);
+inline void Class::SetIFields(ArtField* new_ifields) {
+  DCHECK(GetIFieldsUnchecked() == nullptr);
+  return SetFieldPtr<false>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_), new_ifields);
 }
 
-inline ObjectArray<ArtField>* Class::GetSFields() {
+inline void Class::SetIFieldsUnchecked(ArtField* new_ifields) {
+  SetFieldPtr<false, true, kVerifyNone>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_), new_ifields);
+}
+
+inline ArtField* Class::GetSFieldsUnchecked() {
+  return GetFieldPtr<ArtField*>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_));
+}
+
+inline ArtField* Class::GetIFieldsUnchecked() {
+  return GetFieldPtr<ArtField*>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
+}
+
+inline ArtField* Class::GetSFields() {
   DCHECK(IsLoaded() || IsErroneous()) << GetStatus();
-  return GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_));
+  return GetSFieldsUnchecked();
 }
 
-inline void Class::SetSFields(ObjectArray<ArtField>* new_sfields)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+inline void Class::SetSFields(ArtField* new_sfields) {
   DCHECK((IsRetired() && new_sfields == nullptr) ||
-         (NULL == GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_))));
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), new_sfields);
+         GetFieldPtr<ArtField*>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_)) == nullptr);
+  SetFieldPtr<false>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), new_sfields);
 }
 
-inline uint32_t Class::NumStaticFields() {
-  return (GetSFields() != NULL) ? GetSFields()->GetLength() : 0;
+inline void Class::SetSFieldsUnchecked(ArtField* new_sfields) {
+  SetFieldPtr<false, true, kVerifyNone>(OFFSET_OF_OBJECT_MEMBER(Class, sfields_), new_sfields);
 }
 
-
-inline ArtField* Class::GetStaticField(uint32_t i)  // TODO: uint16_t
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  return GetSFields()->GetWithoutChecks(i);
+inline ArtField* Class::GetStaticField(uint32_t i) {
+  DCHECK_LT(i, NumStaticFields());
+  return &GetSFields()[i];
 }
 
-inline void Class::SetStaticField(uint32_t i, ArtField* f)  // TODO: uint16_t
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectArray<ArtField>* sfields= GetFieldObject<ObjectArray<ArtField>>(
-      OFFSET_OF_OBJECT_MEMBER(Class, sfields_));
-  sfields->Set<false>(i, f);
-}
-
-inline uint32_t Class::NumInstanceFields() {
-  return (GetIFields() != NULL) ? GetIFields()->GetLength() : 0;
-}
-
-inline ArtField* Class::GetInstanceField(uint32_t i) {  // TODO: uint16_t
-  DCHECK_NE(NumInstanceFields(), 0U);
-  return GetIFields()->GetWithoutChecks(i);
-}
-
-inline void Class::SetInstanceField(uint32_t i, ArtField* f)  // TODO: uint16_t
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  ObjectArray<ArtField>* ifields= GetFieldObject<ObjectArray<ArtField>>(
-      OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
-  ifields->Set<false>(i, f);
+inline ArtField* Class::GetInstanceField(uint32_t i) {
+  DCHECK_LT(i, NumInstanceFields());
+  return &GetIFields()[i];
 }
 
 template<VerifyObjectFlags kVerifyFlags>
@@ -513,14 +504,12 @@
   DCHECK(IsIdxLoaded<kVerifyFlags>() || IsRetired<kVerifyFlags>() ||
          IsErroneous<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>() ||
          this == String::GetJavaLangString() ||
-         this == ArtField::GetJavaLangReflectArtField() ||
          this == ArtMethod::GetJavaLangReflectArtMethod())
       << "IsIdxLoaded=" << IsIdxLoaded<kVerifyFlags>()
       << " IsRetired=" << IsRetired<kVerifyFlags>()
       << " IsErroneous=" <<
           IsErroneous<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>()
       << " IsString=" << (this == String::GetJavaLangString())
-      << " IsArtField=" << (this == ArtField::GetJavaLangReflectArtField())
       << " IsArtMethod=" << (this == ArtMethod::GetJavaLangReflectArtMethod())
       << " descriptor=" << PrettyDescriptor(this);
   return GetField32<kVerifyFlags>(AccessFlagsOffset());
@@ -691,11 +680,6 @@
 }
 
 template<ReadBarrierOption kReadBarrierOption>
-inline bool Class::IsArtFieldClass() const {
-  return this == ArtField::GetJavaLangReflectArtField<kReadBarrierOption>();
-}
-
-template<ReadBarrierOption kReadBarrierOption>
 inline bool Class::IsArtMethodClass() const {
   return this == ArtMethod::GetJavaLangReflectArtMethod<kReadBarrierOption>();
 }
@@ -741,35 +725,35 @@
 inline ObjectArray<Class>* Class::GetInterfaces() {
   CHECK(IsProxyClass());
   // First static field.
-  DCHECK(GetSFields()->Get(0)->IsArtField());
-  DCHECK_STREQ(GetSFields()->Get(0)->GetName(), "interfaces");
-  MemberOffset field_offset = GetSFields()->Get(0)->GetOffset();
+  auto* field = GetStaticField(0);
+  DCHECK_STREQ(field->GetName(), "interfaces");
+  MemberOffset field_offset = field->GetOffset();
   return GetFieldObject<ObjectArray<Class>>(field_offset);
 }
 
 inline ObjectArray<ObjectArray<Class>>* Class::GetThrows() {
   CHECK(IsProxyClass());
   // Second static field.
-  DCHECK(GetSFields()->Get(1)->IsArtField());
-  DCHECK_STREQ(GetSFields()->Get(1)->GetName(), "throws");
-  MemberOffset field_offset = GetSFields()->Get(1)->GetOffset();
+  auto* field = GetStaticField(1);
+  DCHECK_STREQ(field->GetName(), "throws");
+  MemberOffset field_offset = field->GetOffset();
   return GetFieldObject<ObjectArray<ObjectArray<Class>>>(field_offset);
 }
 
 inline MemberOffset Class::GetDisableIntrinsicFlagOffset() {
   CHECK(IsReferenceClass());
   // First static field
-  DCHECK(GetSFields()->Get(0)->IsArtField());
-  DCHECK_STREQ(GetSFields()->Get(0)->GetName(), "disableIntrinsic");
-  return GetSFields()->Get(0)->GetOffset();
+  auto* field = GetStaticField(0);
+  DCHECK_STREQ(field->GetName(), "disableIntrinsic");
+  return field->GetOffset();
 }
 
 inline MemberOffset Class::GetSlowPathFlagOffset() {
   CHECK(IsReferenceClass());
   // Second static field
-  DCHECK(GetSFields()->Get(1)->IsArtField());
-  DCHECK_STREQ(GetSFields()->Get(1)->GetName(), "slowPathEnabled");
-  return GetSFields()->Get(1)->GetOffset();
+  auto* field = GetStaticField(1);
+  DCHECK_STREQ(field->GetName(), "slowPathEnabled");
+  return field->GetOffset();
 }
 
 inline bool Class::GetSlowPathEnabled() {
@@ -827,6 +811,24 @@
   return GetFieldObject<ObjectArray<String>>(DexCacheStringsOffset());
 }
 
+template<class Visitor>
+void mirror::Class::VisitFieldRoots(Visitor& visitor) {
+  ArtField* const sfields = GetSFieldsUnchecked();
+  for (size_t i = 0, count = NumStaticFields(); i < count; ++i) {
+    if (kIsDebugBuild && GetStatus() != kStatusRetired) {
+      CHECK_EQ(sfields[i].GetDeclaringClass(), this);
+    }
+    visitor.VisitRoot(sfields[i].DeclaringClassRoot().AddressWithoutBarrier());
+  }
+  ArtField* const ifields = GetIFieldsUnchecked();
+  for (size_t i = 0, count = NumInstanceFields(); i < count; ++i) {
+    if (kIsDebugBuild && GetStatus() != kStatusRetired) {
+      CHECK_EQ(ifields[i].GetDeclaringClass(), this);
+    }
+    visitor.VisitRoot(ifields[i].DeclaringClassRoot().AddressWithoutBarrier());
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index b82a58f..92493bc 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -35,6 +35,7 @@
 
 namespace art {
 
+class ArtField;
 struct ClassOffsets;
 template<class T> class Handle;
 template<class T> class Handle;
@@ -44,7 +45,6 @@
 
 namespace mirror {
 
-class ArtField;
 class ArtMethod;
 class ClassLoader;
 class DexCache;
@@ -420,9 +420,6 @@
   bool IsThrowableClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool IsArtFieldClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsArtMethodClass() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
@@ -823,17 +820,22 @@
   ALWAYS_INLINE void SetIfTable(IfTable* new_iftable) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Get instance fields of the class (See also GetSFields).
-  ObjectArray<ArtField>* GetIFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtField* GetIFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetIFields(ObjectArray<ArtField>* new_ifields) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetIFields(ArtField* new_ifields) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t NumInstanceFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Unchecked edition has no verification flags.
+  void SetIFieldsUnchecked(ArtField* new_sfields) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  ArtField* GetInstanceField(uint32_t i)  // TODO: uint16_t
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  uint32_t NumInstanceFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_instance_fields_));
+  }
 
-  void SetInstanceField(uint32_t i, ArtField* f)  // TODO: uint16_t
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetNumInstanceFields(uint32_t num) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, num_instance_fields_), num);
+  }
+
+  ArtField* GetInstanceField(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns the number of instance fields containing reference types.
   uint32_t NumReferenceInstanceFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -884,18 +886,24 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Gets the static fields of the class.
-  ObjectArray<ArtField>* GetSFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtField* GetSFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetSFields(ObjectArray<ArtField>* new_sfields) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void SetSFields(ArtField* new_sfields) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  uint32_t NumStaticFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  // Unchecked edition has no verification flags.
+  void SetSFieldsUnchecked(ArtField* new_sfields) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  uint32_t NumStaticFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, num_static_fields_));
+  }
+
+  void SetNumStaticFields(uint32_t num) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, num_static_fields_), num);
+  }
 
   // TODO: uint16_t
   ArtField* GetStaticField(uint32_t i) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // TODO: uint16_t
-  void SetStaticField(uint32_t i, ArtField* f) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Find a static or instance field using the JLS resolution order
   static ArtField* FindField(Thread* self, Handle<Class> klass, const StringPiece& name,
                              const StringPiece& type)
@@ -974,6 +982,10 @@
   static void VisitRoots(RootVisitor* visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  template<class Visitor>
+  // Visit field roots.
+  void VisitFieldRoots(Visitor& visitor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // When class is verified, set the kAccPreverified flag on each method.
   void SetPreverifiedFlagOnAllMethods() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -1079,6 +1091,10 @@
 
   void CheckObjectAlloc() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Unchecked editions is for root visiting.
+  ArtField* GetSFieldsUnchecked() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtField* GetIFieldsUnchecked() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // defining class loader, or NULL for the "bootstrap" system loader
   HeapReference<ClassLoader> class_loader_;
 
@@ -1096,18 +1112,6 @@
   // static, private, and <init> methods
   HeapReference<ObjectArray<ArtMethod>> direct_methods_;
 
-  // instance fields
-  //
-  // These describe the layout of the contents of an Object.
-  // Note that only the fields directly declared by this class are
-  // listed in ifields; fields declared by a superclass are listed in
-  // the superclass's Class.ifields.
-  //
-  // All instance fields that refer to objects are guaranteed to be at
-  // the beginning of the field list.  num_reference_instance_fields_
-  // specifies the number of reference fields.
-  HeapReference<ObjectArray<ArtField>> ifields_;
-
   // The interface table (iftable_) contains pairs of a interface class and an array of the
   // interface methods. There is one pair per interface supported by this class.  That means one
   // pair for each interface we support directly, indirectly via superclass, or indirectly via a
@@ -1124,9 +1128,6 @@
   // Descriptor for the class such as "java.lang.Class" or "[C". Lazily initialized by ComputeName
   HeapReference<String> name_;
 
-  // Static fields
-  HeapReference<ObjectArray<ArtField>> sfields_;
-
   // The superclass, or NULL if this is java.lang.Object, an interface or primitive type.
   HeapReference<Class> super_class_;
 
@@ -1143,8 +1144,22 @@
   HeapReference<ObjectArray<ArtMethod>> vtable_;
 
   // Access flags; low 16 bits are defined by VM spec.
+  // Note: Shuffled back.
   uint32_t access_flags_;
 
+  // instance fields
+  //
+  // These describe the layout of the contents of an Object.
+  // Note that only the fields directly declared by this class are
+  // listed in ifields; fields declared by a superclass are listed in
+  // the superclass's Class.ifields.
+  //
+  // ArtField arrays are allocated as an array of fields, and not an array of fields pointers.
+  uint64_t ifields_;
+
+  // Static fields
+  uint64_t sfields_;
+
   // Total size of the Class instance; used when allocating storage on gc heap.
   // See also object_size_.
   uint32_t class_size_;
@@ -1160,12 +1175,18 @@
   // TODO: really 16bits
   int32_t dex_type_idx_;
 
+  // Number of static fields.
+  uint32_t num_instance_fields_;
+
   // Number of instance fields that are object refs.
   uint32_t num_reference_instance_fields_;
 
   // Number of static fields that are object refs,
   uint32_t num_reference_static_fields_;
 
+  // Number of static fields.
+  uint32_t num_static_fields_;
+
   // Total object size; used when allocating storage on gc heap.
   // (For interfaces and abstract classes this will be zero.)
   // See also class_size_.
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 288e88e..1cb437e 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -19,6 +19,7 @@
 
 #include "dex_cache.h"
 
+#include "art_field-inl.h"
 #include "base/logging.h"
 #include "mirror/class.h"
 #include "runtime.h"
@@ -27,7 +28,7 @@
 namespace mirror {
 
 inline uint32_t DexCache::ClassSize() {
-  uint32_t vtable_entries = Object::kVTableLength + 1;
+  uint32_t vtable_entries = Object::kVTableLength + 5;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0);
 }
 
@@ -35,12 +36,11 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   ArtMethod* method = GetResolvedMethods()->Get(method_idx);
   // Hide resolution trampoline methods from the caller
-  if (method != NULL && method->IsRuntimeMethod()) {
-    DCHECK(method == Runtime::Current()->GetResolutionMethod());
-    return NULL;
-  } else {
-    return method;
+  if (method != nullptr && method->IsRuntimeMethod()) {
+    DCHECK_EQ(method, Runtime::Current()->GetResolutionMethod());
+    return nullptr;
   }
+  return method;
 }
 
 inline void DexCache::SetResolvedType(uint32_t type_idx, Class* resolved) {
@@ -49,6 +49,34 @@
   GetResolvedTypes()->Set(type_idx, resolved);
 }
 
+inline ArtField* DexCache::GetResolvedField(uint32_t idx, size_t ptr_size) {
+  ArtField* field = nullptr;
+  if (ptr_size == 8) {
+    field = reinterpret_cast<ArtField*>(
+        static_cast<uintptr_t>(GetResolvedFields()->AsLongArray()->GetWithoutChecks(idx)));
+  } else {
+    DCHECK_EQ(ptr_size, 4u);
+    field = reinterpret_cast<ArtField*>(
+      static_cast<uintptr_t>(GetResolvedFields()->AsIntArray()->GetWithoutChecks(idx)));
+  }
+  if (field == nullptr || field->GetDeclaringClass()->IsErroneous()) {
+    return nullptr;
+  }
+  return field;
+}
+
+inline void DexCache::SetResolvedField(uint32_t idx, ArtField* field, size_t ptr_size) {
+  if (ptr_size == 8) {
+    GetResolvedFields()->AsLongArray()->Set(
+        idx, static_cast<uint64_t>(reinterpret_cast<uintptr_t>(field)));
+  } else {
+    DCHECK_EQ(ptr_size, 4u);
+    CHECK_LE(reinterpret_cast<uintptr_t>(field), 0xFFFFFFFF);
+    GetResolvedFields()->AsIntArray()->Set(
+        idx, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(field)));
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index d6c11e8..ade8bd2 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -36,7 +36,7 @@
                     ObjectArray<String>* strings,
                     ObjectArray<Class>* resolved_types,
                     ObjectArray<ArtMethod>* resolved_methods,
-                    ObjectArray<ArtField>* resolved_fields) {
+                    Array* resolved_fields) {
   CHECK(dex_file != nullptr);
   CHECK(location != nullptr);
   CHECK(strings != nullptr);
@@ -44,19 +44,18 @@
   CHECK(resolved_methods != nullptr);
   CHECK(resolved_fields != nullptr);
 
-  SetFieldPtr<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_), dex_file);
+  SetDexFile(dex_file);
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), location);
   SetFieldObject<false>(StringsOffset(), strings);
+  SetFieldObject<false>(ResolvedFieldsOffset(), resolved_fields);
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, resolved_types_), resolved_types);
   SetFieldObject<false>(ResolvedMethodsOffset(), resolved_methods);
-  SetFieldObject<false>(ResolvedFieldsOffset(), resolved_fields);
 
   Runtime* runtime = Runtime::Current();
   if (runtime->HasResolutionMethod()) {
     // Initialize the resolve methods array to contain trampolines for resolution.
     ArtMethod* trampoline = runtime->GetResolutionMethod();
-    size_t length = resolved_methods->GetLength();
-    for (size_t i = 0; i < length; i++) {
+    for (size_t i = 0, length = resolved_methods->GetLength(); i < length; i++) {
       resolved_methods->SetWithoutChecks<false>(i, trampoline);
     }
   }
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index c548c03..7e30b89 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -50,7 +50,7 @@
             ObjectArray<String>* strings,
             ObjectArray<Class>* types,
             ObjectArray<ArtMethod>* methods,
-            ObjectArray<ArtField>* fields)
+            Array* fields)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void Fixup(ArtMethod* trampoline) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -116,23 +116,16 @@
     GetResolvedMethods()->Set(method_idx, resolved);
   }
 
-  ArtField* GetResolvedField(uint32_t field_idx) ALWAYS_INLINE
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    ArtField* field = GetResolvedFields()->Get(field_idx);
-    if (UNLIKELY(field == nullptr || field->GetDeclaringClass()->IsErroneous())) {
-      return nullptr;
-    } else {
-      return field;
-    }
-  }
+  // Pointer sized variant, used for patching.
+  ArtField* GetResolvedField(uint32_t idx, size_t ptr_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetResolvedField(uint32_t field_idx, ArtField* resolved) ALWAYS_INLINE
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    GetResolvedFields()->Set(field_idx, resolved);
-  }
+  // Pointer sized variant, used for patching.
+  void SetResolvedField(uint32_t idx, ArtField* field, size_t ptr_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ObjectArray<String>* GetStrings() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject< ObjectArray<String>>(StringsOffset());
+    return GetFieldObject<ObjectArray<String>>(StringsOffset());
   }
 
   ObjectArray<Class>* GetResolvedTypes() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -145,9 +138,8 @@
     return GetFieldObject< ObjectArray<ArtMethod>>(ResolvedMethodsOffset());
   }
 
-  ObjectArray<ArtField>* GetResolvedFields() ALWAYS_INLINE
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldObject<ObjectArray<ArtField>>(ResolvedFieldsOffset());
+  Array* GetResolvedFields() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldObject<Array>(ResolvedFieldsOffset());
   }
 
   const DexFile* GetDexFile() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -162,7 +154,8 @@
  private:
   HeapReference<Object> dex_;
   HeapReference<String> location_;
-  HeapReference<ObjectArray<ArtField>> resolved_fields_;
+  // Either an int array or long array (64 bit).
+  HeapReference<Object> resolved_fields_;
   HeapReference<ObjectArray<ArtMethod>> resolved_methods_;
   HeapReference<ObjectArray<Class>> resolved_types_;
   HeapReference<ObjectArray<String>> strings_;
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index 53e5534..1d6846b 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -47,7 +47,7 @@
   EXPECT_LE(0, dex_cache->GetStrings()->GetLength());
   EXPECT_LE(0, dex_cache->GetResolvedTypes()->GetLength());
   EXPECT_LE(0, dex_cache->GetResolvedMethods()->GetLength());
-  EXPECT_LE(0, dex_cache->GetResolvedFields()->GetLength());
+  EXPECT_LE(0u, dex_cache->NumResolvedFields());
 
   EXPECT_EQ(java_lang_dex_file_->NumStringIds(),
             static_cast<uint32_t>(dex_cache->GetStrings()->GetLength()));
@@ -55,8 +55,7 @@
             static_cast<uint32_t>(dex_cache->GetResolvedTypes()->GetLength()));
   EXPECT_EQ(java_lang_dex_file_->NumMethodIds(),
             static_cast<uint32_t>(dex_cache->GetResolvedMethods()->GetLength()));
-  EXPECT_EQ(java_lang_dex_file_->NumFieldIds(),
-            static_cast<uint32_t>(dex_cache->GetResolvedFields()->GetLength()));
+  EXPECT_EQ(java_lang_dex_file_->NumFieldIds(), dex_cache->NumResolvedFields());
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/field-inl.h b/runtime/mirror/field-inl.h
index 24ebc48..7db1811 100644
--- a/runtime/mirror/field-inl.h
+++ b/runtime/mirror/field-inl.h
@@ -27,9 +27,8 @@
 namespace mirror {
 
 template <bool kTransactionActive>
-inline mirror::Field* Field::CreateFromArtField(Thread* self, mirror::ArtField* field,
+inline mirror::Field* Field::CreateFromArtField(Thread* self, ArtField* field,
                                                 bool force_resolve) {
-  CHECK(!kMovingFields);
   // Try to resolve type before allocating since this is a thread suspension point.
   mirror::Class* type = field->GetType<true>();
 
@@ -57,13 +56,13 @@
     return nullptr;
   }
   auto dex_field_index = field->GetDexFieldIndex();
-  auto* resolved_field = field->GetDexCache()->GetResolvedField(dex_field_index);
+  auto* resolved_field = field->GetDexCache()->GetResolvedField(dex_field_index, sizeof(void*));
   if (resolved_field != nullptr) {
     DCHECK_EQ(resolved_field, field);
   } else {
     // We rely on the field being resolved so that we can back to the ArtField
     // (i.e. FromReflectedMethod).
-    field->GetDexCache()->SetResolvedField(dex_field_index, field);
+    field->GetDexCache()->SetResolvedField(dex_field_index, field, sizeof(void*));
   }
   ret->SetType<kTransactionActive>(type);
   ret->SetDeclaringClass<kTransactionActive>(field->GetDeclaringClass());
diff --git a/runtime/mirror/field.cc b/runtime/mirror/field.cc
index 82cc26e..70311bb 100644
--- a/runtime/mirror/field.cc
+++ b/runtime/mirror/field.cc
@@ -55,7 +55,7 @@
 
 ArtField* Field::GetArtField() {
   mirror::DexCache* const dex_cache = GetDeclaringClass()->GetDexCache();
-  mirror::ArtField* const art_field = dex_cache->GetResolvedField(GetDexFieldIndex());
+  ArtField* const art_field = dex_cache->GetResolvedField(GetDexFieldIndex(), sizeof(void*));
   CHECK(art_field != nullptr);
   return art_field;
 }
diff --git a/runtime/mirror/field.h b/runtime/mirror/field.h
index cea06f5..9988f84 100644
--- a/runtime/mirror/field.h
+++ b/runtime/mirror/field.h
@@ -25,11 +25,11 @@
 
 namespace art {
 
+class ArtField;
 struct FieldOffsets;
 
 namespace mirror {
 
-class ArtField;
 class Class;
 class String;
 
@@ -93,10 +93,10 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Slow, try to use only for PrettyField and such.
-  mirror::ArtField* GetArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtField* GetArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   template <bool kTransactionActive = false>
-  static mirror::Field* CreateFromArtField(Thread* self, mirror::ArtField* field,
+  static mirror::Field* CreateFromArtField(Thread* self, ArtField* field,
                                            bool force_resolve)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index d690163..af0e856 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -225,18 +225,6 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
-inline bool Object::IsArtField() {
-  return GetClass<kVerifyFlags, kReadBarrierOption>()->
-      template IsArtFieldClass<kReadBarrierOption>();
-}
-
-template<VerifyObjectFlags kVerifyFlags>
-inline ArtField* Object::AsArtField() {
-  DCHECK(IsArtField<kVerifyFlags>());
-  return down_cast<ArtField*>(this);
-}
-
-template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsArtMethod() {
   return GetClass<kVerifyFlags, kReadBarrierOption>()->
       template IsArtMethodClass<kReadBarrierOption>();
@@ -318,8 +306,8 @@
 template<VerifyObjectFlags kVerifyFlags>
 inline IntArray* Object::AsIntArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  DCHECK(GetClass<kVerifyFlags>()->IsArrayClass());
-  DCHECK(GetClass<kNewFlags>()->template GetComponentType<kNewFlags>()->IsPrimitiveInt() ||
+  CHECK(GetClass<kVerifyFlags>()->IsArrayClass());
+  CHECK(GetClass<kNewFlags>()->template GetComponentType<kNewFlags>()->IsPrimitiveInt() ||
          GetClass<kNewFlags>()->template GetComponentType<kNewFlags>()->IsPrimitiveFloat());
   return down_cast<IntArray*>(this);
 }
@@ -327,8 +315,8 @@
 template<VerifyObjectFlags kVerifyFlags>
 inline LongArray* Object::AsLongArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  DCHECK(GetClass<kVerifyFlags>()->IsArrayClass());
-  DCHECK(GetClass<kNewFlags>()->template GetComponentType<kNewFlags>()->IsPrimitiveLong() ||
+  CHECK(GetClass<kVerifyFlags>()->IsArrayClass());
+  CHECK(GetClass<kNewFlags>()->template GetComponentType<kNewFlags>()->IsPrimitiveLong() ||
          GetClass<kNewFlags>()->template GetComponentType<kNewFlags>()->IsPrimitiveDouble());
   return down_cast<LongArray*>(this);
 }
@@ -403,7 +391,6 @@
   }
   DCHECK_GE(result, sizeof(Object))
       << " class=" << PrettyTypeOf(GetClass<kNewFlags, kReadBarrierOption>());
-  DCHECK(!(IsArtField<kNewFlags, kReadBarrierOption>()) || result == sizeof(ArtField));
   return result;
 }
 
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 57ac46f..04d0cd8 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -204,22 +204,19 @@
     return;
   }
   for (Class* cur = c; cur != NULL; cur = cur->GetSuperClass()) {
-    ObjectArray<ArtField>* fields = cur->GetIFields();
-    if (fields != NULL) {
-      size_t num_ifields = fields->GetLength();
-      for (size_t i = 0; i < num_ifields; ++i) {
-        StackHandleScope<1> hs(Thread::Current());
-        Handle<Object> h_object(hs.NewHandle(new_value));
-        ArtField* field = fields->Get(i);
-        if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
-          CHECK_NE(field->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
-          // TODO: resolve the field type for moving GC.
-          mirror::Class* field_type = field->GetType<!kMovingCollector>();
-          if (field_type != nullptr) {
-            CHECK(field_type->IsAssignableFrom(new_value->GetClass()));
-          }
-          return;
+    ArtField* fields = cur->GetIFields();
+    for (size_t i = 0, count = cur->NumInstanceFields(); i < count; ++i) {
+      StackHandleScope<1> hs(Thread::Current());
+      Handle<Object> h_object(hs.NewHandle(new_value));
+      ArtField* field = &fields[i];
+      if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
+        CHECK_NE(field->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
+        // TODO: resolve the field type for moving GC.
+        mirror::Class* field_type = field->GetType<!kMovingCollector>();
+        if (field_type != nullptr) {
+          CHECK(field_type->IsAssignableFrom(new_value->GetClass()));
         }
+        return;
       }
     }
   }
@@ -228,20 +225,17 @@
     return;
   }
   if (IsClass()) {
-    ObjectArray<ArtField>* fields = AsClass()->GetSFields();
-    if (fields != NULL) {
-      size_t num_sfields = fields->GetLength();
-      for (size_t i = 0; i < num_sfields; ++i) {
-        ArtField* field = fields->Get(i);
-        if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
-          CHECK_NE(field->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
-          // TODO: resolve the field type for moving GC.
-          mirror::Class* field_type = field->GetType<!kMovingCollector>();
-          if (field_type != nullptr) {
-            CHECK(field_type->IsAssignableFrom(new_value->GetClass()));
-          }
-          return;
+    ArtField* fields = AsClass()->GetSFields();
+    for (size_t i = 0, count = AsClass()->NumStaticFields(); i < count; ++i) {
+      ArtField* field = &fields[i];
+      if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
+        CHECK_NE(field->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
+        // TODO: resolve the field type for moving GC.
+        mirror::Class* field_type = field->GetType<!kMovingCollector>();
+        if (field_type != nullptr) {
+          CHECK(field_type->IsAssignableFrom(new_value->GetClass()));
         }
+        return;
       }
     }
   }
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index e44a40a..343c9bc 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -24,6 +24,7 @@
 
 namespace art {
 
+class ArtField;
 class ImageWriter;
 class LockWord;
 class Monitor;
@@ -33,7 +34,6 @@
 
 namespace mirror {
 
-class ArtField;
 class ArtMethod;
 class Array;
 class Class;
@@ -194,12 +194,6 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ArtMethod* AsArtMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
-           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  bool IsArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ArtField* AsArtField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool IsReferenceInstance() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index 80d5135..30bc1cd 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -22,7 +22,6 @@
 #include "array-inl.h"
 #include "base/stringprintf.h"
 #include "gc/heap.h"
-#include "mirror/art_field.h"
 #include "mirror/class.h"
 #include "runtime.h"
 #include "handle_scope-inl.h"
@@ -36,13 +35,13 @@
 inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class,
                                              int32_t length, gc::AllocatorType allocator_type) {
   Array* array = Array::Alloc<true>(self, object_array_class, length,
-                                    ComponentSizeShiftWidth<sizeof(HeapReference<Object>)>(),
+                                    ComponentSizeShiftWidth(sizeof(HeapReference<Object>)),
                                     allocator_type);
   if (UNLIKELY(array == nullptr)) {
     return nullptr;
   } else {
     DCHECK_EQ(array->GetClass()->GetComponentSizeShift(),
-              ComponentSizeShiftWidth<sizeof(HeapReference<Object>)>());
+              ComponentSizeShiftWidth(sizeof(HeapReference<Object>)));
     return array->AsObjectArray<T>();
   }
 }
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 1ce298d..747a008 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -722,14 +722,12 @@
                                      "Ljava/util/Comparator;") == NULL);
 
   // Right name and type.
-  Handle<ArtField> f1(hs.NewHandle(
-      c->FindDeclaredStaticField("CASE_INSENSITIVE_ORDER", "Ljava/util/Comparator;")));
-  Handle<ArtField> f2(hs.NewHandle(
-      mirror::Class::FindStaticField(soa.Self(), c, "CASE_INSENSITIVE_ORDER",
-                                     "Ljava/util/Comparator;")));
-  EXPECT_TRUE(f1.Get() != NULL);
-  EXPECT_TRUE(f2.Get() != NULL);
-  EXPECT_EQ(f1.Get(), f2.Get());
+  ArtField* f1 = c->FindDeclaredStaticField("CASE_INSENSITIVE_ORDER", "Ljava/util/Comparator;");
+  ArtField* f2 = mirror::Class::FindStaticField(soa.Self(), c, "CASE_INSENSITIVE_ORDER",
+                                                "Ljava/util/Comparator;");
+  EXPECT_TRUE(f1 != NULL);
+  EXPECT_TRUE(f2 != NULL);
+  EXPECT_EQ(f1, f2);
 
   // TODO: test static fields via superclasses.
   // TODO: test static fields via interfaces.
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index c182a4d..87ae64d 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -297,22 +297,15 @@
   return result;
 }
 
-// Java: dalvik.system.DexFile.UP_TO_DATE
-static const jbyte kUpToDate = 0;
-// Java: dalvik.system.DexFile.DEXOPT_NEEDED
-static const jbyte kPatchoatNeeded = 1;
-// Java: dalvik.system.DexFile.PATCHOAT_NEEDED
-static const jbyte kDexoptNeeded = 2;
-
-static jbyte IsDexOptNeededInternal(JNIEnv* env, const char* filename,
+static jint GetDexOptNeeded(JNIEnv* env, const char* filename,
     const char* pkgname, const char* instruction_set, const jboolean defer) {
 
   if ((filename == nullptr) || !OS::FileExists(filename)) {
-    LOG(ERROR) << "DexFile_isDexOptNeeded file '" << filename << "' does not exist";
+    LOG(ERROR) << "DexFile_getDexOptNeeded file '" << filename << "' does not exist";
     ScopedLocalRef<jclass> fnfe(env, env->FindClass("java/io/FileNotFoundException"));
     const char* message = (filename == nullptr) ? "<empty file name>" : filename;
     env->ThrowNew(fnfe.get(), message);
-    return kUpToDate;
+    return OatFileAssistant::kNoDexOptNeeded;
   }
 
   const InstructionSet target_instruction_set = GetInstructionSetFromString(instruction_set);
@@ -330,7 +323,7 @@
 
   // Always treat elements of the bootclasspath as up-to-date.
   if (oat_file_assistant.IsInBootClassPath()) {
-    return kUpToDate;
+    return OatFileAssistant::kNoDexOptNeeded;
   }
 
   // TODO: Checking the profile should probably be done in the GetStatus()
@@ -343,7 +336,7 @@
         if (!defer) {
           oat_file_assistant.CopyProfileFile();
         }
-        return kDexoptNeeded;
+        return OatFileAssistant::kDex2OatNeeded;
       } else if (oat_file_assistant.ProfileExists()
           && !oat_file_assistant.OldProfileExists()) {
         if (!defer) {
@@ -353,16 +346,10 @@
     }
   }
 
-  OatFileAssistant::Status status = oat_file_assistant.GetStatus();
-  switch (status) {
-    case OatFileAssistant::kUpToDate: return kUpToDate;
-    case OatFileAssistant::kNeedsRelocation: return kPatchoatNeeded;
-    case OatFileAssistant::kOutOfDate: return kDexoptNeeded;
-  }
-  UNREACHABLE();
+  return oat_file_assistant.GetDexOptNeeded();
 }
 
-static jbyte DexFile_isDexOptNeededInternal(JNIEnv* env, jclass, jstring javaFilename,
+static jint DexFile_getDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename,
     jstring javaPkgname, jstring javaInstructionSet, jboolean defer) {
   ScopedUtfChars filename(env, javaFilename);
   if (env->ExceptionCheck()) {
@@ -376,25 +363,25 @@
     return 0;
   }
 
-  return IsDexOptNeededInternal(env, filename.c_str(), pkgname.c_str(),
-                                instruction_set.c_str(), defer);
+  return GetDexOptNeeded(env, filename.c_str(), pkgname.c_str(),
+                         instruction_set.c_str(), defer);
 }
 
 // public API, NULL pkgname
 static jboolean DexFile_isDexOptNeeded(JNIEnv* env, jclass, jstring javaFilename) {
   const char* instruction_set = GetInstructionSetString(kRuntimeISA);
   ScopedUtfChars filename(env, javaFilename);
-  return kUpToDate != IsDexOptNeededInternal(env, filename.c_str(), nullptr /* pkgname */,
-                                             instruction_set, false /* defer */);
+  jint status = GetDexOptNeeded(env, filename.c_str(), nullptr /* pkgname */,
+                                instruction_set, false /* defer */);
+  return (status != OatFileAssistant::kNoDexOptNeeded) ? JNI_TRUE : JNI_FALSE;
 }
 
-
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(DexFile, closeDexFile, "(Ljava/lang/Object;)V"),
   NATIVE_METHOD(DexFile, defineClassNative, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/Object;)Ljava/lang/Class;"),
   NATIVE_METHOD(DexFile, getClassNameList, "(Ljava/lang/Object;)[Ljava/lang/String;"),
   NATIVE_METHOD(DexFile, isDexOptNeeded, "(Ljava/lang/String;)Z"),
-  NATIVE_METHOD(DexFile, isDexOptNeededInternal, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)B"),
+  NATIVE_METHOD(DexFile, getDexOptNeeded, "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)I"),
   NATIVE_METHOD(DexFile, openDexFileNative, "(Ljava/lang/String;Ljava/lang/String;I)Ljava/lang/Object;"),
 };
 
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 760038a..1a6adf8 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -313,8 +313,8 @@
 static void PreloadDexCachesResolveField(Handle<mirror::DexCache> dex_cache, uint32_t field_idx,
                                          bool is_static)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::ArtField* field = dex_cache->GetResolvedField(field_idx);
-  if (field != NULL) {
+  ArtField* field = dex_cache->GetResolvedField(field_idx, sizeof(void*));
+  if (field != nullptr) {
     return;
   }
   const DexFile* dex_file = dex_cache->GetDexFile();
@@ -334,7 +334,7 @@
     return;
   }
   // LOG(INFO) << "VMRuntime.preloadDexCaches resolved field " << PrettyField(field);
-  dex_cache->SetResolvedField(field_idx, field);
+  dex_cache->SetResolvedField(field_idx, field, sizeof(void*));
 }
 
 // Based on ClassLinker::ResolveMethod.
@@ -437,7 +437,7 @@
       }
     }
     for (size_t j = 0; j < dex_cache->NumResolvedFields(); j++) {
-      mirror::ArtField* field = dex_cache->GetResolvedField(j);
+      ArtField* field = linker->GetResolvedField(j, dex_cache);
       if (field != NULL) {
         filled->num_fields++;
       }
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index c893f0a..5ad18f8 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -16,12 +16,12 @@
 
 #include "java_lang_Class.h"
 
+#include "art_field-inl.h"
 #include "class_linker.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
 #include "jni_internal.h"
 #include "nth_caller_visitor.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/field-inl.h"
@@ -91,6 +91,18 @@
   return soa.AddLocalReference<jclass>(c.Get());
 }
 
+static jobject Class_findOverriddenMethodIfProxy(JNIEnv* env, jclass, jobject art_method) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::ArtMethod* method = soa.Decode<mirror::ArtMethod*>(art_method);
+  mirror::Class* declaring_klass = method->GetDeclaringClass();
+  if (!declaring_klass->IsProxyClass()) {
+    return art_method;
+  }
+  uint32_t dex_method_index = method->GetDexMethodIndex();
+  mirror::ArtMethod* overriden_method = method->GetDexCacheResolvedMethods()->Get(dex_method_index);
+  return soa.AddLocalReference<jobject>(overriden_method);
+}
+
 static jstring Class_getNameNative(JNIEnv* env, jobject javaThis) {
   ScopedFastNativeObjectAccess soa(env);
   StackHandleScope<1> hs(soa.Self());
@@ -107,33 +119,33 @@
 static mirror::ObjectArray<mirror::Field>* GetDeclaredFields(
     Thread* self, mirror::Class* klass, bool public_only, bool force_resolve)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  StackHandleScope<3> hs(self);
-  auto h_ifields = hs.NewHandle(klass->GetIFields());
-  auto h_sfields = hs.NewHandle(klass->GetSFields());
-  const int32_t num_ifields = h_ifields.Get() != nullptr ? h_ifields->GetLength() : 0;
-  const int32_t num_sfields = h_sfields.Get() != nullptr ? h_sfields->GetLength() : 0;
-  int32_t array_size = num_ifields + num_sfields;
+  StackHandleScope<1> hs(self);
+  auto* ifields = klass->GetIFields();
+  auto* sfields = klass->GetSFields();
+  const auto num_ifields = klass->NumInstanceFields();
+  const auto num_sfields = klass->NumStaticFields();
+  size_t array_size = num_ifields + num_sfields;
   if (public_only) {
     // Lets go subtract all the non public fields.
-    for (int32_t i = 0; i < num_ifields; ++i) {
-      if (!h_ifields->GetWithoutChecks(i)->IsPublic()) {
+    for (size_t i = 0; i < num_ifields; ++i) {
+      if (!ifields[i].IsPublic()) {
         --array_size;
       }
     }
-    for (int32_t i = 0; i < num_sfields; ++i) {
-      if (!h_sfields->GetWithoutChecks(i)->IsPublic()) {
+    for (size_t i = 0; i < num_sfields; ++i) {
+      if (!sfields[i].IsPublic()) {
         --array_size;
       }
     }
   }
-  int32_t array_idx = 0;
+  size_t array_idx = 0;
   auto object_array = hs.NewHandle(mirror::ObjectArray<mirror::Field>::Alloc(
       self, mirror::Field::ArrayClass(), array_size));
   if (object_array.Get() == nullptr) {
     return nullptr;
   }
-  for (int32_t i = 0; i < num_ifields; ++i) {
-    auto* art_field = h_ifields->GetWithoutChecks(i);
+  for (size_t i = 0; i < num_ifields; ++i) {
+    auto* art_field = &ifields[i];
     if (!public_only || art_field->IsPublic()) {
       auto* field = mirror::Field::CreateFromArtField(self, art_field, force_resolve);
       if (field == nullptr) {
@@ -146,8 +158,8 @@
       object_array->SetWithoutChecks<false>(array_idx++, field);
     }
   }
-  for (int32_t i = 0; i < num_sfields; ++i) {
-    auto* art_field = h_sfields->GetWithoutChecks(i);
+  for (size_t i = 0; i < num_sfields; ++i) {
+    auto* art_field = &sfields[i];
     if (!public_only || art_field->IsPublic()) {
       auto* field = mirror::Field::CreateFromArtField(self, art_field, force_resolve);
       if (field == nullptr) {
@@ -185,17 +197,16 @@
 // Performs a binary search through an array of fields, TODO: Is this fast enough if we don't use
 // the dex cache for lookups? I think CompareModifiedUtf8ToUtf16AsCodePointValues should be fairly
 // fast.
-ALWAYS_INLINE static inline mirror::ArtField* FindFieldByName(
-    Thread* self ATTRIBUTE_UNUSED, mirror::String* name,
-    mirror::ObjectArray<mirror::ArtField>* fields)
+ALWAYS_INLINE static inline ArtField* FindFieldByName(
+    Thread* self ATTRIBUTE_UNUSED, mirror::String* name, ArtField* fields, size_t num_fields)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  uint32_t low = 0;
-  uint32_t high = fields->GetLength();
+  size_t low = 0;
+  size_t high = num_fields;
   const uint16_t* const data = name->GetCharArray()->GetData() + name->GetOffset();
   const size_t length = name->GetLength();
   while (low < high) {
     auto mid = (low + high) / 2;
-    mirror::ArtField* const field = fields->GetWithoutChecks(mid);
+    ArtField* const field = &fields[mid];
     int result = CompareModifiedUtf8ToUtf16AsCodePointValues(field->GetName(), data, length);
     // Alternate approach, only a few % faster at the cost of more allocations.
     // int result = field->GetStringName(self, true)->CompareTo(name);
@@ -208,8 +219,8 @@
     }
   }
   if (kIsDebugBuild) {
-    for (int32_t i = 0; i < fields->GetLength(); ++i) {
-      CHECK_NE(fields->GetWithoutChecks(i)->GetName(), name->ToModifiedUtf8());
+    for (size_t i = 0; i < num_fields; ++i) {
+      CHECK_NE(fields[i].GetName(), name->ToModifiedUtf8());
     }
   }
   return nullptr;
@@ -219,18 +230,14 @@
     Thread* self, mirror::Class* c, mirror::String* name)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   auto* instance_fields = c->GetIFields();
-  if (instance_fields != nullptr) {
-    auto* art_field = FindFieldByName(self, name, instance_fields);
-    if (art_field != nullptr) {
-      return mirror::Field::CreateFromArtField(self, art_field, true);
-    }
+  auto* art_field = FindFieldByName(self, name, instance_fields, c->NumInstanceFields());
+  if (art_field != nullptr) {
+    return mirror::Field::CreateFromArtField(self, art_field, true);
   }
   auto* static_fields = c->GetSFields();
-  if (static_fields != nullptr) {
-    auto* art_field = FindFieldByName(self, name, static_fields);
-    if (art_field != nullptr) {
-      return mirror::Field::CreateFromArtField(self, art_field, true);
-    }
+  art_field = FindFieldByName(self, name, static_fields, c->NumStaticFields());
+  if (art_field != nullptr) {
+    return mirror::Field::CreateFromArtField(self, art_field, true);
   }
   return nullptr;
 }
@@ -264,6 +271,8 @@
 
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(Class, classForName, "!(Ljava/lang/String;ZLjava/lang/ClassLoader;)Ljava/lang/Class;"),
+  NATIVE_METHOD(Class, findOverriddenMethodIfProxy,
+                "!(Ljava/lang/reflect/ArtMethod;)Ljava/lang/reflect/ArtMethod;"),
   NATIVE_METHOD(Class, getNameNative, "!()Ljava/lang/String;"),
   NATIVE_METHOD(Class, getProxyInterfaces, "!()[Ljava/lang/Class;"),
   NATIVE_METHOD(Class, getDeclaredFields, "!()[Ljava/lang/reflect/Field;"),
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
index 27eae46..8944270 100644
--- a/runtime/native/java_lang_DexCache.cc
+++ b/runtime/native/java_lang_DexCache.cc
@@ -48,8 +48,38 @@
                                       args);
 }
 
+static jobject DexCache_getResolvedType(JNIEnv* env, jobject javaDexCache, jint type_index) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
+  return soa.AddLocalReference<jobject>(dex_cache->GetResolvedType(type_index));
+}
+
+static jobject DexCache_getResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
+  return soa.AddLocalReference<jobject>(dex_cache->GetResolvedString(string_index));
+}
+
+static void DexCache_setResolvedType(JNIEnv* env, jobject javaDexCache, jint type_index,
+                                     jobject type) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
+  dex_cache->SetResolvedType(type_index, soa.Decode<mirror::Class*>(type));
+}
+
+static void DexCache_setResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index,
+                                       jobject string) {
+  ScopedFastNativeObjectAccess soa(env);
+  mirror::DexCache* dex_cache = soa.Decode<mirror::DexCache*>(javaDexCache);
+  dex_cache->SetResolvedString(string_index, soa.Decode<mirror::String*>(string));
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(DexCache, getDexNative, "!()Lcom/android/dex/Dex;"),
+  NATIVE_METHOD(DexCache, getResolvedType, "!(I)Ljava/lang/Class;"),
+  NATIVE_METHOD(DexCache, getResolvedString, "!(I)Ljava/lang/String;"),
+  NATIVE_METHOD(DexCache, setResolvedType, "!(ILjava/lang/Class;)V"),
+  NATIVE_METHOD(DexCache, setResolvedString, "!(ILjava/lang/String;)V"),
 };
 
 void register_java_lang_DexCache(JNIEnv* env) {
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index 1ffcbdf..eddd7de 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -61,10 +61,8 @@
     return NULL;
   }
   DCHECK(array_class->IsObjectArrayClass());
-  mirror::Array* new_array = mirror::Array::Alloc<true>(
-      soa.Self(), array_class, length,
-      ComponentSizeShiftWidth<sizeof(mirror::HeapReference<mirror::Object>)>(),
-      runtime->GetHeap()->GetCurrentAllocator());
+  mirror::Array* new_array = mirror::ObjectArray<mirror::Object*>::Alloc(
+      soa.Self(), array_class, length, runtime->GetHeap()->GetCurrentAllocator());
   return soa.AddLocalReference<jobject>(new_array);
 }
 
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index 765f548..5e1a4c5 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -57,8 +57,6 @@
   bool movable = true;
   if (!kMovingMethods && c->IsArtMethodClass()) {
     movable = false;
-  } else if (!kMovingFields && c->IsArtFieldClass()) {
-    movable = false;
   } else if (!kMovingClasses && c->IsClassClass()) {
     movable = false;
   }
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index d92f59b..e5c27b2 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -142,31 +142,31 @@
   return true;
 }
 
-OatFileAssistant::Status OatFileAssistant::GetStatus() {
+OatFileAssistant::DexOptNeeded OatFileAssistant::GetDexOptNeeded() {
   // TODO: If the profiling code is ever restored, it's worth considering
   // whether we should check to see if the profile is out of date here.
 
-  if (OdexFileIsOutOfDate()) {
-    // The DEX file is not pre-compiled.
-    // TODO: What if the oat file is not out of date? Could we relocate it
-    // from itself?
-    return OatFileIsUpToDate() ? kUpToDate : kOutOfDate;
-  } else {
-    // The DEX file is pre-compiled. If the oat file isn't up to date, we can
-    // patch the pre-compiled version rather than recompiling.
-    if (OatFileIsUpToDate() || OdexFileIsUpToDate()) {
-      return kUpToDate;
-    } else {
-      return kNeedsRelocation;
-    }
+  if (OatFileIsUpToDate() || OdexFileIsUpToDate()) {
+    return kNoDexOptNeeded;
   }
+
+  if (OdexFileNeedsRelocation()) {
+    return kPatchOatNeeded;
+  }
+
+  if (OatFileNeedsRelocation()) {
+    return kSelfPatchOatNeeded;
+  }
+
+  return kDex2OatNeeded;
 }
 
 bool OatFileAssistant::MakeUpToDate(std::string* error_msg) {
-  switch (GetStatus()) {
-    case kUpToDate: return true;
-    case kNeedsRelocation: return RelocateOatFile(error_msg);
-    case kOutOfDate: return GenerateOatFile(error_msg);
+  switch (GetDexOptNeeded()) {
+    case kNoDexOptNeeded: return true;
+    case kDex2OatNeeded: return GenerateOatFile(error_msg);
+    case kPatchOatNeeded: return RelocateOatFile(OdexFileName(), error_msg);
+    case kSelfPatchOatNeeded: return RelocateOatFile(OatFileName(), error_msg);
   }
   UNREACHABLE();
 }
@@ -269,14 +269,14 @@
   return GetOdexFile() != nullptr;
 }
 
-OatFileAssistant::Status OatFileAssistant::OdexFileStatus() {
+OatFileAssistant::OatStatus OatFileAssistant::OdexFileStatus() {
   if (OdexFileIsOutOfDate()) {
-    return kOutOfDate;
+    return kOatOutOfDate;
   }
   if (OdexFileIsUpToDate()) {
-    return kUpToDate;
+    return kOatUpToDate;
   }
-  return kNeedsRelocation;
+  return kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::OdexFileIsOutOfDate() {
@@ -293,7 +293,7 @@
 }
 
 bool OatFileAssistant::OdexFileNeedsRelocation() {
-  return OdexFileStatus() == kNeedsRelocation;
+  return OdexFileStatus() == kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::OdexFileIsUpToDate() {
@@ -338,14 +338,14 @@
   return GetOatFile() != nullptr;
 }
 
-OatFileAssistant::Status OatFileAssistant::OatFileStatus() {
+OatFileAssistant::OatStatus OatFileAssistant::OatFileStatus() {
   if (OatFileIsOutOfDate()) {
-    return kOutOfDate;
+    return kOatOutOfDate;
   }
   if (OatFileIsUpToDate()) {
-    return kUpToDate;
+    return kOatUpToDate;
   }
-  return kNeedsRelocation;
+  return kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::OatFileIsOutOfDate() {
@@ -362,7 +362,7 @@
 }
 
 bool OatFileAssistant::OatFileNeedsRelocation() {
-  return OatFileStatus() == kNeedsRelocation;
+  return OatFileStatus() == kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::OatFileIsUpToDate() {
@@ -378,17 +378,17 @@
   return cached_oat_file_is_up_to_date_;
 }
 
-OatFileAssistant::Status OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
+OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& file) {
   // TODO: This could cause GivenOatFileIsOutOfDate to be called twice, which
   // is more work than we need to do. If performance becomes a concern, and
   // this method is actually called, this should be fixed.
   if (GivenOatFileIsOutOfDate(file)) {
-    return kOutOfDate;
+    return kOatOutOfDate;
   }
   if (GivenOatFileIsUpToDate(file)) {
-    return kUpToDate;
+    return kOatUpToDate;
   }
-  return kNeedsRelocation;
+  return kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::GivenOatFileIsOutOfDate(const OatFile& file) {
@@ -451,7 +451,7 @@
 }
 
 bool OatFileAssistant::GivenOatFileNeedsRelocation(const OatFile& file) {
-  return GivenOatFileStatus(file) == kNeedsRelocation;
+  return GivenOatFileStatus(file) == kOatNeedsRelocation;
 }
 
 bool OatFileAssistant::GivenOatFileIsUpToDate(const OatFile& file) {
@@ -592,16 +592,17 @@
   }
 }
 
-bool OatFileAssistant::RelocateOatFile(std::string* error_msg) {
+bool OatFileAssistant::RelocateOatFile(const std::string* input_file,
+                                       std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
-  if (OdexFileName() == nullptr) {
+  if (input_file == nullptr) {
     *error_msg = "Patching of oat file for dex location "
       + std::string(dex_location_)
-      + " not attempted because the odex file name could not be determined.";
+      + " not attempted because the input file name could not be determined.";
     return false;
   }
-  const std::string& odex_file_name = *OdexFileName();
+  const std::string& input_file_name = *input_file;
 
   if (OatFileName() == nullptr) {
     *error_msg = "Patching of oat file for dex location "
@@ -628,7 +629,7 @@
   std::vector<std::string> argv;
   argv.push_back(runtime->GetPatchoatExecutable());
   argv.push_back("--instruction-set=" + std::string(GetInstructionSetString(isa_)));
-  argv.push_back("--input-oat-file=" + odex_file_name);
+  argv.push_back("--input-oat-file=" + input_file_name);
   argv.push_back("--output-oat-file=" + oat_file_name);
   argv.push_back("--patched-image-location=" + image_info->location);
 
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index f2abcf9..9e7c2ef 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -43,20 +43,43 @@
 // be restored and tested, or removed.
 class OatFileAssistant {
  public:
-  enum Status {
-    // kOutOfDate - An oat file is said to be out of date if the file does not
-    // exist, or is out of date with respect to the dex file or boot image.
-    kOutOfDate,
+  enum DexOptNeeded {
+    // kNoDexOptNeeded - The code for this dex location is up to date and can
+    // be used as is.
+    // Matches Java: dalvik.system.DexFile.NO_DEXOPT_NEEDED = 0
+    kNoDexOptNeeded = 0,
 
-    // kNeedsRelocation - An oat file is said to need relocation if the code
-    // is up to date, but not yet properly relocated for address space layout
-    // randomization (ASLR). In this case, the oat file is neither "out of
-    // date" nor "up to date".
-    kNeedsRelocation,
+    // kDex2OatNeeded - In order to make the code for this dex location up to
+    // date, dex2oat must be run on the dex file.
+    // Matches Java: dalvik.system.DexFile.DEX2OAT_NEEDED = 1
+    kDex2OatNeeded = 1,
 
-    // kUpToDate - An oat file is said to be up to date if it is not out of
+    // kPatchOatNeeded - In order to make the code for this dex location up to
+    // date, patchoat must be run on the odex file.
+    // Matches Java: dalvik.system.DexFile.PATCHOAT_NEEDED = 2
+    kPatchOatNeeded = 2,
+
+    // kSelfPatchOatNeeded - In order to make the code for this dex location
+    // up to date, patchoat must be run on the oat file.
+    // Matches Java: dalvik.system.DexFile.SELF_PATCHOAT_NEEDED = 3
+    kSelfPatchOatNeeded = 3,
+  };
+
+  enum OatStatus {
+    // kOatOutOfDate - An oat file is said to be out of date if the file does
+    // not exist, or is out of date with respect to the dex file or boot
+    // image.
+    kOatOutOfDate,
+
+    // kOatNeedsRelocation - An oat file is said to need relocation if the
+    // code is up to date, but not yet properly relocated for address space
+    // layout randomization (ASLR). In this case, the oat file is neither
+    // "out of date" nor "up to date".
+    kOatNeedsRelocation,
+
+    // kOatUpToDate - An oat file is said to be up to date if it is not out of
     // date and has been properly relocated for the purposes of ASLR.
-    kUpToDate,
+    kOatUpToDate,
   };
 
   // Constructs an OatFileAssistant object to assist the oat file
@@ -67,7 +90,6 @@
   // Typically the dex_location is the absolute path to the original,
   // un-optimized dex file.
   //
-  //
   // Note: Currently the dex_location must have an extension.
   // TODO: Relax this restriction?
   //
@@ -121,8 +143,9 @@
   // file.
   bool Lock(std::string* error_msg);
 
-  // Returns the overall compilation status for the given dex location.
-  Status GetStatus();
+  // Return what action needs to be taken to produce up-to-date code for this
+  // dex location.
+  DexOptNeeded GetDexOptNeeded();
 
   // Attempts to generate or relocate the oat file as needed to make it up to
   // date.
@@ -164,7 +187,7 @@
   //    determined.
   const std::string* OdexFileName();
   bool OdexFileExists();
-  Status OdexFileStatus();
+  OatStatus OdexFileStatus();
   bool OdexFileIsOutOfDate();
   bool OdexFileNeedsRelocation();
   bool OdexFileIsUpToDate();
@@ -176,20 +199,18 @@
   // the dex location.
   //
   // Notes:
-  //  * To get the overall status of the compiled code for this dex_location,
-  //    use the GetStatus() method, not the OatFileStatus() method.
   //  * OatFileName may return null if the oat file name could not be
   //    determined.
   const std::string* OatFileName();
   bool OatFileExists();
-  Status OatFileStatus();
+  OatStatus OatFileStatus();
   bool OatFileIsOutOfDate();
   bool OatFileNeedsRelocation();
   bool OatFileIsUpToDate();
 
   // These methods return the status for a given opened oat file with respect
   // to the dex location.
-  Status GivenOatFileStatus(const OatFile& file);
+  OatStatus GivenOatFileStatus(const OatFile& file);
   bool GivenOatFileIsOutOfDate(const OatFile& file);
   bool GivenOatFileNeedsRelocation(const OatFile& file);
   bool GivenOatFileIsUpToDate(const OatFile& file);
@@ -216,7 +237,7 @@
   // Copy the current profile to the old profile location.
   void CopyProfileFile();
 
-  // Generates the oat file by relocation from the odex file.
+  // Generates the oat file by relocation from the named input file.
   // This does not check the current status before attempting to relocate the
   // oat file.
   // Returns true on success.
@@ -224,7 +245,7 @@
   //
   // If there is a failure, the value of error_msg will be set to a string
   // describing why there was failure. error_msg must not be nullptr.
-  bool RelocateOatFile(std::string* error_msg);
+  bool RelocateOatFile(const std::string* input_file, std::string* error_msg);
 
   // Generate the oat file from the dex file.
   // This does not check the current status before attempting to generate the
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index a198824..37c6353 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -25,11 +25,13 @@
 #include <backtrace/BacktraceMap.h>
 #include <gtest/gtest.h>
 
+#include "art_field-inl.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
 #include "compiler_callbacks.h"
 #include "mem_map.h"
 #include "os.h"
+#include "scoped_thread_state_change.h"
 #include "thread-inl.h"
 #include "utils.h"
 
@@ -267,42 +269,42 @@
 }
 
 // Case: We have a DEX file, but no OAT file for it.
-// Expect: The oat file status is kOutOfDate.
+// Expect: The status is kDex2OatNeeded.
 TEST_F(OatFileAssistantTest, DexNoOat) {
   std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
   EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
   EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.OdexFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OdexFileStatus());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
   EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
   EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.OatFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatOutOfDate, oat_file_assistant.OatFileStatus());
 }
 
 // Case: We have no DEX file and no OAT file.
-// Expect: Status is out of date. Loading should fail, but not crash.
+// Expect: Status is kDex2OatNeeded. Loading should fail, but not crash.
 TEST_F(OatFileAssistantTest, NoDexNoOat) {
   std::string dex_location = GetScratchDir() + "/NoDexNoOat.jar";
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   EXPECT_EQ(nullptr, oat_file.get());
 }
 
 // Case: We have a DEX file and up-to-date OAT file for it.
-// Expect: The oat file status is kUpToDate.
+// Expect: The status is kNoDexOptNeeded.
 TEST_F(OatFileAssistantTest, OatUpToDate) {
   std::string dex_location = GetScratchDir() + "/OatUpToDate.jar";
   Copy(GetDexSrc1(), dex_location);
@@ -310,7 +312,7 @@
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -319,18 +321,20 @@
   EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
   EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
   EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
-  EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.OatFileStatus());
+  EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus());
 }
 
 // Case: We have a MultiDEX file and up-to-date OAT file for it.
-// Expect: The oat file status is kUpToDate.
+// Expect: The status is kNoDexOptNeeded and we load all dex files.
 TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) {
   std::string dex_location = GetScratchDir() + "/MultiDexOatUpToDate.jar";
   Copy(GetMultiDexSrc1(), dex_location);
   GenerateOatForTest(dex_location.c_str());
 
-  // Verify we can load both dex files.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+
+  // Verify we can load both dex files.
   std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
   ASSERT_TRUE(oat_file.get() != nullptr);
   EXPECT_TRUE(oat_file->IsExecutable());
@@ -341,7 +345,7 @@
 
 // Case: We have a MultiDEX file and up-to-date OAT file for it with relative
 // encoded dex locations.
-// Expect: The oat file status is kUpToDate.
+// Expect: The oat file status is kNoDexOptNeeded.
 TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) {
   std::string dex_location = GetScratchDir() + "/RelativeEncodedDexLocation.jar";
   std::string oat_location = GetOdexDir() + "/RelativeEncodedDexLocation.oat";
@@ -370,8 +374,8 @@
   EXPECT_EQ(2u, dex_files.size());
 }
 
-// Case: We have a DEX file and out of date OAT file.
-// Expect: The oat file status is kOutOfDate.
+// Case: We have a DEX file and out-of-date OAT file.
+// Expect: The status is kDex2OatNeeded.
 TEST_F(OatFileAssistantTest, OatOutOfDate) {
   std::string dex_location = GetScratchDir() + "/OatOutOfDate.jar";
 
@@ -382,7 +386,7 @@
   Copy(GetDexSrc2(), dex_location);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -394,7 +398,7 @@
 }
 
 // Case: We have a DEX file and an ODEX file, but no OAT file.
-// Expect: The oat file status is kNeedsRelocation.
+// Expect: The status is kPatchOatNeeded.
 TEST_F(OatFileAssistantTest, DexOdexNoOat) {
   std::string dex_location = GetScratchDir() + "/DexOdexNoOat.jar";
   std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
@@ -406,21 +410,20 @@
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
   EXPECT_FALSE(oat_file_assistant.OdexFileIsOutOfDate());
   EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
   EXPECT_TRUE(oat_file_assistant.OdexFileNeedsRelocation());
-  EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.OdexFileNeedsRelocation());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OatFileIsOutOfDate());
   EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
 }
 
 // Case: We have a stripped DEX file and an ODEX file, but no OAT file.
-// Expect: The oat file status is kNeedsRelocation.
+// Expect: The status is kPatchOatNeeded
 TEST_F(OatFileAssistantTest, StrippedDexOdexNoOat) {
   std::string dex_location = GetScratchDir() + "/StrippedDexOdexNoOat.jar";
   std::string odex_location = GetOdexDir() + "/StrippedDexOdexNoOat.odex";
@@ -435,7 +438,7 @@
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -449,7 +452,7 @@
   std::string error_msg;
   ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
 
-  EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -468,8 +471,8 @@
   EXPECT_EQ(1u, dex_files.size());
 }
 
-// Case: We have a stripped DEX file, an ODEX file, and an out of date OAT file.
-// Expect: The oat file status is kNeedsRelocation.
+// Case: We have a stripped DEX file, an ODEX file, and an out-of-date OAT file.
+// Expect: The status is kPatchOatNeeded.
 TEST_F(OatFileAssistantTest, StrippedDexOdexOat) {
   std::string dex_location = GetScratchDir() + "/StrippedDexOdexOat.jar";
   std::string odex_location = GetOdexDir() + "/StrippedDexOdexOat.odex";
@@ -488,7 +491,7 @@
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -503,7 +506,7 @@
   std::string error_msg;
   ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
 
-  EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -524,9 +527,59 @@
   EXPECT_EQ(1u, dex_files.size());
 }
 
+// Case: We have a DEX file, no ODEX file and an OAT file that needs
+// relocation.
+// Expect: The status is kSelfPatchOatNeeded.
+TEST_F(OatFileAssistantTest, SelfRelocation) {
+  std::string dex_location = GetScratchDir() + "/SelfRelocation.jar";
+  std::string oat_location = GetOdexDir() + "/SelfRelocation.oat";
+
+  // Create the dex and odex files
+  Copy(GetDexSrc1(), dex_location);
+  GenerateOdexForTest(dex_location, oat_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(),
+      oat_location.c_str(), kRuntimeISA, true);
+
+  EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
+  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
+  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
+  EXPECT_TRUE(oat_file_assistant.OatFileExists());
+  EXPECT_TRUE(oat_file_assistant.OatFileNeedsRelocation());
+  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileIsUpToDate());
+
+  // Make the oat file up to date.
+  std::string error_msg;
+  ASSERT_TRUE(oat_file_assistant.MakeUpToDate(&error_msg)) << error_msg;
+
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
+
+  EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
+  EXPECT_FALSE(oat_file_assistant.OdexFileExists());
+  EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OdexFileNeedsRelocation());
+  EXPECT_FALSE(oat_file_assistant.OdexFileIsUpToDate());
+  EXPECT_TRUE(oat_file_assistant.OatFileExists());
+  EXPECT_FALSE(oat_file_assistant.OatFileIsOutOfDate());
+  EXPECT_FALSE(oat_file_assistant.OatFileNeedsRelocation());
+  EXPECT_TRUE(oat_file_assistant.OatFileIsUpToDate());
+
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  ASSERT_TRUE(oat_file.get() != nullptr);
+  EXPECT_TRUE(oat_file->IsExecutable());
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str());
+  EXPECT_EQ(1u, dex_files.size());
+}
+
 // Case: We have a DEX file, an ODEX file and an OAT file, where the ODEX and
 // OAT files both have patch delta of 0.
-// Expect: It shouldn't crash.
+// Expect: It shouldn't crash, and status is kPatchOatNeeded.
 TEST_F(OatFileAssistantTest, OdexOatOverlap) {
   std::string dex_location = GetScratchDir() + "/OdexOatOverlap.jar";
   std::string odex_location = GetOdexDir() + "/OdexOatOverlap.odex";
@@ -544,7 +597,7 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(),
       oat_location.c_str(), kRuntimeISA, true);
 
-  EXPECT_EQ(OatFileAssistant::kNeedsRelocation, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -564,7 +617,7 @@
 }
 
 // Case: We have a DEX file and a PIC ODEX file, but no OAT file.
-// Expect: The oat file status is kUpToDate, because PIC needs no relocation.
+// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation.
 TEST_F(OatFileAssistantTest, DexPicOdexNoOat) {
   std::string dex_location = GetScratchDir() + "/DexPicOdexNoOat.jar";
   std::string odex_location = GetOdexDir() + "/DexPicOdexNoOat.odex";
@@ -576,7 +629,7 @@
   // Verify the status.
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kUpToDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_TRUE(oat_file_assistant.OdexFileExists());
@@ -661,7 +714,7 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -720,7 +773,7 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -737,7 +790,7 @@
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true);
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
   EXPECT_FALSE(oat_file_assistant.OatFileExists());
   EXPECT_TRUE(oat_file_assistant.OdexFileIsOutOfDate());
@@ -751,14 +804,14 @@
 }
 
 // Case: Non-standard extension for dex file.
-// Expect: The oat file status is kOutOfDate.
+// Expect: The status is kDex2OatNeeded.
 TEST_F(OatFileAssistantTest, LongDexExtension) {
   std::string dex_location = GetScratchDir() + "/LongDexExtension.jarx";
   Copy(GetDexSrc1(), dex_location);
 
   OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
 
-  EXPECT_EQ(OatFileAssistant::kOutOfDate, oat_file_assistant.GetStatus());
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, oat_file_assistant.GetDexOptNeeded());
 
   EXPECT_FALSE(oat_file_assistant.IsInBootClassPath());
   EXPECT_FALSE(oat_file_assistant.OdexFileExists());
@@ -895,6 +948,41 @@
         "/foo/bar/baz_noext", kArm, &odex_file, &error_msg));
 }
 
+// Verify the dexopt status values from dalvik.system.DexFile
+// match the OatFileAssistant::DexOptStatus values.
+TEST_F(OatFileAssistantTest, DexOptStatusValues) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  ClassLinker* linker = Runtime::Current()->GetClassLinker();
+  Handle<mirror::Class> dexfile(
+      hs.NewHandle(linker->FindSystemClass(soa.Self(), "Ldalvik/system/DexFile;")));
+  ASSERT_FALSE(dexfile.Get() == nullptr);
+  linker->EnsureInitialized(soa.Self(), dexfile, true, true);
+
+  ArtField* no_dexopt_needed = mirror::Class::FindStaticField(
+      soa.Self(), dexfile, "NO_DEXOPT_NEEDED", "I");
+  ASSERT_FALSE(no_dexopt_needed == nullptr);
+  EXPECT_EQ(no_dexopt_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, no_dexopt_needed->GetInt(dexfile.Get()));
+
+  ArtField* dex2oat_needed = mirror::Class::FindStaticField(
+      soa.Self(), dexfile, "DEX2OAT_NEEDED", "I");
+  ASSERT_FALSE(dex2oat_needed == nullptr);
+  EXPECT_EQ(dex2oat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  EXPECT_EQ(OatFileAssistant::kDex2OatNeeded, dex2oat_needed->GetInt(dexfile.Get()));
+
+  ArtField* patchoat_needed = mirror::Class::FindStaticField(
+      soa.Self(), dexfile, "PATCHOAT_NEEDED", "I");
+  ASSERT_FALSE(patchoat_needed == nullptr);
+  EXPECT_EQ(patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  EXPECT_EQ(OatFileAssistant::kPatchOatNeeded, patchoat_needed->GetInt(dexfile.Get()));
+
+  ArtField* self_patchoat_needed = mirror::Class::FindStaticField(
+      soa.Self(), dexfile, "SELF_PATCHOAT_NEEDED", "I");
+  ASSERT_FALSE(self_patchoat_needed == nullptr);
+  EXPECT_EQ(self_patchoat_needed->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
+  EXPECT_EQ(OatFileAssistant::kSelfPatchOatNeeded, self_patchoat_needed->GetInt(dexfile.Get()));
+}
 
 // TODO: More Tests:
 //  * Test class linker falls back to unquickened dex for DexNoOat
diff --git a/runtime/primitive.h b/runtime/primitive.h
index 32bfdaf..3818487 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -27,21 +27,11 @@
 static constexpr size_t kObjectReferenceSize = 4;
 
 
-template<size_t kComponentSize>
-size_t ComponentSizeShiftWidth() {
-  switch (kComponentSize) {
-    case 1:
-      return 0U;
-    case 2:
-      return 1U;
-    case 4:
-      return 2U;
-    case 8:
-      return 3U;
-    default:
-      LOG(FATAL) << "Unexpected component size : " << kComponentSize;
-      return 0U;
-  }
+constexpr size_t ComponentSizeShiftWidth(size_t component_size) {
+  return component_size == 1u ? 0u :
+      component_size == 2u ? 1u :
+          component_size == 4u ? 2u :
+              component_size == 8u ? 3u : 0u;
 }
 
 class Primitive {
@@ -95,7 +85,7 @@
       case kPrimFloat:   return 2;
       case kPrimLong:
       case kPrimDouble:  return 3;
-      case kPrimNot:     return ComponentSizeShiftWidth<kObjectReferenceSize>();
+      case kPrimNot:     return ComponentSizeShiftWidth(kObjectReferenceSize);
       default:
         LOG(FATAL) << "Invalid type " << static_cast<int>(type);
         return 0;
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index cb97049..a3156b4 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -17,8 +17,8 @@
 #include <jni.h>
 #include <vector>
 
+#include "art_field-inl.h"
 #include "common_compiler_test.h"
-#include "mirror/art_field-inl.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
@@ -165,14 +165,12 @@
   ASSERT_TRUE(proxyClass->IsProxyClass());
   ASSERT_TRUE(proxyClass->IsInitialized());
 
-  Handle<mirror::ObjectArray<mirror::ArtField>> instance_fields(
-      hs.NewHandle(proxyClass->GetIFields()));
-  EXPECT_TRUE(instance_fields.Get() == nullptr);
+  ArtField* instance_fields = proxyClass->GetIFields();
+  EXPECT_TRUE(instance_fields == nullptr);
 
-  Handle<mirror::ObjectArray<mirror::ArtField>> static_fields(
-      hs.NewHandle(proxyClass->GetSFields()));
-  ASSERT_TRUE(static_fields.Get() != nullptr);
-  ASSERT_EQ(2, static_fields->GetLength());
+  ArtField* static_fields = proxyClass->GetSFields();
+  ASSERT_TRUE(static_fields != nullptr);
+  ASSERT_EQ(2u, proxyClass->NumStaticFields());
 
   Handle<mirror::Class> interfacesFieldClass(
       hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Class;")));
@@ -182,21 +180,21 @@
   ASSERT_TRUE(throwsFieldClass.Get() != nullptr);
 
   // Test "Class[] interfaces" field.
-  MutableHandle<mirror::ArtField> fhandle = hs.NewHandle(static_fields->Get(0));
-  EXPECT_EQ("interfaces", std::string(fhandle->GetName()));
-  EXPECT_EQ("[Ljava/lang/Class;", std::string(fhandle->GetTypeDescriptor()));
-  EXPECT_EQ(interfacesFieldClass.Get(), fhandle->GetType<true>());
+  ArtField* field = &static_fields[0];
+  EXPECT_STREQ("interfaces", field->GetName());
+  EXPECT_STREQ("[Ljava/lang/Class;", field->GetTypeDescriptor());
+  EXPECT_EQ(interfacesFieldClass.Get(), field->GetType<true>());
   std::string temp;
-  EXPECT_EQ("L$Proxy1234;", std::string(fhandle->GetDeclaringClass()->GetDescriptor(&temp)));
-  EXPECT_FALSE(fhandle->IsPrimitiveType());
+  EXPECT_STREQ("L$Proxy1234;", field->GetDeclaringClass()->GetDescriptor(&temp));
+  EXPECT_FALSE(field->IsPrimitiveType());
 
   // Test "Class[][] throws" field.
-  fhandle.Assign(static_fields->Get(1));
-  EXPECT_EQ("throws", std::string(fhandle->GetName()));
-  EXPECT_EQ("[[Ljava/lang/Class;", std::string(fhandle->GetTypeDescriptor()));
-  EXPECT_EQ(throwsFieldClass.Get(), fhandle->GetType<true>());
-  EXPECT_EQ("L$Proxy1234;", std::string(fhandle->GetDeclaringClass()->GetDescriptor(&temp)));
-  EXPECT_FALSE(fhandle->IsPrimitiveType());
+  field = &static_fields[1];
+  EXPECT_STREQ("throws", field->GetName());
+  EXPECT_STREQ("[[Ljava/lang/Class;", field->GetTypeDescriptor());
+  EXPECT_EQ(throwsFieldClass.Get(), field->GetType<true>());
+  EXPECT_STREQ("L$Proxy1234;", field->GetDeclaringClass()->GetDescriptor(&temp));
+  EXPECT_FALSE(field->IsPrimitiveType());
 }
 
 }  // namespace art
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index 44e2844..efaa0ac 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -15,18 +15,14 @@
  */
 
 #include "inline_method_analyser.h"
+
+#include "art_field-inl.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
 #include "dex_instruction-inl.h"
-#include "mirror/art_field.h"
-#include "mirror/art_field-inl.h"
-#include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
-#include "mirror/class.h"
 #include "mirror/class-inl.h"
-#include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
-#include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
 
 /*
@@ -331,7 +327,7 @@
   mirror::DexCache* dex_cache = verifier->GetDexCache();
   uint32_t method_idx = verifier->GetMethodReference().dex_method_index;
   mirror::ArtMethod* method = dex_cache->GetResolvedMethod(method_idx);
-  mirror::ArtField* field = dex_cache->GetResolvedField(field_idx);
+  ArtField* field = Runtime::Current()->GetClassLinker()->GetResolvedField(field_idx, dex_cache);
   if (method == nullptr || field == nullptr || field->IsStatic()) {
     return false;
   }
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index 471b37c..52d83a2 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -30,7 +30,6 @@
 namespace art {
 
 namespace mirror {
-  class ArtField;
   class ArtMethod;
   class Object;
   template<typename MirrorType> class HeapReference;
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index ac36447..beba64f 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -238,7 +238,7 @@
 }
 
 void ReferenceTable::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
-  BufferedRootVisitor<128> buffered_visitor(visitor, root_info);
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(visitor, root_info);
   for (GcRoot<mirror::Object>& root : entries_) {
     buffered_visitor.VisitRoot(root);
   }
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 4e94de4..3e1315c 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -16,16 +16,15 @@
 
 #include "reflection-inl.h"
 
+#include "art_field-inl.h"
 #include "class_linker.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "jni_internal.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_array-inl.h"
-#include "mirror/object_array.h"
 #include "nth_caller_visitor.h"
 #include "scoped_thread_state_change.h"
 #include "stack.h"
@@ -238,13 +237,13 @@
 
 #define DO_FIRST_ARG(match_descriptor, get_fn, append) { \
           if (LIKELY(arg != nullptr && arg->GetClass<>()->DescriptorEquals(match_descriptor))) { \
-            mirror::ArtField* primitive_field = arg->GetClass()->GetIFields()->Get(0); \
+            ArtField* primitive_field = arg->GetClass()->GetInstanceField(0); \
             append(primitive_field-> get_fn(arg));
 
 #define DO_ARG(match_descriptor, get_fn, append) \
           } else if (LIKELY(arg != nullptr && \
                             arg->GetClass<>()->DescriptorEquals(match_descriptor))) { \
-            mirror::ArtField* primitive_field = arg->GetClass()->GetIFields()->Get(0); \
+            ArtField* primitive_field = arg->GetClass()->GetInstanceField(0); \
             append(primitive_field-> get_fn(arg));
 
 #define DO_FAIL(expected) \
@@ -692,7 +691,7 @@
   return result.GetL();
 }
 
-static std::string UnboxingFailureKind(mirror::ArtField* f)
+static std::string UnboxingFailureKind(ArtField* f)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (f != nullptr) {
     return "field " + PrettyField(f, false);
@@ -701,7 +700,7 @@
 }
 
 static bool UnboxPrimitive(mirror::Object* o,
-                           mirror::Class* dst_class, mirror::ArtField* f,
+                           mirror::Class* dst_class, ArtField* f,
                            JValue* unboxed_value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   bool unbox_for_result = (f == nullptr);
@@ -742,8 +741,8 @@
   JValue boxed_value;
   mirror::Class* klass = o->GetClass();
   mirror::Class* src_class = nullptr;
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::ArtField* primitive_field = o->GetClass()->GetIFields()->Get(0);
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  ArtField* primitive_field = &klass->GetIFields()[0];
   if (klass->DescriptorEquals("Ljava/lang/Boolean;")) {
     src_class = class_linker->FindPrimitiveClass('Z');
     boxed_value.SetZ(primitive_field->GetBoolean(o));
@@ -782,7 +781,7 @@
                                boxed_value, unboxed_value);
 }
 
-bool UnboxPrimitiveForField(mirror::Object* o, mirror::Class* dst_class, mirror::ArtField* f,
+bool UnboxPrimitiveForField(mirror::Object* o, mirror::Class* dst_class, ArtField* f,
                             JValue* unboxed_value) {
   DCHECK(f != nullptr);
   return UnboxPrimitive(o, dst_class, f, unboxed_value);
diff --git a/runtime/reflection.h b/runtime/reflection.h
index ff970e5..c2d406a 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -23,18 +23,18 @@
 
 namespace art {
 namespace mirror {
-  class ArtField;
   class ArtMethod;
   class Class;
   class Object;
 }  // namespace mirror
+class ArtField;
 union JValue;
 class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 
 mirror::Object* BoxPrimitive(Primitive::Type src_class, const JValue& value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-bool UnboxPrimitiveForField(mirror::Object* o, mirror::Class* dst_class, mirror::ArtField* f,
+bool UnboxPrimitiveForField(mirror::Object* o, mirror::Class* dst_class, ArtField* f,
                             JValue* unboxed_value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 bool UnboxPrimitiveForResult(mirror::Object* o, mirror::Class* dst_class, JValue* unboxed_value)
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 1cd0a96..66d38ce 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -48,6 +48,7 @@
 #include "arch/x86/registers_x86.h"
 #include "arch/x86_64/quick_method_frame_info_x86_64.h"
 #include "arch/x86_64/registers_x86_64.h"
+#include "art_field-inl.h"
 #include "asm_support.h"
 #include "atomic.h"
 #include "base/arena_allocator.h"
@@ -70,8 +71,8 @@
 #include "interpreter/interpreter.h"
 #include "jit/jit.h"
 #include "jni_internal.h"
+#include "linear_alloc.h"
 #include "mirror/array.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -257,7 +258,9 @@
     VLOG(jit) << "Deleting jit";
     jit_.reset(nullptr);
   }
+  linear_alloc_.reset();
   arena_pool_.reset();
+  low_4gb_arena_pool_.reset();
 
   // Shutdown the fault manager if it was initialized.
   fault_manager.Shutdown();
@@ -441,7 +444,7 @@
       hs.NewHandle(soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_Thread)));
   CHECK(cl->EnsureInitialized(soa.Self(), thread_class, true, true));
 
-  mirror::ArtField* contextClassLoader =
+  ArtField* contextClassLoader =
       thread_class->FindDeclaredInstanceField("contextClassLoader", "Ljava/lang/ClassLoader;");
   CHECK(contextClassLoader != NULL);
 
@@ -862,7 +865,14 @@
   // Use MemMap arena pool for jit, malloc otherwise. Malloc arenas are faster to allocate but
   // can't be trimmed as easily.
   const bool use_malloc = !use_jit;
-  arena_pool_.reset(new ArenaPool(use_malloc));
+  arena_pool_.reset(new ArenaPool(use_malloc, false));
+  if (IsCompiler() && Is64BitInstructionSet(kRuntimeISA)) {
+    // 4gb, no malloc. Explanation in header.
+    low_4gb_arena_pool_.reset(new ArenaPool(false, true));
+    linear_alloc_.reset(new LinearAlloc(low_4gb_arena_pool_.get()));
+  } else {
+    linear_alloc_.reset(new LinearAlloc(arena_pool_.get()));
+  }
 
   BlockSignals();
   InitPlatformSignalHandlers();
@@ -1294,7 +1304,6 @@
 void Runtime::VisitConstantRoots(RootVisitor* visitor) {
   // Visit the classes held as static in mirror classes, these can be visited concurrently and only
   // need to be visited once per GC since they never change.
-  mirror::ArtField::VisitRoots(visitor);
   mirror::ArtMethod::VisitRoots(visitor);
   mirror::Class::VisitRoots(visitor);
   mirror::Reference::VisitRoots(visitor);
diff --git a/runtime/runtime.h b/runtime/runtime.h
index baa4d18..d95640d 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -42,6 +42,7 @@
 
 class ArenaPool;
 class CompilerCallbacks;
+class LinearAlloc;
 
 namespace gc {
   class Heap;
@@ -549,6 +550,9 @@
   const ArenaPool* GetArenaPool() const {
     return arena_pool_.get();
   }
+  LinearAlloc* GetLinearAlloc() {
+    return linear_alloc_.get();
+  }
 
   jit::JitOptions* GetJITOptions() {
     return jit_options_.get();
@@ -618,6 +622,13 @@
   gc::Heap* heap_;
 
   std::unique_ptr<ArenaPool> arena_pool_;
+  // Special low 4gb pool for compiler linear alloc. We need ArtFields to be in low 4gb if we are
+  // compiling using a 32 bit image on a 64 bit compiler in case we resolve things in the image
+  // since the field arrays are int arrays in this case.
+  std::unique_ptr<ArenaPool> low_4gb_arena_pool_;
+
+  // Shared linear alloc for now.
+  std::unique_ptr<LinearAlloc> linear_alloc_;
 
   // The number of spins that are done before thread suspension is used to forcibly inflate.
   size_t max_spins_before_thin_lock_inflation_;
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index adf3480..11b7df6 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -20,7 +20,7 @@
 #include "base/casts.h"
 #include "java_vm_ext.h"
 #include "jni_env_ext-inl.h"
-#include "mirror/art_field.h"
+#include "art_field.h"
 #include "read_barrier.h"
 #include "thread-inl.h"
 #include "verify_object.h"
@@ -148,20 +148,16 @@
     return down_cast<T>(Self()->DecodeJObject(obj));
   }
 
-  mirror::ArtField* DecodeField(jfieldID fid) const
+  ArtField* DecodeField(jfieldID fid) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-    CHECK(!kMovingFields);
-    mirror::ArtField* field = reinterpret_cast<mirror::ArtField*>(fid);
-    return ReadBarrier::BarrierForRoot<mirror::ArtField, kWithReadBarrier>(&field);
+    return reinterpret_cast<ArtField*>(fid);
   }
 
-  jfieldID EncodeField(mirror::ArtField* field) const
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  jfieldID EncodeField(ArtField* field) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     Locks::mutator_lock_->AssertSharedHeld(Self());
     DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-    CHECK(!kMovingFields);
     return reinterpret_cast<jfieldID>(field);
   }
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 79d2b13..58b272b 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -32,11 +32,11 @@
 #include <sstream>
 
 #include "arch/context.h"
+#include "art_field-inl.h"
 #include "base/mutex.h"
 #include "base/timing_logger.h"
 #include "base/to_str.h"
 #include "class_linker-inl.h"
-#include "class_linker.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils.h"
@@ -47,10 +47,8 @@
 #include "gc/heap.h"
 #include "gc/space/space.h"
 #include "handle_scope-inl.h"
-#include "handle_scope.h"
 #include "indirect_reference_table-inl.h"
 #include "jni_internal.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/class-inl.h"
@@ -173,7 +171,7 @@
     self->tlsPtr_.jpeer = nullptr;
     self->SetThreadName(self->GetThreadName(soa)->ToModifiedUtf8().c_str());
 
-    mirror::ArtField* priorityField = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority);
+    ArtField* priorityField = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority);
     self->SetNativePriority(priorityField->GetInt(self->tlsPtr_.opeer));
     Dbg::PostThreadStart(self);
 
@@ -190,7 +188,7 @@
 
 Thread* Thread::FromManagedThread(const ScopedObjectAccessAlreadyRunnable& soa,
                                   mirror::Object* thread_peer) {
-  mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer);
+  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer);
   Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetLong(thread_peer)));
   // Sanity check that if we have a result it is either suspended or we hold the thread_list_lock_
   // to stop it from going away.
@@ -377,7 +375,11 @@
 
   tls32_.thin_lock_thread_id = thread_list->AllocThreadId(this);
 
-  tlsPtr_.jni_env = new JNIEnvExt(this, java_vm);
+  tlsPtr_.jni_env = JNIEnvExt::Create(this, java_vm);
+  if (tlsPtr_.jni_env == nullptr) {
+    return false;
+  }
+
   thread_list->Register(this);
   return true;
 }
@@ -585,7 +587,7 @@
 }
 
 mirror::String* Thread::GetThreadName(const ScopedObjectAccessAlreadyRunnable& soa) const {
-  mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
+  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
   return (tlsPtr_.opeer != nullptr) ? reinterpret_cast<mirror::String*>(f->GetObject(tlsPtr_.opeer)) : nullptr;
 }
 
@@ -790,7 +792,7 @@
         soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->GetObject(thread->tlsPtr_.opeer);
 
     if (thread_group != nullptr) {
-      mirror::ArtField* group_name_field =
+      ArtField* group_name_field =
           soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
       mirror::String* group_name_string =
           reinterpret_cast<mirror::String*>(group_name_field->GetObject(thread_group));
@@ -1385,7 +1387,8 @@
 }
 
 void Thread::HandleScopeVisitRoots(RootVisitor* visitor, uint32_t thread_id) {
-  BufferedRootVisitor<128> buffered_visitor(visitor, RootInfo(kRootNativeStack, thread_id));
+  BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(
+      visitor, RootInfo(kRootNativeStack, thread_id));
   for (HandleScope* cur = tlsPtr_.top_handle_scope; cur; cur = cur->GetLink()) {
     for (size_t j = 0, count = cur->NumberOfReferences(); j < count; ++j) {
       buffered_visitor.VisitRootIfNonNull(cur->GetHandle(j).GetReference());
@@ -2303,8 +2306,8 @@
       mapper.VisitShadowFrame(shadow_frame);
     }
   }
-  if (tlsPtr_.method_verifier != nullptr) {
-    tlsPtr_.method_verifier->VisitRoots(visitor, RootInfo(kRootNativeStack, thread_id));
+  for (auto* verifier = tlsPtr_.method_verifier; verifier != nullptr; verifier = verifier->link_) {
+    verifier->VisitRoots(visitor, RootInfo(kRootNativeStack, thread_id));
   }
   // Visit roots on this thread's stack
   Context* context = GetLongJumpContext();
@@ -2428,14 +2431,14 @@
   tlsPtr_.debug_invoke_req = nullptr;
 }
 
-void Thread::SetVerifier(verifier::MethodVerifier* verifier) {
-  CHECK(tlsPtr_.method_verifier == nullptr);
+void Thread::PushVerifier(verifier::MethodVerifier* verifier) {
+  verifier->link_ = tlsPtr_.method_verifier;
   tlsPtr_.method_verifier = verifier;
 }
 
-void Thread::ClearVerifier(verifier::MethodVerifier* verifier) {
+void Thread::PopVerifier(verifier::MethodVerifier* verifier) {
   CHECK_EQ(tlsPtr_.method_verifier, verifier);
-  tlsPtr_.method_verifier = nullptr;
+  tlsPtr_.method_verifier = verifier->link_;
 }
 
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index f89e46b..b095e22 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -895,8 +895,8 @@
     return tls32_.suspended_at_suspend_check;
   }
 
-  void SetVerifier(verifier::MethodVerifier* verifier);
-  void ClearVerifier(verifier::MethodVerifier* verifier);
+  void PushVerifier(verifier::MethodVerifier* verifier);
+  void PopVerifier(verifier::MethodVerifier* verifier);
 
  private:
   explicit Thread(bool daemon);
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 5375dc0..7326865 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -582,7 +582,7 @@
 }
 
 void Trace::FieldRead(Thread* thread, mirror::Object* this_object,
-                       mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field)
+                       mirror::ArtMethod* method, uint32_t dex_pc, ArtField* field)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   UNUSED(thread, this_object, method, dex_pc, field);
   // We're not recorded to listen to this kind of event, so complain.
@@ -590,7 +590,7 @@
 }
 
 void Trace::FieldWritten(Thread* thread, mirror::Object* this_object,
-                          mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field,
+                          mirror::ArtMethod* method, uint32_t dex_pc, ArtField* field,
                           const JValue& field_value)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   UNUSED(thread, this_object, method, dex_pc, field, field_value);
diff --git a/runtime/trace.h b/runtime/trace.h
index 80f926f..d8bd428 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -33,10 +33,10 @@
 namespace art {
 
 namespace mirror {
-  class ArtField;
   class ArtMethod;
 }  // namespace mirror
 
+class ArtField;
 class Thread;
 
 enum TracingMode {
@@ -99,10 +99,10 @@
                   mirror::ArtMethod* method, uint32_t new_dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
   void FieldRead(Thread* thread, mirror::Object* this_object,
-                 mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field)
+                 mirror::ArtMethod* method, uint32_t dex_pc, ArtField* field)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
   void FieldWritten(Thread* thread, mirror::Object* this_object,
-                    mirror::ArtMethod* method, uint32_t dex_pc, mirror::ArtField* field,
+                    mirror::ArtMethod* method, uint32_t dex_pc, ArtField* field,
                     const JValue& field_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE;
   void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index 24ecf6b..9792eca 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -16,9 +16,9 @@
 
 #include "transaction.h"
 
+#include "art_field-inl.h"
 #include "common_runtime_test.h"
 #include "mirror/array-inl.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "scoped_thread_state_change.h"
 
@@ -181,47 +181,47 @@
   ASSERT_FALSE(soa.Self()->IsExceptionPending());
 
   // Lookup fields.
-  mirror::ArtField* booleanField = h_klass->FindDeclaredStaticField("booleanField", "Z");
+  ArtField* booleanField = h_klass->FindDeclaredStaticField("booleanField", "Z");
   ASSERT_TRUE(booleanField != nullptr);
   ASSERT_EQ(booleanField->GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
   ASSERT_EQ(booleanField->GetBoolean(h_klass.Get()), false);
 
-  mirror::ArtField* byteField = h_klass->FindDeclaredStaticField("byteField", "B");
+  ArtField* byteField = h_klass->FindDeclaredStaticField("byteField", "B");
   ASSERT_TRUE(byteField != nullptr);
   ASSERT_EQ(byteField->GetTypeAsPrimitiveType(), Primitive::kPrimByte);
   ASSERT_EQ(byteField->GetByte(h_klass.Get()), 0);
 
-  mirror::ArtField* charField = h_klass->FindDeclaredStaticField("charField", "C");
+  ArtField* charField = h_klass->FindDeclaredStaticField("charField", "C");
   ASSERT_TRUE(charField != nullptr);
   ASSERT_EQ(charField->GetTypeAsPrimitiveType(), Primitive::kPrimChar);
   ASSERT_EQ(charField->GetChar(h_klass.Get()), 0u);
 
-  mirror::ArtField* shortField = h_klass->FindDeclaredStaticField("shortField", "S");
+  ArtField* shortField = h_klass->FindDeclaredStaticField("shortField", "S");
   ASSERT_TRUE(shortField != nullptr);
   ASSERT_EQ(shortField->GetTypeAsPrimitiveType(), Primitive::kPrimShort);
   ASSERT_EQ(shortField->GetShort(h_klass.Get()), 0);
 
-  mirror::ArtField* intField = h_klass->FindDeclaredStaticField("intField", "I");
+  ArtField* intField = h_klass->FindDeclaredStaticField("intField", "I");
   ASSERT_TRUE(intField != nullptr);
   ASSERT_EQ(intField->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
   ASSERT_EQ(intField->GetInt(h_klass.Get()), 0);
 
-  mirror::ArtField* longField = h_klass->FindDeclaredStaticField("longField", "J");
+  ArtField* longField = h_klass->FindDeclaredStaticField("longField", "J");
   ASSERT_TRUE(longField != nullptr);
   ASSERT_EQ(longField->GetTypeAsPrimitiveType(), Primitive::kPrimLong);
   ASSERT_EQ(longField->GetLong(h_klass.Get()), static_cast<int64_t>(0));
 
-  mirror::ArtField* floatField = h_klass->FindDeclaredStaticField("floatField", "F");
+  ArtField* floatField = h_klass->FindDeclaredStaticField("floatField", "F");
   ASSERT_TRUE(floatField != nullptr);
   ASSERT_EQ(floatField->GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
   ASSERT_FLOAT_EQ(floatField->GetFloat(h_klass.Get()), static_cast<float>(0.0f));
 
-  mirror::ArtField* doubleField = h_klass->FindDeclaredStaticField("doubleField", "D");
+  ArtField* doubleField = h_klass->FindDeclaredStaticField("doubleField", "D");
   ASSERT_TRUE(doubleField != nullptr);
   ASSERT_EQ(doubleField->GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
   ASSERT_DOUBLE_EQ(doubleField->GetDouble(h_klass.Get()), static_cast<double>(0.0));
 
-  mirror::ArtField* objectField = h_klass->FindDeclaredStaticField("objectField",
+  ArtField* objectField = h_klass->FindDeclaredStaticField("objectField",
                                                                    "Ljava/lang/Object;");
   ASSERT_TRUE(objectField != nullptr);
   ASSERT_EQ(objectField->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
@@ -283,47 +283,47 @@
   ASSERT_TRUE(h_instance.Get() != nullptr);
 
   // Lookup fields.
-  mirror::ArtField* booleanField = h_klass->FindDeclaredInstanceField("booleanField", "Z");
+  ArtField* booleanField = h_klass->FindDeclaredInstanceField("booleanField", "Z");
   ASSERT_TRUE(booleanField != nullptr);
   ASSERT_EQ(booleanField->GetTypeAsPrimitiveType(), Primitive::kPrimBoolean);
   ASSERT_EQ(booleanField->GetBoolean(h_instance.Get()), false);
 
-  mirror::ArtField* byteField = h_klass->FindDeclaredInstanceField("byteField", "B");
+  ArtField* byteField = h_klass->FindDeclaredInstanceField("byteField", "B");
   ASSERT_TRUE(byteField != nullptr);
   ASSERT_EQ(byteField->GetTypeAsPrimitiveType(), Primitive::kPrimByte);
   ASSERT_EQ(byteField->GetByte(h_instance.Get()), 0);
 
-  mirror::ArtField* charField = h_klass->FindDeclaredInstanceField("charField", "C");
+  ArtField* charField = h_klass->FindDeclaredInstanceField("charField", "C");
   ASSERT_TRUE(charField != nullptr);
   ASSERT_EQ(charField->GetTypeAsPrimitiveType(), Primitive::kPrimChar);
   ASSERT_EQ(charField->GetChar(h_instance.Get()), 0u);
 
-  mirror::ArtField* shortField = h_klass->FindDeclaredInstanceField("shortField", "S");
+  ArtField* shortField = h_klass->FindDeclaredInstanceField("shortField", "S");
   ASSERT_TRUE(shortField != nullptr);
   ASSERT_EQ(shortField->GetTypeAsPrimitiveType(), Primitive::kPrimShort);
   ASSERT_EQ(shortField->GetShort(h_instance.Get()), 0);
 
-  mirror::ArtField* intField = h_klass->FindDeclaredInstanceField("intField", "I");
+  ArtField* intField = h_klass->FindDeclaredInstanceField("intField", "I");
   ASSERT_TRUE(intField != nullptr);
   ASSERT_EQ(intField->GetTypeAsPrimitiveType(), Primitive::kPrimInt);
   ASSERT_EQ(intField->GetInt(h_instance.Get()), 0);
 
-  mirror::ArtField* longField = h_klass->FindDeclaredInstanceField("longField", "J");
+  ArtField* longField = h_klass->FindDeclaredInstanceField("longField", "J");
   ASSERT_TRUE(longField != nullptr);
   ASSERT_EQ(longField->GetTypeAsPrimitiveType(), Primitive::kPrimLong);
   ASSERT_EQ(longField->GetLong(h_instance.Get()), static_cast<int64_t>(0));
 
-  mirror::ArtField* floatField = h_klass->FindDeclaredInstanceField("floatField", "F");
+  ArtField* floatField = h_klass->FindDeclaredInstanceField("floatField", "F");
   ASSERT_TRUE(floatField != nullptr);
   ASSERT_EQ(floatField->GetTypeAsPrimitiveType(), Primitive::kPrimFloat);
   ASSERT_FLOAT_EQ(floatField->GetFloat(h_instance.Get()), static_cast<float>(0.0f));
 
-  mirror::ArtField* doubleField = h_klass->FindDeclaredInstanceField("doubleField", "D");
+  ArtField* doubleField = h_klass->FindDeclaredInstanceField("doubleField", "D");
   ASSERT_TRUE(doubleField != nullptr);
   ASSERT_EQ(doubleField->GetTypeAsPrimitiveType(), Primitive::kPrimDouble);
   ASSERT_DOUBLE_EQ(doubleField->GetDouble(h_instance.Get()), static_cast<double>(0.0));
 
-  mirror::ArtField* objectField = h_klass->FindDeclaredInstanceField("objectField",
+  ArtField* objectField = h_klass->FindDeclaredInstanceField("objectField",
                                                                         "Ljava/lang/Object;");
   ASSERT_TRUE(objectField != nullptr);
   ASSERT_EQ(objectField->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
@@ -381,63 +381,63 @@
   ASSERT_FALSE(soa.Self()->IsExceptionPending());
 
   // Lookup fields.
-  mirror::ArtField* booleanArrayField = h_klass->FindDeclaredStaticField("booleanArrayField", "[Z");
+  ArtField* booleanArrayField = h_klass->FindDeclaredStaticField("booleanArrayField", "[Z");
   ASSERT_TRUE(booleanArrayField != nullptr);
   mirror::BooleanArray* booleanArray = booleanArrayField->GetObject(h_klass.Get())->AsBooleanArray();
   ASSERT_TRUE(booleanArray != nullptr);
   ASSERT_EQ(booleanArray->GetLength(), 1);
   ASSERT_EQ(booleanArray->GetWithoutChecks(0), false);
 
-  mirror::ArtField* byteArrayField = h_klass->FindDeclaredStaticField("byteArrayField", "[B");
+  ArtField* byteArrayField = h_klass->FindDeclaredStaticField("byteArrayField", "[B");
   ASSERT_TRUE(byteArrayField != nullptr);
   mirror::ByteArray* byteArray = byteArrayField->GetObject(h_klass.Get())->AsByteArray();
   ASSERT_TRUE(byteArray != nullptr);
   ASSERT_EQ(byteArray->GetLength(), 1);
   ASSERT_EQ(byteArray->GetWithoutChecks(0), 0);
 
-  mirror::ArtField* charArrayField = h_klass->FindDeclaredStaticField("charArrayField", "[C");
+  ArtField* charArrayField = h_klass->FindDeclaredStaticField("charArrayField", "[C");
   ASSERT_TRUE(charArrayField != nullptr);
   mirror::CharArray* charArray = charArrayField->GetObject(h_klass.Get())->AsCharArray();
   ASSERT_TRUE(charArray != nullptr);
   ASSERT_EQ(charArray->GetLength(), 1);
   ASSERT_EQ(charArray->GetWithoutChecks(0), 0u);
 
-  mirror::ArtField* shortArrayField = h_klass->FindDeclaredStaticField("shortArrayField", "[S");
+  ArtField* shortArrayField = h_klass->FindDeclaredStaticField("shortArrayField", "[S");
   ASSERT_TRUE(shortArrayField != nullptr);
   mirror::ShortArray* shortArray = shortArrayField->GetObject(h_klass.Get())->AsShortArray();
   ASSERT_TRUE(shortArray != nullptr);
   ASSERT_EQ(shortArray->GetLength(), 1);
   ASSERT_EQ(shortArray->GetWithoutChecks(0), 0);
 
-  mirror::ArtField* intArrayField = h_klass->FindDeclaredStaticField("intArrayField", "[I");
+  ArtField* intArrayField = h_klass->FindDeclaredStaticField("intArrayField", "[I");
   ASSERT_TRUE(intArrayField != nullptr);
   mirror::IntArray* intArray = intArrayField->GetObject(h_klass.Get())->AsIntArray();
   ASSERT_TRUE(intArray != nullptr);
   ASSERT_EQ(intArray->GetLength(), 1);
   ASSERT_EQ(intArray->GetWithoutChecks(0), 0);
 
-  mirror::ArtField* longArrayField = h_klass->FindDeclaredStaticField("longArrayField", "[J");
+  ArtField* longArrayField = h_klass->FindDeclaredStaticField("longArrayField", "[J");
   ASSERT_TRUE(longArrayField != nullptr);
   mirror::LongArray* longArray = longArrayField->GetObject(h_klass.Get())->AsLongArray();
   ASSERT_TRUE(longArray != nullptr);
   ASSERT_EQ(longArray->GetLength(), 1);
   ASSERT_EQ(longArray->GetWithoutChecks(0), static_cast<int64_t>(0));
 
-  mirror::ArtField* floatArrayField = h_klass->FindDeclaredStaticField("floatArrayField", "[F");
+  ArtField* floatArrayField = h_klass->FindDeclaredStaticField("floatArrayField", "[F");
   ASSERT_TRUE(floatArrayField != nullptr);
   mirror::FloatArray* floatArray = floatArrayField->GetObject(h_klass.Get())->AsFloatArray();
   ASSERT_TRUE(floatArray != nullptr);
   ASSERT_EQ(floatArray->GetLength(), 1);
   ASSERT_FLOAT_EQ(floatArray->GetWithoutChecks(0), static_cast<float>(0.0f));
 
-  mirror::ArtField* doubleArrayField = h_klass->FindDeclaredStaticField("doubleArrayField", "[D");
+  ArtField* doubleArrayField = h_klass->FindDeclaredStaticField("doubleArrayField", "[D");
   ASSERT_TRUE(doubleArrayField != nullptr);
   mirror::DoubleArray* doubleArray = doubleArrayField->GetObject(h_klass.Get())->AsDoubleArray();
   ASSERT_TRUE(doubleArray != nullptr);
   ASSERT_EQ(doubleArray->GetLength(), 1);
   ASSERT_DOUBLE_EQ(doubleArray->GetWithoutChecks(0), static_cast<double>(0.0f));
 
-  mirror::ArtField* objectArrayField = h_klass->FindDeclaredStaticField("objectArrayField",
+  ArtField* objectArrayField = h_klass->FindDeclaredStaticField("objectArrayField",
                                                                            "[Ljava/lang/Object;");
   ASSERT_TRUE(objectArrayField != nullptr);
   mirror::ObjectArray<mirror::Object>* objectArray =
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 8a23ff7..f13da8b 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -25,10 +25,10 @@
 #include <unistd.h>
 #include <memory>
 
+#include "art_field-inl.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -364,7 +364,7 @@
   return result;
 }
 
-std::string PrettyField(mirror::ArtField* f, bool with_type) {
+std::string PrettyField(ArtField* f, bool with_type) {
   if (f == NULL) {
     return "null";
   }
diff --git a/runtime/utils.h b/runtime/utils.h
index e6a6b1d..6dee5fe 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -33,10 +33,10 @@
 
 namespace art {
 
+class ArtField;
 class DexFile;
 
 namespace mirror {
-class ArtField;
 class ArtMethod;
 class Class;
 class Object;
@@ -343,7 +343,7 @@
 
 // Returns a human-readable signature for 'f'. Something like "a.b.C.f" or
 // "int a.b.C.f" (depending on the value of 'with_type').
-std::string PrettyField(mirror::ArtField* f, bool with_type = true)
+std::string PrettyField(ArtField* f, bool with_type = true)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 std::string PrettyField(uint32_t field_idx, const DexFile& dex_file, bool with_type = true);
 
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index 833427e..623a3ec 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -146,7 +146,7 @@
   mirror::Class* java_lang_String = class_linker_->FindSystemClass(soa.Self(),
                                                                    "Ljava/lang/String;");
 
-  mirror::ArtField* f;
+  ArtField* f;
   f = java_lang_String->FindDeclaredInstanceField("count", "I");
   EXPECT_EQ("int java.lang.String.count", PrettyField(f));
   EXPECT_EQ("java.lang.String.count", PrettyField(f, false));
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index d0f8468..065df05 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -18,6 +18,7 @@
 
 #include <iostream>
 
+#include "art_field-inl.h"
 #include "base/logging.h"
 #include "base/mutex-inl.h"
 #include "class_linker.h"
@@ -30,7 +31,6 @@
 #include "indenter.h"
 #include "intern_table.h"
 #include "leb128.h"
-#include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
@@ -395,12 +395,12 @@
       has_virtual_or_interface_invokes_(false),
       verify_to_dump_(verify_to_dump),
       allow_thread_suspension_(allow_thread_suspension) {
-  self->SetVerifier(this);
+  self->PushVerifier(this);
   DCHECK(class_def != nullptr);
 }
 
 MethodVerifier::~MethodVerifier() {
-  Thread::Current()->ClearVerifier(this);
+  Thread::Current()->PopVerifier(this);
   STLDeleteElements(&failure_messages_);
 }
 
@@ -451,7 +451,7 @@
   Verify();
 }
 
-mirror::ArtField* MethodVerifier::FindAccessedFieldAtDexPc(mirror::ArtMethod* m,
+ArtField* MethodVerifier::FindAccessedFieldAtDexPc(mirror::ArtMethod* m,
                                                            uint32_t dex_pc) {
   Thread* self = Thread::Current();
   StackHandleScope<3> hs(self);
@@ -464,7 +464,7 @@
   return verifier.FindAccessedFieldAtDexPc(dex_pc);
 }
 
-mirror::ArtField* MethodVerifier::FindAccessedFieldAtDexPc(uint32_t dex_pc) {
+ArtField* MethodVerifier::FindAccessedFieldAtDexPc(uint32_t dex_pc) {
   CHECK(code_item_ != nullptr);  // This only makes sense for methods with code.
 
   // Strictly speaking, we ought to be able to get away with doing a subset of the full method
@@ -3788,7 +3788,7 @@
   }
 }
 
-mirror::ArtField* MethodVerifier::GetStaticField(int field_idx) {
+ArtField* MethodVerifier::GetStaticField(int field_idx) {
   const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
   // Check access to class
   const RegType& klass_type = ResolveClassAndCheckAccess(field_id.class_idx_);
@@ -3802,8 +3802,8 @@
     return nullptr;  // Can't resolve Class so no more to do here, will do checking at runtime.
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::ArtField* field = class_linker->ResolveFieldJLS(*dex_file_, field_idx, dex_cache_,
-                                                          class_loader_);
+  ArtField* field = class_linker->ResolveFieldJLS(*dex_file_, field_idx, dex_cache_,
+                                                  class_loader_);
   if (field == nullptr) {
     VLOG(verifier) << "Unable to resolve static field " << field_idx << " ("
               << dex_file_->GetFieldName(field_id) << ") in "
@@ -3823,7 +3823,7 @@
   return field;
 }
 
-mirror::ArtField* MethodVerifier::GetInstanceField(const RegType& obj_type, int field_idx) {
+ArtField* MethodVerifier::GetInstanceField(const RegType& obj_type, int field_idx) {
   const DexFile::FieldId& field_id = dex_file_->GetFieldId(field_idx);
   // Check access to class
   const RegType& klass_type = ResolveClassAndCheckAccess(field_id.class_idx_);
@@ -3837,8 +3837,8 @@
     return nullptr;  // Can't resolve Class so no more to do here
   }
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::ArtField* field = class_linker->ResolveFieldJLS(*dex_file_, field_idx, dex_cache_,
-                                                          class_loader_);
+  ArtField* field = class_linker->ResolveFieldJLS(*dex_file_, field_idx, dex_cache_,
+                                                  class_loader_);
   if (field == nullptr) {
     VLOG(verifier) << "Unable to resolve instance field " << field_idx << " ("
               << dex_file_->GetFieldName(field_id) << ") in "
@@ -3894,7 +3894,7 @@
 void MethodVerifier::VerifyISFieldAccess(const Instruction* inst, const RegType& insn_type,
                                          bool is_primitive, bool is_static) {
   uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
-  mirror::ArtField* field;
+  ArtField* field;
   if (is_static) {
     field = GetStaticField(field_idx);
   } else {
@@ -3914,12 +3914,8 @@
       }
     }
 
-    mirror::Class* field_type_class;
-    {
-      StackHandleScope<1> hs(self_);
-      HandleWrapper<mirror::ArtField> h_field(hs.NewHandleWrapper(&field));
-      field_type_class = can_load_classes_ ? h_field->GetType<true>() : h_field->GetType<false>();
-    }
+    mirror::Class* field_type_class =
+        can_load_classes_ ? field->GetType<true>() : field->GetType<false>();
     if (field_type_class != nullptr) {
       field_type = &reg_types_.FromClass(field->GetTypeDescriptor(), field_type_class,
                                          field_type_class->CannotBeAssignedFromOtherTypes());
@@ -3988,7 +3984,7 @@
   }
 }
 
-mirror::ArtField* MethodVerifier::GetQuickFieldAccess(const Instruction* inst,
+ArtField* MethodVerifier::GetQuickFieldAccess(const Instruction* inst,
                                                       RegisterLine* reg_line) {
   DCHECK(IsInstructionIGetQuickOrIPutQuick(inst->Opcode())) << inst->Opcode();
   const RegType& object_type = reg_line->GetRegisterType(this, inst->VRegB_22c());
@@ -3997,8 +3993,7 @@
     return nullptr;
   }
   uint32_t field_offset = static_cast<uint32_t>(inst->VRegC_22c());
-  mirror::ArtField* const f = mirror::ArtField::FindInstanceFieldWithOffset(object_type.GetClass(),
-                                                                            field_offset);
+  ArtField* const f = ArtField::FindInstanceFieldWithOffset(object_type.GetClass(), field_offset);
   DCHECK_EQ(f->GetOffset().Uint32Value(), field_offset);
   if (f == nullptr) {
     VLOG(verifier) << "Failed to find instance field at offset '" << field_offset
@@ -4012,7 +4007,7 @@
                                             bool is_primitive) {
   DCHECK(Runtime::Current()->IsStarted() || verify_to_dump_);
 
-  mirror::ArtField* field = GetQuickFieldAccess(inst, work_line_.get());
+  ArtField* field = GetQuickFieldAccess(inst, work_line_.get());
   if (field == nullptr) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Cannot infer field from " << inst->Name();
     return;
@@ -4030,12 +4025,8 @@
   // Get the field type.
   const RegType* field_type;
   {
-    mirror::Class* field_type_class;
-    {
-      StackHandleScope<1> hs(Thread::Current());
-      HandleWrapper<mirror::ArtField> h_field(hs.NewHandleWrapper(&field));
-      field_type_class = can_load_classes_ ? h_field->GetType<true>() : h_field->GetType<false>();
-    }
+    mirror::Class* field_type_class = can_load_classes_ ? field->GetType<true>() :
+        field->GetType<false>();
 
     if (field_type_class != nullptr) {
       field_type = &reg_types_.FromClass(field->GetTypeDescriptor(), field_type_class,
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index c813634..cd414c2 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -31,6 +31,7 @@
 
 class Instruction;
 struct ReferenceMap2Visitor;
+class Thread;
 
 namespace verifier {
 
@@ -190,7 +191,7 @@
 
   // Returns the accessed field corresponding to the quick instruction's field
   // offset at 'dex_pc' in method 'm'.
-  static mirror::ArtField* FindAccessedFieldAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc)
+  static ArtField* FindAccessedFieldAtDexPc(mirror::ArtMethod* m, uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Returns the invoked method corresponding to the quick instruction's vtable
@@ -249,7 +250,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   // Returns the access field of a quick field access (iget/iput-quick) or nullptr
   // if it cannot be found.
-  mirror::ArtField* GetQuickFieldAccess(const Instruction* inst, RegisterLine* reg_line)
+  ArtField* GetQuickFieldAccess(const Instruction* inst, RegisterLine* reg_line)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Is the method being verified a constructor?
@@ -300,7 +301,7 @@
 
   void FindLocksAtDexPc() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  mirror::ArtField* FindAccessedFieldAtDexPc(uint32_t dex_pc)
+  ArtField* FindAccessedFieldAtDexPc(uint32_t dex_pc)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::ArtMethod* FindInvokedMethodAtDexPc(uint32_t dex_pc)
@@ -524,11 +525,11 @@
                   bool is_primitive) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Lookup instance field and fail for resolution violations
-  mirror::ArtField* GetInstanceField(const RegType& obj_type, int field_idx)
+  ArtField* GetInstanceField(const RegType& obj_type, int field_idx)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Lookup static field and fail for resolution violations
-  mirror::ArtField* GetStaticField(int field_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  ArtField* GetStaticField(int field_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Perform verification of an iget/sget/iput/sput instruction.
   enum class FieldAccessType {  // private
@@ -738,6 +739,10 @@
   // FindLocksAtDexPC, resulting in deadlocks.
   const bool allow_thread_suspension_;
 
+  // Link, for the method verifier root linked list.
+  MethodVerifier* link_;
+
+  friend class art::Thread;
   DISALLOW_COPY_AND_ASSIGN(MethodVerifier);
 };
 std::ostream& operator<<(std::ostream& os, const MethodVerifier::FailureKind& rhs);
diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java
index 3d0f074..708f61f 100644
--- a/test/004-UnsafeTest/src/Main.java
+++ b/test/004-UnsafeTest/src/Main.java
@@ -104,6 +104,16 @@
     if (!unsafe.compareAndSwapInt(t, intOffset, 0, 1)) {
         System.out.println("Unexpectedly not succeeding compareAndSwap...");
     }
+
+    if (unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
+        System.out.println("Unexpectedly succeeding compareAndSwapLong...");
+    }
+    if (!unsafe.compareAndSwapLong(t, longOffset, longValue, 0)) {
+        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+    }
+    if (!unsafe.compareAndSwapLong(t, longOffset, 0, 1)) {
+        System.out.println("Unexpectedly not succeeding compareAndSwapLong...");
+    }
   }
 
   private static class TestClass {
diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java
index f0fe934..6a6227c 100644
--- a/test/107-int-math2/src/Main.java
+++ b/test/107-int-math2/src/Main.java
@@ -379,7 +379,7 @@
      */
     static int lit16Test(int x) {
 
-        int[] results = new int[8];
+        int[] results = new int[10];
 
         /* try to generate op-int/lit16" instructions */
         results[0] = x + 1000;
@@ -390,6 +390,9 @@
         results[5] = x & 1000;
         results[6] = x | -1000;
         results[7] = x ^ -1000;
+        /* use an 16-bit constant that has its MSB (bit-15) set */
+        results[8] = x / 32769;
+        results[9] = x / -32769;
 
         if (results[0] != 78777) { return 1; }
         if (results[1] != -76777) { return 2; }
@@ -399,6 +402,8 @@
         if (results[5] != 960) { return 6; }
         if (results[6] != -39) { return 7; }
         if (results[7] != -76855) { return 8; }
+        if (results[8] != 2) { return 9; }
+        if (results[9] != -2) { return 10; }
         return 0;
     }
 
diff --git a/test/407-arrays/src/Main.java b/test/407-arrays/src/Main.java
index d5c5604..4b833be 100644
--- a/test/407-arrays/src/Main.java
+++ b/test/407-arrays/src/Main.java
@@ -19,9 +19,9 @@
 public class Main extends TestCase {
   public static void main(String[] args) {
     $opt$testReads(new boolean[1], new byte[1], new char[1], new short[1],
-                   new int[1], new Object[1], new long[1], 0);
+                   new int[1], new Object[1], new long[1], new float[1], new double[1], 0);
     $opt$testWrites(new boolean[2], new byte[2], new char[2], new short[2],
-                    new int[2], new Object[2], new long[2], 1);
+                    new int[2], new Object[2], new long[2], new float[2], new double[2], 1);
     ensureThrows(new boolean[2], 2);
     ensureThrows(new boolean[2], 4);
     ensureThrows(new boolean[2], -1);
@@ -30,7 +30,8 @@
   }
 
   static void $opt$testReads(boolean[] bools, byte[] bytes, char[] chars, short[] shorts,
-                             int[] ints, Object[] objects, long[] longs, int index) {
+                             int[] ints, Object[] objects, long[] longs, float[] floats,
+                             double[] doubles, int index) {
     assertEquals(false, bools[0]);
     assertEquals(false, bools[index]);
 
@@ -51,10 +52,17 @@
 
     assertEquals(0, longs[0]);
     assertEquals(0, longs[index]);
+
+    assertEquals(0, floats[0]);
+    assertEquals(0, floats[index]);
+
+    assertEquals(0, doubles[0]);
+    assertEquals(0, doubles[index]);
   }
 
   static void $opt$testWrites(boolean[] bools, byte[] bytes, char[] chars, short[] shorts,
-                              int[] ints, Object[] objects, long[] longs, int index) {
+                              int[] ints, Object[] objects, long[] longs, float[] floats,
+                              double doubles[], int index) {
     bools[0] = true;
     assertEquals(true, bools[0]);
     bools[index] = true;
@@ -99,6 +107,18 @@
     // on 32bits. So we call out a long helper to ensure this method gets
     // optimized.
     $opt$testLongWrites(longs, index);
+
+    float f = 1.0F;
+    floats[0] = f / (1 | 0);
+    assertEquals(f, floats[0]);
+    floats[index] = f / (1 | 0);
+    assertEquals(f, floats[index]);
+
+    double d = 1.0F;
+    doubles[0] = d / (1 | 0);
+    assertEquals(d, doubles[0]);
+    doubles[index] = d / (1 | 0);
+    assertEquals(d, doubles[index]);
   }
 
   public static void $opt$testLongWrites(long[] longs, int index) {
diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java
index ef6428d..3cbcebb 100644
--- a/test/458-checker-instruction-simplification/src/Main.java
+++ b/test/458-checker-instruction-simplification/src/Main.java
@@ -40,8 +40,10 @@
 
   // CHECK-START: long Main.Add0(long) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:j\d+]]     ParameterValue
-  // CHECK-NOT:                      Add
   // CHECK-DAG:                      Return [ [[Arg]] ]
+  //
+  // CHECK-START: long Main.Add0(long) instruction_simplifier (after)
+  // CHECK-NOT:                        Add
 
   public static long Add0(long arg) {
     return 0 + arg;
@@ -55,9 +57,11 @@
 
   // CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:i\d+]]     ParameterValue
-  // CHECK-NOT:                      And
   // CHECK-DAG:                      Return [ [[Arg]] ]
 
+  // CHECK-START: int Main.AndAllOnes(int) instruction_simplifier (after)
+  // CHECK-NOT:                      And
+
   public static int AndAllOnes(int arg) {
     return arg & -1;
   }
@@ -70,9 +74,11 @@
 
   // CHECK-START: long Main.Div1(long) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:j\d+]]     ParameterValue
-  // CHECK-NOT:                      Div
   // CHECK-DAG:                      Return [ [[Arg]] ]
 
+  // CHECK-START: long Main.Div1(long) instruction_simplifier (after)
+  // CHECK-NOT:                      Div
+
   public static long Div1(long arg) {
     return arg / 1;
   }
@@ -86,9 +92,11 @@
   // CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
   // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg]] ]
-  // CHECK-NOT:                       Div
   // CHECK-DAG:                       Return [ [[Neg]] ]
 
+  // CHECK-START: int Main.DivN1(int) instruction_simplifier (after)
+  // CHECK-NOT:                       Div
+
   public static int DivN1(int arg) {
     return arg / -1;
   }
@@ -101,9 +109,11 @@
 
   // CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:j\d+]]     ParameterValue
-  // CHECK-NOT:                      Mul
   // CHECK-DAG:                      Return [ [[Arg]] ]
 
+  // CHECK-START: long Main.Mul1(long) instruction_simplifier (after)
+  // CHECK-NOT:                       Mul
+
   public static long Mul1(long arg) {
     return arg * 1;
   }
@@ -117,9 +127,11 @@
   // CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
   // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg]] ]
-  // CHECK-NOT:                       Mul
   // CHECK-DAG:                       Return [ [[Neg]] ]
 
+  // CHECK-START: int Main.MulN1(int) instruction_simplifier (after)
+  // CHECK-NOT:                       Mul
+
   public static int MulN1(int arg) {
     return arg * -1;
   }
@@ -134,9 +146,11 @@
   // CHECK-DAG:     [[Arg:j\d+]]       ParameterValue
   // CHECK-DAG:     [[Const7:i\d+]]    IntConstant 7
   // CHECK-DAG:     [[Shl:j\d+]]       Shl [ [[Arg]] [[Const7]] ]
-  // CHECK-NOT:                        Mul
   // CHECK-DAG:                        Return [ [[Shl]] ]
 
+  // CHECK-START: long Main.MulPowerOfTwo128(long) instruction_simplifier (after)
+  // CHECK-NOT:                        Mul
+
   public static long MulPowerOfTwo128(long arg) {
     return arg * 128;
   }
@@ -149,9 +163,11 @@
 
   // CHECK-START: int Main.Or0(int) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-NOT:                       Or
   // CHECK-DAG:                       Return [ [[Arg]] ]
 
+  // CHECK-START: int Main.Or0(int) instruction_simplifier (after)
+  // CHECK-NOT:                       Or
+
   public static int Or0(int arg) {
     return arg | 0;
   }
@@ -163,9 +179,11 @@
 
   // CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:j\d+]]       ParameterValue
-  // CHECK-NOT:                        Or
   // CHECK-DAG:                        Return [ [[Arg]] ]
 
+  // CHECK-START: long Main.OrSame(long) instruction_simplifier (after)
+  // CHECK-NOT:                        Or
+
   public static long OrSame(long arg) {
     return arg | arg;
   }
@@ -178,9 +196,11 @@
 
   // CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-NOT:                       Shl
   // CHECK-DAG:                       Return [ [[Arg]] ]
 
+  // CHECK-START: int Main.Shl0(int) instruction_simplifier (after)
+  // CHECK-NOT:                       Shl
+
   public static int Shl0(int arg) {
     return arg << 0;
   }
@@ -193,9 +213,11 @@
 
   // CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-NOT:                       Shr
   // CHECK-DAG:                       Return [ [[Arg]] ]
 
+  // CHECK-START: long Main.Shr0(long) instruction_simplifier (after)
+  // CHECK-NOT:                       Shr
+
   public static long Shr0(long arg) {
     return arg >> 0;
   }
@@ -208,9 +230,11 @@
 
   // CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-NOT:                       Sub
   // CHECK-DAG:                       Return [ [[Arg]] ]
 
+  // CHECK-START: long Main.Sub0(long) instruction_simplifier (after)
+  // CHECK-NOT:                       Sub
+
   public static long Sub0(long arg) {
     return arg - 0;
   }
@@ -224,9 +248,11 @@
   // CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
   // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg]] ]
-  // CHECK-NOT:                       Sub
   // CHECK-DAG:                       Return [ [[Neg]] ]
 
+  // CHECK-START: int Main.SubAliasNeg(int) instruction_simplifier (after)
+  // CHECK-NOT:                       Sub
+
   public static int SubAliasNeg(int arg) {
     return 0 - arg;
   }
@@ -239,9 +265,11 @@
 
   // CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
-  // CHECK-NOT:                       UShr
   // CHECK-DAG:                       Return [ [[Arg]] ]
 
+  // CHECK-START: long Main.UShr0(long) instruction_simplifier (after)
+  // CHECK-NOT:                       UShr
+
   public static long UShr0(long arg) {
     return arg >>> 0;
   }
@@ -254,9 +282,11 @@
 
   // CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
-  // CHECK-NOT:                       Xor
   // CHECK-DAG:                       Return [ [[Arg]] ]
 
+  // CHECK-START: int Main.Xor0(int) instruction_simplifier (after)
+  // CHECK-NOT:                       Xor
+
   public static int Xor0(int arg) {
     return arg ^ 0;
   }
@@ -270,13 +300,466 @@
   // CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
   // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
   // CHECK-DAG:     [[Not:i\d+]]      Not [ [[Arg]] ]
-  // CHECK-NOT:                       Xor
   // CHECK-DAG:                       Return [ [[Not]] ]
 
+  // CHECK-START: int Main.XorAllOnes(int) instruction_simplifier (after)
+  // CHECK-NOT:                       Xor
+
   public static int XorAllOnes(int arg) {
     return arg ^ -1;
   }
 
+  /**
+   * Test that addition or subtraction operation with both inputs negated are
+   * optimized to use a single negation after the operation.
+   * The transformation tested is implemented in
+   * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`.
+   */
+
+  // CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Arg1]] ]
+  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Arg2]] ]
+  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK-DAG:                       Return [ [[Add]] ]
+
+  // CHECK-START: int Main.AddNegs1(int, int) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-NOT:                       Neg
+  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Arg1]] [[Arg2]] ]
+  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Add]] ]
+  // CHECK-DAG:                       Return [ [[Neg]] ]
+
+  public static int AddNegs1(int arg1, int arg2) {
+    return -arg1 + -arg2;
+  }
+
+  /**
+   * This is similar to the test-case AddNegs1, but the negations have
+   * multiple uses.
+   * The transformation tested is implemented in
+   * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`.
+   * The current code won't perform the previous optimization. The
+   * transformations do not look at other uses of their inputs. As they don't
+   * know what will happen with other uses, they do not take the risk of
+   * increasing the register pressure by creating or extending live ranges.
+   */
+
+  // CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Arg1]] ]
+  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Arg2]] ]
+  // CHECK-DAG:     [[Add1:i\d+]]     Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK-DAG:     [[Add2:i\d+]]     Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Add1]] [[Add2]] ]
+  // CHECK-DAG:                       Return [ [[Or]] ]
+
+  // CHECK-START: int Main.AddNegs2(int, int) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Arg1]] ]
+  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Arg2]] ]
+  // CHECK-DAG:     [[Add1:i\d+]]     Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK-DAG:     [[Add2:i\d+]]     Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK-NOT:                       Neg
+  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Add1]] [[Add2]] ]
+  // CHECK-DAG:                       Return [ [[Or]] ]
+
+  public static int AddNegs2(int arg1, int arg2) {
+    int temp1 = -arg1;
+    int temp2 = -arg2;
+    return (temp1 + temp2) | (temp1 + temp2);
+  }
+
+  /**
+   * This follows test-cases AddNegs1 and AddNegs2.
+   * The transformation tested is implemented in
+   * `InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop`.
+   * The optimization should not happen if it moves an additional instruction in
+   * the loop.
+   */
+
+  // CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (before)
+  // -------------- Arguments and initial negation operations.
+  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg1:j\d+]]     Neg [ [[Arg1]] ]
+  // CHECK-DAG:     [[Neg2:j\d+]]     Neg [ [[Arg2]] ]
+  // CHECK:                           Goto
+  // -------------- Loop
+  // CHECK:                           SuspendCheck
+  // CHECK:         [[Add:j\d+]]      Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK:                           Goto
+
+  // CHECK-START: long Main.AddNegs3(long, long) instruction_simplifier (after)
+  // -------------- Arguments and initial negation operations.
+  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg1:j\d+]]     Neg [ [[Arg1]] ]
+  // CHECK-DAG:     [[Neg2:j\d+]]     Neg [ [[Arg2]] ]
+  // CHECK:                           Goto
+  // -------------- Loop
+  // CHECK:                           SuspendCheck
+  // CHECK:         [[Add:j\d+]]      Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK-NOT:                       Neg
+  // CHECK:                           Goto
+
+  public static long AddNegs3(long arg1, long arg2) {
+    long res = 0;
+    long n_arg1 = -arg1;
+    long n_arg2 = -arg2;
+    for (long i = 0; i < 1; i++) {
+      res += n_arg1 + n_arg2 + i;
+    }
+    return res;
+  }
+
+  /**
+   * Test the simplification of an addition with a negated argument into a
+   * subtraction.
+   * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitAdd`.
+   */
+
+  // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg1]] ]
+  // CHECK-DAG:     [[Add:j\d+]]      Add [ [[Neg]] [[Arg2]] ]
+  // CHECK-DAG:                       Return [ [[Add]] ]
+
+  // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Sub:j\d+]]      Sub [ [[Arg2]] [[Arg1]] ]
+  // CHECK-DAG:                       Return [ [[Sub]] ]
+
+  // CHECK-START: long Main.AddNeg1(long, long) instruction_simplifier (after)
+  // CHECK-NOT:                       Neg
+  // CHECK-NOT:                       Add
+
+  public static long AddNeg1(long arg1, long arg2) {
+    return -arg1 + arg2;
+  }
+
+  /**
+   * This is similar to the test-case AddNeg1, but the negation has two uses.
+   * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitAdd`.
+   * The current code won't perform the previous optimization. The
+   * transformations do not look at other uses of their inputs. As they don't
+   * know what will happen with other uses, they do not take the risk of
+   * increasing the register pressure by creating or extending live ranges.
+   */
+
+  // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg2]] ]
+  // CHECK-DAG:     [[Add1:j\d+]]     Add [ [[Arg1]] [[Neg]] ]
+  // CHECK-DAG:     [[Add2:j\d+]]     Add [ [[Arg1]] [[Neg]] ]
+  // CHECK-DAG:     [[Res:j\d+]]      Or [ [[Add1]] [[Add2]] ]
+  // CHECK-DAG:                       Return [ [[Res]] ]
+
+  // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg2]] ]
+  // CHECK-DAG:     [[Add1:j\d+]]     Add [ [[Arg1]] [[Neg]] ]
+  // CHECK-DAG:     [[Add2:j\d+]]     Add [ [[Arg1]] [[Neg]] ]
+  // CHECK-DAG:     [[Res:j\d+]]      Or [ [[Add1]] [[Add2]] ]
+  // CHECK-DAG:                       Return [ [[Res]] ]
+
+  // CHECK-START: long Main.AddNeg2(long, long) instruction_simplifier (after)
+  // CHECK-NOT:                       Sub
+
+  public static long AddNeg2(long arg1, long arg2) {
+    long temp = -arg2;
+    return (arg1 + temp) | (arg1 + temp);
+  }
+
+  /**
+   * Test simplification of the `-(-var)` pattern.
+   * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
+   */
+
+  // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
+  // CHECK-DAG:     [[Neg1:j\d+]]     Neg [ [[Arg]] ]
+  // CHECK-DAG:     [[Neg2:j\d+]]     Neg [ [[Neg1]] ]
+  // CHECK-DAG:                       Return [ [[Neg2]] ]
+
+  // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
+  // CHECK-DAG:                       Return [ [[Arg]] ]
+
+  // CHECK-START: long Main.NegNeg1(long) instruction_simplifier (after)
+  // CHECK-NOT:                       Neg
+
+  public static long NegNeg1(long arg) {
+    return -(-arg);
+  }
+
+  /**
+   * Test 'multi-step' simplification, where a first transformation yields a
+   * new simplification possibility for the current instruction.
+   * The transformations tested are implemented in `InstructionSimplifierVisitor::VisitNeg`
+   * and in `InstructionSimplifierVisitor::VisitAdd`.
+   */
+
+  // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
+  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Arg]] ]
+  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Neg1]] ]
+  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Neg1]] [[Neg2]] ]
+  // CHECK-DAG:                       Return [ [[Add]] ]
+
+  // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
+  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Arg]] [[Arg]] ]
+  // CHECK-DAG:                       Return [ [[Sub]] ]
+
+  // CHECK-START: int Main.NegNeg2(int) instruction_simplifier (after)
+  // CHECK-NOT:                       Neg
+  // CHECK-NOT:                       Add
+
+  public static int NegNeg2(int arg) {
+    int temp = -arg;
+    return temp + -temp;
+  }
+
+  /**
+   * Test another 'multi-step' simplification, where a first transformation
+   * yields a new simplification possibility for the current instruction.
+   * The transformations tested are implemented in `InstructionSimplifierVisitor::VisitNeg`
+   * and in `InstructionSimplifierVisitor::VisitSub`.
+   */
+
+  // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
+  // CHECK-DAG:     [[Const0:j\d+]]   LongConstant 0
+  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg]] ]
+  // CHECK-DAG:     [[Sub:j\d+]]      Sub [ [[Const0]] [[Neg]] ]
+  // CHECK-DAG:                       Return [ [[Sub]] ]
+
+  // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
+  // CHECK-DAG:                       Return [ [[Arg]] ]
+
+  // CHECK-START: long Main.NegNeg3(long) instruction_simplifier (after)
+  // CHECK-NOT:                       Neg
+  // CHECK-NOT:                       Sub
+
+  public static long NegNeg3(long arg) {
+    return 0 - -arg;
+  }
+
+  /**
+   * Test that a negated subtraction is simplified to a subtraction with its
+   * arguments reversed.
+   * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
+   */
+
+  // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Arg1]] [[Arg2]] ]
+  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Sub]] ]
+  // CHECK-DAG:                       Return [ [[Neg]] ]
+
+  // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Arg2]] [[Arg1]] ]
+  // CHECK-DAG:                       Return [ [[Sub]] ]
+
+  // CHECK-START: int Main.NegSub1(int, int) instruction_simplifier (after)
+  // CHECK-NOT:                       Neg
+
+  public static int NegSub1(int arg1, int arg2) {
+    return -(arg1 - arg2);
+  }
+
+  /**
+   * This is similar to the test-case NegSub1, but the subtraction has
+   * multiple uses.
+   * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNeg`.
+   * The current code won't perform the previous optimization. The
+   * transformations do not look at other uses of their inputs. As they don't
+   * know what will happen with other uses, they do not take the risk of
+   * increasing the register pressure by creating or extending live ranges.
+   */
+
+  // CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Arg1]] [[Arg2]] ]
+  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Sub]] ]
+  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Sub]] ]
+  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Neg1]] [[Neg2]] ]
+  // CHECK-DAG:                       Return [ [[Or]] ]
+
+  // CHECK-START: int Main.NegSub2(int, int) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Arg1]] [[Arg2]] ]
+  // CHECK-DAG:     [[Neg1:i\d+]]     Neg [ [[Sub]] ]
+  // CHECK-DAG:     [[Neg2:i\d+]]     Neg [ [[Sub]] ]
+  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Neg1]] [[Neg2]] ]
+  // CHECK-DAG:                       Return [ [[Or]] ]
+
+  public static int NegSub2(int arg1, int arg2) {
+    int temp = arg1 - arg2;
+    return -temp | -temp;
+  }
+
+  /**
+   * Test simplification of the `~~var` pattern.
+   * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitNot`.
+   */
+
+  // CHECK-START: long Main.NotNot1(long) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
+  // CHECK-DAG:     [[ConstF1:j\d+]]  LongConstant -1
+  // CHECK-DAG:     [[Xor1:j\d+]]     Xor [ [[Arg]] [[ConstF1]] ]
+  // CHECK-DAG:     [[Xor2:j\d+]]     Xor [ [[Xor1]] [[ConstF1]] ]
+  // CHECK-DAG:                       Return [ [[Xor2]] ]
+
+  // CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg:j\d+]]      ParameterValue
+  // CHECK-DAG:                       Return [ [[Arg]] ]
+
+  // CHECK-START: long Main.NotNot1(long) instruction_simplifier (after)
+  // CHECK-NOT:                       Xor
+
+  public static long NotNot1(long arg) {
+    return ~~arg;
+  }
+
+  // CHECK-START: int Main.NotNot2(int) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
+  // CHECK-DAG:     [[ConstF1:i\d+]]  IntConstant -1
+  // CHECK-DAG:     [[Xor1:i\d+]]     Xor [ [[Arg]] [[ConstF1]] ]
+  // CHECK-DAG:     [[Xor2:i\d+]]     Xor [ [[Xor1]] [[ConstF1]] ]
+  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Xor1]] [[Xor2]] ]
+  // CHECK-DAG:                       Return [ [[Add]] ]
+
+  // CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg:i\d+]]      ParameterValue
+  // CHECK-DAG:     [[Not:i\d+]]      Not [ [[Arg]] ]
+  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Not]] [[Arg]] ]
+  // CHECK-DAG:                       Return [ [[Add]] ]
+
+  // CHECK-START: int Main.NotNot2(int) instruction_simplifier (after)
+  // CHECK-NOT:                       Xor
+
+  public static int NotNot2(int arg) {
+    int temp = ~arg;
+    return temp + ~temp;
+  }
+
+  /**
+   * Test the simplification of a subtraction with a negated argument.
+   * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`.
+   */
+
+  // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg1]] ]
+  // CHECK-DAG:     [[Sub:i\d+]]      Sub [ [[Neg]] [[Arg2]] ]
+  // CHECK-DAG:                       Return [ [[Sub]] ]
+
+  // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Add:i\d+]]      Add [ [[Arg1]] [[Arg2]] ]
+  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Add]] ]
+  // CHECK-DAG:                       Return [ [[Neg]] ]
+
+  // CHECK-START: int Main.SubNeg1(int, int) instruction_simplifier (after)
+  // CHECK-NOT:                       Sub
+
+  public static int SubNeg1(int arg1, int arg2) {
+    return -arg1 - arg2;
+  }
+
+  /**
+   * This is similar to the test-case SubNeg1, but the negation has
+   * multiple uses.
+   * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`.
+   * The current code won't perform the previous optimization. The
+   * transformations do not look at other uses of their inputs. As they don't
+   * know what will happen with other uses, they do not take the risk of
+   * increasing the register pressure by creating or extending live ranges.
+   */
+
+  // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (before)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg1]] ]
+  // CHECK-DAG:     [[Sub1:i\d+]]     Sub [ [[Neg]] [[Arg2]] ]
+  // CHECK-DAG:     [[Sub2:i\d+]]     Sub [ [[Neg]] [[Arg2]] ]
+  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Sub1]] [[Sub2]] ]
+  // CHECK-DAG:                       Return [ [[Or]] ]
+
+  // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+  // CHECK-DAG:     [[Arg1:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:i\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg:i\d+]]      Neg [ [[Arg1]] ]
+  // CHECK-DAG:     [[Sub1:i\d+]]     Sub [ [[Neg]] [[Arg2]] ]
+  // CHECK-DAG:     [[Sub2:i\d+]]     Sub [ [[Neg]] [[Arg2]] ]
+  // CHECK-DAG:     [[Or:i\d+]]       Or [ [[Sub1]] [[Sub2]] ]
+  // CHECK-DAG:                       Return [ [[Or]] ]
+
+  // CHECK-START: int Main.SubNeg2(int, int) instruction_simplifier (after)
+  // CHECK-NOT:                       Add
+
+  public static int SubNeg2(int arg1, int arg2) {
+    int temp = -arg1;
+    return (temp - arg2) | (temp - arg2);
+  }
+
+  /**
+   * This follows test-cases SubNeg1 and SubNeg2.
+   * The transformation tested is implemented in `InstructionSimplifierVisitor::VisitSub`.
+   * The optimization should not happen if it moves an additional instruction in
+   * the loop.
+   */
+
+  // CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (before)
+  // -------------- Arguments and initial negation operation.
+  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg1]] ]
+  // CHECK:                           Goto
+  // -------------- Loop
+  // CHECK:                           SuspendCheck
+  // CHECK:         [[Sub:j\d+]]      Sub [ [[Neg]] [[Arg2]] ]
+  // CHECK:                           Goto
+
+  // CHECK-START: long Main.SubNeg3(long, long) instruction_simplifier (after)
+  // -------------- Arguments and initial negation operation.
+  // CHECK-DAG:     [[Arg1:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Arg2:j\d+]]     ParameterValue
+  // CHECK-DAG:     [[Neg:j\d+]]      Neg [ [[Arg1]] ]
+  // CHECK-DAG:                       Goto
+  // -------------- Loop
+  // CHECK:                           SuspendCheck
+  // CHECK:         [[Sub:j\d+]]      Sub [ [[Neg]] [[Arg2]] ]
+  // CHECK-NOT:                       Neg
+  // CHECK:                           Goto
+
+  public static long SubNeg3(long arg1, long arg2) {
+    long res = 0;
+    long temp = -arg1;
+    for (long i = 0; i < 1; i++) {
+      res += temp - arg2 - i;
+    }
+    return res;
+  }
+
   public static void main(String[] args) {
     int arg = 123456;
 
@@ -296,5 +779,20 @@
     assertLongEquals(UShr0(arg), arg);
     assertIntEquals(Xor0(arg), arg);
     assertIntEquals(XorAllOnes(arg), ~arg);
+    assertIntEquals(AddNegs1(arg, arg + 1), -(arg + arg + 1));
+    assertIntEquals(AddNegs2(arg, arg + 1), -(arg + arg + 1));
+    assertLongEquals(AddNegs3(arg, arg + 1), -(2 * arg + 1));
+    assertLongEquals(AddNeg1(arg, arg + 1), 1);
+    assertLongEquals(AddNeg2(arg, arg + 1), -1);
+    assertLongEquals(NegNeg1(arg), arg);
+    assertIntEquals(NegNeg2(arg), 0);
+    assertLongEquals(NegNeg3(arg), arg);
+    assertIntEquals(NegSub1(arg, arg + 1), 1);
+    assertIntEquals(NegSub2(arg, arg + 1), 1);
+    assertLongEquals(NotNot1(arg), arg);
+    assertIntEquals(NotNot2(arg), -1);
+    assertIntEquals(SubNeg1(arg, arg + 1), -(arg + arg + 1));
+    assertIntEquals(SubNeg2(arg, arg + 1), -(arg + arg + 1));
+    assertLongEquals(SubNeg3(arg, arg + 1), -(2 * arg + 1));
   }
 }
diff --git a/test/MyClassNatives/MyClassNatives.java b/test/MyClassNatives/MyClassNatives.java
index fab153b..8b4a9a4 100644
--- a/test/MyClassNatives/MyClassNatives.java
+++ b/test/MyClassNatives/MyClassNatives.java
@@ -80,6 +80,7 @@
         Object o248, Object o249, Object o250, Object o251, Object o252, Object o253);
 
     native void withoutImplementation();
+    native Object withoutImplementationRefReturn();
 
     native static void stackArgsIntsFirst(int i1, int i2, int i3, int i4, int i5, int i6, int i7,
         int i8, int i9, int i10, float f1, float f2, float f3, float f4, float f5, float f6,
diff --git a/tools/analyze-init-failures.py b/tools/analyze-init-failures.py
index cca05e1..60c7dc5 100755
--- a/tools/analyze-init-failures.py
+++ b/tools/analyze-init-failures.py
@@ -25,7 +25,7 @@
 
 
 _CLASS_RE = re.compile(r'^L(.*);$')
-_ERROR_LINE_RE = re.compile(r'^java.lang.InternalError: (.*)')
+_ERROR_LINE_RE = re.compile(r'^dalvik.system.TransactionAbortError: (.*)')
 _STACK_LINE_RE = re.compile(r'^\s*at\s[^\s]*\s([^\s]*)')
 
 def Confused(filename, line_number, line):
diff --git a/tools/dexfuzz/Android.mk b/tools/dexfuzz/Android.mk
index 1e4b4f5..1580bc3 100644
--- a/tools/dexfuzz/Android.mk
+++ b/tools/dexfuzz/Android.mk
@@ -31,7 +31,10 @@
 LOCAL_MODULE_CLASS := EXECUTABLES
 LOCAL_MODULE := dexfuzz
 include $(BUILD_SYSTEM)/base_rules.mk
-$(LOCAL_BUILT_MODULE): $(LOCAL_PATH)/dexfuzz $(ACP) $(HOST_CORE_IMG_OUTS)
+$(LOCAL_BUILT_MODULE): $(LOCAL_PATH)/dexfuzz $(ACP)
 	@echo "Copy: $(PRIVATE_MODULE) ($@)"
 	$(copy-file-to-new-target)
 	$(hide) chmod 755 $@
+
+# --- dexfuzz script with core image dependencies ----------------
+fuzzer: $(LOCAL_BUILT_MODULE) $(HOST_CORE_IMG_OUTS)
diff --git a/tools/dexfuzz/src/dexfuzz/executors/Device.java b/tools/dexfuzz/src/dexfuzz/executors/Device.java
index 736aaad..4a53957 100644
--- a/tools/dexfuzz/src/dexfuzz/executors/Device.java
+++ b/tools/dexfuzz/src/dexfuzz/executors/Device.java
@@ -17,6 +17,7 @@
 package dexfuzz.executors;
 
 import java.io.IOException;
+import java.io.File;
 import java.util.Map;
 
 import dexfuzz.ExecutionResult;
@@ -67,6 +68,10 @@
     return envVars.get(key);
   }
 
+  private String getHostCoreImagePath() {
+    return androidHostOut + "/framework/core.art";
+  }
+
   private void setup() {
     programPushed = false;
 
@@ -74,6 +79,13 @@
     androidProductOut = checkForEnvVar(envVars, "ANDROID_PRODUCT_OUT");
     androidHostOut = checkForEnvVar(envVars, "ANDROID_HOST_OUT");
 
+    if (Options.executeOnHost) {
+      File coreImage = new File(getHostCoreImagePath());
+      if (!coreImage.exists()) {
+        Log.errorAndQuit("Host core image not found at " + coreImage.getPath()
+            + ". Did you forget to build it?");
+      }
+    }
     if (!isHost) {
       // Create temporary consumers for the initial test.
       StreamConsumer outputConsumer = new StreamConsumer();
@@ -144,7 +156,7 @@
    * Get any extra flags required to execute ART on the host.
    */
   public String getHostExecutionFlags() {
-    return String.format("-Xnorelocate -Ximage:%s/framework/core.art", androidHostOut);
+    return String.format("-Xnorelocate -Ximage:%s", getHostCoreImagePath());
   }
 
   public String getAndroidHostOut() {