Merge "ART: Fix SelectKind to work with nullptr"
diff --git a/Android.mk b/Android.mk
index 1edd543..9a813fd 100644
--- a/Android.mk
+++ b/Android.mk
@@ -329,9 +329,9 @@
 .PHONY: oat-target-$(1)
 oat-target-$(1): $$(OUT_OAT_FILE)
 
-$$(OUT_OAT_FILE): $(PRODUCT_OUT)/$(1) $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(DEX2OATD_DEPENDENCY)
+$$(OUT_OAT_FILE): $(PRODUCT_OUT)/$(1) $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(DEX2OAT_DEPENDENCY)
 	@mkdir -p $$(dir $$@)
-	$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \
+	$(DEX2OAT) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \
 		--boot-image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --dex-file=$(PRODUCT_OUT)/$(1) \
 		--dex-location=/$(1) --oat-file=$$@ \
 		--instruction-set=$(DEX2OAT_TARGET_ARCH) \
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 386128e..55a4821 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -122,7 +122,11 @@
 endif
 
 # Clang on the target. Target builds use GCC by default.
-ART_TARGET_CLANG :=
+ifneq ($(USE_CLANG_PLATFORM_BUILD),)
+ART_TARGET_CLANG := $(USE_CLANG_PLATFORM_BUILD)
+else
+ART_TARGET_CLANG := false
+endif
 ART_TARGET_CLANG_arm :=
 ART_TARGET_CLANG_arm64 :=
 ART_TARGET_CLANG_mips :=
@@ -168,7 +172,7 @@
 ART_TARGET_CLANG_CFLAGS_x86 :=
 ART_TARGET_CLANG_CFLAGS_x86_64 :=
 
-# these are necessary for Clang ARM64 ART builds
+# These are necessary for Clang ARM64 ART builds. TODO: remove.
 ART_TARGET_CLANG_CFLAGS_arm64  += \
   -Wno-implicit-exception-spec-mismatch \
   -DNVALGRIND \
@@ -236,6 +240,14 @@
 ART_TARGET_CFLAGS += -DART_BASE_ADDRESS_MIN_DELTA=$(LIBART_IMG_TARGET_MIN_BASE_ADDRESS_DELTA)
 ART_TARGET_CFLAGS += -DART_BASE_ADDRESS_MAX_DELTA=$(LIBART_IMG_TARGET_MAX_BASE_ADDRESS_DELTA)
 
+# Colorize clang compiler warnings.
+ifeq ($(ART_HOST_CLANG),true)
+  ART_HOST_CFLAGS += -fcolor-diagnostics
+endif
+ifeq ($(ART_TARGET_CLANG),true)
+  ART_TARGET_CFLAGS += -fcolor-diagnostics
+endif
+
 ART_TARGET_LDFLAGS :=
 ifeq ($(TARGET_CPU_SMP),true)
   ART_TARGET_CFLAGS += -DANDROID_SMP=1
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 1c462eb..6ef451f 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -28,10 +28,10 @@
 # NB depending on HOST_CORE_DEX_LOCATIONS so we are sure to have the dex files in frameworks for
 # run-test --no-image
 define create-core-oat-host-rules
-$$($(1)HOST_CORE_IMG_OUT): $$(HOST_CORE_DEX_LOCATIONS) $$(DEX2OATD_DEPENDENCY)
+$$($(1)HOST_CORE_IMG_OUT): $$(HOST_CORE_DEX_LOCATIONS) $$(DEX2OAT_DEPENDENCY)
 	@echo "host dex2oat: $$@ ($$?)"
 	@mkdir -p $$(dir $$@)
-	$$(hide) $$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \
+	$$(hide) $$(DEX2OAT) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \
 	  --image-classes=$$(PRELOADED_CLASSES) $$(addprefix --dex-file=,$$(HOST_CORE_DEX_FILES)) \
 	  $$(addprefix --dex-location=,$$(HOST_CORE_DEX_LOCATIONS)) --oat-file=$$($(1)HOST_CORE_OAT_OUT) \
 	  --oat-location=$$($(1)HOST_CORE_OAT) --image=$$($(1)HOST_CORE_IMG_OUT) \
@@ -51,10 +51,10 @@
 endif
 
 define create-core-oat-target-rules
-$$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
+$$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OAT_DEPENDENCY)
 	@echo "target dex2oat: $$@ ($$?)"
 	@mkdir -p $$(dir $$@)
-	$$(hide) $$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \
+	$$(hide) $$(DEX2OAT) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \
 	  --image-classes=$$(PRELOADED_CLASSES) $$(addprefix --dex-file=,$$(TARGET_CORE_DEX_FILES)) \
 	  $$(addprefix --dex-location=,$$(TARGET_CORE_DEX_LOCATIONS)) --oat-file=$$($(1)TARGET_CORE_OAT_OUT) \
 	  --oat-location=$$($(1)TARGET_CORE_OAT) --image=$$($(1)TARGET_CORE_IMG_OUT) \
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 8b5e6d5..edc5bd0 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -100,6 +100,7 @@
 	optimizing/nodes.cc \
 	optimizing/optimizing_compiler.cc \
 	optimizing/parallel_move_resolver.cc \
+	optimizing/prepare_for_register_allocation.cc \
 	optimizing/register_allocator.cc \
 	optimizing/ssa_builder.cc \
 	optimizing/ssa_liveness_analysis.cc \
@@ -125,8 +126,6 @@
 	utils/scoped_arena_allocator.cc \
 	buffered_output_stream.cc \
 	compiler.cc \
-	elf_fixup.cc \
-	elf_stripper.cc \
 	elf_writer.cc \
 	elf_writer_quick.cc \
 	file_output_stream.cc \
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index eb0806b..e411164 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -1463,14 +1463,12 @@
       // Intentional fall-through.
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE:
-      if ((mir->optimization_flags & MIR_INLINED) == 0) {
-        // Make ref args aliasing.
-        for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) {
-          uint16_t reg = GetOperandValue(mir->ssa_rep->uses[i]);
-          non_aliasing_refs_.erase(reg);
-        }
-        HandleInvokeOrClInitOrAcquireOp(mir);
+      // Make ref args aliasing.
+      for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) {
+        uint16_t reg = GetOperandValue(mir->ssa_rep->uses[i]);
+        non_aliasing_refs_.erase(reg);
       }
+      HandleInvokeOrClInitOrAcquireOp(mir);
       break;
 
     case Instruction::MOVE_RESULT:
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 246ae44..51b6709 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -1163,7 +1163,7 @@
     if (!MIR::DecodedInstruction::IsPseudoMirOp(mir->dalvikInsn.opcode)) {
       int flags = mir->dalvikInsn.FlagsOf();
 
-      if ((flags & Instruction::kInvoke) != 0 && (mir->optimization_flags & MIR_INLINED) == 0) {
+      if ((flags & Instruction::kInvoke) != 0) {
         attributes_ &= ~METHOD_IS_LEAF;
       }
     }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index dda9e77..276b886 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -178,7 +178,7 @@
                                  BasicBlock* orig_block, BasicBlock** immed_pred_block_p) {
   DCHECK_GT(code_offset, orig_block->start_offset);
   MIR* insn = orig_block->first_mir_insn;
-  MIR* prev = NULL;
+  MIR* prev = NULL;  // Will be set to instruction before split.
   while (insn) {
     if (insn->offset == code_offset) break;
     prev = insn;
@@ -187,6 +187,10 @@
   if (insn == NULL) {
     LOG(FATAL) << "Break split failed";
   }
+  // Now insn is at the instruction where we want to split, namely
+  // insn will be the first instruction of the "bottom" block.
+  // Similarly, prev will be the last instruction of the "top" block
+
   BasicBlock* bottom_block = CreateNewBB(kDalvikByteCode);
 
   bottom_block->start_offset = code_offset;
@@ -259,7 +263,10 @@
   DCHECK(static_cast<int>(insn->dalvikInsn.opcode) == kMirOpCheck ||
          !MIR::DecodedInstruction::IsPseudoMirOp(insn->dalvikInsn.opcode));
   DCHECK_EQ(dex_pc_to_block_map_[insn->offset], orig_block->id);
+  // Scan the "bottom" instructions, remapping them to the
+  // newly created "bottom" block.
   MIR* p = insn;
+  p->bb = bottom_block->id;
   dex_pc_to_block_map_[p->offset] = bottom_block->id;
   while (p != bottom_block->last_mir_insn) {
     p = p->next;
@@ -273,7 +280,11 @@
      * the first in a BasicBlock, we can't hit it here.
      */
     if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
-      DCHECK_EQ(dex_pc_to_block_map_[p->offset], orig_block->id);
+      BasicBlockId mapped_id = dex_pc_to_block_map_[p->offset];
+      // At first glance the instructions should all be mapped to orig_block.
+      // However, multiple instructions may correspond to the same dex, hence an earlier
+      // instruction may have already moved the mapping for dex to bottom_block.
+      DCHECK((mapped_id == orig_block->id) || (mapped_id == bottom_block->id));
       dex_pc_to_block_map_[p->offset] = bottom_block->id;
     }
   }
@@ -568,8 +579,6 @@
                                       const uint16_t* code_ptr, const uint16_t* code_end) {
   bool in_try_block = try_block_addr->IsBitSet(cur_offset);
   bool is_throw = (insn->dalvikInsn.opcode == Instruction::THROW);
-  bool build_all_edges =
-      (cu_->disable_opt & (1 << kSuppressExceptionEdges)) || is_throw || in_try_block;
 
   /* In try block */
   if (in_try_block) {
@@ -605,6 +614,8 @@
     }
     in_try_block = (cur_block->successor_block_list_type != kNotUsed);
   }
+  bool build_all_edges =
+      (cu_->disable_opt & (1 << kSuppressExceptionEdges)) || is_throw || in_try_block;
   if (!in_try_block && build_all_edges) {
     BasicBlock* eh_block = CreateNewBB(kExceptionHandling);
     cur_block->taken = eh_block->id;
@@ -1537,7 +1548,8 @@
     return GetSSAName(ssa_reg);
   }
   if (IsConst(reg_location_[ssa_reg])) {
-    if (!singles_only && reg_location_[ssa_reg].wide) {
+    if (!singles_only && reg_location_[ssa_reg].wide &&
+        !reg_location_[ssa_reg].high_word) {
       return StringPrintf("v%d_%d#0x%" PRIx64, SRegToVReg(ssa_reg), GetSSASubscript(ssa_reg),
                           ConstantValueWide(reg_location_[ssa_reg]));
     } else {
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 1a4b23e..bf09446 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -845,7 +845,7 @@
   RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
   RegLocation rl_new_value;
   if (!is_long) {
-    rl_new_value = LoadValue(rl_src_new_value);
+    rl_new_value = LoadValue(rl_src_new_value, LocToRegClass(rl_src_new_value));
   } else if (load_early) {
     rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
   }
@@ -868,7 +868,7 @@
 
   RegLocation rl_expected;
   if (!is_long) {
-    rl_expected = LoadValue(rl_src_expected);
+    rl_expected = LoadValue(rl_src_expected, LocToRegClass(rl_src_new_value));
   } else if (load_early) {
     rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
   } else {
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index ab71921..a87b06a 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -117,6 +117,7 @@
 #define IS_SIGNED_IMM14(value) IS_SIGNED_IMM(14, value)
 #define IS_SIGNED_IMM19(value) IS_SIGNED_IMM(19, value)
 #define IS_SIGNED_IMM21(value) IS_SIGNED_IMM(21, value)
+#define IS_SIGNED_IMM26(value) IS_SIGNED_IMM(26, value)
 
 // Quick macro used to define the registers.
 #define A64_REGISTER_CODE_LIST(R) \
@@ -240,6 +241,7 @@
   kA64B2ct,          // b.cond [01010100] imm_19[23-5] [0] cond[3-0].
   kA64Blr1x,         // blr [1101011000111111000000] rn[9-5] [00000].
   kA64Br1x,          // br  [1101011000011111000000] rn[9-5] [00000].
+  kA64Bl1t,          // bl  [100101] imm26[25-0].
   kA64Brk1d,         // brk [11010100001] imm_16[20-5] [00000].
   kA64B1t,           // b   [00010100] offset_26[25-0].
   kA64Cbnz2rt,       // cbnz[00110101] imm_19[23-5] rt[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index b1cf279..7c663a9 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -155,6 +155,10 @@
                  kFmtRegX, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH,
                  "br", "!0x", kFixupNone),
+    ENCODING_MAP(kA64Bl1t, NO_VARIANTS(0x94000000),
+                 kFmtBitBlt, 25, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR | NEEDS_FIXUP,
+                 "bl", "!0T", kFixupLabel),
     ENCODING_MAP(kA64Brk1d, NO_VARIANTS(0xd4200000),
                  kFmtBitBlt, 20, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
@@ -873,7 +877,7 @@
               ((target_lir->flags.generation == lir->flags.generation) ? 0 : offset_adjustment);
           int32_t delta = target - pc;
           DCHECK_EQ(delta & 0x3, 0);
-          if (!IS_SIGNED_IMM19(delta >> 2)) {
+          if (!IS_SIGNED_IMM26(delta >> 2)) {
             LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
           }
           lir->operands[0] = delta >> 2;
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 6081f28..e8de876 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -21,6 +21,8 @@
 #include "dex/quick/mir_to_lir-inl.h"
 #include "gc/accounting/card_table.h"
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "mirror/art_method.h"
+#include "mirror/object_array-inl.h"
 
 namespace art {
 
@@ -433,4 +435,117 @@
   NewLIR0(kA64Ret);
 }
 
+static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
+  // Always emit relative calls.
+  return true;
+}
+
+/*
+ * Bit of a hack here - in the absence of a real scheduling pass,
+ * emit the next instruction in static & direct invoke sequences.
+ */
+static int Arm64NextSDCallInsn(CompilationUnit* cu, CallInfo* info,
+                               int state, const MethodReference& target_method,
+                               uint32_t unused,
+                               uintptr_t direct_code, uintptr_t direct_method,
+                               InvokeType type) {
+  Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
+  if (direct_code != 0 && direct_method != 0) {
+    switch (state) {
+    case 0:  // Get the current Method* [sets kArg0]
+      if (direct_code != static_cast<uintptr_t>(-1)) {
+        cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+      } else if (Arm64UseRelativeCall(cu, target_method)) {
+        // Defer to linker patch.
+      } else {
+        cg->LoadCodeAddress(target_method, type, kInvokeTgt);
+      }
+      if (direct_method != static_cast<uintptr_t>(-1)) {
+        cg->LoadConstant(cg->TargetReg(kArg0, kRef), direct_method);
+      } else {
+        cg->LoadMethodAddress(target_method, type, kArg0);
+      }
+      break;
+    default:
+      return -1;
+    }
+  } else {
+    RegStorage arg0_ref = cg->TargetReg(kArg0, kRef);
+    switch (state) {
+    case 0:  // Get the current Method* [sets kArg0]
+      // TUNING: we can save a reg copy if Method* has been promoted.
+      cg->LoadCurrMethodDirect(arg0_ref);
+      break;
+    case 1:  // Get method->dex_cache_resolved_methods_
+      cg->LoadRefDisp(arg0_ref,
+                      mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value(),
+                      arg0_ref,
+                      kNotVolatile);
+      // Set up direct code if known.
+      if (direct_code != 0) {
+        if (direct_code != static_cast<uintptr_t>(-1)) {
+          cg->LoadConstant(cg->TargetPtrReg(kInvokeTgt), direct_code);
+        } else if (Arm64UseRelativeCall(cu, target_method)) {
+          // Defer to linker patch.
+        } else {
+          CHECK_LT(target_method.dex_method_index, target_method.dex_file->NumMethodIds());
+          cg->LoadCodeAddress(target_method, type, kInvokeTgt);
+        }
+      }
+      break;
+    case 2:  // Grab target method*
+      CHECK_EQ(cu->dex_file, target_method.dex_file);
+      cg->LoadRefDisp(arg0_ref,
+                      mirror::ObjectArray<mirror::Object>::OffsetOfElement(
+                          target_method.dex_method_index).Int32Value(),
+                      arg0_ref,
+                      kNotVolatile);
+      break;
+    case 3:  // Grab the code from the method*
+      if (direct_code == 0) {
+        // kInvokeTgt := arg0_ref->entrypoint
+        cg->LoadWordDisp(arg0_ref,
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
+                         cg->TargetPtrReg(kInvokeTgt));
+      }
+      break;
+    default:
+      return -1;
+    }
+  }
+  return state + 1;
+}
+
+NextCallInsn Arm64Mir2Lir::GetNextSDCallInsn() {
+  return Arm64NextSDCallInsn;
+}
+
+LIR* Arm64Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) {
+  // For ARM64, just generate a relative BL instruction that will be filled in at 'link time'.
+  // If the target turns out to be too far, the linker will generate a thunk for dispatch.
+  int target_method_idx = target_method.dex_method_index;
+  const DexFile* target_dex_file = target_method.dex_file;
+
+  // Generate the call instruction and save index, dex_file, and type.
+  // NOTE: Method deduplication takes linker patches into account, so we can just pass 0
+  // as a placeholder for the offset.
+  LIR* call = RawLIR(current_dalvik_offset_, kA64Bl1t, 0,
+                     target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type);
+  AppendLIR(call);
+  call_method_insns_.push_back(call);
+  return call;
+}
+
+LIR* Arm64Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
+  LIR* call_insn;
+  if (method_info.FastPath() && Arm64UseRelativeCall(cu_, method_info.GetTargetMethod()) &&
+      (method_info.GetSharpType() == kDirect || method_info.GetSharpType() == kStatic) &&
+      method_info.DirectCode() == static_cast<uintptr_t>(-1)) {
+    call_insn = CallWithLinkerFixup(method_info.GetTargetMethod(), method_info.GetSharpType());
+  } else {
+    call_insn = OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
+  }
+  return call_insn;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 55cc938..93d9b34 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -259,6 +259,28 @@
 
   size_t GetInstructionOffset(LIR* lir) OVERRIDE;
 
+  NextCallInsn GetNextSDCallInsn() OVERRIDE;
+
+  /*
+   * @brief Generate a relative call to the method that will be patched at link time.
+   * @param target_method The MethodReference of the method to be invoked.
+   * @param type How the method will be invoked.
+   * @returns Call instruction
+   */
+  LIR* CallWithLinkerFixup(const MethodReference& target_method, InvokeType type);
+
+  /*
+   * @brief Generate the actual call insn based on the method info.
+   * @param method_info the lowering info for the method call.
+   * @returns Call instruction
+   */
+  virtual LIR* GenCallInsn(const MirMethodLoweringInfo& method_info) OVERRIDE;
+
+  /*
+   * @brief Handle ARM specific literals.
+   */
+  void InstallLiteralPools() OVERRIDE;
+
   LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
 
  private:
@@ -396,6 +418,8 @@
 
   InToRegStorageMapping in_to_reg_storage_mapping_;
   static const A64EncodingMap EncodingMap[kA64Last];
+
+  ArenaVector<LIR*> call_method_insns_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 88123e1..97f3994 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -925,7 +925,7 @@
 
 LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target);
+  return RawLIR(current_dalvik_offset_, kA64Ldr2rp, As32BitReg(reg).GetReg(), 0, 0, 0, 0, target);
 }
 
 LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index 0462530..ba47883 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -585,7 +585,8 @@
 }
 
 Arm64Mir2Lir::Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
-    : Mir2Lir(cu, mir_graph, arena) {
+    : Mir2Lir(cu, mir_graph, arena),
+      call_method_insns_(arena->Adapter()) {
   // Sanity check - make sure encoding map lines up.
   for (int i = 0; i < kA64Last; i++) {
     if (UNWIDE(Arm64Mir2Lir::EncodingMap[i].opcode) != i) {
@@ -1201,4 +1202,21 @@
   return call_state;
 }
 
+void Arm64Mir2Lir::InstallLiteralPools() {
+  // PC-relative calls to methods.
+  patches_.reserve(call_method_insns_.size());
+  for (LIR* p : call_method_insns_) {
+      DCHECK_EQ(p->opcode, kA64Bl1t);
+      uint32_t target_method_idx = p->operands[1];
+      const DexFile* target_dex_file =
+          reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2]));
+
+      patches_.push_back(LinkerPatch::RelativeCodePatch(p->offset,
+                                                        target_dex_file, target_method_idx));
+  }
+
+  // And do the normal processing.
+  Mir2Lir::InstallLiteralPools();
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index f305017..e18116e 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -447,15 +447,16 @@
   buf.push_back((data >> 24) & 0xff);
 }
 
-// Push 8 bytes on 64-bit target systems; 4 on 32-bit target systems.
-static void PushPointer(std::vector<uint8_t>&buf, const void* pointer, bool target64) {
-  uint64_t data = reinterpret_cast<uintptr_t>(pointer);
-  if (target64) {
-    Push32(buf, data & 0xFFFFFFFF);
-    Push32(buf, (data >> 32) & 0xFFFFFFFF);
-  } else {
-    Push32(buf, static_cast<uint32_t>(data));
-  }
+/**
+ * @brief Push a compressed reference which needs patching at link/patchoat-time.
+ * @details This needs to be kept consistent with the code which actually does the patching in
+ *   oat_writer.cc and in the patchoat tool.
+ */
+static void PushUnpatchedReference(std::vector<uint8_t>&buf) {
+  // Note that we can safely initialize the patches to zero. The code deduplication mechanism takes
+  // the patches into account when determining whether two pieces of codes are functionally
+  // equivalent.
+  Push32(buf, UINT32_C(0));
 }
 
 static void AlignBuffer(std::vector<uint8_t>&buf, size_t offset) {
@@ -481,9 +482,7 @@
         reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
     patches_.push_back(LinkerPatch::CodePatch(code_buffer_.size(),
                                               target_dex_file, target_method_idx));
-    const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
-    // unique value based on target to ensure code deduplication works
-    PushPointer(code_buffer_, &target_method_id, cu_->target64);
+    PushUnpatchedReference(code_buffer_);
     data_lir = NEXT_LIR(data_lir);
   }
   data_lir = method_literal_list_;
@@ -493,9 +492,7 @@
         reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
     patches_.push_back(LinkerPatch::MethodPatch(code_buffer_.size(),
                                                 target_dex_file, target_method_idx));
-    const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx);
-    // unique value based on target to ensure code deduplication works
-    PushPointer(code_buffer_, &target_method_id, cu_->target64);
+    PushUnpatchedReference(code_buffer_);
     data_lir = NEXT_LIR(data_lir);
   }
   // Push class literals.
@@ -506,9 +503,7 @@
       reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
     patches_.push_back(LinkerPatch::TypePatch(code_buffer_.size(),
                                               class_dex_file, target_type_idx));
-    const DexFile::TypeId& target_method_id = class_dex_file->GetTypeId(target_type_idx);
-    // unique value based on target to ensure code deduplication works
-    PushPointer(code_buffer_, &target_method_id, cu_->target64);
+    PushUnpatchedReference(code_buffer_);
     data_lir = NEXT_LIR(data_lir);
   }
 }
@@ -772,7 +767,9 @@
 /* Determine the offset of each literal field */
 int Mir2Lir::AssignLiteralOffset(CodeOffset offset) {
   offset = AssignLiteralOffsetCommon(literal_list_, offset);
-  unsigned int ptr_size = GetInstructionSetPointerSize(cu_->instruction_set);
+  constexpr unsigned int ptr_size = sizeof(uint32_t);
+  COMPILE_ASSERT(ptr_size >= sizeof(mirror::HeapReference<mirror::Object>),
+                 ptr_size_cannot_hold_a_heap_reference);
   offset = AssignLiteralPointerOffsetCommon(code_literal_list_, offset, ptr_size);
   offset = AssignLiteralPointerOffsetCommon(method_literal_list_, offset, ptr_size);
   offset = AssignLiteralPointerOffsetCommon(class_literal_list_, offset, ptr_size);
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 2523380..0f1d765 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -566,7 +566,6 @@
       break;
   }
   if (result) {
-    invoke->optimization_flags |= MIR_INLINED;
     // If the invoke has not been eliminated yet, check now whether we should do it.
     // This is done so that dataflow analysis does not get tripped up seeing nop invoke.
     if (static_cast<int>(invoke->dalvikInsn.opcode) != kMirOpNop) {
@@ -583,7 +582,6 @@
       }
     }
     if (move_result != nullptr) {
-      move_result->optimization_flags |= MIR_INLINED;
       move_result->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
     }
   }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 9f7a881..3f7ecfe 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -214,9 +214,8 @@
 void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1,
                                   RegLocation rl_src2, LIR* taken,
                                   LIR* fall_through) {
-  DCHECK(!rl_src1.fp);
-  DCHECK(!rl_src2.fp);
   ConditionCode cond;
+  RegisterClass reg_class = (rl_src1.ref || rl_src2.ref) ? kRefReg : kCoreReg;
   switch (opcode) {
     case Instruction::IF_EQ:
       cond = kCondEq;
@@ -249,7 +248,7 @@
     cond = FlipComparisonOrder(cond);
   }
 
-  rl_src1 = LoadValue(rl_src1);
+  rl_src1 = LoadValue(rl_src1, reg_class);
   // Is this really an immediate comparison?
   if (rl_src2.is_const) {
     // If it's already live in a register or not easily materialized, just keep going
@@ -273,15 +272,15 @@
     }
   }
 
-  rl_src2 = LoadValue(rl_src2);
+  rl_src2 = LoadValue(rl_src2, reg_class);
   OpCmpBranch(cond, rl_src1.reg, rl_src2.reg, taken);
 }
 
 void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken,
                                       LIR* fall_through) {
   ConditionCode cond;
-  DCHECK(!rl_src.fp);
-  rl_src = LoadValue(rl_src);
+  RegisterClass reg_class = rl_src.ref ? kRefReg : kCoreReg;
+  rl_src = LoadValue(rl_src, reg_class);
   switch (opcode) {
     case Instruction::IF_EQZ:
       cond = kCondEq;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 67a75cb..408c73d 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -495,7 +495,8 @@
                           uintptr_t direct_code, uintptr_t direct_method,
                           InvokeType type) {
   DCHECK(cu->instruction_set != kX86 && cu->instruction_set != kX86_64 &&
-         cu->instruction_set != kThumb2 && cu->instruction_set != kArm);
+         cu->instruction_set != kThumb2 && cu->instruction_set != kArm &&
+         cu->instruction_set != kArm64);
   Mir2Lir* cg = static_cast<Mir2Lir*>(cu->cg.get());
   if (direct_code != 0 && direct_method != 0) {
     switch (state) {
@@ -1643,7 +1644,7 @@
       FreeTemp(rl_temp_offset);
     }
   } else {
-    rl_value = LoadValue(rl_src_value);
+    rl_value = LoadValue(rl_src_value, LocToRegClass(rl_src_value));
     if (rl_value.ref) {
       StoreRefIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0);
     } else {
@@ -1666,16 +1667,6 @@
 }
 
 void Mir2Lir::GenInvoke(CallInfo* info) {
-  if ((info->opt_flags & MIR_INLINED) != 0) {
-    // Already inlined but we may still need the null check.
-    if (info->type != kStatic &&
-        ((cu_->disable_opt & (1 << kNullCheckElimination)) != 0 ||
-         (info->opt_flags & MIR_IGNORE_NULL_CHECK) == 0))  {
-      RegLocation rl_obj = LoadValue(info->args[0], kRefReg);
-      GenNullCheck(rl_obj.reg);
-    }
-    return;
-  }
   DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
   if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file)
       ->GenIntrinsic(this, info)) {
@@ -1761,7 +1752,8 @@
 
 LIR* Mir2Lir::GenCallInsn(const MirMethodLoweringInfo& method_info) {
   DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64 &&
-         cu_->instruction_set != kThumb2 && cu_->instruction_set != kArm);
+         cu_->instruction_set != kThumb2 && cu_->instruction_set != kArm &&
+         cu_->instruction_set != kArm64);
   return OpReg(kOpBlx, TargetPtrReg(kInvokeTgt));
 }
 
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index e5798fd..39b40a0 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -166,10 +166,6 @@
   return rl_src;
 }
 
-RegLocation Mir2Lir::LoadValue(RegLocation rl_src) {
-  return LoadValue(rl_src, LocToRegClass(rl_src));
-}
-
 void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) {
   /*
    * Sanity checking - should never try to store to the same
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 6942c0f..0ac1299 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -500,17 +500,11 @@
       break;
 
     case Instruction::MOVE_RESULT_WIDE:
-      if ((opt_flags & MIR_INLINED) != 0) {
-        break;  // Nop - combined w/ previous invoke.
-      }
       StoreValueWide(rl_dest, GetReturnWide(LocToRegClass(rl_dest)));
       break;
 
     case Instruction::MOVE_RESULT:
     case Instruction::MOVE_RESULT_OBJECT:
-      if ((opt_flags & MIR_INLINED) != 0) {
-        break;  // Nop - combined w/ previous invoke.
-      }
       StoreValue(rl_dest, GetReturn(LocToRegClass(rl_dest)));
       break;
 
@@ -867,7 +861,7 @@
 
     case Instruction::INVOKE_STATIC_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, true));
-      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+      if (!kLeafOptimization) {
         // If the invocation is not inlined, we can assume there is already a
         // suspend check at the return site
         mir_graph_->AppendGenSuspendTestList(bb);
@@ -875,59 +869,59 @@
       break;
     case Instruction::INVOKE_STATIC:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kStatic, false));
-      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+      if (!kLeafOptimization) {
         mir_graph_->AppendGenSuspendTestList(bb);
       }
       break;
 
     case Instruction::INVOKE_DIRECT:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, false));
-      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+      if (!kLeafOptimization) {
         mir_graph_->AppendGenSuspendTestList(bb);
       }
       break;
     case Instruction::INVOKE_DIRECT_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kDirect, true));
-      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+      if (!kLeafOptimization) {
         mir_graph_->AppendGenSuspendTestList(bb);
       }
       break;
 
     case Instruction::INVOKE_VIRTUAL:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, false));
-      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+      if (!kLeafOptimization) {
         mir_graph_->AppendGenSuspendTestList(bb);
       }
       break;
     case Instruction::INVOKE_VIRTUAL_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kVirtual, true));
-      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+      if (!kLeafOptimization) {
         mir_graph_->AppendGenSuspendTestList(bb);
       }
       break;
 
     case Instruction::INVOKE_SUPER:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, false));
-      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+      if (!kLeafOptimization) {
         mir_graph_->AppendGenSuspendTestList(bb);
       }
       break;
     case Instruction::INVOKE_SUPER_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kSuper, true));
-      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+      if (!kLeafOptimization) {
         mir_graph_->AppendGenSuspendTestList(bb);
       }
       break;
 
     case Instruction::INVOKE_INTERFACE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, false));
-      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+      if (!kLeafOptimization) {
         mir_graph_->AppendGenSuspendTestList(bb);
       }
       break;
     case Instruction::INVOKE_INTERFACE_RANGE:
       GenInvoke(mir_graph_->NewMemCallInfo(bb, mir, kInterface, true));
-      if (!kLeafOptimization && (opt_flags & MIR_INLINED) == 0) {
+      if (!kLeafOptimization) {
         mir_graph_->AppendGenSuspendTestList(bb);
       }
       break;
@@ -1241,6 +1235,7 @@
       // Combine check and work halves of throwing instruction.
       MIR* work_half = mir->meta.throw_insn;
       mir->dalvikInsn.opcode = work_half->dalvikInsn.opcode;
+      mir->optimization_flags = work_half->optimization_flags;
       mir->meta = work_half->meta;  // Whatever the work_half had, we need to copy it.
       opcode = work_half->dalvikInsn.opcode;
       SSARepresentation* ssa_rep = work_half->ssa_rep;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 67a8c0f..3de4c56 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -1008,8 +1008,6 @@
     }
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
     virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind);
-    // Same as above, but derive the target register class from the location record.
-    virtual RegLocation LoadValue(RegLocation rl_src);
     // Load Dalvik value with 64-bit memory storage.
     virtual RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind);
     // Load Dalvik value with 32-bit memory storage.  If compressed object reference, decompress.
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4357657..8638204 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1155,7 +1155,7 @@
     LockTemp(rs_r0);
 
     RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
-    RegLocation rl_new_value = LoadValue(rl_src_new_value);
+    RegLocation rl_new_value = LoadValue(rl_src_new_value, LocToRegClass(rl_src_new_value));
 
     if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
       // Mark card for object assuming new value is stored.
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 30384ec..6898b50 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -1113,20 +1113,18 @@
   }
 
   uint32_t index = mir->dalvikInsn.vB;
-  if (!(mir->optimization_flags & MIR_INLINED)) {
-    DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
-    DexFileMethodInliner* method_inliner =
-      cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
-    InlineMethod method;
-    if (method_inliner->IsIntrinsic(index, &method)) {
-      switch (method.opcode) {
-        case kIntrinsicAbsDouble:
-        case kIntrinsicMinMaxDouble:
-          store_method_addr_ = true;
-          break;
-        default:
-          break;
-      }
+  DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr);
+  DexFileMethodInliner* method_inliner =
+    cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
+  InlineMethod method;
+  if (method_inliner->IsIntrinsic(index, &method)) {
+    switch (method.opcode) {
+      case kIntrinsicAbsDouble:
+      case kIntrinsicMinMaxDouble:
+        store_method_addr_ = true;
+        break;
+      default:
+        break;
     }
   }
 }
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 3be2478..74ee038 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -1094,7 +1094,8 @@
     elf_header_.e_ident[EI_MAG1]       = ELFMAG1;
     elf_header_.e_ident[EI_MAG2]       = ELFMAG2;
     elf_header_.e_ident[EI_MAG3]       = ELFMAG3;
-    elf_header_.e_ident[EI_CLASS]      = ELFCLASS32;
+    elf_header_.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
+                                         ? ELFCLASS32 : ELFCLASS64;;
     elf_header_.e_ident[EI_DATA]       = ELFDATA2LSB;
     elf_header_.e_ident[EI_VERSION]    = EV_CURRENT;
     elf_header_.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
diff --git a/compiler/elf_fixup.cc b/compiler/elf_fixup.cc
deleted file mode 100644
index 0d34879..0000000
--- a/compiler/elf_fixup.cc
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "elf_fixup.h"
-
-#include <inttypes.h>
-#include <memory>
-
-#include "base/logging.h"
-#include "base/stringprintf.h"
-#include "elf_file.h"
-#include "elf_writer.h"
-
-namespace art {
-
-static const bool DEBUG_FIXUP = false;
-
-bool ElfFixup::Fixup(File* file, uintptr_t oat_data_begin) {
-  std::string error_msg;
-  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, &error_msg));
-  CHECK(elf_file.get() != nullptr) << error_msg;
-
-  // Lookup "oatdata" symbol address.
-  Elf32_Addr oatdata_address = ElfWriter::GetOatDataAddress(elf_file.get());
-  Elf32_Off base_address = oat_data_begin - oatdata_address;
-
-  if (!FixupDynamic(*elf_file.get(), base_address)) {
-    LOG(WARNING) << "Failed to fixup .dynamic in " << file->GetPath();
-    return false;
-  }
-  if (!FixupSectionHeaders(*elf_file.get(), base_address)) {
-    LOG(WARNING) << "Failed to fixup section headers in " << file->GetPath();
-    return false;
-  }
-  if (!FixupProgramHeaders(*elf_file.get(), base_address)) {
-    LOG(WARNING) << "Failed to fixup program headers in " << file->GetPath();
-    return false;
-  }
-  if (!FixupSymbols(*elf_file.get(), base_address, true)) {
-    LOG(WARNING) << "Failed to fixup .dynsym in " << file->GetPath();
-    return false;
-  }
-  if (!FixupSymbols(*elf_file.get(), base_address, false)) {
-    LOG(WARNING) << "Failed to fixup .symtab in " << file->GetPath();
-    return false;
-  }
-  if (!FixupRelocations(*elf_file.get(), base_address)) {
-    LOG(WARNING) << "Failed to fixup .rel.dyn in " << file->GetPath();
-    return false;
-  }
-  if (!elf_file->FixupDebugSections(base_address)) {
-    LOG(WARNING) << "Failed to fixup debug sections in " << file->GetPath();
-    return false;
-  }
-  return true;
-}
-
-
-bool ElfFixup::FixupDynamic(ElfFile& elf_file, uintptr_t base_address) {
-  for (Elf32_Word i = 0; i < elf_file.GetDynamicNum(); i++) {
-    Elf32_Dyn& elf_dyn = elf_file.GetDynamic(i);
-    Elf32_Word d_tag = elf_dyn.d_tag;
-    if (IsDynamicSectionPointer(d_tag, elf_file.GetHeader().e_machine)) {
-      uint32_t d_ptr = elf_dyn.d_un.d_ptr;
-      if (DEBUG_FIXUP) {
-        LOG(INFO) << StringPrintf("In %s moving Elf32_Dyn[%d] from 0x%08x to 0x%08" PRIxPTR,
-                                  elf_file.GetFile().GetPath().c_str(), i,
-                                  d_ptr, d_ptr + base_address);
-      }
-      d_ptr += base_address;
-      elf_dyn.d_un.d_ptr = d_ptr;
-    }
-  }
-  return true;
-}
-
-bool ElfFixup::FixupSectionHeaders(ElfFile& elf_file, uintptr_t base_address) {
-  for (Elf32_Word i = 0; i < elf_file.GetSectionHeaderNum(); i++) {
-    Elf32_Shdr* sh = elf_file.GetSectionHeader(i);
-    CHECK(sh != nullptr);
-    // 0 implies that the section will not exist in the memory of the process
-    if (sh->sh_addr == 0) {
-      continue;
-    }
-    if (DEBUG_FIXUP) {
-      LOG(INFO) << StringPrintf("In %s moving Elf32_Shdr[%d] from 0x%08x to 0x%08" PRIxPTR,
-                                elf_file.GetFile().GetPath().c_str(), i,
-                                sh->sh_addr, sh->sh_addr + base_address);
-    }
-    sh->sh_addr += base_address;
-  }
-  return true;
-}
-
-bool ElfFixup::FixupProgramHeaders(ElfFile& elf_file, uintptr_t base_address) {
-  // TODO: ELFObjectFile doesn't have give to Elf32_Phdr, so we do that ourselves for now.
-  for (Elf32_Word i = 0; i < elf_file.GetProgramHeaderNum(); i++) {
-    Elf32_Phdr* ph = elf_file.GetProgramHeader(i);
-    CHECK(ph != nullptr);
-    CHECK_EQ(ph->p_vaddr, ph->p_paddr) << elf_file.GetFile().GetPath() << " i=" << i;
-    CHECK((ph->p_align == 0) || (0 == ((ph->p_vaddr - ph->p_offset) & (ph->p_align - 1))))
-            << elf_file.GetFile().GetPath() << " i=" << i;
-    if (DEBUG_FIXUP) {
-      LOG(INFO) << StringPrintf("In %s moving Elf32_Phdr[%d] from 0x%08x to 0x%08" PRIxPTR,
-                                elf_file.GetFile().GetPath().c_str(), i,
-                                ph->p_vaddr, ph->p_vaddr + base_address);
-    }
-    ph->p_vaddr += base_address;
-    ph->p_paddr += base_address;
-    CHECK((ph->p_align == 0) || (0 == ((ph->p_vaddr - ph->p_offset) & (ph->p_align - 1))))
-            << elf_file.GetFile().GetPath() << " i=" << i;
-  }
-  return true;
-}
-
-bool ElfFixup::FixupSymbols(ElfFile& elf_file, uintptr_t base_address, bool dynamic) {
-  Elf32_Word section_type = dynamic ? SHT_DYNSYM : SHT_SYMTAB;
-  // TODO: Unfortunate ELFObjectFile has protected symbol access, so use ElfFile
-  Elf32_Shdr* symbol_section = elf_file.FindSectionByType(section_type);
-  if (symbol_section == nullptr) {
-    // file is missing optional .symtab
-    CHECK(!dynamic) << elf_file.GetFile().GetPath();
-    return true;
-  }
-  for (uint32_t i = 0; i < elf_file.GetSymbolNum(*symbol_section); i++) {
-    Elf32_Sym* symbol = elf_file.GetSymbol(section_type, i);
-    CHECK(symbol != nullptr);
-    if (symbol->st_value != 0) {
-      if (DEBUG_FIXUP) {
-        LOG(INFO) << StringPrintf("In %s moving Elf32_Sym[%d] from 0x%08x to 0x%08" PRIxPTR,
-                                  elf_file.GetFile().GetPath().c_str(), i,
-                                  symbol->st_value, symbol->st_value + base_address);
-      }
-      symbol->st_value += base_address;
-    }
-  }
-  return true;
-}
-
-bool ElfFixup::FixupRelocations(ElfFile& elf_file, uintptr_t base_address) {
-  for (Elf32_Word i = 0; i < elf_file.GetSectionHeaderNum(); i++) {
-    Elf32_Shdr* sh = elf_file.GetSectionHeader(i);
-    CHECK(sh != nullptr);
-    if (sh->sh_type == SHT_REL) {
-      for (uint32_t i = 0; i < elf_file.GetRelNum(*sh); i++) {
-        Elf32_Rel& rel = elf_file.GetRel(*sh, i);
-        if (DEBUG_FIXUP) {
-          LOG(INFO) << StringPrintf("In %s moving Elf32_Rel[%d] from 0x%08x to 0x%08" PRIxPTR,
-                                    elf_file.GetFile().GetPath().c_str(), i,
-                                    rel.r_offset, rel.r_offset + base_address);
-        }
-        rel.r_offset += base_address;
-      }
-    } else if (sh->sh_type == SHT_RELA) {
-      for (uint32_t i = 0; i < elf_file.GetRelaNum(*sh); i++) {
-        Elf32_Rela& rela = elf_file.GetRela(*sh, i);
-        if (DEBUG_FIXUP) {
-          LOG(INFO) << StringPrintf("In %s moving Elf32_Rela[%d] from 0x%08x to 0x%08" PRIxPTR,
-                                    elf_file.GetFile().GetPath().c_str(), i,
-                                    rela.r_offset, rela.r_offset + base_address);
-        }
-        rela.r_offset += base_address;
-      }
-    }
-  }
-  return true;
-}
-
-}  // namespace art
diff --git a/compiler/elf_fixup.h b/compiler/elf_fixup.h
deleted file mode 100644
index 1abf06b..0000000
--- a/compiler/elf_fixup.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_ELF_FIXUP_H_
-#define ART_COMPILER_ELF_FIXUP_H_
-
-#include <stdint.h>
-
-#include "base/macros.h"
-#include "os.h"
-
-namespace art {
-
-class ElfFile;
-
-class ElfFixup {
- public:
-  // Fixup an ELF file so that that oat header will be loaded at oat_begin.
-  // Returns true on success, false on failure.
-  static bool Fixup(File* file, uintptr_t oat_data_begin);
-
- private:
-  // Fixup .dynamic d_ptr values for the expected base_address.
-  static bool FixupDynamic(ElfFile& elf_file, uintptr_t base_address);
-
-  // Fixup Elf32_Shdr p_vaddr to load at the desired address.
-  static bool FixupSectionHeaders(ElfFile& elf_file, uintptr_t base_address);
-
-  // Fixup Elf32_Phdr p_vaddr to load at the desired address.
-  static bool FixupProgramHeaders(ElfFile& elf_file, uintptr_t base_address);
-
-  // Fixup symbol table
-  static bool FixupSymbols(ElfFile& elf_file, uintptr_t base_address, bool dynamic);
-
-  // Fixup dynamic relocations
-  static bool FixupRelocations(ElfFile& elf_file, uintptr_t base_address);
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfFixup);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_ELF_FIXUP_H_
diff --git a/compiler/elf_stripper.cc b/compiler/elf_stripper.cc
deleted file mode 100644
index 457d8a0..0000000
--- a/compiler/elf_stripper.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "elf_stripper.h"
-
-#include <unistd.h>
-#include <sys/types.h>
-#include <memory>
-#include <vector>
-
-#include "base/logging.h"
-#include "base/stringprintf.h"
-#include "elf_file.h"
-#include "elf_utils.h"
-#include "utils.h"
-
-namespace art {
-
-bool ElfStripper::Strip(File* file, std::string* error_msg) {
-  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, error_msg));
-  if (elf_file.get() == nullptr) {
-    return false;
-  }
-
-  // ELF files produced by MCLinker look roughly like this
-  //
-  // +------------+
-  // | Elf32_Ehdr | contains number of Elf32_Shdr and offset to first
-  // +------------+
-  // | Elf32_Phdr | program headers
-  // | Elf32_Phdr |
-  // | ...        |
-  // | Elf32_Phdr |
-  // +------------+
-  // | section    | mixture of needed and unneeded sections
-  // +------------+
-  // | section    |
-  // +------------+
-  // | ...        |
-  // +------------+
-  // | section    |
-  // +------------+
-  // | Elf32_Shdr | section headers
-  // | Elf32_Shdr |
-  // | ...        | contains offset to section start
-  // | Elf32_Shdr |
-  // +------------+
-  //
-  // To strip:
-  // - leave the Elf32_Ehdr and Elf32_Phdr values in place.
-  // - walk the sections making a new set of Elf32_Shdr section headers for what we want to keep
-  // - move the sections are keeping up to fill in gaps of sections we want to strip
-  // - write new Elf32_Shdr section headers to end of file, updating Elf32_Ehdr
-  // - truncate rest of file
-  //
-
-  std::vector<Elf32_Shdr> section_headers;
-  std::vector<Elf32_Word> section_headers_original_indexes;
-  section_headers.reserve(elf_file->GetSectionHeaderNum());
-
-
-  Elf32_Shdr* string_section = elf_file->GetSectionNameStringSection();
-  CHECK(string_section != nullptr);
-  for (Elf32_Word i = 0; i < elf_file->GetSectionHeaderNum(); i++) {
-    Elf32_Shdr* sh = elf_file->GetSectionHeader(i);
-    CHECK(sh != nullptr);
-    const char* name = elf_file->GetString(*string_section, sh->sh_name);
-    if (name == nullptr) {
-      CHECK_EQ(0U, i);
-      section_headers.push_back(*sh);
-      section_headers_original_indexes.push_back(0);
-      continue;
-    }
-    if (StartsWith(name, ".debug")
-        || (strcmp(name, ".strtab") == 0)
-        || (strcmp(name, ".symtab") == 0)) {
-      continue;
-    }
-    section_headers.push_back(*sh);
-    section_headers_original_indexes.push_back(i);
-  }
-  CHECK_NE(0U, section_headers.size());
-  CHECK_EQ(section_headers.size(), section_headers_original_indexes.size());
-
-  // section 0 is the NULL section, sections start at offset of first section
-  CHECK(elf_file->GetSectionHeader(1) != nullptr);
-  Elf32_Off offset = elf_file->GetSectionHeader(1)->sh_offset;
-  for (size_t i = 1; i < section_headers.size(); i++) {
-    Elf32_Shdr& new_sh = section_headers[i];
-    Elf32_Shdr* old_sh = elf_file->GetSectionHeader(section_headers_original_indexes[i]);
-    CHECK(old_sh != nullptr);
-    CHECK_EQ(new_sh.sh_name, old_sh->sh_name);
-    if (old_sh->sh_addralign > 1) {
-      offset = RoundUp(offset, old_sh->sh_addralign);
-    }
-    if (old_sh->sh_offset == offset) {
-      // already in place
-      offset += old_sh->sh_size;
-      continue;
-    }
-    // shift section earlier
-    memmove(elf_file->Begin() + offset,
-            elf_file->Begin() + old_sh->sh_offset,
-            old_sh->sh_size);
-    new_sh.sh_offset = offset;
-    offset += old_sh->sh_size;
-  }
-
-  Elf32_Off shoff = offset;
-  size_t section_headers_size_in_bytes = section_headers.size() * sizeof(Elf32_Shdr);
-  memcpy(elf_file->Begin() + offset, &section_headers[0], section_headers_size_in_bytes);
-  offset += section_headers_size_in_bytes;
-
-  elf_file->GetHeader().e_shnum = section_headers.size();
-  elf_file->GetHeader().e_shoff = shoff;
-  int result = ftruncate(file->Fd(), offset);
-  if (result != 0) {
-    *error_msg = StringPrintf("Failed to truncate while stripping ELF file: '%s': %s",
-                              file->GetPath().c_str(), strerror(errno));
-    return false;
-  }
-  return true;
-}
-
-}  // namespace art
diff --git a/compiler/elf_stripper.h b/compiler/elf_stripper.h
deleted file mode 100644
index f1a1d46..0000000
--- a/compiler/elf_stripper.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_ELF_STRIPPER_H_
-#define ART_COMPILER_ELF_STRIPPER_H_
-
-#include <string>
-
-#include "base/macros.h"
-#include "os.h"
-
-namespace art {
-
-class ElfStripper {
- public:
-  // Strip an ELF file of unneeded debugging information.
-  // Returns true on success, false on failure.
-  static bool Strip(File* file, std::string* error_msg);
-
- private:
-  DISALLOW_IMPLICIT_CONSTRUCTORS(ElfStripper);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_ELF_STRIPPER_H_
diff --git a/compiler/elf_writer.cc b/compiler/elf_writer.cc
index 55ee18e..47402f3 100644
--- a/compiler/elf_writer.cc
+++ b/compiler/elf_writer.cc
@@ -30,8 +30,8 @@
 
 namespace art {
 
-uint32_t ElfWriter::GetOatDataAddress(ElfFile* elf_file) {
-  Elf32_Addr oatdata_address = elf_file->FindSymbolAddress(SHT_DYNSYM,
+uintptr_t ElfWriter::GetOatDataAddress(ElfFile* elf_file) {
+  uintptr_t oatdata_address = elf_file->FindSymbolAddress(SHT_DYNSYM,
                                                            "oatdata",
                                                            false);
   CHECK_NE(0U, oatdata_address);
@@ -51,4 +51,16 @@
   CHECK_NE(0U, oat_data_offset);
 }
 
+bool ElfWriter::Fixup(File* file, uintptr_t oat_data_begin) {
+  std::string error_msg;
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, &error_msg));
+  CHECK(elf_file.get() != nullptr) << error_msg;
+
+  // Lookup "oatdata" symbol address.
+  uintptr_t oatdata_address = ElfWriter::GetOatDataAddress(elf_file.get());
+  uintptr_t base_address = oat_data_begin - oatdata_address;
+
+  return elf_file->Fixup(base_address);
+}
+
 }  // namespace art
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 03b965a..033c1f8 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -42,7 +42,9 @@
                                    size_t& oat_data_offset);
 
   // Returns runtime oat_data runtime address for an opened ElfFile.
-  static uint32_t GetOatDataAddress(ElfFile* elf_file);
+  static uintptr_t GetOatDataAddress(ElfFile* elf_file);
+
+  static bool Fixup(File* file, uintptr_t oat_data_begin);
 
  protected:
   ElfWriter(const CompilerDriver& driver, File* elf_file)
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 2a37049..5834e8e 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -23,7 +23,7 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
-#include "elf_fixup.h"
+#include "elf_writer.h"
 #include "gc/space/image_space.h"
 #include "image_writer.h"
 #include "lock_word.h"
@@ -101,7 +101,7 @@
     bool success_image =
         writer.Write(image_file.GetFilename(), dup_oat->GetPath(), dup_oat->GetPath());
     ASSERT_TRUE(success_image);
-    bool success_fixup = ElfFixup::Fixup(dup_oat.get(), writer.GetOatDataBegin());
+    bool success_fixup = ElfWriter::Fixup(dup_oat.get(), writer.GetOatDataBegin());
     ASSERT_TRUE(success_fixup);
   }
 
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 1c8b8d5..6fff5f4 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -811,11 +811,12 @@
 }
 
 static OatHeader* GetOatHeaderFromElf(ElfFile* elf) {
-  Elf32_Shdr* data_sec = elf->FindSectionByName(".rodata");
-  if (data_sec == nullptr) {
+  uint64_t data_sec_offset;
+  bool has_data_sec = elf->GetSectionOffsetAndSize(".rodata", &data_sec_offset, nullptr);
+  if (!has_data_sec) {
     return nullptr;
   }
-  return reinterpret_cast<OatHeader*>(elf->Begin() + data_sec->sh_offset);
+  return reinterpret_cast<OatHeader*>(elf->Begin() + data_sec_offset);
 }
 
 void ImageWriter::SetOatChecksumFromElfFile(File* elf_file) {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index dd64368..e64d2ab 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -38,6 +38,7 @@
 #include "scoped_thread_state_change.h"
 #include "handle_scope-inl.h"
 #include "utils/arm/assembler_thumb2.h"
+#include "utils/arm64/assembler_arm64.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
@@ -117,10 +118,14 @@
   DISALLOW_COPY_AND_ASSIGN(X86RelativeCallPatcher);
 };
 
-class OatWriter::Thumb2RelativeCallPatcher FINAL : public RelativeCallPatcher {
+class OatWriter::ArmBaseRelativeCallPatcher : public RelativeCallPatcher {
  public:
-  explicit Thumb2RelativeCallPatcher(OatWriter* writer)
-      : writer_(writer), thunk_code_(CompileThunkCode()),
+  ArmBaseRelativeCallPatcher(OatWriter* writer,
+                             InstructionSet instruction_set, std::vector<uint8_t> thunk_code,
+                             uint32_t max_positive_displacement, uint32_t max_negative_displacement)
+      : writer_(writer), instruction_set_(instruction_set), thunk_code_(thunk_code),
+        max_positive_displacement_(max_positive_displacement),
+        max_negative_displacement_(max_negative_displacement),
         thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() {
   }
 
@@ -130,11 +135,11 @@
     // of code. To avoid any alignment discrepancies for the final chunk, we always align the
     // offset after reserving of writing any chunk.
     if (UNLIKELY(compiled_method == nullptr)) {
-      uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2);
+      uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
       bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset);
       if (needs_thunk) {
         thunk_locations_.push_back(aligned_offset);
-        offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), kThumb2);
+        offset = CompiledMethod::AlignCode(aligned_offset + thunk_code_.size(), instruction_set_);
       }
       return offset;
     }
@@ -143,14 +148,14 @@
     uint32_t quick_code_offset = compiled_method->AlignCode(offset) + sizeof(OatQuickMethodHeader);
     uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
     if (!unprocessed_patches_.empty() &&
-        next_aligned_offset - unprocessed_patches_.front().second > kMaxPositiveDisplacement) {
+        next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
       bool needs_thunk = ReserveSpaceProcessPatches(next_aligned_offset);
       if (needs_thunk) {
         // A single thunk will cover all pending patches.
         unprocessed_patches_.clear();
         uint32_t thunk_location = compiled_method->AlignCode(offset);
         thunk_locations_.push_back(thunk_location);
-        offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), kThumb2);
+        offset = CompiledMethod::AlignCode(thunk_location + thunk_code_.size(), instruction_set_);
       }
     }
     for (const LinkerPatch& patch : compiled_method->GetPatches()) {
@@ -166,7 +171,7 @@
     if (current_thunk_to_write_ == thunk_locations_.size()) {
       return offset;
     }
-    uint32_t aligned_offset = CompiledMethod::AlignCode(offset, kThumb2);
+    uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
     if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) {
       ++current_thunk_to_write_;
       uint32_t aligned_code_delta = aligned_offset - offset;
@@ -179,7 +184,7 @@
       writer_->size_relative_call_thunks_ += thunk_code_.size();
       uint32_t thunk_end_offset = aligned_offset + thunk_code_.size();
       // Align after writing chunk, see the ReserveSpace() above.
-      offset = CompiledMethod::AlignCode(thunk_end_offset, kThumb2);
+      offset = CompiledMethod::AlignCode(thunk_end_offset, instruction_set_);
       aligned_code_delta = offset - thunk_end_offset;
       if (aligned_code_delta != 0u && !writer_->WriteCodeAlignment(out, aligned_code_delta)) {
         return 0u;
@@ -188,30 +193,88 @@
     return offset;
   }
 
-  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
-             uint32_t target_offset) OVERRIDE {
-    DCHECK_LE(literal_offset + 4u, code->size());
-    DCHECK_EQ(literal_offset & 1u, 0u);
-    DCHECK_EQ(patch_offset & 1u, 0u);
-    DCHECK_EQ(target_offset & 1u, 1u);  // Thumb2 mode bit.
+ protected:
+  uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset) {
     // Unsigned arithmetic with its well-defined overflow behavior is just fine here.
-    uint32_t displacement = target_offset - 1u - patch_offset;
+    uint32_t displacement = target_offset - patch_offset;
     // NOTE: With unsigned arithmetic we do mean to use && rather than || below.
-    if (displacement > kMaxPositiveDisplacement && displacement < -kMaxNegativeDisplacement) {
+    if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) {
       // Unwritten thunks have higher offsets, check if it's within range.
       DCHECK(current_thunk_to_write_ == thunk_locations_.size() ||
              thunk_locations_[current_thunk_to_write_] > patch_offset);
       if (current_thunk_to_write_ != thunk_locations_.size() &&
-          thunk_locations_[current_thunk_to_write_] - patch_offset < kMaxPositiveDisplacement) {
+          thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) {
         displacement = thunk_locations_[current_thunk_to_write_] - patch_offset;
       } else {
         // We must have a previous thunk then.
         DCHECK_NE(current_thunk_to_write_, 0u);
         DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset);
         displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset;
-        DCHECK(displacement >= -kMaxNegativeDisplacement);
+        DCHECK(displacement >= -max_negative_displacement_);
       }
     }
+    return displacement;
+  }
+
+ private:
+  bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) {
+    // Process as many patches as possible, stop only on unresolved targets or calls too far back.
+    while (!unprocessed_patches_.empty()) {
+      uint32_t patch_offset = unprocessed_patches_.front().second;
+      auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first);
+      if (it == writer_->method_offset_map_.end()) {
+        // If still unresolved, check if we have a thunk within range.
+        DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
+        if (thunk_locations_.empty() ||
+            patch_offset - thunk_locations_.back() > max_negative_displacement_) {
+          return next_aligned_offset - patch_offset > max_positive_displacement_;
+        }
+      } else if (it->second >= patch_offset) {
+        DCHECK_LE(it->second - patch_offset, max_positive_displacement_);
+      } else {
+        // When calling back, check if we have a thunk that's closer than the actual target.
+        uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back())
+            ? it->second
+            : thunk_locations_.back();
+        DCHECK_GT(patch_offset, target_offset);
+        if (patch_offset - target_offset > max_negative_displacement_) {
+          return true;
+        }
+      }
+      unprocessed_patches_.pop_front();
+    }
+    return false;
+  }
+
+  OatWriter* const writer_;
+  const InstructionSet instruction_set_;
+  const std::vector<uint8_t> thunk_code_;
+  const uint32_t max_positive_displacement_;
+  const uint32_t max_negative_displacement_;
+  std::vector<uint32_t> thunk_locations_;
+  size_t current_thunk_to_write_;
+
+  // ReserveSpace() tracks unprocessed patches.
+  typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
+  std::deque<UnprocessedPatch> unprocessed_patches_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativeCallPatcher);
+};
+
+class OatWriter::Thumb2RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher {
+ public:
+  explicit Thumb2RelativeCallPatcher(OatWriter* writer)
+      : ArmBaseRelativeCallPatcher(writer, kThumb2, CompileThunkCode(),
+                                   kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
+  }
+
+  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+             uint32_t target_offset) OVERRIDE {
+    DCHECK_LE(literal_offset + 4u, code->size());
+    DCHECK_EQ(literal_offset & 1u, 0u);
+    DCHECK_EQ(patch_offset & 1u, 0u);
+    DCHECK_EQ(target_offset & 1u, 1u);  // Thumb2 mode bit.
+    uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
     displacement -= kPcDisplacement;  // The base PC is at the end of the 4-byte patch.
     DCHECK_EQ(displacement & 1u, 0u);
     DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u);  // 25-bit signed.
@@ -237,35 +300,6 @@
   }
 
  private:
-  bool ReserveSpaceProcessPatches(uint32_t next_aligned_offset) {
-    // Process as many patches as possible, stop only on unresolved targets or calls too far back.
-    while (!unprocessed_patches_.empty()) {
-      uint32_t patch_offset = unprocessed_patches_.front().second;
-      auto it = writer_->method_offset_map_.find(unprocessed_patches_.front().first);
-      if (it == writer_->method_offset_map_.end()) {
-        // If still unresolved, check if we have a thunk within range.
-        DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
-        if (thunk_locations_.empty() ||
-            patch_offset - thunk_locations_.back() > kMaxNegativeDisplacement) {
-          return next_aligned_offset - patch_offset > kMaxPositiveDisplacement;
-        }
-      } else if (it->second >= patch_offset) {
-        DCHECK_LE(it->second - patch_offset, kMaxPositiveDisplacement);
-      } else {
-        // When calling back, check if we have a thunk that's closer than the actual target.
-        uint32_t target_offset = (thunk_locations_.empty() || it->second > thunk_locations_.back())
-            ? it->second
-            : thunk_locations_.back();
-        DCHECK_GT(patch_offset, target_offset);
-        if (patch_offset - target_offset > kMaxNegativeDisplacement) {
-          return true;
-        }
-      }
-      unprocessed_patches_.pop_front();
-    }
-    return false;
-  }
-
   static std::vector<uint8_t> CompileThunkCode() {
     // The thunk just uses the entry point in the ArtMethod. This works even for calls
     // to the generic JNI and interpreter trampolines.
@@ -289,18 +323,60 @@
   static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
   static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement;
 
-  OatWriter* const writer_;
-  const std::vector<uint8_t> thunk_code_;
-  std::vector<uint32_t> thunk_locations_;
-  size_t current_thunk_to_write_;
-
-  // ReserveSpace() tracks unprocessed patches.
-  typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
-  std::deque<UnprocessedPatch> unprocessed_patches_;
-
   DISALLOW_COPY_AND_ASSIGN(Thumb2RelativeCallPatcher);
 };
 
+class OatWriter::Arm64RelativeCallPatcher FINAL : public ArmBaseRelativeCallPatcher {
+ public:
+  explicit Arm64RelativeCallPatcher(OatWriter* writer)
+      : ArmBaseRelativeCallPatcher(writer, kArm64, CompileThunkCode(),
+                                   kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
+  }
+
+  void Patch(std::vector<uint8_t>* code, uint32_t literal_offset, uint32_t patch_offset,
+             uint32_t target_offset) OVERRIDE {
+    DCHECK_LE(literal_offset + 4u, code->size());
+    DCHECK_EQ(literal_offset & 3u, 0u);
+    DCHECK_EQ(patch_offset & 3u, 0u);
+    DCHECK_EQ(target_offset & 3u, 0u);
+    uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
+    DCHECK_EQ(displacement & 3u, 0u);
+    DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u);  // 28-bit signed.
+    uint32_t value = (displacement & 0x0fffffffu) >> 2;
+    value |= 0x94000000;  // BL
+
+    uint8_t* addr = &(*code)[literal_offset];
+    // Check that we're just overwriting an existing BL.
+    DCHECK_EQ(addr[3] & 0xfc, 0x94);
+    // Write the new BL.
+    addr[0] = (value >> 0) & 0xff;
+    addr[1] = (value >> 8) & 0xff;
+    addr[2] = (value >> 16) & 0xff;
+    addr[3] = (value >> 24) & 0xff;
+  }
+
+ private:
+  static std::vector<uint8_t> CompileThunkCode() {
+    // The thunk just uses the entry point in the ArtMethod. This works even for calls
+    // to the generic JNI and interpreter trampolines.
+    arm64::Arm64Assembler assembler;
+    Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+    assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+    std::vector<uint8_t> thunk_code(assembler.CodeSize());
+    MemoryRegion code(thunk_code.data(), thunk_code.size());
+    assembler.FinalizeInstructions(code);
+    return thunk_code;
+  }
+
+  // Maximum positive and negative displacement measured from the patch location.
+  // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from
+  // the ARM64 PC pointing to the BL.)
+  static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u;
+  static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27);
+
+  DISALLOW_COPY_AND_ASSIGN(Arm64RelativeCallPatcher);
+};
+
 #define DCHECK_OFFSET() \
   DCHECK_EQ(static_cast<off_t>(file_offset + relative_offset), out->Seek(0, kSeekCurrent)) \
     << "file_offset=" << file_offset << " relative_offset=" << relative_offset
@@ -373,7 +449,8 @@
       relative_call_patcher_.reset(new Thumb2RelativeCallPatcher(this));
       break;
     case kArm64:
-      // TODO: Implement relative calls for arm64.
+      relative_call_patcher_.reset(new Arm64RelativeCallPatcher(this));
+      break;
     default:
       relative_call_patcher_.reset(new NoRelativeCallPatcher);
       break;
@@ -868,8 +945,8 @@
     : OatDexMethodVisitor(writer, relative_offset),
       out_(out),
       file_offset_(file_offset),
-      self_(Thread::Current()),
-      old_no_thread_suspension_cause_(self_->StartAssertNoThreadSuspension("OatWriter patching")),
+      soa_(Thread::Current()),
+      no_thread_suspension_(soa_.Self(), "OatWriter patching"),
       class_linker_(Runtime::Current()->GetClassLinker()),
       dex_cache_(nullptr) {
     if (writer_->image_writer_ != nullptr) {
@@ -877,12 +954,9 @@
       CHECK(writer_->image_writer_->IsImageAddressSpaceReady());
       patched_code_.reserve(16 * KB);
     }
-    self_->TransitionFromSuspendedToRunnable();
   }
 
   ~WriteCodeMethodVisitor() UNLOCK_FUNCTION(Locks::mutator_lock_) {
-    self_->EndAssertNoThreadSuspension(old_no_thread_suspension_cause_);
-    self_->TransitionFromRunnableToSuspended(kNative);
   }
 
   bool StartClass(const DexFile* dex_file, size_t class_def_index)
@@ -997,9 +1071,9 @@
 
  private:
   OutputStream* const out_;
-  size_t const file_offset_;
-  Thread* const self_;
-  const char* const old_no_thread_suspension_cause_;  // TODO: Use ScopedAssertNoThreadSuspension.
+  const size_t file_offset_;
+  const ScopedObjectAccess soa_;
+  const ScopedAssertNoThreadSuspension no_thread_suspension_;
   ClassLinker* const class_linker_;
   mirror::DexCache* dex_cache_;
   std::vector<uint8_t> patched_code_;
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 5545ba8..a1e61b9 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -330,7 +330,9 @@
   class RelativeCallPatcher;
   class NoRelativeCallPatcher;
   class X86RelativeCallPatcher;
+  class ArmBaseRelativeCallPatcher;
   class Thumb2RelativeCallPatcher;
+  class Arm64RelativeCallPatcher;
 
   std::unique_ptr<RelativeCallPatcher> relative_call_patcher_;
 
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 5015bd0..5bcc65b 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -102,28 +102,19 @@
 
   uint32_t pos = 1;
   for (int i = 0; i < number_of_parameters; i++) {
-    switch (shorty[pos++]) {
-      case 'F':
-      case 'D': {
-        return false;
-      }
-
-      default: {
-        // integer and reference parameters.
-        HParameterValue* parameter =
-            new (arena_) HParameterValue(parameter_index++, Primitive::GetType(shorty[pos - 1]));
-        entry_block_->AddInstruction(parameter);
-        HLocal* local = GetLocalAt(locals_index++);
-        // Store the parameter value in the local that the dex code will use
-        // to reference that parameter.
-        entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter));
-        if (parameter->GetType() == Primitive::kPrimLong) {
-          i++;
-          locals_index++;
-          parameter_index++;
-        }
-        break;
-      }
+    HParameterValue* parameter =
+        new (arena_) HParameterValue(parameter_index++, Primitive::GetType(shorty[pos++]));
+    entry_block_->AddInstruction(parameter);
+    HLocal* local = GetLocalAt(locals_index++);
+    // Store the parameter value in the local that the dex code will use
+    // to reference that parameter.
+    entry_block_->AddInstruction(new (arena_) HStoreLocal(local, parameter));
+    bool is_wide = (parameter->GetType() == Primitive::kPrimLong)
+        || (parameter->GetType() == Primitive::kPrimDouble);
+    if (is_wide) {
+      i++;
+      locals_index++;
+      parameter_index++;
     }
   }
   return true;
@@ -402,10 +393,8 @@
   uint32_t argument_index = start_index;
   for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) {
     Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
-    if (!IsTypeSupported(type)) {
-      return false;
-    }
-    if (!is_range && type == Primitive::kPrimLong && args[i] + 1 != args[i + 1]) {
+    bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
+    if (!is_range && is_wide && args[i] + 1 != args[i + 1]) {
       LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol()
                    << " at " << dex_offset;
       // We do not implement non sequential register pair.
@@ -413,15 +402,11 @@
     }
     HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
     invoke->SetArgumentAt(argument_index, arg);
-    if (type == Primitive::kPrimLong) {
+    if (is_wide) {
       i++;
     }
   }
 
-  if (!IsTypeSupported(return_type)) {
-    return false;
-  }
-
   DCHECK_EQ(argument_index, number_of_arguments);
   current_block_->AddInstruction(invoke);
   return true;
@@ -648,17 +633,22 @@
     }
 
     case Instruction::RETURN: {
-      BuildReturn(instruction, Primitive::kPrimInt);
+      DCHECK_NE(return_type_, Primitive::kPrimNot);
+      DCHECK_NE(return_type_, Primitive::kPrimLong);
+      DCHECK_NE(return_type_, Primitive::kPrimDouble);
+      BuildReturn(instruction, return_type_);
       break;
     }
 
     case Instruction::RETURN_OBJECT: {
-      BuildReturn(instruction, Primitive::kPrimNot);
+      DCHECK(return_type_ == Primitive::kPrimNot);
+      BuildReturn(instruction, return_type_);
       break;
     }
 
     case Instruction::RETURN_WIDE: {
-      BuildReturn(instruction, Primitive::kPrimLong);
+      DCHECK(return_type_ == Primitive::kPrimDouble || return_type_ == Primitive::kPrimLong);
+      BuildReturn(instruction, return_type_);
       break;
     }
 
@@ -698,6 +688,16 @@
       break;
     }
 
+    case Instruction::ADD_DOUBLE: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimDouble);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT: {
+      Binop_23x<HAdd>(instruction, Primitive::kPrimFloat);
+      break;
+    }
+
     case Instruction::SUB_INT: {
       Binop_23x<HSub>(instruction, Primitive::kPrimInt);
       break;
@@ -718,6 +718,16 @@
       break;
     }
 
+    case Instruction::ADD_DOUBLE_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimDouble);
+      break;
+    }
+
+    case Instruction::ADD_FLOAT_2ADDR: {
+      Binop_12x<HAdd>(instruction, Primitive::kPrimFloat);
+      break;
+    }
+
     case Instruction::SUB_INT_2ADDR: {
       Binop_12x<HSub>(instruction, Primitive::kPrimInt);
       break;
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index e143786..7d7b188 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_BUILDER_H_
 
 #include "dex_file.h"
+#include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "primitive.h"
@@ -32,9 +33,9 @@
 class HGraphBuilder : public ValueObject {
  public:
   HGraphBuilder(ArenaAllocator* arena,
-                DexCompilationUnit* dex_compilation_unit = nullptr,
-                const DexFile* dex_file = nullptr,
-                CompilerDriver* driver = nullptr)
+                DexCompilationUnit* dex_compilation_unit,
+                const DexFile* dex_file,
+                CompilerDriver* driver)
       : arena_(arena),
         branch_targets_(arena, 0),
         locals_(arena, 0),
@@ -46,7 +47,24 @@
         constant1_(nullptr),
         dex_file_(dex_file),
         dex_compilation_unit_(dex_compilation_unit),
-        compiler_driver_(driver) {}
+        compiler_driver_(driver),
+        return_type_(Primitive::GetType(dex_compilation_unit_->GetShorty()[0])) {}
+
+  // Only for unit testing.
+  HGraphBuilder(ArenaAllocator* arena, Primitive::Type return_type = Primitive::kPrimInt)
+      : arena_(arena),
+        branch_targets_(arena, 0),
+        locals_(arena, 0),
+        entry_block_(nullptr),
+        exit_block_(nullptr),
+        current_block_(nullptr),
+        graph_(nullptr),
+        constant0_(nullptr),
+        constant1_(nullptr),
+        dex_file_(nullptr),
+        dex_compilation_unit_(nullptr),
+        compiler_driver_(nullptr),
+        return_type_(return_type) {}
 
   HGraph* BuildGraph(const DexFile::CodeItem& code);
 
@@ -128,6 +146,7 @@
   const DexFile* const dex_file_;
   DexCompilationUnit* const dex_compilation_unit_;
   CompilerDriver* const compiler_driver_;
+  const Primitive::Type return_type_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphBuilder);
 };
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 2a9a7b3..e6fe067 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -189,6 +189,9 @@
       if (loc.GetPolicy() == Location::kRequiresRegister) {
         loc = Location::RegisterLocation(
             AllocateFreeRegister(input->GetType(), blocked_registers_));
+      } else if (loc.GetPolicy() == Location::kRequiresFpuRegister) {
+        loc = Location::FpuRegisterLocation(
+            AllocateFreeRegister(input->GetType(), blocked_registers_));
       } else {
         DCHECK_EQ(loc.GetPolicy(), Location::kAny);
         HLoadLocal* load = input->AsLoadLocal();
@@ -223,6 +226,10 @@
         result_location = Location::RegisterLocation(
             AllocateFreeRegister(instruction->GetType(), blocked_registers_));
         break;
+      case Location::kRequiresFpuRegister:
+        result_location = Location::FpuRegisterLocation(
+            AllocateFreeRegister(instruction->GetType(), blocked_registers_));
+        break;
       case Location::kSameAsFirstInput:
         result_location = locations->InAt(0);
         break;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index b58f3b3..c7623fe 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -203,28 +203,42 @@
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
 };
 
-template <typename T>
+template <typename C, typename F>
 class CallingConvention {
  public:
-  CallingConvention(const T* registers, int number_of_registers)
-      : registers_(registers), number_of_registers_(number_of_registers) {}
+  CallingConvention(const C* registers,
+                    size_t number_of_registers,
+                    const F* fpu_registers,
+                    size_t number_of_fpu_registers)
+      : registers_(registers),
+        number_of_registers_(number_of_registers),
+        fpu_registers_(fpu_registers),
+        number_of_fpu_registers_(number_of_fpu_registers) {}
 
   size_t GetNumberOfRegisters() const { return number_of_registers_; }
+  size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
 
-  T GetRegisterAt(size_t index) const {
+  C GetRegisterAt(size_t index) const {
     DCHECK_LT(index, number_of_registers_);
     return registers_[index];
   }
 
-  uint8_t GetStackOffsetOf(size_t index) const {
+  F GetFpuRegisterAt(size_t index) const {
+    DCHECK_LT(index, number_of_fpu_registers_);
+    return fpu_registers_[index];
+  }
+
+  size_t GetStackOffsetOf(size_t index) const {
     // We still reserve the space for parameters passed by registers.
     // Add one for the method pointer.
     return (index + 1) * kVRegSize;
   }
 
  private:
-  const T* registers_;
+  const C* registers_;
   const size_t number_of_registers_;
+  const F* fpu_registers_;
+  const size_t number_of_fpu_registers_;
 
   DISALLOW_COPY_AND_ASSIGN(CallingConvention);
 };
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1876cb9..d116905 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -47,12 +47,16 @@
 static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
+static constexpr DRegister kRuntimeParameterFpuRegisters[] = { };
+static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
 
-class InvokeRuntimeCallingConvention : public CallingConvention<Register> {
+class InvokeRuntimeCallingConvention : public CallingConvention<Register, DRegister> {
  public:
   InvokeRuntimeCallingConvention()
       : CallingConvention(kRuntimeParameterCoreRegisters,
-                          kRuntimeParameterCoreRegistersLength) {}
+                          kRuntimeParameterCoreRegistersLength,
+                          kRuntimeParameterFpuRegisters,
+                          kRuntimeParameterFpuRegistersLength) {}
 
  private:
   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
@@ -218,6 +222,10 @@
   return blocked_registers + kNumberOfAllocIds;
 }
 
+static bool* GetBlockedDRegisters(bool* blocked_registers) {
+  return blocked_registers + kNumberOfCoreRegisters + kNumberOfSRegisters;
+}
+
 ManagedRegister CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type,
                                                        bool* blocked_registers) const {
   switch (type) {
@@ -262,8 +270,10 @@
     }
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented register type " << type;
+    case Primitive::kPrimDouble: {
+      int reg = AllocateFreeRegisterInternal(GetBlockedDRegisters(blocked_registers), kNumberOfDRegisters);
+      return ArmManagedRegister::FromDRegister(static_cast<DRegister>(reg));
+    }
 
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << type;
@@ -274,6 +284,7 @@
 
 void CodeGeneratorARM::SetupBlockedRegisters(bool* blocked_registers) const {
   bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers);
+  bool* blocked_fpu_registers = GetBlockedDRegisters(blocked_registers);
 
   // Don't allocate the dalvik style register pair passing.
   blocked_register_pairs[R1_R2] = true;
@@ -300,6 +311,15 @@
   blocked_registers[R8] = true;
   blocked_registers[R10] = true;
   blocked_registers[R11] = true;
+
+  blocked_fpu_registers[D8] = true;
+  blocked_fpu_registers[D9] = true;
+  blocked_fpu_registers[D10] = true;
+  blocked_fpu_registers[D11] = true;
+  blocked_fpu_registers[D12] = true;
+  blocked_fpu_registers[D13] = true;
+  blocked_fpu_registers[D14] = true;
+  blocked_fpu_registers[D15] = true;
 }
 
 size_t CodeGeneratorARM::GetNumberOfRegisters() const {
@@ -348,16 +368,14 @@
 Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
   switch (load->GetType()) {
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
       break;
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented type " << load->GetType();
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
 
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -378,6 +396,7 @@
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
     case Primitive::kPrimNot: {
       uint32_t index = gp_index_++;
       if (index < calling_convention.GetNumberOfRegisters()) {
@@ -387,7 +406,8 @@
       }
     }
 
-    case Primitive::kPrimLong: {
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble: {
       uint32_t index = gp_index_;
       gp_index_ += 2;
       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
@@ -400,11 +420,6 @@
       }
     }
 
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented parameter type " << type;
-      break;
-
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
@@ -419,14 +434,32 @@
   if (destination.IsRegister()) {
     if (source.IsRegister()) {
       __ Mov(destination.AsArm().AsCoreRegister(), source.AsArm().AsCoreRegister());
+    } else if (source.IsFpuRegister()) {
+      __ vmovrs(destination.AsArm().AsCoreRegister(),
+                source.AsArm().AsOverlappingDRegisterLow());
     } else {
       __ ldr(destination.AsArm().AsCoreRegister(), Address(SP, source.GetStackIndex()));
     }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsRegister()) {
+      __ vmovsr(destination.AsArm().AsOverlappingDRegisterLow(),
+                source.AsArm().AsCoreRegister());
+    } else if (source.IsFpuRegister()) {
+      __ vmovs(destination.AsArm().AsOverlappingDRegisterLow(),
+               source.AsArm().AsOverlappingDRegisterLow());
+    } else {
+      __ vldrs(destination.AsArm().AsOverlappingDRegisterLow(),
+               Address(SP, source.GetStackIndex()));
+    }
   } else {
     DCHECK(destination.IsStackSlot());
     if (source.IsRegister()) {
       __ str(source.AsArm().AsCoreRegister(), Address(SP, destination.GetStackIndex()));
+    } else if (source.IsFpuRegister()) {
+      __ vstrs(source.AsArm().AsOverlappingDRegisterLow(),
+               Address(SP, destination.GetStackIndex()));
     } else {
+      DCHECK(source.IsStackSlot());
       __ ldr(IP, Address(SP, source.GetStackIndex()));
       __ str(IP, Address(SP, destination.GetStackIndex()));
     }
@@ -441,6 +474,8 @@
     if (source.IsRegister()) {
       __ Mov(destination.AsArm().AsRegisterPairLow(), source.AsArm().AsRegisterPairLow());
       __ Mov(destination.AsArm().AsRegisterPairHigh(), source.AsArm().AsRegisterPairHigh());
+    } else if (source.IsFpuRegister()) {
+      LOG(FATAL) << "Unimplemented";
     } else if (source.IsQuickParameter()) {
       uint32_t argument_index = source.GetQuickParameterIndex();
       InvokeDexCallingConvention calling_convention;
@@ -458,6 +493,12 @@
                           SP, source.GetStackIndex());
       }
     }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsDoubleStackSlot()) {
+      __ vldrd(destination.AsArm().AsDRegister(), Address(SP, source.GetStackIndex()));
+    } else {
+      LOG(FATAL) << "Unimplemented";
+    }
   } else if (destination.IsQuickParameter()) {
     InvokeDexCallingConvention calling_convention;
     uint32_t argument_index = destination.GetQuickParameterIndex();
@@ -465,6 +506,8 @@
       __ Mov(calling_convention.GetRegisterAt(argument_index), source.AsArm().AsRegisterPairLow());
       __ str(source.AsArm().AsRegisterPairHigh(),
              Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1)));
+    } else if (source.IsFpuRegister()) {
+      LOG(FATAL) << "Unimplemented";
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ ldr(calling_convention.GetRegisterAt(argument_index), Address(SP, source.GetStackIndex()));
@@ -489,6 +532,8 @@
       __ ldr(R0,
              Address(SP, calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
       __ str(R0, Address(SP, destination.GetHighStackIndex(kArmWordSize)));
+    } else if (source.IsFpuRegister()) {
+      __ vstrd(source.AsArm().AsDRegister(), Address(SP, destination.GetStackIndex()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ ldr(IP, Address(SP, source.GetStackIndex()));
@@ -535,15 +580,17 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
+      case Primitive::kPrimFloat:
         Move32(location, Location::StackSlot(stack_slot));
         break;
 
       case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
         Move64(location, Location::DoubleStackSlot(stack_slot));
         break;
 
       default:
-        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+        LOG(FATAL) << "Unexpected type " << instruction->GetType();
     }
   } else {
     DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
@@ -554,15 +601,17 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimNot:
       case Primitive::kPrimInt:
+      case Primitive::kPrimFloat:
         Move32(location, locations->Out());
         break;
 
       case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
         Move64(location, locations->Out());
         break;
 
       default:
-        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+        LOG(FATAL) << "Unexpected type " << instruction->GetType();
     }
   }
 }
@@ -608,18 +657,14 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
   HInstruction* cond = if_instr->InputAt(0);
-  DCHECK(cond->IsCondition());
-  HCondition* condition = cond->AsCondition();
-  if (condition->NeedsMaterialization()) {
-    locations->SetInAt(0, Location::RequiresRegister());
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
   }
 }
 
 void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
   HInstruction* cond = if_instr->InputAt(0);
-  DCHECK(cond->IsCondition());
-  HCondition* condition = cond->AsCondition();
-  if (condition->NeedsMaterialization()) {
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
     // Condition has been materialized, compare the output to 0
     DCHECK(if_instr->GetLocations()->InAt(0).IsRegister());
     __ cmp(if_instr->GetLocations()->InAt(0).AsArm().AsCoreRegister(),
@@ -628,7 +673,7 @@
   } else {
     // Condition has not been materialized, use its inputs as the comparison and its
     // condition as the branch condition.
-    LocationSummary* locations = condition->GetLocations();
+    LocationSummary* locations = cond->GetLocations();
     if (locations->InAt(1).IsRegister()) {
       __ cmp(locations->InAt(0).AsArm().AsCoreRegister(),
              ShifterOperand(locations->InAt(1).AsArm().AsCoreRegister()));
@@ -645,7 +690,7 @@
       }
     }
     __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()),
-         ARMCondition(condition->GetCondition()));
+         ARMCondition(cond->AsCondition()->GetCondition()));
   }
 
   if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
@@ -657,8 +702,8 @@
 void LocationsBuilderARM::VisitCondition(HCondition* comp) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(comp->InputAt(1)));
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+  locations->SetInAt(1, Location::RegisterOrConstant(comp->InputAt(1)), Location::kDiesAtEntry);
   if (comp->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister());
   }
@@ -764,15 +809,17 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
+    case Primitive::kPrimFloat:
       locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     default:
-      LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType();
+      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
   }
 }
 
@@ -816,10 +863,12 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
+    case Primitive::kPrimFloat:
       locations->SetInAt(0, ArmCoreLocation(R0));
       break;
 
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       locations->SetInAt(
           0, Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
       break;
@@ -838,10 +887,12 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
+      case Primitive::kPrimFloat:
         DCHECK_EQ(ret->GetLocations()->InAt(0).AsArm().AsCoreRegister(), R0);
         break;
 
       case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
         DCHECK_EQ(ret->GetLocations()->InAt(0).AsArm().AsRegisterPair(), R0_R1);
         break;
 
@@ -911,20 +962,17 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
+    case Primitive::kPrimFloat:
       locations->SetOut(ArmCoreLocation(R0));
       break;
 
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       locations->SetOut(Location::RegisterLocation(ArmManagedRegister::FromRegisterPair(R0_R1)));
       break;
 
     case Primitive::kPrimVoid:
       break;
-
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented return type " << invoke->GetType();
-      break;
   }
 }
 
@@ -960,21 +1008,23 @@
   switch (add->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
+      bool dies_at_entry = add->GetResultType() != Primitive::kPrimLong;
+      locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry);
+      locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)), dies_at_entry);
       locations->SetOut(Location::RequiresRegister());
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
       break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   }
 }
 
@@ -1002,15 +1052,20 @@
              ShifterOperand(locations->InAt(1).AsArm().AsRegisterPairHigh()));
       break;
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimFloat:
+      __ vadds(locations->Out().AsArm().AsOverlappingDRegisterLow(),
+               locations->InAt(0).AsArm().AsOverlappingDRegisterLow(),
+               locations->InAt(1).AsArm().AsOverlappingDRegisterLow());
+      break;
+
+    case Primitive::kPrimDouble:
+      __ vaddd(locations->Out().AsArm().AsDRegister(),
+               locations->InAt(0).AsArm().AsDRegister(),
+               locations->InAt(1).AsArm().AsDRegister());
       break;
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   }
 }
 
@@ -1020,8 +1075,9 @@
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)));
+      bool dies_at_entry = sub->GetResultType() != Primitive::kPrimLong;
+      locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry);
+      locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1)), dies_at_entry);
       locations->SetOut(Location::RequiresRegister());
       break;
     }
@@ -1116,7 +1172,7 @@
 void LocationsBuilderARM::VisitNot(HNot* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -1129,8 +1185,8 @@
 void LocationsBuilderARM::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+  locations->SetInAt(1, Location::RequiresRegister(), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -1186,10 +1242,12 @@
 void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  bool is_object_type = instruction->GetFieldType() == Primitive::kPrimNot;
+  bool dies_at_entry = !is_object_type;
+  locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry);
+  locations->SetInAt(1, Location::RequiresRegister(), dies_at_entry);
   // Temporary registers for the write barrier.
-  if (instruction->GetFieldType() == Primitive::kPrimNot) {
+  if (is_object_type) {
     locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
   }
@@ -1246,7 +1304,7 @@
 void LocationsBuilderARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -1307,8 +1365,9 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) {
@@ -1317,19 +1376,23 @@
 
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
-  DCHECK(obj.Equals(locations->Out()));
 
   if (obj.IsRegister()) {
     __ cmp(obj.AsArm().AsCoreRegister(), ShifterOperand(0));
+    __ b(slow_path->GetEntryLabel(), EQ);
+  } else {
+    DCHECK(obj.IsConstant()) << obj;
+    DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
+    __ b(slow_path->GetEntryLabel());
   }
-  __ b(slow_path->GetEntryLabel(), EQ);
 }
 
 void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+  locations->SetInAt(
+      1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -1439,9 +1502,10 @@
     locations->SetInAt(1, ArmCoreLocation(calling_convention.GetRegisterAt(1)));
     locations->SetInAt(2, ArmCoreLocation(calling_convention.GetRegisterAt(2)));
   } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    locations->SetInAt(2, Location::RequiresRegister());
+    locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+    locations->SetInAt(
+        1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry);
+    locations->SetInAt(2, Location::RequiresRegister(), Location::kDiesAtEntry);
   }
 }
 
@@ -1527,7 +1591,7 @@
 void LocationsBuilderARM::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -1544,8 +1608,9 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -1610,13 +1675,12 @@
       new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
   codegen_->AddSlowPath(slow_path);
 
-  __ AddConstant(R4, R4, -1);
-  __ cmp(R4, ShifterOperand(0));
+  __ subs(R4, R4, ShifterOperand(1));
   if (successor == nullptr) {
-    __ b(slow_path->GetEntryLabel(), LE);
+    __ b(slow_path->GetEntryLabel(), EQ);
     __ Bind(slow_path->GetReturnLabel());
   } else {
-    __ b(codegen_->GetLabelOf(successor), GT);
+    __ b(codegen_->GetLabelOf(successor), NE);
     __ b(slow_path->GetEntryLabel());
   }
 }
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 8c86b7a..b5de8ed 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -32,11 +32,16 @@
 static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
 static constexpr RegisterPair kParameterCorePairRegisters[] = { R1_R2, R2_R3 };
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+static constexpr DRegister kParameterFpuRegisters[] = { };
+static constexpr size_t kParameterFpuRegistersLength = 0;
 
-class InvokeDexCallingConvention : public CallingConvention<Register> {
+class InvokeDexCallingConvention : public CallingConvention<Register, DRegister> {
  public:
   InvokeDexCallingConvention()
-      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+      : CallingConvention(kParameterCoreRegisters,
+                          kParameterCoreRegistersLength,
+                          kParameterFpuRegisters,
+                          kParameterFpuRegistersLength) {}
 
   RegisterPair GetRegisterPairAt(size_t argument_index) {
     DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index ea67dfd..328fc93 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -47,12 +47,16 @@
 static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
+static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { };
+static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
 
-class InvokeRuntimeCallingConvention : public CallingConvention<Register> {
+class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
  public:
   InvokeRuntimeCallingConvention()
       : CallingConvention(kRuntimeParameterCoreRegisters,
-                          kRuntimeParameterCoreRegistersLength) {}
+                          kRuntimeParameterCoreRegistersLength,
+                          kRuntimeParameterFpuRegisters,
+                          kRuntimeParameterFpuRegistersLength) {}
 
  private:
   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
@@ -193,6 +197,10 @@
   return blocked_registers + kNumberOfAllocIds;
 }
 
+static bool* GetBlockedXmmRegisters(bool* blocked_registers) {
+  return blocked_registers + kNumberOfCpuRegisters;
+}
+
 ManagedRegister CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type,
                                                        bool* blocked_registers) const {
   switch (type) {
@@ -238,8 +246,11 @@
     }
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented register type " << type;
+    case Primitive::kPrimDouble: {
+      XmmRegister reg = static_cast<XmmRegister>(AllocateFreeRegisterInternal(
+          GetBlockedXmmRegisters(blocked_registers), kNumberOfXmmRegisters));
+      return X86ManagedRegister::FromXmmRegister(reg);
+    }
 
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << type;
@@ -316,16 +327,14 @@
 Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
   switch (load->GetType()) {
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
       break;
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented type " << load->GetType();
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
 
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -346,6 +355,7 @@
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
     case Primitive::kPrimNot: {
       uint32_t index = gp_index_++;
       if (index < calling_convention.GetNumberOfRegisters()) {
@@ -355,7 +365,8 @@
       }
     }
 
-    case Primitive::kPrimLong: {
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble: {
       uint32_t index = gp_index_;
       gp_index_ += 2;
       if (index + 1 < calling_convention.GetNumberOfRegisters()) {
@@ -368,11 +379,6 @@
       }
     }
 
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented parameter type " << type;
-      break;
-
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unexpected parameter type " << type;
       break;
@@ -387,13 +393,27 @@
   if (destination.IsRegister()) {
     if (source.IsRegister()) {
       __ movl(destination.AsX86().AsCpuRegister(), source.AsX86().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movd(destination.AsX86().AsCpuRegister(), source.AsX86().AsXmmRegister());
     } else {
       DCHECK(source.IsStackSlot());
       __ movl(destination.AsX86().AsCpuRegister(), Address(ESP, source.GetStackIndex()));
     }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsRegister()) {
+      __ movd(destination.AsX86().AsXmmRegister(), source.AsX86().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movaps(destination.AsX86().AsXmmRegister(), source.AsX86().AsXmmRegister());
+    } else {
+      DCHECK(source.IsStackSlot());
+      __ movss(destination.AsX86().AsXmmRegister(), Address(ESP, source.GetStackIndex()));
+    }
   } else {
+    DCHECK(destination.IsStackSlot());
     if (source.IsRegister()) {
       __ movl(Address(ESP, destination.GetStackIndex()), source.AsX86().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movss(Address(ESP, destination.GetStackIndex()), source.AsX86().AsXmmRegister());
     } else {
       DCHECK(source.IsStackSlot());
       __ pushl(Address(ESP, source.GetStackIndex()));
@@ -410,6 +430,8 @@
     if (source.IsRegister()) {
       __ movl(destination.AsX86().AsRegisterPairLow(), source.AsX86().AsRegisterPairLow());
       __ movl(destination.AsX86().AsRegisterPairHigh(), source.AsX86().AsRegisterPairHigh());
+    } else if (source.IsFpuRegister()) {
+      LOG(FATAL) << "Unimplemented";
     } else if (source.IsQuickParameter()) {
       uint32_t argument_index = source.GetQuickParameterIndex();
       InvokeDexCallingConvention calling_convention;
@@ -430,6 +452,8 @@
       __ movl(calling_convention.GetRegisterAt(argument_index), source.AsX86().AsRegisterPairLow());
       __ movl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1)),
               source.AsX86().AsRegisterPairHigh());
+    } else if (source.IsFpuRegister()) {
+      LOG(FATAL) << "Unimplemented";
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ movl(calling_convention.GetRegisterAt(argument_index),
@@ -437,6 +461,12 @@
       __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize)));
       __ popl(Address(ESP, calling_convention.GetStackOffsetOf(argument_index + 1)));
     }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsDoubleStackSlot()) {
+      __ movsd(destination.AsX86().AsXmmRegister(), Address(ESP, source.GetStackIndex()));
+    } else {
+      LOG(FATAL) << "Unimplemented";
+    }
   } else {
     DCHECK(destination.IsDoubleStackSlot());
     if (source.IsRegister()) {
@@ -448,9 +478,10 @@
       uint32_t argument_index = source.GetQuickParameterIndex();
       __ movl(Address(ESP, destination.GetStackIndex()),
               calling_convention.GetRegisterAt(argument_index));
-      __ pushl(Address(ESP,
-          calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize()));
-      __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)));
+      DCHECK_EQ(calling_convention.GetStackOffsetOf(argument_index + 1) + GetFrameSize(),
+                static_cast<size_t>(destination.GetHighStackIndex(kX86WordSize)));
+    } else if (source.IsFpuRegister()) {
+      __ movsd(Address(ESP, destination.GetStackIndex()), source.AsX86().AsXmmRegister());
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ pushl(Address(ESP, source.GetStackIndex()));
@@ -479,6 +510,7 @@
       __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
     }
   } else if (instruction->AsLoadLocal() != nullptr) {
+    int slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
     switch (instruction->GetType()) {
       case Primitive::kPrimBoolean:
       case Primitive::kPrimByte:
@@ -486,12 +518,13 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
-        Move32(location, Location::StackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+      case Primitive::kPrimFloat:
+        Move32(location, Location::StackSlot(slot));
         break;
 
       case Primitive::kPrimLong:
-        Move64(location, Location::DoubleStackSlot(
-            GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+      case Primitive::kPrimDouble:
+        Move64(location, Location::DoubleStackSlot(slot));
         break;
 
       default:
@@ -506,15 +539,17 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
+      case Primitive::kPrimFloat:
         Move32(location, instruction->GetLocations()->Out());
         break;
 
       case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
         Move64(location, instruction->GetLocations()->Out());
         break;
 
       default:
-        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+        LOG(FATAL) << "Unexpected type " << instruction->GetType();
     }
   }
 }
@@ -560,22 +595,18 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
   HInstruction* cond = if_instr->InputAt(0);
-  DCHECK(cond->IsCondition());
-  HCondition* condition = cond->AsCondition();
-  if (condition->NeedsMaterialization()) {
-    locations->SetInAt(0, Location::Any());
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::Any(), Location::kDiesAtEntry);
   }
 }
 
 void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
   HInstruction* cond = if_instr->InputAt(0);
-  DCHECK(cond->IsCondition());
-  HCondition* condition = cond->AsCondition();
-  if (condition->NeedsMaterialization()) {
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
     // Moves do not affect the eflags register, so if the condition is evaluated
     // just before the if, we don't need to evaluate it again.
-    if (!condition->IsBeforeWhenDisregardMoves(if_instr)) {
-      // Materialized condition, compare against 0
+    if (!cond->IsCondition() || !cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr)) {
+      // Materialized condition, compare against 0.
       Location lhs = if_instr->GetLocations()->InAt(0);
       if (lhs.IsRegister()) {
         __ cmpl(lhs.AsX86().AsCpuRegister(), Immediate(0));
@@ -585,8 +616,8 @@
     }
     __ j(kNotEqual,  codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
   } else {
-    Location lhs = condition->GetLocations()->InAt(0);
-    Location rhs = condition->GetLocations()->InAt(1);
+    Location lhs = cond->GetLocations()->InAt(0);
+    Location rhs = cond->GetLocations()->InAt(1);
     // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition).
     if (rhs.IsRegister()) {
       __ cmpl(lhs.AsX86().AsCpuRegister(), rhs.AsX86().AsCpuRegister());
@@ -597,7 +628,7 @@
     } else {
       __ cmpl(lhs.AsX86().AsCpuRegister(), Address(ESP, rhs.GetStackIndex()));
     }
-    __ j(X86Condition(condition->GetCondition()),
+    __ j(X86Condition(cond->AsCondition()->GetCondition()),
          codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
   }
   if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
@@ -631,15 +662,17 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
+    case Primitive::kPrimFloat:
       locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     default:
-      LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType();
+      LOG(FATAL) << "Unknown local type " << store->InputAt(1)->GetType();
   }
   store->SetLocations(locations);
 }
@@ -650,8 +683,8 @@
 void LocationsBuilderX86::VisitCondition(HCondition* comp) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+  locations->SetInAt(1, Location::Any(), Location::kDiesAtEntry);
   if (comp->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister());
   }
@@ -772,8 +805,14 @@
           0, Location::RegisterLocation(X86ManagedRegister::FromRegisterPair(EAX_EDX)));
       break;
 
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(
+          0, Location::FpuRegisterLocation(X86ManagedRegister::FromXmmRegister(XMM0)));
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+      LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
   }
 }
 
@@ -793,8 +832,13 @@
         DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86().AsRegisterPair(), EAX_EDX);
         break;
 
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86().AsXmmRegister(), XMM0);
+        break;
+
       default:
-        LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+        LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
     }
   }
   codegen_->GenerateFrameExit();
@@ -865,7 +909,7 @@
 
     case Primitive::kPrimDouble:
     case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented return type " << invoke->GetType();
+      locations->SetOut(Location::FpuRegisterLocation(X86ManagedRegister::FromXmmRegister(XMM0)));
       break;
   }
 
@@ -907,64 +951,74 @@
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+      break;
   }
 }
 
 void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
   LocationSummary* locations = add->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
-      DCHECK_EQ(locations->InAt(0).AsX86().AsCpuRegister(),
-                locations->Out().AsX86().AsCpuRegister());
-      if (locations->InAt(1).IsRegister()) {
-        __ addl(locations->InAt(0).AsX86().AsCpuRegister(),
-                locations->InAt(1).AsX86().AsCpuRegister());
-      } else if (locations->InAt(1).IsConstant()) {
-        HConstant* instruction = locations->InAt(1).GetConstant();
+      DCHECK_EQ(first.AsX86().AsCpuRegister(), locations->Out().AsX86().AsCpuRegister());
+      if (second.IsRegister()) {
+        __ addl(first.AsX86().AsCpuRegister(), second.AsX86().AsCpuRegister());
+      } else if (second.IsConstant()) {
+        HConstant* instruction = second.GetConstant();
         Immediate imm(instruction->AsIntConstant()->GetValue());
-        __ addl(locations->InAt(0).AsX86().AsCpuRegister(), imm);
+        __ addl(first.AsX86().AsCpuRegister(), imm);
       } else {
-        __ addl(locations->InAt(0).AsX86().AsCpuRegister(),
-                Address(ESP, locations->InAt(1).GetStackIndex()));
+        __ addl(first.AsX86().AsCpuRegister(), Address(ESP, second.GetStackIndex()));
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-      DCHECK_EQ(locations->InAt(0).AsX86().AsRegisterPair(),
+      DCHECK_EQ(first.AsX86().AsRegisterPair(),
                 locations->Out().AsX86().AsRegisterPair());
-      if (locations->InAt(1).IsRegister()) {
-        __ addl(locations->InAt(0).AsX86().AsRegisterPairLow(),
-                locations->InAt(1).AsX86().AsRegisterPairLow());
-        __ adcl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
-                locations->InAt(1).AsX86().AsRegisterPairHigh());
+      if (second.IsRegister()) {
+        __ addl(first.AsX86().AsRegisterPairLow(), second.AsX86().AsRegisterPairLow());
+        __ adcl(first.AsX86().AsRegisterPairHigh(), second.AsX86().AsRegisterPairHigh());
       } else {
-        __ addl(locations->InAt(0).AsX86().AsRegisterPairLow(),
-                Address(ESP, locations->InAt(1).GetStackIndex()));
-        __ adcl(locations->InAt(0).AsX86().AsRegisterPairHigh(),
-                Address(ESP, locations->InAt(1).GetHighStackIndex(kX86WordSize)));
+        __ addl(first.AsX86().AsRegisterPairLow(), Address(ESP, second.GetStackIndex()));
+        __ adcl(first.AsX86().AsRegisterPairHigh(),
+                Address(ESP, second.GetHighStackIndex(kX86WordSize)));
       }
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimFloat: {
+      if (second.IsFpuRegister()) {
+        __ addss(first.AsX86().AsXmmRegister(), second.AsX86().AsXmmRegister());
+      } else {
+        __ addss(first.AsX86().AsXmmRegister(), Address(ESP, second.GetStackIndex()));
+      }
       break;
+    }
+
+    case Primitive::kPrimDouble: {
+      if (second.IsFpuRegister()) {
+        __ addsd(first.AsX86().AsXmmRegister(), second.AsX86().AsXmmRegister());
+      } else {
+        __ addsd(first.AsX86().AsXmmRegister(), Address(ESP, second.GetStackIndex()));
+      }
+      break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   }
 }
 
@@ -1094,8 +1148,8 @@
 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+  locations->SetInAt(1, Location::Any(), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -1159,14 +1213,20 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   Primitive::Type field_type = instruction->GetFieldType();
-  if (field_type == Primitive::kPrimBoolean || field_type == Primitive::kPrimByte) {
+  bool is_object_type = field_type == Primitive::kPrimNot;
+  bool is_byte_type = (field_type == Primitive::kPrimBoolean)
+      || (field_type == Primitive::kPrimByte);
+  // The register allocator does not support multiple
+  // inputs that die at entry with one in a specific register.
+  bool dies_at_entry = !is_object_type && !is_byte_type;
+  if (is_byte_type) {
     // Ensure the value is in a byte register.
-    locations->SetInAt(1, X86CpuLocation(EAX));
+    locations->SetInAt(1, X86CpuLocation(EAX), dies_at_entry);
   } else {
-    locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RequiresRegister(), dies_at_entry);
   }
   // Temporary registers for the write barrier.
-  if (field_type == Primitive::kPrimNot) {
+  if (is_object_type) {
     locations->AddTemp(Location::RequiresRegister());
     // Ensure the card is in a byte register.
     locations->AddTemp(X86CpuLocation(ECX));
@@ -1238,7 +1298,7 @@
 void LocationsBuilderX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -1300,8 +1360,9 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::Any());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
@@ -1310,13 +1371,16 @@
 
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
-  DCHECK(obj.Equals(locations->Out()));
 
   if (obj.IsRegister()) {
     __ cmpl(obj.AsX86().AsCpuRegister(), Immediate(0));
-  } else {
-    DCHECK(locations->InAt(0).IsStackSlot());
+  } else if (obj.IsStackSlot()) {
     __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
+  } else {
+    DCHECK(obj.IsConstant()) << obj;
+    DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
+    __ jmp(slow_path->GetEntryLabel());
+    return;
   }
   __ j(kEqual, slow_path->GetEntryLabel());
 }
@@ -1324,8 +1388,9 @@
 void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+  locations->SetInAt(
+      1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -1433,13 +1498,21 @@
     locations->SetInAt(1, X86CpuLocation(calling_convention.GetRegisterAt(1)));
     locations->SetInAt(2, X86CpuLocation(calling_convention.GetRegisterAt(2)));
   } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (value_type == Primitive::kPrimBoolean || value_type == Primitive::kPrimByte) {
+    bool is_byte_type = (value_type == Primitive::kPrimBoolean)
+        || (value_type == Primitive::kPrimByte);
+    // We need the inputs to be different than the output in case of long operation.
+    // In case of a byte operation, the register allocator does not support multiple
+    // inputs that die at entry with one in a specific register.
+    bool dies_at_entry = value_type != Primitive::kPrimLong && !is_byte_type;
+    locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry);
+    locations->SetInAt(
+        1, Location::RegisterOrConstant(instruction->InputAt(1)), dies_at_entry);
+    if (is_byte_type) {
       // Ensure the value is in a byte register.
-      locations->SetInAt(2, X86CpuLocation(EAX));
+      locations->SetInAt(2, Location::ByteRegisterOrConstant(
+          X86ManagedRegister::FromCpuRegister(EAX), instruction->InputAt(2)), dies_at_entry);
     } else {
-      locations->SetInAt(2, Location::RequiresRegister());
+      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)), dies_at_entry);
     }
   }
 }
@@ -1448,18 +1521,29 @@
   LocationSummary* locations = instruction->GetLocations();
   Register obj = locations->InAt(0).AsX86().AsCpuRegister();
   Location index = locations->InAt(1);
+  Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      ByteRegister value = locations->InAt(2).AsX86().AsByteRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ movb(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movb(Address(obj, offset), value.AsX86().AsByteRegister());
+        } else {
+          __ movb(Address(obj, offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movb(Address(obj, index.AsX86().AsCpuRegister(), TIMES_1, data_offset), value);
+        if (value.IsRegister()) {
+          __ movb(Address(obj, index.AsX86().AsCpuRegister(), TIMES_1, data_offset),
+                  value.AsX86().AsByteRegister());
+        } else {
+          __ movb(Address(obj, index.AsX86().AsCpuRegister(), TIMES_1, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
@@ -1467,24 +1551,43 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register value = locations->InAt(2).AsX86().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ movw(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movw(Address(obj, offset), value.AsX86().AsCpuRegister());
+        } else {
+          __ movw(Address(obj, offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movw(Address(obj, index.AsX86().AsCpuRegister(), TIMES_2, data_offset), value);
+        if (value.IsRegister()) {
+          __ movw(Address(obj, index.AsX86().AsCpuRegister(), TIMES_2, data_offset),
+                  value.AsX86().AsCpuRegister());
+        } else {
+          __ movw(Address(obj, index.AsX86().AsCpuRegister(), TIMES_2, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
 
     case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register value = locations->InAt(2).AsX86().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ movl(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movl(Address(obj, offset), value.AsX86().AsCpuRegister());
+        } else {
+          __ movl(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_4, data_offset), value);
+        if (value.IsRegister()) {
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_4, data_offset),
+                  value.AsX86().AsCpuRegister());
+        } else {
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_4, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
@@ -1498,16 +1601,29 @@
 
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      X86ManagedRegister value = locations->InAt(2).AsX86();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ movl(Address(obj, offset), value.AsRegisterPairLow());
-        __ movl(Address(obj, offset + kX86WordSize), value.AsRegisterPairHigh());
+        if (value.IsRegister()) {
+          __ movl(Address(obj, offset), value.AsX86().AsRegisterPairLow());
+          __ movl(Address(obj, offset + kX86WordSize), value.AsX86().AsRegisterPairHigh());
+        } else {
+          int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
+          __ movl(Address(obj, offset), Immediate(Low32Bits(val)));
+          __ movl(Address(obj, offset + kX86WordSize), Immediate(High32Bits(val)));
+        }
       } else {
-        __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset),
-                value.AsRegisterPairLow());
-        __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset + kX86WordSize),
-                value.AsRegisterPairHigh());
+        if (value.IsRegister()) {
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset),
+                  value.AsX86().AsRegisterPairLow());
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset + kX86WordSize),
+                  value.AsX86().AsRegisterPairHigh());
+        } else {
+          int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset),
+                  Immediate(Low32Bits(val)));
+          __ movl(Address(obj, index.AsX86().AsCpuRegister(), TIMES_8, data_offset + kX86WordSize),
+                  Immediate(High32Bits(val)));
+        }
       }
       break;
     }
@@ -1523,7 +1639,7 @@
 
 void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
   instruction->SetLocations(locations);
 }
@@ -1541,8 +1657,9 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 23145bf..3e2ca90 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -32,11 +32,16 @@
 static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX };
 static constexpr RegisterPair kParameterCorePairRegisters[] = { ECX_EDX, EDX_EBX };
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+static constexpr XmmRegister kParameterFpuRegisters[] = { };
+static constexpr size_t kParameterFpuRegistersLength = 0;
 
-class InvokeDexCallingConvention : public CallingConvention<Register> {
+class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegister> {
  public:
-  InvokeDexCallingConvention()
-      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+  InvokeDexCallingConvention() : CallingConvention(
+      kParameterCoreRegisters,
+      kParameterCoreRegistersLength,
+      kParameterFpuRegisters,
+      kParameterFpuRegistersLength) {}
 
   RegisterPair GetRegisterPairAt(size_t argument_index) {
     DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 78c7d9d..5d04ca6 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -51,12 +51,16 @@
 static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
+static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { };
+static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
 
-class InvokeRuntimeCallingConvention : public CallingConvention<Register> {
+class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
  public:
   InvokeRuntimeCallingConvention()
       : CallingConvention(kRuntimeParameterCoreRegisters,
-                          kRuntimeParameterCoreRegistersLength) {}
+                          kRuntimeParameterCoreRegistersLength,
+                          kRuntimeParameterFpuRegisters,
+                          kRuntimeParameterFpuRegistersLength) {}
 
  private:
   DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
@@ -219,8 +223,11 @@
     }
 
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented register type " << type;
+    case Primitive::kPrimDouble: {
+      size_t reg = AllocateFreeRegisterInternal(
+          blocked_registers + kNumberOfCpuRegisters, kNumberOfFloatRegisters);
+      return X86_64ManagedRegister::FromXmmRegister(static_cast<FloatRegister>(reg));
+    }
 
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << type;
@@ -243,6 +250,12 @@
   blocked_registers[R13] = true;
   blocked_registers[R14] = true;
   blocked_registers[R15] = true;
+
+  bool* blocked_xmm_registers = blocked_registers + kNumberOfCpuRegisters;
+  blocked_xmm_registers[XMM12] = true;
+  blocked_xmm_registers[XMM13] = true;
+  blocked_xmm_registers[XMM14] = true;
+  blocked_xmm_registers[XMM15] = true;
 }
 
 void CodeGeneratorX86_64::GenerateFrameEntry() {
@@ -291,16 +304,14 @@
 Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
   switch (load->GetType()) {
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
       break;
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
-      return Location::StackSlot(GetStackSlot(load->GetLocal()));
-
     case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      LOG(FATAL) << "Unimplemented type " << load->GetType();
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
 
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -321,15 +332,36 @@
   if (destination.IsRegister()) {
     if (source.IsRegister()) {
       __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movd(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsXmmRegister());
     } else if (source.IsStackSlot()) {
-      __ movl(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movl(destination.AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movq(destination.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(destination.AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
+    }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsRegister()) {
+      __ movd(destination.AsX86_64().AsXmmRegister(), source.AsX86_64().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movaps(destination.AsX86_64().AsXmmRegister(), source.AsX86_64().AsXmmRegister());
+    } else if (source.IsStackSlot()) {
+      __ movss(destination.AsX86_64().AsXmmRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(source.IsDoubleStackSlot());
+      __ movsd(destination.AsX86_64().AsXmmRegister(),
+               Address(CpuRegister(RSP), source.GetStackIndex()));
     }
   } else if (destination.IsStackSlot()) {
     if (source.IsRegister()) {
-      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
+      __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
+              source.AsX86_64().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
+               source.AsX86_64().AsXmmRegister());
     } else {
       DCHECK(source.IsStackSlot());
       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -338,7 +370,11 @@
   } else {
     DCHECK(destination.IsDoubleStackSlot());
     if (source.IsRegister()) {
-      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsX86_64().AsCpuRegister());
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
+              source.AsX86_64().AsCpuRegister());
+    } else if (source.IsFpuRegister()) {
+      __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
+               source.AsX86_64().AsXmmRegister());
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -373,15 +409,17 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
+      case Primitive::kPrimFloat:
         Move(location, Location::StackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
         break;
 
       case Primitive::kPrimLong:
+      case Primitive::kPrimDouble:
         Move(location, Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
         break;
 
       default:
-        LOG(FATAL) << "Unimplemented local type " << instruction->GetType();
+        LOG(FATAL) << "Unexpected local type " << instruction->GetType();
     }
   } else {
     DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
@@ -393,11 +431,13 @@
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
       case Primitive::kPrimLong:
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
         Move(location, instruction->GetLocations()->Out());
         break;
 
       default:
-        LOG(FATAL) << "Unimplemented type " << instruction->GetType();
+        LOG(FATAL) << "Unexpected type " << instruction->GetType();
     }
   }
 }
@@ -443,21 +483,17 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
   HInstruction* cond = if_instr->InputAt(0);
-  DCHECK(cond->IsCondition());
-  HCondition* condition = cond->AsCondition();
-  if (condition->NeedsMaterialization()) {
-    locations->SetInAt(0, Location::Any());
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::Any(), Location::kDiesAtEntry);
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
   HInstruction* cond = if_instr->InputAt(0);
-  DCHECK(cond->IsCondition());
-  HCondition* condition = cond->AsCondition();
-  if (condition->NeedsMaterialization()) {
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
     // Moves do not affect the eflags register, so if the condition is evaluated
     // just before the if, we don't need to evaluate it again.
-    if (!condition->IsBeforeWhenDisregardMoves(if_instr)) {
+    if (!cond->IsCondition() || !cond->AsCondition()->IsBeforeWhenDisregardMoves(if_instr)) {
       // Materialized condition, compare against 0.
       Location lhs = if_instr->GetLocations()->InAt(0);
       if (lhs.IsRegister()) {
@@ -468,8 +504,8 @@
     }
     __ j(kNotEqual, codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
   } else {
-    Location lhs = condition->GetLocations()->InAt(0);
-    Location rhs = condition->GetLocations()->InAt(1);
+    Location lhs = cond->GetLocations()->InAt(0);
+    Location rhs = cond->GetLocations()->InAt(1);
     if (rhs.IsRegister()) {
       __ cmpl(lhs.AsX86_64().AsCpuRegister(), rhs.AsX86_64().AsCpuRegister());
     } else if (rhs.IsConstant()) {
@@ -478,7 +514,7 @@
     } else {
       __ cmpl(lhs.AsX86_64().AsCpuRegister(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
     }
-    __ j(X86_64Condition(condition->GetCondition()),
+    __ j(X86_64Condition(cond->AsCondition()->GetCondition()),
          codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
   }
   if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
@@ -512,15 +548,17 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
+    case Primitive::kPrimFloat:
       locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
       locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
       break;
 
     default:
-      LOG(FATAL) << "Unimplemented local type " << store->InputAt(1)->GetType();
+      LOG(FATAL) << "Unexpected local type " << store->InputAt(1)->GetType();
   }
 }
 
@@ -530,8 +568,8 @@
 void LocationsBuilderX86_64::VisitCondition(HCondition* comp) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(comp, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+  locations->SetInAt(1, Location::Any(), Location::kDiesAtEntry);
   if (comp->NeedsMaterialization()) {
     locations->SetOut(Location::RequiresRegister());
   }
@@ -608,8 +646,8 @@
 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+  locations->SetInAt(1, Location::RequiresRegister(), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -679,8 +717,14 @@
       locations->SetInAt(0, X86_64CpuLocation(RAX));
       break;
 
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0,
+          Location::FpuRegisterLocation(X86_64ManagedRegister::FromXmmRegister(XMM0)));
+      break;
+
     default:
-      LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+      LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
   }
 }
 
@@ -697,8 +741,14 @@
         DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86_64().AsCpuRegister().AsRegister(), RAX);
         break;
 
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsX86_64().AsXmmRegister().AsFloatRegister(),
+                  XMM0);
+        break;
+
       default:
-        LOG(FATAL) << "Unimplemented return type " << ret->InputAt(0)->GetType();
+        LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
     }
   }
   codegen_->GenerateFrameExit();
@@ -734,10 +784,27 @@
       }
     }
 
-    case Primitive::kPrimDouble:
-    case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented parameter type " << type;
-      break;
+    case Primitive::kPrimFloat: {
+      uint32_t index = fp_index_++;
+      stack_index_++;
+      if (index < calling_convention.GetNumberOfFpuRegisters()) {
+        return Location::FpuRegisterLocation(X86_64ManagedRegister::FromXmmRegister(
+            calling_convention.GetFpuRegisterAt(index)));
+      } else {
+        return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
+      }
+    }
+
+    case Primitive::kPrimDouble: {
+      uint32_t index = fp_index_++;
+      stack_index_ += 2;
+      if (index < calling_convention.GetNumberOfFpuRegisters()) {
+        return Location::FpuRegisterLocation(X86_64ManagedRegister::FromXmmRegister(
+            calling_convention.GetFpuRegisterAt(index)));
+      } else {
+        return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
+      }
+    }
 
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unexpected parameter type " << type;
@@ -807,7 +874,8 @@
 
     case Primitive::kPrimDouble:
     case Primitive::kPrimFloat:
-      LOG(FATAL) << "Unimplemented return type " << invoke->GetType();
+      locations->SetOut(
+          Location::FpuRegisterLocation(X86_64ManagedRegister::FromXmmRegister(XMM0)));
       break;
   }
 }
@@ -845,6 +913,7 @@
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
+
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -852,52 +921,67 @@
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimDouble:
+    case Primitive::kPrimFloat: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::Any());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
   LocationSummary* locations = add->GetLocations();
-  DCHECK_EQ(locations->InAt(0).AsX86_64().AsCpuRegister().AsRegister(),
-            locations->Out().AsX86_64().AsCpuRegister().AsRegister());
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  DCHECK(first.Equals(locations->Out()));
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
-      if (locations->InAt(1).IsRegister()) {
-        __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(),
-                locations->InAt(1).AsX86_64().AsCpuRegister());
-      } else if (locations->InAt(1).IsConstant()) {
-        HConstant* instruction = locations->InAt(1).GetConstant();
+      if (second.IsRegister()) {
+        __ addl(first.AsX86_64().AsCpuRegister(), second.AsX86_64().AsCpuRegister());
+      } else if (second.IsConstant()) {
+        HConstant* instruction = second.GetConstant();
         Immediate imm(instruction->AsIntConstant()->GetValue());
-        __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(), imm);
+        __ addl(first.AsX86_64().AsCpuRegister(), imm);
       } else {
-        __ addl(locations->InAt(0).AsX86_64().AsCpuRegister(),
-                Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex()));
+        __ addl(first.AsX86_64().AsCpuRegister(),
+                Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
     }
+
     case Primitive::kPrimLong: {
-      __ addq(locations->InAt(0).AsX86_64().AsCpuRegister(),
-              locations->InAt(1).AsX86_64().AsCpuRegister());
+      __ addq(first.AsX86_64().AsCpuRegister(), second.AsX86_64().AsCpuRegister());
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
+    case Primitive::kPrimFloat: {
+      if (second.IsFpuRegister()) {
+        __ addss(first.AsX86_64().AsXmmRegister(), second.AsX86_64().AsXmmRegister());
+      } else {
+        __ addss(first.AsX86_64().AsXmmRegister(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
       break;
+    }
+
+    case Primitive::kPrimDouble: {
+      if (second.IsFpuRegister()) {
+        __ addsd(first.AsX86_64().AsXmmRegister(), second.AsX86_64().AsXmmRegister());
+      } else {
+        __ addsd(first.AsX86_64().AsXmmRegister(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
+      }
+      break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented add type " << add->GetResultType();
+      LOG(FATAL) << "Unexpected add type " << add->GetResultType();
   }
 }
 
@@ -970,6 +1054,9 @@
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->AddTemp(X86_64CpuLocation(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(X86_64CpuLocation(calling_convention.GetRegisterAt(1)));
   locations->SetOut(X86_64CpuLocation(RAX));
 }
 
@@ -1031,10 +1118,13 @@
 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  // Temporary registers for the write barrier.
-  if (instruction->GetFieldType() == Primitive::kPrimNot) {
+  Primitive::Type field_type = instruction->GetFieldType();
+  bool is_object_type = field_type == Primitive::kPrimNot;
+  bool dies_at_entry = !is_object_type;
+  locations->SetInAt(0, Location::RequiresRegister(), dies_at_entry);
+  locations->SetInAt(1, Location::RequiresRegister(), dies_at_entry);
+  if (is_object_type) {
+    // Temporary registers for the write barrier.
     locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
   }
@@ -1088,7 +1178,7 @@
 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -1143,8 +1233,9 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::Any());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
@@ -1153,13 +1244,16 @@
 
   LocationSummary* locations = instruction->GetLocations();
   Location obj = locations->InAt(0);
-  DCHECK(obj.Equals(locations->Out()));
 
   if (obj.IsRegister()) {
     __ cmpl(obj.AsX86_64().AsCpuRegister(), Immediate(0));
-  } else {
-    DCHECK(locations->InAt(0).IsStackSlot());
+  } else if (obj.IsStackSlot()) {
     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
+  } else {
+    DCHECK(obj.IsConstant()) << obj;
+    DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
+    __ jmp(slow_path->GetEntryLabel());
+    return;
   }
   __ j(kEqual, slow_path->GetEntryLabel());
 }
@@ -1167,8 +1261,9 @@
 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+  locations->SetInAt(
+      1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -1272,9 +1367,15 @@
     locations->SetInAt(1, X86_64CpuLocation(calling_convention.GetRegisterAt(1)));
     locations->SetInAt(2, X86_64CpuLocation(calling_convention.GetRegisterAt(2)));
   } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    locations->SetInAt(2, Location::RequiresRegister());
+    locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
+    locations->SetInAt(
+        1, Location::RegisterOrConstant(instruction->InputAt(1)), Location::kDiesAtEntry);
+    locations->SetInAt(2, Location::RequiresRegister(), Location::kDiesAtEntry);
+    if (value_type == Primitive::kPrimLong) {
+      locations->SetInAt(2, Location::RequiresRegister(), Location::kDiesAtEntry);
+    } else {
+      locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)), Location::kDiesAtEntry);
+    }
   }
 }
 
@@ -1282,18 +1383,28 @@
   LocationSummary* locations = instruction->GetLocations();
   CpuRegister obj = locations->InAt(0).AsX86_64().AsCpuRegister();
   Location index = locations->InAt(1);
+  Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ movb(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movb(Address(obj, offset), value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movb(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movb(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_1, data_offset), value);
+        if (value.IsRegister()) {
+          __ movb(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_1, data_offset),
+                  value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movb(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_1, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
@@ -1301,24 +1412,42 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ movw(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movw(Address(obj, offset), value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movw(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movw(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_2, data_offset), value);
+        if (value.IsRegister()) {
+          __ movw(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_2, data_offset),
+                  value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movw(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_2, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
 
     case Primitive::kPrimInt: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ movl(Address(obj, offset), value);
+        if (value.IsRegister()) {
+          __ movl(Address(obj, offset), value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movl(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       } else {
-        __ movl(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_4, data_offset), value);
+        if (value.IsRegister()) {
+          __ movl(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_4, data_offset),
+                  value.AsX86_64().AsCpuRegister());
+        } else {
+          __ movl(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_4, data_offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+        }
       }
       break;
     }
@@ -1332,12 +1461,14 @@
 
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      CpuRegister value = locations->InAt(2).AsX86_64().AsCpuRegister();
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ movq(Address(obj, offset), value);
+        DCHECK(value.IsRegister());
+        __ movq(Address(obj, offset), value.AsX86_64().AsCpuRegister());
       } else {
-        __ movq(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_8, data_offset), value);
+        DCHECK(value.IsRegister());
+        __ movq(Address(obj, index.AsX86_64().AsCpuRegister(), TIMES_8, data_offset),
+                value.AsX86_64().AsCpuRegister());
       }
       break;
     }
@@ -1354,7 +1485,7 @@
 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RequiresRegister(), Location::kDiesAtEntry);
   locations->SetOut(Location::RequiresRegister());
 }
 
@@ -1371,8 +1502,9 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // TODO: Have a normalization phase that makes this instruction never used.
-  locations->SetOut(Location::SameAsFirstInput());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index a299cf6..c81f785 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -28,13 +28,19 @@
 static constexpr size_t kX86_64WordSize = 8;
 
 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
+static constexpr FloatRegister kParameterFloatRegisters[] =
+    { XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 };
 
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
 
-class InvokeDexCallingConvention : public CallingConvention<Register> {
+class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegister> {
  public:
-  InvokeDexCallingConvention()
-      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+  InvokeDexCallingConvention() : CallingConvention(
+      kParameterCoreRegisters,
+      kParameterCoreRegistersLength,
+      kParameterFloatRegisters,
+      kParameterFloatRegistersLength) {}
 
  private:
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
@@ -42,13 +48,17 @@
 
 class InvokeDexCallingConventionVisitor {
  public:
-  InvokeDexCallingConventionVisitor() : gp_index_(0), stack_index_(0) {}
+  InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
 
   Location GetNextLocation(Primitive::Type type);
 
  private:
   InvokeDexCallingConvention calling_convention;
+  // The current index for cpu registers.
   uint32_t gp_index_;
+  // The current index for fpu registers.
+  uint32_t fp_index_;
+  // The current stack index.
   uint32_t stack_index_;
 
   DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
diff --git a/compiler/optimizing/constant_propagation_test.cc b/compiler/optimizing/constant_propagation_test.cc
index 5c8c709..342777a 100644
--- a/compiler/optimizing/constant_propagation_test.cc
+++ b/compiler/optimizing/constant_propagation_test.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <functional>
+
 #include "constant_propagation.h"
 #include "dead_code_elimination.h"
 #include "pretty_printer.h"
@@ -27,10 +29,12 @@
 static void TestCode(const uint16_t* data,
                      const std::string& expected_before,
                      const std::string& expected_after_cp,
-                     const std::string& expected_after_dce) {
+                     const std::string& expected_after_dce,
+                     std::function<void(HGraph*)> check_after_cp,
+                     Primitive::Type return_type = Primitive::kPrimInt) {
   ArenaPool pool;
   ArenaAllocator allocator(&pool);
-  HGraph* graph = CreateCFG(&allocator, data);
+  HGraph* graph = CreateCFG(&allocator, data, return_type);
   ASSERT_NE(graph, nullptr);
 
   graph->BuildDominatorTree();
@@ -48,6 +52,8 @@
   std::string actual_after_cp = printer_after_cp.str();
   ASSERT_EQ(expected_after_cp, actual_after_cp);
 
+  check_after_cp(graph);
+
   DeadCodeElimination(graph).Run();
 
   StringPrettyPrinter printer_after_dce(graph);
@@ -100,6 +106,13 @@
   };
   std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
 
+  // Check the value of the computed constant.
+  auto check_after_cp = [](HGraph* graph) {
+    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction();
+    ASSERT_TRUE(inst->IsIntConstant());
+    ASSERT_EQ(inst->AsIntConstant()->GetValue(), 3);
+  };
+
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
     { "  3: IntConstant\n", removed },
@@ -107,7 +120,11 @@
   };
   std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
 
-  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+  TestCode(data,
+           expected_before,
+           expected_after_cp,
+           expected_after_dce,
+           check_after_cp);
 }
 
 /**
@@ -165,6 +182,19 @@
   };
   std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
 
+  // Check the values of the computed constants.
+  auto check_after_cp = [](HGraph* graph) {
+    HInstruction* inst1 = graph->GetBlock(1)->GetFirstInstruction();
+    ASSERT_TRUE(inst1->IsIntConstant());
+    ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 3);
+    HInstruction* inst2 = inst1->GetNext();
+    ASSERT_TRUE(inst2->IsIntConstant());
+    ASSERT_EQ(inst2->AsIntConstant()->GetValue(), 7);
+    HInstruction* inst3 = inst2->GetNext();
+    ASSERT_TRUE(inst3->IsIntConstant());
+    ASSERT_EQ(inst3->AsIntConstant()->GetValue(), 10);
+  };
+
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
     { "  3: IntConstant\n",  removed },
@@ -176,7 +206,11 @@
   };
   std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
 
-  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+  TestCode(data,
+           expected_before,
+           expected_after_cp,
+           expected_after_dce,
+           check_after_cp);
 }
 
 /**
@@ -218,6 +252,13 @@
   };
   std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
 
+  // Check the value of the computed constant.
+  auto check_after_cp = [](HGraph* graph) {
+    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction();
+    ASSERT_TRUE(inst->IsIntConstant());
+    ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1);
+  };
+
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
     { "  3: IntConstant\n", removed },
@@ -225,7 +266,11 @@
   };
   std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
 
-  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+  TestCode(data,
+           expected_before,
+           expected_after_cp,
+           expected_after_dce,
+           check_after_cp);
 }
 
 #define SIX_REGISTERS_CODE_ITEM(...)                                     \
@@ -272,6 +317,13 @@
   };
   std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
 
+  // Check the value of the computed constant.
+  auto check_after_cp = [](HGraph* graph) {
+    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction();
+    ASSERT_TRUE(inst->IsLongConstant());
+    ASSERT_EQ(inst->AsLongConstant()->GetValue(), 3);
+  };
+
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
     { "  6: LongConstant\n", removed },
@@ -279,7 +331,12 @@
   };
   std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
 
-  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+  TestCode(data,
+           expected_before,
+           expected_after_cp,
+           expected_after_dce,
+           check_after_cp,
+           Primitive::kPrimLong);
 }
 
 /**
@@ -323,6 +380,13 @@
   };
   std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
 
+  // Check the value of the computed constant.
+  auto check_after_cp = [](HGraph* graph) {
+    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction();
+    ASSERT_TRUE(inst->IsLongConstant());
+    ASSERT_EQ(inst->AsLongConstant()->GetValue(), 1);
+  };
+
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
     { "  6: LongConstant\n", removed },
@@ -330,7 +394,12 @@
   };
   std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
 
-  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+  TestCode(data,
+           expected_before,
+           expected_after_cp,
+           expected_after_dce,
+           check_after_cp,
+           Primitive::kPrimLong);
 }
 
 /**
@@ -370,26 +439,26 @@
 
   std::string expected_before =
     "BasicBlock 0, succ: 1\n"
-    "  3: IntConstant [9]\n"
-    "  5: IntConstant [9]\n"
-    "  13: IntConstant [14]\n"
-    "  18: IntConstant [19]\n"
-    "  24: IntConstant [25]\n"
+    "  3: IntConstant [9]\n"            // v0 <- 0
+    "  5: IntConstant [9]\n"            // v1 <- 1
+    "  13: IntConstant [14]\n"          // const 3
+    "  18: IntConstant [19]\n"          // const 2
+    "  24: IntConstant [25]\n"          // const 4
     "  30: SuspendCheck\n"
     "  31: Goto 1\n"
     "BasicBlock 1, pred: 0, succ: 3\n"
-    "  9: Add(3, 5) [19]\n"
-    "  11: Goto 3\n"
-    "BasicBlock 2, pred: 3, succ: 4\n"
-    "  14: Add(19, 13) [25]\n"
-    "  16: Goto 4\n"
-    "BasicBlock 3, pred: 1, succ: 2\n"
-    "  19: Add(9, 18) [14]\n"
+    "  9: Add(3, 5) [19]\n"             // v2 <- v0 + v1 = 0 + 1 = 1
+    "  11: Goto 3\n"                    // goto L2
+    "BasicBlock 2, pred: 3, succ: 4\n"  // L1:
+    "  14: Add(19, 13) [25]\n"          // v1 <- v0 + 3 = 3 + 3 = 6
+    "  16: Goto 4\n"                    // goto L3
+    "BasicBlock 3, pred: 1, succ: 2\n"  // L2:
+    "  19: Add(9, 18) [14]\n"           // v0 <- v2 + 2 = 1 + 2 = 3
     "  21: SuspendCheck\n"
-    "  22: Goto 2\n"
-    "BasicBlock 4, pred: 2, succ: 5\n"
-    "  25: Add(14, 24) [28]\n"
-    "  28: Return(25)\n"
+    "  22: Goto 2\n"                    // goto L1
+    "BasicBlock 4, pred: 2, succ: 5\n"  // L3:
+    "  25: Add(14, 24) [28]\n"          // v2 <- v1 + 4 = 6 + 4 = 10
+    "  28: Return(25)\n"                // return v2
     "BasicBlock 5, pred: 4\n"
     "  29: Exit\n";
 
@@ -408,6 +477,22 @@
   };
   std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
 
+  // Check the values of the computed constants.
+  auto check_after_cp = [](HGraph* graph) {
+    HInstruction* inst1 = graph->GetBlock(1)->GetFirstInstruction();
+    ASSERT_TRUE(inst1->IsIntConstant());
+    ASSERT_EQ(inst1->AsIntConstant()->GetValue(), 1);
+    HInstruction* inst2 = graph->GetBlock(2)->GetFirstInstruction();
+    ASSERT_TRUE(inst2->IsIntConstant());
+    ASSERT_EQ(inst2->AsIntConstant()->GetValue(), 6);
+    HInstruction* inst3 = graph->GetBlock(3)->GetFirstInstruction();
+    ASSERT_TRUE(inst3->IsIntConstant());
+    ASSERT_EQ(inst3->AsIntConstant()->GetValue(), 3);
+    HInstruction* inst4 = graph->GetBlock(4)->GetFirstInstruction();
+    ASSERT_TRUE(inst4->IsIntConstant());
+    ASSERT_EQ(inst4->AsIntConstant()->GetValue(), 10);
+  };
+
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
     { "  3: IntConstant\n",     removed },
@@ -418,7 +503,11 @@
   };
   std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
 
-  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+  TestCode(data,
+           expected_before,
+           expected_after_cp,
+           expected_after_dce,
+           check_after_cp);
 }
 
 
@@ -473,6 +562,13 @@
   };
   std::string expected_after_cp = Patch(expected_before, expected_cp_diff);
 
+  // Check the values of the computed constants.
+  auto check_after_cp = [](HGraph* graph) {
+    HInstruction* inst = graph->GetBlock(1)->GetFirstInstruction();
+    ASSERT_TRUE(inst->IsIntConstant());
+    ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1);
+  };
+
   // Expected difference after dead code elimination.
   diff_t expected_dce_diff = {
     { "  3: IntConstant [15, 22]\n", "  3: IntConstant [22]\n" },
@@ -481,7 +577,11 @@
   };
   std::string expected_after_dce = Patch(expected_after_cp, expected_dce_diff);
 
-  TestCode(data, expected_before, expected_after_cp, expected_after_dce);
+  TestCode(data,
+           expected_before,
+           expected_after_cp,
+           expected_after_dce,
+           check_after_cp);
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 0fb4737..686a0b0 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -158,6 +158,7 @@
       }
     }
     output_ << ")";
+    output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
   }
 
   void VisitInstruction(HInstruction* instruction) {
@@ -191,6 +192,7 @@
           DumpLocation(locations->Out(), instruction->GetType());
         }
       }
+      output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
     }
   }
 
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index a0de73d..2d9e35c 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -38,4 +38,21 @@
   block->RemoveInstruction(check);
 }
 
+void InstructionSimplifier::VisitEqual(HEqual* equal) {
+  HInstruction* input1 = equal->InputAt(0);
+  HInstruction* input2 = equal->InputAt(1);
+  if (input1->GetType() == Primitive::kPrimBoolean && input2->IsIntConstant()) {
+    if (input2->AsIntConstant()->GetValue() == 1) {
+      // Replace (bool_value == 1) with bool_value
+      equal->ReplaceWith(equal->InputAt(0));
+      equal->GetBlock()->RemoveInstruction(equal);
+    } else {
+      // Replace (bool_value == 0) with !bool_value
+      DCHECK_EQ(input2->AsIntConstant()->GetValue(), 0);
+      equal->GetBlock()->ReplaceAndRemoveInstructionWith(
+          equal, new (GetGraph()->GetArena()) HNot(input1));
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index b2f3f52..d74b624 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -32,6 +32,7 @@
 
  private:
   virtual void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
+  virtual void VisitEqual(HEqual* equal) OVERRIDE;
 };
 
 }  // namespace art
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 03f8625..8be4746 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -268,7 +268,7 @@
   range = interval->GetFirstRange();
   // Instruction is consumed by the if.
   ASSERT_EQ(14u, range->GetStart());
-  ASSERT_EQ(17u, range->GetEnd());
+  ASSERT_EQ(16u, range->GetEnd());
   ASSERT_TRUE(range->GetNext() == nullptr);
 }
 
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 1c36cdf..7b09241 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -25,13 +25,16 @@
       temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
       environment_(instruction->GetBlock()->GetGraph()->GetArena(),
                    instruction->EnvironmentSize()),
+      dies_at_entry_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
       call_kind_(call_kind),
       stack_mask_(nullptr),
       register_mask_(0),
       live_registers_() {
   inputs_.SetSize(instruction->InputCount());
+  dies_at_entry_.SetSize(instruction->InputCount());
   for (size_t i = 0; i < instruction->InputCount(); ++i) {
     inputs_.Put(i, Location());
+    dies_at_entry_.Put(i, false);
   }
   environment_.SetSize(instruction->EnvironmentSize());
   for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) {
@@ -52,4 +55,15 @@
       : Location::RequiresRegister();
 }
 
+Location Location::ByteRegisterOrConstant(ManagedRegister reg, HInstruction* instruction) {
+  return instruction->IsConstant()
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RegisterLocation(reg);
+}
+
+std::ostream& operator<<(std::ostream& os, const Location& location) {
+  os << location.DebugString();
+  return os;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index f358e05..5f85b6a 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -34,34 +34,44 @@
  */
 class Location : public ValueObject {
  public:
+  static constexpr bool kDiesAtEntry = true;
+
   enum Kind {
     kInvalid = 0,
     kConstant = 1,
-    kStackSlot = 2,  // Word size slot.
+    kStackSlot = 2,  // 32bit stack slot.
     kDoubleStackSlot = 3,  // 64bit stack slot.
-    kRegister = 4,
+
+    kRegister = 4,  // Core register.
+
+    // We do not use the value 5 because it conflicts with kLocationConstantMask.
+    kDoNotUse = 5,
+
+    kFpuRegister = 6,  // Floating point processor.
+
     // On 32bits architectures, quick can pass a long where the
     // low bits are in the last parameter register, and the high
     // bits are in a stack slot. The kQuickParameter kind is for
     // handling this special case.
-    kQuickParameter = 6,
+    kQuickParameter = 7,
 
     // Unallocated location represents a location that is not fixed and can be
     // allocated by a register allocator.  Each unallocated location has
     // a policy that specifies what kind of location is suitable. Payload
     // contains register allocation policy.
-    kUnallocated = 7,
+    kUnallocated = 8,
   };
 
   Location() : value_(kInvalid) {
-    // Verify that non-tagged location kinds do not interfere with kConstantTag.
-    COMPILE_ASSERT((kInvalid & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kUnallocated & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kStackSlot & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kDoubleStackSlot & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kRegister & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kQuickParameter & kLocationTagMask) != kConstant, TagError);
-    COMPILE_ASSERT((kConstant & kLocationTagMask) == kConstant, TagError);
+    // Verify that non-constant location kinds do not interfere with kConstant.
+    COMPILE_ASSERT((kInvalid & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kUnallocated & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kStackSlot & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kDoubleStackSlot & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kRegister & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kQuickParameter & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kFpuRegister & kLocationConstantMask) != kConstant, TagError);
+    COMPILE_ASSERT((kConstant & kLocationConstantMask) == kConstant, TagError);
 
     DCHECK(!IsValid());
   }
@@ -74,7 +84,7 @@
   }
 
   bool IsConstant() const {
-    return (value_ & kLocationTagMask) == kConstant;
+    return (value_ & kLocationConstantMask) == kConstant;
   }
 
   static Location ConstantLocation(HConstant* constant) {
@@ -84,7 +94,7 @@
 
   HConstant* GetConstant() const {
     DCHECK(IsConstant());
-    return reinterpret_cast<HConstant*>(value_ & ~kLocationTagMask);
+    return reinterpret_cast<HConstant*>(value_ & ~kLocationConstantMask);
   }
 
   bool IsValid() const {
@@ -105,12 +115,20 @@
     return Location(kRegister, reg.RegId());
   }
 
+  static Location FpuRegisterLocation(ManagedRegister reg) {
+    return Location(kFpuRegister, reg.RegId());
+  }
+
   bool IsRegister() const {
     return GetKind() == kRegister;
   }
 
+  bool IsFpuRegister() const {
+    return GetKind() == kFpuRegister;
+  }
+
   ManagedRegister reg() const {
-    DCHECK(IsRegister());
+    DCHECK(IsRegister() || IsFpuRegister());
     return static_cast<ManagedRegister>(GetPayload());
   }
 
@@ -190,7 +208,11 @@
       case kQuickParameter: return "Q";
       case kUnallocated: return "U";
       case kConstant: return "C";
+      case kFpuRegister: return "F";
+      case kDoNotUse:
+        LOG(FATAL) << "Should not use this location kind";
     }
+    UNREACHABLE();
     return "?";
   }
 
@@ -198,6 +220,7 @@
   enum Policy {
     kAny,
     kRequiresRegister,
+    kRequiresFpuRegister,
     kSameAsFirstInput,
   };
 
@@ -218,7 +241,12 @@
     return UnallocatedLocation(kRequiresRegister);
   }
 
+  static Location RequiresFpuRegister() {
+    return UnallocatedLocation(kRequiresFpuRegister);
+  }
+
   static Location RegisterOrConstant(HInstruction* instruction);
+  static Location ByteRegisterOrConstant(ManagedRegister reg, HInstruction* instruction);
 
   // The location of the first input to the instruction will be
   // used to replace this unallocated location.
@@ -239,7 +267,7 @@
   // Number of bits required to encode Kind value.
   static constexpr uint32_t kBitsForKind = 4;
   static constexpr uint32_t kBitsForPayload = kWordSize * kBitsPerByte - kBitsForKind;
-  static constexpr uword kLocationTagMask = 0x3;
+  static constexpr uword kLocationConstantMask = 0x3;
 
   explicit Location(uword value) : value_(value) {}
 
@@ -312,7 +340,8 @@
 
   LocationSummary(HInstruction* instruction, CallKind call_kind = kNoCall);
 
-  void SetInAt(uint32_t at, Location location) {
+  void SetInAt(uint32_t at, Location location, bool dies_at_entry = false) {
+    dies_at_entry_.Put(at, dies_at_entry);
     inputs_.Put(at, location);
   }
 
@@ -390,10 +419,12 @@
   bool InputOverlapsWithOutputOrTemp(uint32_t input, bool is_environment) const {
     if (is_environment) return true;
     Location location = Out();
-    // TODO: Add more policies.
     if (input == 0 && location.IsUnallocated() && location.GetPolicy() == Location::kSameAsFirstInput) {
       return false;
     }
+    if (dies_at_entry_.Get(input)) {
+      return false;
+    }
     return true;
   }
 
@@ -401,6 +432,7 @@
   GrowableArray<Location> inputs_;
   GrowableArray<Location> temps_;
   GrowableArray<Location> environment_;
+  GrowableArray<bool> dies_at_entry_;
   Location output_;
   const CallKind call_kind_;
 
@@ -416,6 +448,8 @@
   DISALLOW_COPY_AND_ASSIGN(LocationSummary);
 };
 
+std::ostream& operator<<(std::ostream& os, const Location& location);
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_LOCATIONS_H_
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 5c4ab8e..4cac319 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -308,6 +308,14 @@
   return false;
 }
 
+static void UpdateInputsUsers(HInstruction* instruction) {
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    instruction->InputAt(i)->AddUseAt(instruction, i);
+  }
+  // Environment should be created later.
+  DCHECK(!instruction->HasEnvironment());
+}
+
 void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor) {
   DCHECK(cursor->AsPhi() == nullptr);
   DCHECK(instruction->AsPhi() == nullptr);
@@ -325,6 +333,7 @@
   }
   instruction->SetBlock(this);
   instruction->SetId(GetGraph()->GetNextInstructionId());
+  UpdateInputsUsers(instruction);
 }
 
 void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial,
@@ -342,6 +351,7 @@
   DCHECK_EQ(instruction->GetId(), -1);
   instruction->SetBlock(block);
   instruction->SetId(block->GetGraph()->GetNextInstructionId());
+  UpdateInputsUsers(instruction);
   instruction_list->AddInstruction(instruction);
 }
 
@@ -421,9 +431,6 @@
     instruction->previous_ = last_instruction_;
     last_instruction_ = instruction;
   }
-  for (size_t i = 0; i < instruction->InputCount(); i++) {
-    instruction->InputAt(i)->AddUseAt(instruction, i);
-  }
 }
 
 void HInstructionList::RemoveInstruction(HInstruction* instruction) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 3d65366..fc5b06d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -93,6 +93,7 @@
 
   ArenaAllocator* GetArena() const { return arena_; }
   const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; }
+  HBasicBlock* GetBlock(size_t id) const { return blocks_.Get(id); }
 
   HBasicBlock* GetEntryBlock() const { return entry_block_; }
   HBasicBlock* GetExitBlock() const { return exit_block_; }
@@ -1143,8 +1144,12 @@
   HEqual(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
-  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x == y; }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x == y; }
+  virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+    return x == y ? 1 : 0;
+  }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+    return x == y ? 1 : 0;
+  }
 
   DECLARE_INSTRUCTION(Equal);
 
@@ -1161,8 +1166,12 @@
   HNotEqual(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
-  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x != y; }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x != y; }
+  virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+    return x != y ? 1 : 0;
+  }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+    return x != y ? 1 : 0;
+  }
 
   DECLARE_INSTRUCTION(NotEqual);
 
@@ -1179,8 +1188,12 @@
   HLessThan(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
-  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x < y; }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x < y; }
+  virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+    return x < y ? 1 : 0;
+  }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+    return x < y ? 1 : 0;
+  }
 
   DECLARE_INSTRUCTION(LessThan);
 
@@ -1197,8 +1210,12 @@
   HLessThanOrEqual(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
-  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x <= y; }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x <= y; }
+  virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+    return x <= y ? 1 : 0;
+  }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+    return x <= y ? 1 : 0;
+  }
 
   DECLARE_INSTRUCTION(LessThanOrEqual);
 
@@ -1215,8 +1232,12 @@
   HGreaterThan(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
-  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x > y; }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x > y; }
+  virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+    return x > y ? 1 : 0;
+  }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+    return x > y ? 1 : 0;
+  }
 
   DECLARE_INSTRUCTION(GreaterThan);
 
@@ -1233,8 +1254,12 @@
   HGreaterThanOrEqual(HInstruction* first, HInstruction* second)
       : HCondition(first, second) {}
 
-  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x >= y; }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x >= y; }
+  virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+    return x >= y ? 1 : 0;
+  }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+    return x >= y ? 1 : 0;
+  }
 
   DECLARE_INSTRUCTION(GreaterThanOrEqual);
 
@@ -1257,13 +1282,13 @@
     DCHECK_EQ(type, second->GetType());
   }
 
-  virtual int32_t Evaluate(int32_t x, int32_t y) const {
+  virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
     return
       x == y ? 0 :
       x > y ? 1 :
       -1;
   }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const {
+  virtual int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
     return
       x == y ? 0 :
       x > y ? 1 :
@@ -1490,8 +1515,12 @@
 
   virtual bool IsCommutative() { return true; }
 
-  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x + y; }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x + y; }
+  virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+    return x + y;
+  }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+    return x + y;
+  }
 
   DECLARE_INSTRUCTION(Add);
 
@@ -1506,8 +1535,12 @@
 
   virtual bool IsCommutative() { return false; }
 
-  virtual int32_t Evaluate(int32_t x, int32_t y) const { return x + y; }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x + y; }
+  virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+    return x - y;
+  }
+  virtual int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+    return x - y;
+  }
 
   DECLARE_INSTRUCTION(Sub);
 
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index b75bacb..70dd8d7 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -63,4 +63,55 @@
   ASSERT_FALSE(parameter->HasUses());
 }
 
+/**
+ * Test that inserting an instruction in the graph updates user lists.
+ */
+TEST(Node, InsertInstruction) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+  HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  HInstruction* parameter2 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  entry->AddInstruction(parameter1);
+  entry->AddInstruction(parameter2);
+  entry->AddInstruction(new (&allocator) HExit());
+
+  ASSERT_FALSE(parameter1->HasUses());
+  ASSERT_EQ(parameter1->NumberOfUses(), 0u);
+
+  HInstruction* to_insert = new (&allocator) HNullCheck(parameter1, 0);
+  entry->InsertInstructionBefore(to_insert, parameter2);
+
+  ASSERT_TRUE(parameter1->HasUses());
+  ASSERT_EQ(parameter1->NumberOfUses(), 1u);
+}
+
+/**
+ * Test that adding an instruction in the graph updates user lists.
+ */
+TEST(Node, AddInstruction) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+
+  HGraph* graph = new (&allocator) HGraph(&allocator);
+  HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+  HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+  entry->AddInstruction(parameter);
+
+  ASSERT_FALSE(parameter->HasUses());
+  ASSERT_EQ(parameter->NumberOfUses(), 0u);
+
+  HInstruction* to_add = new (&allocator) HNullCheck(parameter, 0);
+  entry->AddInstruction(to_add);
+
+  ASSERT_TRUE(parameter->HasUses());
+  ASSERT_EQ(parameter->NumberOfUses(), 1u);
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 65bdb18..3cf5a0b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -28,6 +28,7 @@
 #include "gvn.h"
 #include "instruction_simplifier.h"
 #include "nodes.h"
+#include "prepare_for_register_allocation.h"
 #include "register_allocator.h"
 #include "ssa_phi_elimination.h"
 #include "ssa_liveness_analysis.h"
@@ -265,6 +266,7 @@
     InstructionSimplifier(graph).Run();
     GlobalValueNumberer(graph->GetArena(), graph).Run();
     visualizer.DumpGraph(kGVNPassName);
+    PrepareForRegisterAllocation(graph).Run();
 
     SsaLivenessAnalysis liveness(*graph, codegen);
     liveness.Analyze();
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 6dd53e5..5b693dd 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -68,8 +68,10 @@
 }
 
 // Create a control-flow graph from Dex instructions.
-inline HGraph* CreateCFG(ArenaAllocator* allocator, const uint16_t* data) {
-  HGraphBuilder builder(allocator);
+inline HGraph* CreateCFG(ArenaAllocator* allocator,
+                         const uint16_t* data,
+                         Primitive::Type return_type = Primitive::kPrimInt) {
+  HGraphBuilder builder(allocator, return_type);
   const DexFile::CodeItem* item =
     reinterpret_cast<const DexFile::CodeItem*>(data);
   HGraph* graph = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
new file mode 100644
index 0000000..bfbbab5
--- /dev/null
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "prepare_for_register_allocation.h"
+
+namespace art {
+
+void PrepareForRegisterAllocation::Run() {
+  // Order does not matter.
+  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
+    HBasicBlock* block = it.Current();
+    // No need to visit the phis.
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      it.Current()->Accept(this);
+    }
+  }
+}
+
+void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) {
+  check->ReplaceWith(check->InputAt(0));
+}
+
+void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
+  check->ReplaceWith(check->InputAt(0));
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
new file mode 100644
index 0000000..37f2871
--- /dev/null
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_
+#define ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_
+
+#include "nodes.h"
+
+namespace art {
+
+/**
+ * A simplification pass over the graph before doing register allocation.
+ * For example it changes uses of null checks and bounds checks to the original
+ * objects, to avoid creating a live range for these checks.
+ */
+class PrepareForRegisterAllocation : public HGraphVisitor {
+ public:
+  explicit PrepareForRegisterAllocation(HGraph* graph) : HGraphVisitor(graph) {}
+
+  void Run();
+
+ private:
+  virtual void VisitNullCheck(HNullCheck* check) OVERRIDE;
+  virtual void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
+
+  DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1d1d694..3ee1afe 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -150,8 +150,7 @@
     if (temp.IsRegister()) {
       BlockRegister(temp, position, position + 1, Primitive::kPrimInt);
     } else {
-      LiveInterval* interval =
-          LiveInterval::MakeTempInterval(allocator_, instruction, Primitive::kPrimInt);
+      LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
       temp_intervals_.Add(interval);
       interval->AddRange(position, position + 1);
       unhandled_core_intervals_.Add(interval);
@@ -486,12 +485,18 @@
     reg = current->GetRegister();
     DCHECK_NE(free_until[reg], 0u);
   } else {
-    // Pick the register that is free the longest.
-    for (size_t i = 0; i < number_of_registers_; ++i) {
-      if (IsBlocked(i)) continue;
-      if (reg == -1 || free_until[i] > free_until[reg]) {
-        reg = i;
-        if (free_until[i] == kMaxLifetimePosition) break;
+    int hint = current->FindFirstRegisterHint(free_until);
+    if (hint != kNoRegister) {
+      DCHECK(!IsBlocked(hint));
+      reg = hint;
+    } else {
+      // Pick the register that is free the longest.
+      for (size_t i = 0; i < number_of_registers_; ++i) {
+        if (IsBlocked(i)) continue;
+        if (reg == -1 || free_until[i] > free_until[reg]) {
+          reg = i;
+          if (free_until[i] == kMaxLifetimePosition) break;
+        }
       }
     }
   }
@@ -654,10 +659,6 @@
   }
 }
 
-static bool NeedTwoSpillSlot(Primitive::Type type) {
-  return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
-}
-
 void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
   LiveInterval* parent = interval->GetParent();
 
@@ -698,7 +699,7 @@
     }
   }
 
-  if (NeedTwoSpillSlot(parent->GetType())) {
+  if (parent->NeedsTwoSpillSlots()) {
     if (slot == spill_slots_.Size()) {
       // We need a new spill slot.
       spill_slots_.Add(end);
@@ -722,24 +723,6 @@
   parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize);
 }
 
-static Location ConvertToLocation(LiveInterval* interval) {
-  if (interval->HasRegister()) {
-    return Location::RegisterLocation(ManagedRegister(interval->GetRegister()));
-  } else {
-    HInstruction* defined_by = interval->GetParent()->GetDefinedBy();
-    if (defined_by->IsConstant()) {
-      return defined_by->GetLocations()->Out();
-    } else {
-      DCHECK(interval->GetParent()->HasSpillSlot());
-      if (NeedTwoSpillSlot(interval->GetType())) {
-        return Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot());
-      } else {
-        return Location::StackSlot(interval->GetParent()->GetSpillSlot());
-      }
-    }
-  }
-}
-
 // We create a special marker for inputs moves to differentiate them from
 // moves created during resolution. They must be different instructions
 // because the input moves work on the assumption that the interval moves
@@ -795,7 +778,7 @@
     move = at->GetNext()->AsParallelMove();
     // This is a parallel move for connecting siblings in a same block. We need to
     // differentiate it with moves for connecting blocks, and input moves.
-    if (move == nullptr || move->GetLifetimePosition() != position) {
+    if (move == nullptr || IsInputMove(move) || move->GetLifetimePosition() > position) {
       move = new (allocator_) HParallelMove(allocator_);
       move->SetLifetimePosition(position);
       at->GetBlock()->InsertInstructionBefore(move, at->GetNext());
@@ -803,16 +786,11 @@
   } else {
     // Move must happen before the instruction.
     HInstruction* previous = at->GetPrevious();
-    if (previous != nullptr && previous->IsParallelMove()) {
+    if (previous != nullptr && previous->IsParallelMove() && IsInputMove(previous)) {
       // This is a parallel move for connecting siblings in a same block. We need to
-      // differentiate it with moves for connecting blocks, and input moves.
-      if (previous->GetLifetimePosition() != position) {
-        // If the previous instruction of the previous instruction is not a parallel
-        // move, we have to insert the new parallel move before the input or connecting
-        // block moves.
-        at = previous;
-        previous = previous->GetPrevious();
-      }
+      // differentiate it with input moves.
+      at = previous;
+      previous = previous->GetPrevious();
     }
     if (previous == nullptr
         || !previous->IsParallelMove()
@@ -830,6 +808,7 @@
       move = previous->AsParallelMove();
     }
   }
+  DCHECK_EQ(move->GetLifetimePosition(), position);
   move->AddMove(new (allocator_) MoveOperands(source, destination, instruction));
 }
 
@@ -906,7 +885,7 @@
     // We spill eagerly, so move must be at definition.
     InsertMoveAfter(interval->GetDefinedBy(),
                     Location::RegisterLocation(ManagedRegister(interval->GetRegister())),
-                    NeedTwoSpillSlot(interval->GetType())
+                    interval->NeedsTwoSpillSlots()
                         ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
                         : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
   }
@@ -915,7 +894,7 @@
   // Walk over all siblings, updating locations of use positions, and
   // connecting them when they are adjacent.
   do {
-    Location source = ConvertToLocation(current);
+    Location source = current->ToLocation();
 
     // Walk over all uses covered by this interval, and update the location
     // information.
@@ -940,7 +919,7 @@
     if (next_sibling != nullptr
         && next_sibling->HasRegister()
         && current->GetEnd() == next_sibling->GetStart()) {
-      Location destination = ConvertToLocation(next_sibling);
+      Location destination = next_sibling->ToLocation();
       InsertParallelMoveAt(current->GetEnd(), interval->GetDefinedBy(), source, destination);
     }
 
@@ -1030,27 +1009,17 @@
   if (from->GetSuccessors().Size() == 1) {
     InsertParallelMoveAtExitOf(from,
                                interval->GetParent()->GetDefinedBy(),
-                               ConvertToLocation(source),
-                               ConvertToLocation(destination));
+                               source->ToLocation(),
+                               destination->ToLocation());
   } else {
     DCHECK_EQ(to->GetPredecessors().Size(), 1u);
     InsertParallelMoveAtEntryOf(to,
                                 interval->GetParent()->GetDefinedBy(),
-                                ConvertToLocation(source),
-                                ConvertToLocation(destination));
+                                source->ToLocation(),
+                                destination->ToLocation());
   }
 }
 
-// Returns the location of `interval`, or siblings of `interval`, at `position`.
-static Location FindLocationAt(LiveInterval* interval, size_t position) {
-  LiveInterval* current = interval;
-  while (!current->Covers(position)) {
-    current = current->GetNextSibling();
-    DCHECK(current != nullptr);
-  }
-  return ConvertToLocation(current);
-}
-
 void RegisterAllocator::Resolve() {
   codegen_->ComputeFrameSize(
       spill_slots_.Size(), maximum_number_of_live_registers_, reserved_out_slots_);
@@ -1077,7 +1046,7 @@
       }
     }
 
-    Location source = ConvertToLocation(current);
+    Location source = current->ToLocation();
 
     if (location.IsUnallocated()) {
       if (location.GetPolicy() == Location::kSameAsFirstInput) {
@@ -1117,9 +1086,9 @@
         HBasicBlock* predecessor = current->GetPredecessors().Get(i);
         DCHECK_EQ(predecessor->GetSuccessors().Size(), 1u);
         HInstruction* input = phi->InputAt(i);
-        Location source = FindLocationAt(input->GetLiveInterval(),
-                                         predecessor->GetLastInstruction()->GetLifetimePosition());
-        Location destination = ConvertToLocation(phi->GetLiveInterval());
+        Location source = input->GetLiveInterval()->GetLocationAt(
+            predecessor->GetLifetimeEnd() - 1);
+        Location destination = phi->GetLiveInterval()->ToLocation();
         InsertParallelMoveAtExitOf(predecessor, nullptr, source, destination);
       }
     }
@@ -1130,11 +1099,12 @@
   size_t temp_index = 0;
   for (size_t i = 0; i < temp_intervals_.Size(); ++i) {
     LiveInterval* temp = temp_intervals_.Get(i);
-    if (temp->GetDefinedBy() != current) {
+    HInstruction* at = liveness_.GetTempUser(temp);
+    if (at != current) {
       temp_index = 0;
-      current = temp->GetDefinedBy();
+      current = at;
     }
-    LocationSummary* locations = current->GetLocations();
+    LocationSummary* locations = at->GetLocations();
     locations->SetTempAt(
         temp_index++, Location::RegisterLocation(ManagedRegister(temp->GetRegister())));
   }
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 535a768..b7d56e6 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -25,6 +25,7 @@
 #include "ssa_liveness_analysis.h"
 #include "ssa_phi_elimination.h"
 #include "utils/arena_allocator.h"
+#include "utils/managed_register.h"
 
 #include "gtest/gtest.h"
 
@@ -418,17 +419,17 @@
   // Add three temps holding the same register, and starting at different positions.
   // Put the one that should be picked in the middle of the inactive list to ensure
   // we do not depend on an order.
-  LiveInterval* interval = LiveInterval::MakeTempInterval(&allocator, nullptr, Primitive::kPrimInt);
+  LiveInterval* interval = LiveInterval::MakeTempInterval(&allocator, Primitive::kPrimInt);
   interval->SetRegister(0);
   interval->AddRange(40, 50);
   register_allocator.inactive_.Add(interval);
 
-  interval = LiveInterval::MakeTempInterval(&allocator, nullptr, Primitive::kPrimInt);
+  interval = LiveInterval::MakeTempInterval(&allocator, Primitive::kPrimInt);
   interval->SetRegister(0);
   interval->AddRange(20, 30);
   register_allocator.inactive_.Add(interval);
 
-  interval = LiveInterval::MakeTempInterval(&allocator, nullptr, Primitive::kPrimInt);
+  interval = LiveInterval::MakeTempInterval(&allocator, Primitive::kPrimInt);
   interval->SetRegister(0);
   interval->AddRange(60, 70);
   register_allocator.inactive_.Add(interval);
@@ -447,4 +448,250 @@
   ASSERT_EQ(20u, register_allocator.unhandled_->Get(0)->GetStart());
 }
 
+static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
+                                  HPhi** phi,
+                                  HInstruction** input1,
+                                  HInstruction** input2) {
+  HGraph* graph = new (allocator) HGraph(allocator);
+  HBasicBlock* entry = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  entry->AddInstruction(parameter);
+
+  HBasicBlock* block = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(block);
+  entry->AddSuccessor(block);
+
+  HInstruction* test = new (allocator) HInstanceFieldGet(
+      parameter, Primitive::kPrimBoolean, MemberOffset(22));
+  block->AddInstruction(test);
+  block->AddInstruction(new (allocator) HIf(test));
+  HBasicBlock* then = new (allocator) HBasicBlock(graph);
+  HBasicBlock* else_ = new (allocator) HBasicBlock(graph);
+  HBasicBlock* join = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(then);
+  graph->AddBlock(else_);
+  graph->AddBlock(join);
+
+  block->AddSuccessor(then);
+  block->AddSuccessor(else_);
+  then->AddSuccessor(join);
+  else_->AddSuccessor(join);
+  then->AddInstruction(new (allocator) HGoto());
+  else_->AddInstruction(new (allocator) HGoto());
+
+  *phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
+  join->AddPhi(*phi);
+  *input1 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42));
+  *input2 = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42));
+  then->AddInstruction(*input1);
+  else_->AddInstruction(*input2);
+  join->AddInstruction(new (allocator) HExit());
+  (*phi)->AddInput(*input1);
+  (*phi)->AddInput(*input2);
+
+  graph->BuildDominatorTree();
+  graph->FindNaturalLoops();
+  return graph;
+}
+
+TEST(RegisterAllocatorTest, PhiHint) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HPhi *phi;
+  HInstruction *input1, *input2;
+
+  {
+    HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
+    x86::CodeGeneratorX86 codegen(graph);
+    SsaLivenessAnalysis liveness(*graph, &codegen);
+    liveness.Analyze();
+
+    // Check that the register allocator is deterministic.
+    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+    register_allocator.AllocateRegisters();
+
+    ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0);
+    ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 0);
+    ASSERT_EQ(phi->GetLiveInterval()->GetRegister(), 0);
+  }
+
+  {
+    HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
+    x86::CodeGeneratorX86 codegen(graph);
+    SsaLivenessAnalysis liveness(*graph, &codegen);
+    liveness.Analyze();
+
+    // Set the phi to a specific register, and check that the inputs get allocated
+    // the same register.
+    phi->GetLocations()->SetOut(Location::RegisterLocation(ManagedRegister(2)));
+    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+    register_allocator.AllocateRegisters();
+
+    ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
+    ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
+    ASSERT_EQ(phi->GetLiveInterval()->GetRegister(), 2);
+  }
+
+  {
+    HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
+    x86::CodeGeneratorX86 codegen(graph);
+    SsaLivenessAnalysis liveness(*graph, &codegen);
+    liveness.Analyze();
+
+    // Set input1 to a specific register, and check that the phi and other input get allocated
+    // the same register.
+    input1->GetLocations()->SetOut(Location::RegisterLocation(ManagedRegister(2)));
+    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+    register_allocator.AllocateRegisters();
+
+    ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
+    ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
+    ASSERT_EQ(phi->GetLiveInterval()->GetRegister(), 2);
+  }
+
+  {
+    HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2);
+    x86::CodeGeneratorX86 codegen(graph);
+    SsaLivenessAnalysis liveness(*graph, &codegen);
+    liveness.Analyze();
+
+    // Set input2 to a specific register, and check that the phi and other input get allocated
+    // the same register.
+    input2->GetLocations()->SetOut(Location::RegisterLocation(ManagedRegister(2)));
+    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+    register_allocator.AllocateRegisters();
+
+    ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2);
+    ASSERT_EQ(input2->GetLiveInterval()->GetRegister(), 2);
+    ASSERT_EQ(phi->GetLiveInterval()->GetRegister(), 2);
+  }
+}
+
+static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
+                                HInstruction** field,
+                                HInstruction** ret) {
+  HGraph* graph = new (allocator) HGraph(allocator);
+  HBasicBlock* entry = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+  entry->AddInstruction(parameter);
+
+  HBasicBlock* block = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(block);
+  entry->AddSuccessor(block);
+
+  *field = new (allocator) HInstanceFieldGet(parameter, Primitive::kPrimInt, MemberOffset(42));
+  block->AddInstruction(*field);
+  *ret = new (allocator) HReturn(*field);
+  block->AddInstruction(*ret);
+
+  HBasicBlock* exit = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(exit);
+  block->AddSuccessor(exit);
+  exit->AddInstruction(new (allocator) HExit());
+  return graph;
+}
+
+TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HInstruction *field, *ret;
+
+  {
+    HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
+    x86::CodeGeneratorX86 codegen(graph);
+    SsaLivenessAnalysis liveness(*graph, &codegen);
+    liveness.Analyze();
+
+    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+    register_allocator.AllocateRegisters();
+
+    // Sanity check that in normal conditions, the register should be hinted to 0 (EAX).
+    ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 0);
+  }
+
+  {
+    HGraph* graph = BuildFieldReturn(&allocator, &field, &ret);
+    x86::CodeGeneratorX86 codegen(graph);
+    SsaLivenessAnalysis liveness(*graph, &codegen);
+    liveness.Analyze();
+
+    // Check that the field gets put in the register expected by its use.
+    ret->GetLocations()->SetInAt(0, Location::RegisterLocation(ManagedRegister(2)));
+
+    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+    register_allocator.AllocateRegisters();
+
+    ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2);
+  }
+}
+
+static HGraph* BuildTwoAdds(ArenaAllocator* allocator,
+                            HInstruction** first_add,
+                            HInstruction** second_add) {
+  HGraph* graph = new (allocator) HGraph(allocator);
+  HBasicBlock* entry = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(entry);
+  graph->SetEntryBlock(entry);
+  HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimInt);
+  HInstruction* constant1 = new (allocator) HIntConstant(0);
+  HInstruction* constant2 = new (allocator) HIntConstant(0);
+  entry->AddInstruction(parameter);
+  entry->AddInstruction(constant1);
+  entry->AddInstruction(constant2);
+
+  HBasicBlock* block = new (allocator) HBasicBlock(graph);
+  graph->AddBlock(block);
+  entry->AddSuccessor(block);
+
+  *first_add = new (allocator) HAdd(Primitive::kPrimInt, parameter, constant1);
+  block->AddInstruction(*first_add);
+  *second_add = new (allocator) HAdd(Primitive::kPrimInt, *first_add, constant2);
+  block->AddInstruction(*second_add);
+
+  block->AddInstruction(new (allocator) HExit());
+  return graph;
+}
+
+TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HInstruction *first_add, *second_add;
+
+  {
+    HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add);
+    x86::CodeGeneratorX86 codegen(graph);
+    SsaLivenessAnalysis liveness(*graph, &codegen);
+    liveness.Analyze();
+
+    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+    register_allocator.AllocateRegisters();
+
+    // Sanity check that in normal conditions, the registers are the same.
+    ASSERT_EQ(first_add->GetLiveInterval()->GetRegister(), 1);
+    ASSERT_EQ(second_add->GetLiveInterval()->GetRegister(), 1);
+  }
+
+  {
+    HGraph* graph = BuildTwoAdds(&allocator, &first_add, &second_add);
+    x86::CodeGeneratorX86 codegen(graph);
+    SsaLivenessAnalysis liveness(*graph, &codegen);
+    liveness.Analyze();
+
+    // check that both adds get the same register.
+    first_add->InputAt(0)->GetLocations()->SetOut(Location::RegisterLocation(ManagedRegister(2)));
+    ASSERT_EQ(first_add->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
+    ASSERT_EQ(second_add->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput);
+
+    RegisterAllocator register_allocator(&allocator, &codegen, liveness);
+    register_allocator.AllocateRegisters();
+
+    ASSERT_EQ(first_add->GetLiveInterval()->GetRegister(), 2);
+    ASSERT_EQ(second_add->GetLiveInterval()->GetRegister(), 2);
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index cd13d81..1de90b4 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -297,4 +297,136 @@
   return live_in->UnionIfNotIn(live_out, kill);
 }
 
+int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
+  if (GetParent() == this && defined_by_ != nullptr) {
+    // This is the first interval for the instruction. Try to find
+    // a register based on its definition.
+    DCHECK_EQ(defined_by_->GetLiveInterval(), this);
+    int hint = FindHintAtDefinition();
+    if (hint != kNoRegister && free_until[hint] > GetStart()) {
+      return hint;
+    }
+  }
+
+  UsePosition* use = first_use_;
+  size_t start = GetStart();
+  size_t end = GetEnd();
+  while (use != nullptr && use->GetPosition() <= end) {
+    size_t use_position = use->GetPosition();
+    if (use_position >= start && !use->GetIsEnvironment()) {
+      HInstruction* user = use->GetUser();
+      size_t input_index = use->GetInputIndex();
+      if (user->IsPhi()) {
+        // If the phi has a register, try to use the same.
+        Location phi_location = user->GetLiveInterval()->ToLocation();
+        if (phi_location.IsRegister() && free_until[phi_location.reg().RegId()] >= use_position) {
+          return phi_location.reg().RegId();
+        }
+        const GrowableArray<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors();
+        // If the instruction dies at the phi assignment, we can try having the
+        // same register.
+        if (end == predecessors.Get(input_index)->GetLifetimeEnd()) {
+          for (size_t i = 0, e = user->InputCount(); i < e; ++i) {
+            if (i == input_index) {
+              continue;
+            }
+            HInstruction* input = user->InputAt(i);
+            Location location = input->GetLiveInterval()->GetLocationAt(
+                predecessors.Get(i)->GetLifetimeEnd() - 1);
+            if (location.IsRegister() && free_until[location.reg().RegId()] >= use_position) {
+              return location.reg().RegId();
+            }
+          }
+        }
+      } else {
+        // If the instruction is expected in a register, try to use it.
+        LocationSummary* locations = user->GetLocations();
+        Location expected = locations->InAt(use->GetInputIndex());
+        // We use the user's lifetime position - 1 (and not `use_position`) because the
+        // register is blocked at the beginning of the user.
+        size_t position = user->GetLifetimePosition() - 1;
+        if (expected.IsRegister() && free_until[expected.reg().RegId()] >= position) {
+          return expected.reg().RegId();
+        }
+      }
+    }
+    use = use->GetNext();
+  }
+
+  return kNoRegister;
+}
+
+int LiveInterval::FindHintAtDefinition() const {
+  if (defined_by_->IsPhi()) {
+    // Try to use the same register as one of the inputs.
+    const GrowableArray<HBasicBlock*>& predecessors = defined_by_->GetBlock()->GetPredecessors();
+    for (size_t i = 0, e = defined_by_->InputCount(); i < e; ++i) {
+      HInstruction* input = defined_by_->InputAt(i);
+      size_t end = predecessors.Get(i)->GetLifetimeEnd();
+      const LiveInterval& input_interval = input->GetLiveInterval()->GetIntervalAt(end - 1);
+      if (input_interval.GetEnd() == end) {
+        // If the input dies at the end of the predecessor, we know its register can
+        // be reused.
+        Location input_location = input_interval.ToLocation();
+        if (input_location.IsRegister()) {
+          return input_location.reg().RegId();
+        }
+      }
+    }
+  } else {
+    LocationSummary* locations = GetDefinedBy()->GetLocations();
+    Location out = locations->Out();
+    if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
+      // Try to use the same register as the first input.
+      const LiveInterval& input_interval =
+          GetDefinedBy()->InputAt(0)->GetLiveInterval()->GetIntervalAt(GetStart() - 1);
+      if (input_interval.GetEnd() == GetStart()) {
+        // If the input dies at the start of this instruction, we know its register can
+        // be reused.
+        Location location = input_interval.ToLocation();
+        if (location.IsRegister()) {
+          return location.reg().RegId();
+        }
+      }
+    }
+  }
+  return kNoRegister;
+}
+
+bool LiveInterval::NeedsTwoSpillSlots() const {
+  return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble;
+}
+
+Location LiveInterval::ToLocation() const {
+  if (HasRegister()) {
+    return Location::RegisterLocation(ManagedRegister(GetRegister()));
+  } else {
+    HInstruction* defined_by = GetParent()->GetDefinedBy();
+    if (defined_by->IsConstant()) {
+      return defined_by->GetLocations()->Out();
+    } else if (GetParent()->HasSpillSlot()) {
+      if (NeedsTwoSpillSlots()) {
+        return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
+      } else {
+        return Location::StackSlot(GetParent()->GetSpillSlot());
+      }
+    } else {
+      return Location();
+    }
+  }
+}
+
+Location LiveInterval::GetLocationAt(size_t position) const {
+  return GetIntervalAt(position).ToLocation();
+}
+
+const LiveInterval& LiveInterval::GetIntervalAt(size_t position) const {
+  const LiveInterval* current = this;
+  while (!current->Covers(position)) {
+    current = current->GetNextSibling();
+    DCHECK(current != nullptr);
+  }
+  return *current;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index c62e61b..e9bd303 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -23,6 +23,8 @@
 
 class CodeGenerator;
 
+static constexpr int kNoRegister = -1;
+
 class BlockInfo : public ArenaObject {
  public:
   BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values)
@@ -166,10 +168,8 @@
     return new (allocator) LiveInterval(allocator, type, nullptr, true, reg, false);
   }
 
-  static LiveInterval* MakeTempInterval(ArenaAllocator* allocator,
-                                        HInstruction* defined_by,
-                                        Primitive::Type type) {
-    return new (allocator) LiveInterval(allocator, type, defined_by, false, kNoRegister, true);
+  static LiveInterval* MakeTempInterval(ArenaAllocator* allocator, Primitive::Type type) {
+    return new (allocator) LiveInterval(allocator, type, nullptr, false, kNoRegister, true);
   }
 
   bool IsFixed() const { return is_fixed_; }
@@ -484,6 +484,31 @@
 
   LiveInterval* GetNextSibling() const { return next_sibling_; }
 
+  // Returns the first register hint that is at least free before
+  // the value contained in `free_until`. If none is found, returns
+  // `kNoRegister`.
+  int FindFirstRegisterHint(size_t* free_until) const;
+
+  // If there is enough at the definition site to find a register (for example
+  // it uses the same input as the first input), returns the register as a hint.
+  // Returns kNoRegister otherwise.
+  int FindHintAtDefinition() const;
+
+  // Returns whether the interval needs two (Dex virtual register size `kVRegSize`)
+  // slots for spilling.
+  bool NeedsTwoSpillSlots() const;
+
+  // Converts the location of the interval to a `Location` object.
+  Location ToLocation() const;
+
+  // Returns the location of the interval following its siblings at `position`.
+  Location GetLocationAt(size_t position) const;
+
+  // Finds the interval that covers `position`.
+  const LiveInterval& GetIntervalAt(size_t position) const;
+
+  bool IsTemp() const { return is_temp_; }
+
  private:
   ArenaAllocator* const allocator_;
 
@@ -567,6 +592,12 @@
     return instructions_from_lifetime_position_.Get(index);
   }
 
+  HInstruction* GetTempUser(LiveInterval* temp) const {
+    // A temporary shares the same lifetime start as the instruction that requires it.
+    DCHECK(temp->IsTemp());
+    return GetInstructionFromPosition(temp->GetStart() / 2);
+  }
+
   size_t GetMaxLifetimePosition() const {
     return instructions_from_lifetime_position_.Size() * 2 - 1;
   }
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index f888d46..3ff24b7 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -243,6 +243,17 @@
 }
 
 
+void X86Assembler::movw(const Address& dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOperandSizeOverride();
+  EmitUint8(0xC7);
+  EmitOperand(0, dst);
+  CHECK(imm.is_uint16() || imm.is_int16());
+  EmitUint8(imm.value() & 0xFF);
+  EmitUint8(imm.value() >> 8);
+}
+
+
 void X86Assembler::leal(Register dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x8D);
@@ -266,6 +277,14 @@
 }
 
 
+void X86Assembler::movaps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x28);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
 void X86Assembler::movss(XmmRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index ec983d9..1f6f7e6 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -37,6 +37,7 @@
 
   bool is_int8() const { return IsInt(8, value_); }
   bool is_uint8() const { return IsUint(8, value_); }
+  bool is_int16() const { return IsInt(16, value_); }
   bool is_uint16() const { return IsUint(16, value_); }
 
  private:
@@ -251,6 +252,7 @@
   void movsxw(Register dst, const Address& src);
   void movw(Register dst, const Address& src);
   void movw(const Address& dst, Register src);
+  void movw(const Address& dst, const Immediate& imm);
 
   void leal(Register dst, const Address& src);
 
@@ -258,6 +260,7 @@
 
   void setb(Condition condition, Register dst);
 
+  void movaps(XmmRegister dst, XmmRegister src);
   void movss(XmmRegister dst, const Address& src);
   void movss(const Address& dst, XmmRegister src);
   void movss(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 1dcd4b3..705b639 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -234,6 +234,7 @@
 
 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst);
   EmitUint8(0xC6);
   EmitOperand(Register::RAX, dst);
   CHECK(imm.is_int8());
@@ -291,6 +292,18 @@
 }
 
 
+void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOperandSizeOverride();
+  EmitOptionalRex32(dst);
+  EmitUint8(0xC7);
+  EmitOperand(Register::RAX, dst);
+  CHECK(imm.is_uint16() || imm.is_int16());
+  EmitUint8(imm.value() & 0xFF);
+  EmitUint8(imm.value() >> 8);
+}
+
+
 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(dst, src);
@@ -299,6 +312,15 @@
 }
 
 
+void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x28);
+  EmitXmmRegisterOperand(src.LowBits(), dst);
+}
+
+
 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
@@ -1991,7 +2013,7 @@
                            MemberOffset offs) {
   X86_64ManagedRegister dest = mdest.AsX86_64();
   CHECK(dest.IsCpuRegister() && dest.IsCpuRegister());
-  movq(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
+  movl(dest.AsCpuRegister(), Address(base.AsX86_64().AsCpuRegister(), offs));
   if (kPoisonHeapReferences) {
     negl(dest.AsCpuRegister());
   }
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 1fd65c2..268f72b 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -44,6 +44,7 @@
 
   bool is_int8() const { return IsInt(8, value_); }
   bool is_uint8() const { return IsUint(8, value_); }
+  bool is_int16() const { return IsInt(16, value_); }
   bool is_uint16() const { return IsUint(16, value_); }
   bool is_int32() const {
     // This does not work on 32b machines: return IsInt(32, value_);
@@ -295,9 +296,12 @@
   void movsxw(CpuRegister dst, const Address& src);
   void movw(CpuRegister dst, const Address& src);
   void movw(const Address& dst, CpuRegister src);
+  void movw(const Address& dst, const Immediate& imm);
 
   void leaq(CpuRegister dst, const Address& src);
 
+  void movaps(XmmRegister dst, XmmRegister src);
+
   void movss(XmmRegister dst, const Address& src);
   void movss(const Address& dst, XmmRegister src);
   void movss(XmmRegister dst, XmmRegister src);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 09825e2..e1f513d 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -44,8 +44,7 @@
 #include "dex/quick/dex_file_to_method_inliner_map.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
-#include "elf_fixup.h"
-#include "elf_stripper.h"
+#include "elf_writer.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "image_writer.h"
@@ -472,7 +471,7 @@
       PLOG(ERROR) << "Failed to open ELF file: " << oat_filename;
       return false;
     }
-    if (!ElfFixup::Fixup(oat_file.get(), oat_data_begin)) {
+    if (!ElfWriter::Fixup(oat_file.get(), oat_data_begin)) {
       LOG(ERROR) << "Failed to fixup ELF file " << oat_file->GetPath();
       return false;
     }
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 50b4ece..bbaf0e4 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -29,6 +29,7 @@
 #include "base/stringprintf.h"
 #include "elf_utils.h"
 #include "elf_file.h"
+#include "elf_file_impl.h"
 #include "gc/space/image_space.h"
 #include "image.h"
 #include "instruction_set.h"
@@ -467,15 +468,16 @@
   return true;
 }
 
-template <typename ptr_t>
-bool PatchOat::CheckOatFile(const Elf32_Shdr& patches_sec) {
-  if (patches_sec.sh_type != SHT_OAT_PATCH) {
+template <typename ElfFileImpl, typename ptr_t>
+bool PatchOat::CheckOatFile(ElfFileImpl* oat_file) {
+  auto patches_sec = oat_file->FindSectionByName(".oat_patches");
+  if (patches_sec->sh_type != SHT_OAT_PATCH) {
     return false;
   }
-  ptr_t* patches = reinterpret_cast<ptr_t*>(oat_file_->Begin() + patches_sec.sh_offset);
-  ptr_t* patches_end = patches + (patches_sec.sh_size / sizeof(ptr_t));
-  Elf32_Shdr* oat_data_sec = oat_file_->FindSectionByName(".rodata");
-  Elf32_Shdr* oat_text_sec = oat_file_->FindSectionByName(".text");
+  ptr_t* patches = reinterpret_cast<ptr_t*>(oat_file->Begin() + patches_sec->sh_offset);
+  ptr_t* patches_end = patches + (patches_sec->sh_size / sizeof(ptr_t));
+  auto oat_data_sec = oat_file->FindSectionByName(".rodata");
+  auto oat_text_sec = oat_file->FindSectionByName(".text");
   if (oat_data_sec == nullptr) {
     return false;
   }
@@ -495,14 +497,15 @@
   return true;
 }
 
-bool PatchOat::PatchOatHeader() {
-  Elf32_Shdr *rodata_sec = oat_file_->FindSectionByName(".rodata");
+template <typename ElfFileImpl>
+bool PatchOat::PatchOatHeader(ElfFileImpl* oat_file) {
+  auto rodata_sec = oat_file->FindSectionByName(".rodata");
   if (rodata_sec == nullptr) {
     return false;
   }
-  OatHeader* oat_header = reinterpret_cast<OatHeader*>(oat_file_->Begin() + rodata_sec->sh_offset);
+  OatHeader* oat_header = reinterpret_cast<OatHeader*>(oat_file->Begin() + rodata_sec->sh_offset);
   if (!oat_header->IsValid()) {
-    LOG(ERROR) << "Elf file " << oat_file_->GetFile().GetPath() << " has an invalid oat header";
+    LOG(ERROR) << "Elf file " << oat_file->GetFile().GetPath() << " has an invalid oat header";
     return false;
   }
   oat_header->RelocateOat(delta_);
@@ -510,28 +513,31 @@
 }
 
 bool PatchOat::PatchElf() {
+  if (oat_file_->is_elf64_)
+    return PatchElf<ElfFileImpl64>(oat_file_->GetImpl64());
+  else
+    return PatchElf<ElfFileImpl32>(oat_file_->GetImpl32());
+}
+
+template <typename ElfFileImpl>
+bool PatchOat::PatchElf(ElfFileImpl* oat_file) {
   TimingLogger::ScopedTiming t("Fixup Elf Text Section", timings_);
-  if (!PatchTextSection()) {
+  if (!PatchTextSection<ElfFileImpl>(oat_file)) {
     return false;
   }
 
-  if (!PatchOatHeader()) {
+  if (!PatchOatHeader<ElfFileImpl>(oat_file)) {
     return false;
   }
 
   bool need_fixup = false;
-  t.NewTiming("Fixup Elf Headers");
-  // Fixup Phdr's
-  for (unsigned int i = 0; i < oat_file_->GetProgramHeaderNum(); i++) {
-    Elf32_Phdr* hdr = oat_file_->GetProgramHeader(i);
-    CHECK(hdr != nullptr);
+  for (unsigned int i = 0; i < oat_file->GetProgramHeaderNum(); i++) {
+    auto hdr = oat_file->GetProgramHeader(i);
     if (hdr->p_vaddr != 0 && hdr->p_vaddr != hdr->p_offset) {
       need_fixup = true;
-      hdr->p_vaddr += delta_;
     }
     if (hdr->p_paddr != 0 && hdr->p_paddr != hdr->p_offset) {
       need_fixup = true;
-      hdr->p_paddr += delta_;
     }
   }
   if (!need_fixup) {
@@ -539,67 +545,39 @@
     // their addr. Therefore we do not need to update these parts.
     return true;
   }
+
+  t.NewTiming("Fixup Elf Headers");
+  // Fixup Phdr's
+  oat_file->FixupProgramHeaders(delta_);
+
   t.NewTiming("Fixup Section Headers");
-  for (unsigned int i = 0; i < oat_file_->GetSectionHeaderNum(); i++) {
-    Elf32_Shdr* hdr = oat_file_->GetSectionHeader(i);
-    CHECK(hdr != nullptr);
-    if (hdr->sh_addr != 0) {
-      hdr->sh_addr += delta_;
-    }
-  }
+  // Fixup Shdr's
+  oat_file->FixupSectionHeaders(delta_);
 
   t.NewTiming("Fixup Dynamics");
-  for (Elf32_Word i = 0; i < oat_file_->GetDynamicNum(); i++) {
-    Elf32_Dyn& dyn = oat_file_->GetDynamic(i);
-    if (IsDynamicSectionPointer(dyn.d_tag, oat_file_->GetHeader().e_machine)) {
-      dyn.d_un.d_ptr += delta_;
-    }
-  }
+  oat_file->FixupDynamic(delta_);
 
   t.NewTiming("Fixup Elf Symbols");
   // Fixup dynsym
-  Elf32_Shdr* dynsym_sec = oat_file_->FindSectionByName(".dynsym");
-  CHECK(dynsym_sec != nullptr);
-  if (!PatchSymbols(dynsym_sec)) {
+  if (!oat_file->FixupSymbols(delta_, true)) {
     return false;
   }
-
   // Fixup symtab
-  Elf32_Shdr* symtab_sec = oat_file_->FindSectionByName(".symtab");
-  if (symtab_sec != nullptr) {
-    if (!PatchSymbols(symtab_sec)) {
-      return false;
-    }
+  if (!oat_file->FixupSymbols(delta_, false)) {
+    return false;
   }
 
   t.NewTiming("Fixup Debug Sections");
-  if (!oat_file_->FixupDebugSections(delta_)) {
+  if (!oat_file->FixupDebugSections(delta_)) {
     return false;
   }
 
   return true;
 }
 
-bool PatchOat::PatchSymbols(Elf32_Shdr* section) {
-  Elf32_Sym* syms = reinterpret_cast<Elf32_Sym*>(oat_file_->Begin() + section->sh_offset);
-  const Elf32_Sym* last_sym =
-      reinterpret_cast<Elf32_Sym*>(oat_file_->Begin() + section->sh_offset + section->sh_size);
-  CHECK_EQ(section->sh_size % sizeof(Elf32_Sym), 0u)
-      << "Symtab section size is not multiple of symbol size";
-  for (; syms < last_sym; syms++) {
-    uint8_t sttype = ELF32_ST_TYPE(syms->st_info);
-    Elf32_Word shndx = syms->st_shndx;
-    if (shndx != SHN_ABS && shndx != SHN_COMMON && shndx != SHN_UNDEF &&
-        (sttype == STT_FUNC || sttype == STT_OBJECT)) {
-      CHECK_NE(syms->st_value, 0u);
-      syms->st_value += delta_;
-    }
-  }
-  return true;
-}
-
-bool PatchOat::PatchTextSection() {
-  Elf32_Shdr* patches_sec = oat_file_->FindSectionByName(".oat_patches");
+template <typename ElfFileImpl>
+bool PatchOat::PatchTextSection(ElfFileImpl* oat_file) {
+  auto patches_sec = oat_file->FindSectionByName(".oat_patches");
   if (patches_sec == nullptr) {
     LOG(ERROR) << ".oat_patches section not found. Aborting patch";
     return false;
@@ -611,9 +589,9 @@
 
   switch (patches_sec->sh_entsize) {
     case sizeof(uint32_t):
-      return PatchTextSection<uint32_t>(*patches_sec);
+      return PatchTextSection<ElfFileImpl, uint32_t>(oat_file);
     case sizeof(uint64_t):
-      return PatchTextSection<uint64_t>(*patches_sec);
+      return PatchTextSection<ElfFileImpl, uint64_t>(oat_file);
     default:
       LOG(ERROR) << ".oat_patches Entsize of " << patches_sec->sh_entsize << "bits "
                  << "is not valid";
@@ -621,14 +599,16 @@
   }
 }
 
-template <typename ptr_t>
-bool PatchOat::PatchTextSection(const Elf32_Shdr& patches_sec) {
-  DCHECK(CheckOatFile<ptr_t>(patches_sec)) << "Oat file invalid";
-  ptr_t* patches = reinterpret_cast<ptr_t*>(oat_file_->Begin() + patches_sec.sh_offset);
-  ptr_t* patches_end = patches + (patches_sec.sh_size / sizeof(ptr_t));
-  Elf32_Shdr* oat_text_sec = oat_file_->FindSectionByName(".text");
+template <typename ElfFileImpl, typename patch_loc_t>
+bool PatchOat::PatchTextSection(ElfFileImpl* oat_file) {
+  bool oat_file_valid = CheckOatFile<ElfFileImpl, patch_loc_t>(oat_file);
+  CHECK(oat_file_valid) << "Oat file invalid";
+  auto patches_sec = oat_file->FindSectionByName(".oat_patches");
+  patch_loc_t* patches = reinterpret_cast<patch_loc_t*>(oat_file->Begin() + patches_sec->sh_offset);
+  patch_loc_t* patches_end = patches + (patches_sec->sh_size / sizeof(patch_loc_t));
+  auto oat_text_sec = oat_file->FindSectionByName(".text");
   CHECK(oat_text_sec != nullptr);
-  byte* to_patch = oat_file_->Begin() + oat_text_sec->sh_offset;
+  byte* to_patch = oat_file->Begin() + oat_text_sec->sh_offset;
   uintptr_t to_patch_end = reinterpret_cast<uintptr_t>(to_patch) + oat_text_sec->sh_size;
 
   for (; patches < patches_end; patches++) {
@@ -768,7 +748,7 @@
     if (f.get() != nullptr) {
       if (fchmod(f->Fd(), 0644) != 0) {
         PLOG(ERROR) << "Unable to make " << name << " world readable";
-        unlink(name);
+        TEMP_FAILURE_RETRY(unlink(name));
         return nullptr;
       }
     }
@@ -1086,11 +1066,11 @@
     if (!success) {
       if (new_oat_out) {
         CHECK(!output_oat_filename.empty());
-        unlink(output_oat_filename.c_str());
+        TEMP_FAILURE_RETRY(unlink(output_oat_filename.c_str()));
       }
       if (new_image_out) {
         CHECK(!output_image_filename.empty());
-        unlink(output_image_filename.c_str());
+        TEMP_FAILURE_RETRY(unlink(output_image_filename.c_str()));
       }
     }
     if (dump_timings) {
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 9086d58..7dd95f5 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -76,12 +76,15 @@
 
   // Patches oat in place, modifying the oat_file given to the constructor.
   bool PatchElf();
-  bool PatchTextSection();
+  template <typename ElfFileImpl>
+  bool PatchElf(ElfFileImpl* oat_file);
+  template <typename ElfFileImpl>
+  bool PatchTextSection(ElfFileImpl* oat_file);
   // Templatized version to actually do the patching with the right sized offsets.
-  template <typename ptr_t> bool PatchTextSection(const Elf32_Shdr& patches_sec);
-  template <typename ptr_t> bool CheckOatFile(const Elf32_Shdr& patches_sec);
-  bool PatchOatHeader();
-  bool PatchSymbols(Elf32_Shdr* section);
+  template <typename ElfFileImpl, typename patch_loc_t> bool PatchTextSection(ElfFileImpl* oat_file);
+  template <typename ElfFileImpl, typename patch_loc_t> bool CheckOatFile(ElfFileImpl* oat_filec);
+  template <typename ElfFileImpl>
+  bool PatchOatHeader(ElfFileImpl* oat_file);
 
   bool PatchImage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/elf.h b/runtime/elf.h
index 6e007a2..60b5248 100644
--- a/runtime/elf.h
+++ b/runtime/elf.h
@@ -1411,6 +1411,7 @@
 
 // BEGIN android-added for <elf.h> compat
 static inline unsigned char ELF32_ST_TYPE(unsigned char st_info) { return st_info & 0x0f; }
+static inline unsigned char ELF64_ST_TYPE(unsigned char st_info) { return st_info & 0x0f; }
 // END android-added for <elf.h> compat
 
 // Symbol table entries for ELF64.
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 65a557b..3b8358d 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -16,6 +16,7 @@
 
 #include "elf_file.h"
 
+#include <inttypes.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -106,7 +107,12 @@
   delete entry;
 }
 
-ElfFile::ElfFile(File* file, bool writable, bool program_header_only)
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::ElfFileImpl(File* file, bool writable, bool program_header_only)
   : file_(file),
     writable_(writable),
     program_header_only_(program_header_only),
@@ -128,9 +134,20 @@
   CHECK(file != nullptr);
 }
 
-ElfFile* ElfFile::Open(File* file, bool writable, bool program_header_only,
-                       std::string* error_msg) {
-  std::unique_ptr<ElfFile> elf_file(new ElfFile(file, writable, program_header_only));
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>*
+    ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::Open(File* file, bool writable, bool program_header_only,
+           std::string* error_msg) {
+  std::unique_ptr<ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>>
+    elf_file(new ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+                 Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+                 (file, writable, program_header_only));
   int prot;
   int flags;
   if (writable) {
@@ -146,15 +163,31 @@
   return elf_file.release();
 }
 
-ElfFile* ElfFile::Open(File* file, int prot, int flags, std::string* error_msg) {
-  std::unique_ptr<ElfFile> elf_file(new ElfFile(file, (prot & PROT_WRITE) == PROT_WRITE, false));
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>*
+    ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::Open(File* file, int prot, int flags, std::string* error_msg) {
+  std::unique_ptr<ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>>
+    elf_file(new ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+                 Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+                 (file, (prot & PROT_WRITE) == PROT_WRITE, false));
   if (!elf_file->Setup(prot, flags, error_msg)) {
     return nullptr;
   }
   return elf_file.release();
 }
 
-bool ElfFile::Setup(int prot, int flags, std::string* error_msg) {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::Setup(int prot, int flags, std::string* error_msg) {
   int64_t temp_file_length = file_->GetLength();
   if (temp_file_length < 0) {
     errno = -temp_file_length;
@@ -163,16 +196,16 @@
     return false;
   }
   size_t file_length = static_cast<size_t>(temp_file_length);
-  if (file_length < sizeof(Elf32_Ehdr)) {
+  if (file_length < sizeof(Elf_Ehdr)) {
     *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF header of "
-                              "%zd bytes: '%s'", file_length, sizeof(Elf32_Ehdr),
+                              "%zd bytes: '%s'", file_length, sizeof(Elf_Ehdr),
                               file_->GetPath().c_str());
     return false;
   }
 
   if (program_header_only_) {
     // first just map ELF header to get program header size information
-    size_t elf_header_size = sizeof(Elf32_Ehdr);
+    size_t elf_header_size = sizeof(Elf_Ehdr);
     if (!SetMap(MemMap::MapFile(elf_header_size, prot, flags, file_->Fd(), 0,
                                 file_->GetPath().c_str(), error_msg),
                 error_msg)) {
@@ -183,7 +216,7 @@
     if (file_length < program_header_size) {
       *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF program "
                                 "header of %zd bytes: '%s'", file_length,
-                                sizeof(Elf32_Ehdr), file_->GetPath().c_str());
+                                sizeof(Elf_Ehdr), file_->GetPath().c_str());
       return false;
     }
     if (!SetMap(MemMap::MapFile(program_header_size, prot, flags, file_->Fd(), 0,
@@ -215,7 +248,7 @@
     }
 
     // Find shstrtab.
-    Elf32_Shdr* shstrtab_section_header = GetSectionNameStringSection();
+    Elf_Shdr* shstrtab_section_header = GetSectionNameStringSection();
     if (shstrtab_section_header == nullptr) {
       *error_msg = StringPrintf("Failed to find shstrtab section header in ELF file: '%s'",
                                 file_->GetPath().c_str());
@@ -236,8 +269,8 @@
     }
 
     // Find other sections from section headers
-    for (Elf32_Word i = 0; i < GetSectionHeaderNum(); i++) {
-      Elf32_Shdr* section_header = GetSectionHeader(i);
+    for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) {
+      Elf_Shdr* section_header = GetSectionHeader(i);
       if (section_header == nullptr) {
         *error_msg = StringPrintf("Failed to find section header for section %d in ELF file: '%s'",
                                   i, file_->GetPath().c_str());
@@ -311,7 +344,12 @@
   return true;
 }
 
-ElfFile::~ElfFile() {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::~ElfFileImpl() {
   STLDeleteElements(&segments_);
   delete symtab_symbol_table_;
   delete dynsym_symbol_table_;
@@ -321,8 +359,13 @@
   }
 }
 
-bool ElfFile::CheckAndSet(Elf32_Off offset, const char* label,
-                          byte** target, std::string* error_msg) {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::CheckAndSet(Elf32_Off offset, const char* label,
+                  byte** target, std::string* error_msg) {
   if (Begin() + offset >= End()) {
     *error_msg = StringPrintf("Offset %d is out of range for %s in ELF file: '%s'", offset, label,
                               file_->GetPath().c_str());
@@ -332,18 +375,23 @@
   return true;
 }
 
-bool ElfFile::CheckSectionsLinked(const byte* source, const byte* target) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::CheckSectionsLinked(const byte* source, const byte* target) const {
   // Only works in whole-program mode, as we need to iterate over the sections.
   // Note that we normally can't search by type, as duplicates are allowed for most section types.
   if (program_header_only_) {
     return true;
   }
 
-  Elf32_Shdr* source_section = nullptr;
-  Elf32_Word target_index = 0;
+  Elf_Shdr* source_section = nullptr;
+  Elf_Word target_index = 0;
   bool target_found = false;
-  for (Elf32_Word i = 0; i < GetSectionHeaderNum(); i++) {
-    Elf32_Shdr* section_header = GetSectionHeader(i);
+  for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) {
+    Elf_Shdr* section_header = GetSectionHeader(i);
 
     if (Begin() + section_header->sh_offset == source) {
       // Found the source.
@@ -363,7 +411,12 @@
   return target_found && source_section != nullptr && source_section->sh_link == target_index;
 }
 
-bool ElfFile::CheckSectionsExist(std::string* error_msg) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::CheckSectionsExist(std::string* error_msg) const {
   if (!program_header_only_) {
     // If in full mode, need section headers.
     if (section_headers_start_ == nullptr) {
@@ -432,7 +485,12 @@
   return true;
 }
 
-bool ElfFile::SetMap(MemMap* map, std::string* error_msg) {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::SetMap(MemMap* map, std::string* error_msg) {
   if (map == nullptr) {
     // MemMap::Open should have already set an error.
     DCHECK(!error_msg->empty());
@@ -442,7 +500,7 @@
   CHECK(map_.get() != nullptr) << file_->GetPath();
   CHECK(map_->Begin() != nullptr) << file_->GetPath();
 
-  header_ = reinterpret_cast<Elf32_Ehdr*>(map_->Begin());
+  header_ = reinterpret_cast<Elf_Ehdr*>(map_->Begin());
   if ((ELFMAG0 != header_->e_ident[EI_MAG0])
       || (ELFMAG1 != header_->e_ident[EI_MAG1])
       || (ELFMAG2 != header_->e_ident[EI_MAG2])
@@ -456,9 +514,10 @@
                               header_->e_ident[EI_MAG3]);
     return false;
   }
-  if (ELFCLASS32 != header_->e_ident[EI_CLASS]) {
+  uint8_t elf_class = (sizeof(Elf_Addr) == sizeof(Elf64_Addr)) ? ELFCLASS64 : ELFCLASS32;
+  if (elf_class != header_->e_ident[EI_CLASS]) {
     *error_msg = StringPrintf("Failed to find expected EI_CLASS value %d in %s, found %d",
-                              ELFCLASS32,
+                              elf_class,
                               file_->GetPath().c_str(),
                               header_->e_ident[EI_CLASS]);
     return false;
@@ -495,7 +554,7 @@
     *error_msg = StringPrintf("Failed to find expected e_entry value %d in %s, found %d",
                               0,
                               file_->GetPath().c_str(),
-                              header_->e_entry);
+                              static_cast<int32_t>(header_->e_entry));
     return false;
   }
   if (0 == header_->e_phoff) {
@@ -548,15 +607,15 @@
 
   if (!program_header_only_) {
     if (header_->e_phoff >= Size()) {
-      *error_msg = StringPrintf("Failed to find e_phoff value %d less than %zd in %s",
-                                header_->e_phoff,
+      *error_msg = StringPrintf("Failed to find e_phoff value %" PRIu64 " less than %zd in %s",
+                                static_cast<uint64_t>(header_->e_phoff),
                                 Size(),
                                 file_->GetPath().c_str());
       return false;
     }
     if (header_->e_shoff >= Size()) {
-      *error_msg = StringPrintf("Failed to find e_shoff value %d less than %zd in %s",
-                                header_->e_shoff,
+      *error_msg = StringPrintf("Failed to find e_shoff value %" PRIu64 " less than %zd in %s",
+                                static_cast<uint64_t>(header_->e_shoff),
                                 Size(),
                                 file_->GetPath().c_str());
       return false;
@@ -565,39 +624,64 @@
   return true;
 }
 
-
-Elf32_Ehdr& ElfFile::GetHeader() const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Ehdr& ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetHeader() const {
   CHECK(header_ != nullptr);  // Header has been checked in SetMap. This is a sanity check.
   return *header_;
 }
 
-byte* ElfFile::GetProgramHeadersStart() const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+byte* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetProgramHeadersStart() const {
   CHECK(program_headers_start_ != nullptr);  // Header has been set in Setup. This is a sanity
                                              // check.
   return program_headers_start_;
 }
 
-byte* ElfFile::GetSectionHeadersStart() const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+byte* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetSectionHeadersStart() const {
   CHECK(!program_header_only_);              // Only used in "full" mode.
   CHECK(section_headers_start_ != nullptr);  // Is checked in CheckSectionsExist. Sanity check.
   return section_headers_start_;
 }
 
-Elf32_Phdr& ElfFile::GetDynamicProgramHeader() const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Phdr& ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetDynamicProgramHeader() const {
   CHECK(dynamic_program_header_ != nullptr);  // Is checked in CheckSectionsExist. Sanity check.
   return *dynamic_program_header_;
 }
 
-Elf32_Dyn* ElfFile::GetDynamicSectionStart() const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Dyn* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetDynamicSectionStart() const {
   CHECK(dynamic_section_start_ != nullptr);  // Is checked in CheckSectionsExist. Sanity check.
   return dynamic_section_start_;
 }
 
-static bool IsSymbolSectionType(Elf32_Word section_type) {
-  return ((section_type == SHT_SYMTAB) || (section_type == SHT_DYNSYM));
-}
-
-Elf32_Sym* ElfFile::GetSymbolSectionStart(Elf32_Word section_type) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Sym* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetSymbolSectionStart(Elf_Word section_type) const {
   CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
   switch (section_type) {
     case SHT_SYMTAB: {
@@ -615,7 +699,12 @@
   }
 }
 
-const char* ElfFile::GetStringSectionStart(Elf32_Word section_type) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+const char* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetStringSectionStart(Elf_Word section_type) const {
   CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
   switch (section_type) {
     case SHT_SYMTAB: {
@@ -631,7 +720,12 @@
   }
 }
 
-const char* ElfFile::GetString(Elf32_Word section_type, Elf32_Word i) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+const char* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetString(Elf_Word section_type, Elf_Word i) const {
   CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
   if (i == 0) {
     return nullptr;
@@ -646,19 +740,39 @@
 // WARNING: The following methods do not check for an error condition (non-existent hash section).
 //          It is the caller's job to do this.
 
-Elf32_Word* ElfFile::GetHashSectionStart() const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetHashSectionStart() const {
   return hash_section_start_;
 }
 
-Elf32_Word ElfFile::GetHashBucketNum() const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetHashBucketNum() const {
   return GetHashSectionStart()[0];
 }
 
-Elf32_Word ElfFile::GetHashChainNum() const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetHashChainNum() const {
   return GetHashSectionStart()[1];
 }
 
-Elf32_Word ElfFile::GetHashBucket(size_t i, bool* ok) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetHashBucket(size_t i, bool* ok) const {
   if (i >= GetHashBucketNum()) {
     *ok = false;
     return 0;
@@ -668,7 +782,12 @@
   return GetHashSectionStart()[2 + i];
 }
 
-Elf32_Word ElfFile::GetHashChain(size_t i, bool* ok) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetHashChain(size_t i, bool* ok) const {
   if (i >= GetHashBucketNum()) {
     *ok = false;
     return 0;
@@ -678,22 +797,37 @@
   return GetHashSectionStart()[2 + GetHashBucketNum() + i];
 }
 
-Elf32_Word ElfFile::GetProgramHeaderNum() const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetProgramHeaderNum() const {
   return GetHeader().e_phnum;
 }
 
-Elf32_Phdr* ElfFile::GetProgramHeader(Elf32_Word i) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Phdr* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetProgramHeader(Elf_Word i) const {
   CHECK_LT(i, GetProgramHeaderNum()) << file_->GetPath();  // Sanity check for caller.
   byte* program_header = GetProgramHeadersStart() + (i * GetHeader().e_phentsize);
   if (program_header >= End()) {
     return nullptr;  // Failure condition.
   }
-  return reinterpret_cast<Elf32_Phdr*>(program_header);
+  return reinterpret_cast<Elf_Phdr*>(program_header);
 }
 
-Elf32_Phdr* ElfFile::FindProgamHeaderByType(Elf32_Word type) const {
-  for (Elf32_Word i = 0; i < GetProgramHeaderNum(); i++) {
-    Elf32_Phdr* program_header = GetProgramHeader(i);
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Phdr* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FindProgamHeaderByType(Elf_Word type) const {
+  for (Elf_Word i = 0; i < GetProgramHeaderNum(); i++) {
+    Elf_Phdr* program_header = GetProgramHeader(i);
     if (program_header->p_type == type) {
       return program_header;
     }
@@ -701,11 +835,21 @@
   return nullptr;
 }
 
-Elf32_Word ElfFile::GetSectionHeaderNum() const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetSectionHeaderNum() const {
   return GetHeader().e_shnum;
 }
 
-Elf32_Shdr* ElfFile::GetSectionHeader(Elf32_Word i) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Shdr* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetSectionHeader(Elf_Word i) const {
   // Can only access arbitrary sections when we have the whole file, not just program header.
   // Even if we Load(), it doesn't bring in all the sections.
   CHECK(!program_header_only_) << file_->GetPath();
@@ -716,15 +860,20 @@
   if (section_header >= End()) {
     return nullptr;  // Failure condition.
   }
-  return reinterpret_cast<Elf32_Shdr*>(section_header);
+  return reinterpret_cast<Elf_Shdr*>(section_header);
 }
 
-Elf32_Shdr* ElfFile::FindSectionByType(Elf32_Word type) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Shdr* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FindSectionByType(Elf_Word type) const {
   // Can only access arbitrary sections when we have the whole file, not just program header.
   // We could change this to switch on known types if they were detected during loading.
   CHECK(!program_header_only_) << file_->GetPath();
-  for (Elf32_Word i = 0; i < GetSectionHeaderNum(); i++) {
-    Elf32_Shdr* section_header = GetSectionHeader(i);
+  for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) {
+    Elf_Shdr* section_header = GetSectionHeader(i);
     if (section_header->sh_type == type) {
       return section_header;
     }
@@ -746,16 +895,26 @@
   return h;
 }
 
-Elf32_Shdr* ElfFile::GetSectionNameStringSection() const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Shdr* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetSectionNameStringSection() const {
   return GetSectionHeader(GetHeader().e_shstrndx);
 }
 
-const byte* ElfFile::FindDynamicSymbolAddress(const std::string& symbol_name) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+const byte* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FindDynamicSymbolAddress(const std::string& symbol_name) const {
   // Check that we have a hash section.
   if (GetHashSectionStart() == nullptr) {
     return nullptr;  // Failure condition.
   }
-  const Elf32_Sym* sym = FindDynamicSymbol(symbol_name);
+  const Elf_Sym* sym = FindDynamicSymbol(symbol_name);
   if (sym != nullptr) {
     return base_address_ + sym->st_value;
   } else {
@@ -764,20 +923,25 @@
 }
 
 // WARNING: Only called from FindDynamicSymbolAddress. Elides check for hash section.
-const Elf32_Sym* ElfFile::FindDynamicSymbol(const std::string& symbol_name) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+const Elf_Sym* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FindDynamicSymbol(const std::string& symbol_name) const {
   if (GetHashBucketNum() == 0) {
     // No dynamic symbols at all.
     return nullptr;
   }
-  Elf32_Word hash = elfhash(symbol_name.c_str());
-  Elf32_Word bucket_index = hash % GetHashBucketNum();
+  Elf_Word hash = elfhash(symbol_name.c_str());
+  Elf_Word bucket_index = hash % GetHashBucketNum();
   bool ok;
-  Elf32_Word symbol_and_chain_index = GetHashBucket(bucket_index, &ok);
+  Elf_Word symbol_and_chain_index = GetHashBucket(bucket_index, &ok);
   if (!ok) {
     return nullptr;
   }
   while (symbol_and_chain_index != 0 /* STN_UNDEF */) {
-    Elf32_Sym* symbol = GetSymbol(SHT_DYNSYM, symbol_and_chain_index);
+    Elf_Sym* symbol = GetSymbol(SHT_DYNSYM, symbol_and_chain_index);
     if (symbol == nullptr) {
       return nullptr;  // Failure condition.
     }
@@ -793,23 +957,49 @@
   return nullptr;
 }
 
-Elf32_Word ElfFile::GetSymbolNum(Elf32_Shdr& section_header) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::IsSymbolSectionType(Elf_Word section_type) {
+  return ((section_type == SHT_SYMTAB) || (section_type == SHT_DYNSYM));
+}
+
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetSymbolNum(Elf_Shdr& section_header) const {
   CHECK(IsSymbolSectionType(section_header.sh_type))
       << file_->GetPath() << " " << section_header.sh_type;
   CHECK_NE(0U, section_header.sh_entsize) << file_->GetPath();
   return section_header.sh_size / section_header.sh_entsize;
 }
 
-Elf32_Sym* ElfFile::GetSymbol(Elf32_Word section_type,
-                              Elf32_Word i) const {
-  Elf32_Sym* sym_start = GetSymbolSectionStart(section_type);
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Sym* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetSymbol(Elf_Word section_type,
+                Elf_Word i) const {
+  Elf_Sym* sym_start = GetSymbolSectionStart(section_type);
   if (sym_start == nullptr) {
     return nullptr;
   }
   return sym_start + i;
 }
 
-ElfFile::SymbolTable** ElfFile::GetSymbolTable(Elf32_Word section_type) {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+typename ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::SymbolTable** ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetSymbolTable(Elf_Word section_type) {
   CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
   switch (section_type) {
     case SHT_SYMTAB: {
@@ -825,9 +1015,14 @@
   }
 }
 
-Elf32_Sym* ElfFile::FindSymbolByName(Elf32_Word section_type,
-                                     const std::string& symbol_name,
-                                     bool build_map) {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Sym* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FindSymbolByName(Elf_Word section_type,
+                       const std::string& symbol_name,
+                       bool build_map) {
   CHECK(!program_header_only_) << file_->GetPath();
   CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
 
@@ -836,20 +1031,22 @@
     if (*symbol_table == nullptr) {
       DCHECK(build_map);
       *symbol_table = new SymbolTable;
-      Elf32_Shdr* symbol_section = FindSectionByType(section_type);
+      Elf_Shdr* symbol_section = FindSectionByType(section_type);
       if (symbol_section == nullptr) {
         return nullptr;  // Failure condition.
       }
-      Elf32_Shdr* string_section = GetSectionHeader(symbol_section->sh_link);
+      Elf_Shdr* string_section = GetSectionHeader(symbol_section->sh_link);
       if (string_section == nullptr) {
         return nullptr;  // Failure condition.
       }
       for (uint32_t i = 0; i < GetSymbolNum(*symbol_section); i++) {
-        Elf32_Sym* symbol = GetSymbol(section_type, i);
+        Elf_Sym* symbol = GetSymbol(section_type, i);
         if (symbol == nullptr) {
           return nullptr;  // Failure condition.
         }
-        unsigned char type = ELF32_ST_TYPE(symbol->st_info);
+        unsigned char type = (sizeof(Elf_Addr) == sizeof(Elf64_Addr))
+                             ? ELF64_ST_TYPE(symbol->st_info)
+                             : ELF32_ST_TYPE(symbol->st_info);
         if (type == STT_NOTYPE) {
           continue;
         }
@@ -857,7 +1054,7 @@
         if (name == nullptr) {
           continue;
         }
-        std::pair<SymbolTable::iterator, bool> result =
+        std::pair<typename SymbolTable::iterator, bool> result =
             (*symbol_table)->insert(std::make_pair(name, symbol));
         if (!result.second) {
           // If a duplicate, make sure it has the same logical value. Seen on x86.
@@ -872,7 +1069,7 @@
       }
     }
     CHECK(*symbol_table != nullptr);
-    SymbolTable::const_iterator it = (*symbol_table)->find(symbol_name);
+    typename SymbolTable::const_iterator it = (*symbol_table)->find(symbol_name);
     if (it == (*symbol_table)->end()) {
       return nullptr;
     }
@@ -880,16 +1077,16 @@
   }
 
   // Fall back to linear search
-  Elf32_Shdr* symbol_section = FindSectionByType(section_type);
+  Elf_Shdr* symbol_section = FindSectionByType(section_type);
   if (symbol_section == nullptr) {
     return nullptr;
   }
-  Elf32_Shdr* string_section = GetSectionHeader(symbol_section->sh_link);
+  Elf_Shdr* string_section = GetSectionHeader(symbol_section->sh_link);
   if (string_section == nullptr) {
     return nullptr;
   }
   for (uint32_t i = 0; i < GetSymbolNum(*symbol_section); i++) {
-    Elf32_Sym* symbol = GetSymbol(section_type, i);
+    Elf_Sym* symbol = GetSymbol(section_type, i);
     if (symbol == nullptr) {
       return nullptr;  // Failure condition.
     }
@@ -904,20 +1101,30 @@
   return nullptr;
 }
 
-Elf32_Addr ElfFile::FindSymbolAddress(Elf32_Word section_type,
-                                      const std::string& symbol_name,
-                                      bool build_map) {
-  Elf32_Sym* symbol = FindSymbolByName(section_type, symbol_name, build_map);
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Addr ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FindSymbolAddress(Elf_Word section_type,
+                        const std::string& symbol_name,
+                        bool build_map) {
+  Elf_Sym* symbol = FindSymbolByName(section_type, symbol_name, build_map);
   if (symbol == nullptr) {
     return 0;
   }
   return symbol->st_value;
 }
 
-const char* ElfFile::GetString(Elf32_Shdr& string_section, Elf32_Word i) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+const char* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetString(Elf_Shdr& string_section, Elf_Word i) const {
   CHECK(!program_header_only_) << file_->GetPath();
   // TODO: remove this static_cast from enum when using -std=gnu++0x
-  if (static_cast<Elf32_Word>(SHT_STRTAB) != string_section.sh_type) {
+  if (static_cast<Elf_Word>(SHT_STRTAB) != string_section.sh_type) {
     return nullptr;  // Failure condition.
   }
   if (i >= string_section.sh_size) {
@@ -934,18 +1141,33 @@
   return reinterpret_cast<const char*>(string);
 }
 
-Elf32_Word ElfFile::GetDynamicNum() const {
-  return GetDynamicProgramHeader().p_filesz / sizeof(Elf32_Dyn);
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetDynamicNum() const {
+  return GetDynamicProgramHeader().p_filesz / sizeof(Elf_Dyn);
 }
 
-Elf32_Dyn& ElfFile::GetDynamic(Elf32_Word i) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Dyn& ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetDynamic(Elf_Word i) const {
   CHECK_LT(i, GetDynamicNum()) << file_->GetPath();
   return *(GetDynamicSectionStart() + i);
 }
 
-Elf32_Dyn* ElfFile::FindDynamicByType(Elf32_Sword type) const {
-  for (Elf32_Word i = 0; i < GetDynamicNum(); i++) {
-    Elf32_Dyn* dyn = &GetDynamic(i);
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Dyn* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FindDynamicByType(Elf_Sword type) const {
+  for (Elf_Word i = 0; i < GetDynamicNum(); i++) {
+    Elf_Dyn* dyn = &GetDynamic(i);
     if (dyn->d_tag == type) {
       return dyn;
     }
@@ -953,8 +1175,13 @@
   return NULL;
 }
 
-Elf32_Word ElfFile::FindDynamicValueByType(Elf32_Sword type) const {
-  Elf32_Dyn* dyn = FindDynamicByType(type);
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FindDynamicValueByType(Elf_Sword type) const {
+  Elf_Dyn* dyn = FindDynamicByType(type);
   if (dyn == NULL) {
     return 0;
   } else {
@@ -962,53 +1189,88 @@
   }
 }
 
-Elf32_Rel* ElfFile::GetRelSectionStart(Elf32_Shdr& section_header) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Rel* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetRelSectionStart(Elf_Shdr& section_header) const {
   CHECK(SHT_REL == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
-  return reinterpret_cast<Elf32_Rel*>(Begin() + section_header.sh_offset);
+  return reinterpret_cast<Elf_Rel*>(Begin() + section_header.sh_offset);
 }
 
-Elf32_Word ElfFile::GetRelNum(Elf32_Shdr& section_header) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetRelNum(Elf_Shdr& section_header) const {
   CHECK(SHT_REL == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
   CHECK_NE(0U, section_header.sh_entsize) << file_->GetPath();
   return section_header.sh_size / section_header.sh_entsize;
 }
 
-Elf32_Rel& ElfFile::GetRel(Elf32_Shdr& section_header, Elf32_Word i) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Rel& ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetRel(Elf_Shdr& section_header, Elf_Word i) const {
   CHECK(SHT_REL == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
   CHECK_LT(i, GetRelNum(section_header)) << file_->GetPath();
   return *(GetRelSectionStart(section_header) + i);
 }
 
-Elf32_Rela* ElfFile::GetRelaSectionStart(Elf32_Shdr& section_header) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Rela* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+  Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+  ::GetRelaSectionStart(Elf_Shdr& section_header) const {
   CHECK(SHT_RELA == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
-  return reinterpret_cast<Elf32_Rela*>(Begin() + section_header.sh_offset);
+  return reinterpret_cast<Elf_Rela*>(Begin() + section_header.sh_offset);
 }
 
-Elf32_Word ElfFile::GetRelaNum(Elf32_Shdr& section_header) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Word ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetRelaNum(Elf_Shdr& section_header) const {
   CHECK(SHT_RELA == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
   return section_header.sh_size / section_header.sh_entsize;
 }
 
-Elf32_Rela& ElfFile::GetRela(Elf32_Shdr& section_header, Elf32_Word i) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Rela& ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetRela(Elf_Shdr& section_header, Elf_Word i) const {
   CHECK(SHT_RELA == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
   CHECK_LT(i, GetRelaNum(section_header)) << file_->GetPath();
   return *(GetRelaSectionStart(section_header) + i);
 }
 
 // Base on bionic phdr_table_get_load_size
-size_t ElfFile::GetLoadedSize() const {
-  Elf32_Addr min_vaddr = 0xFFFFFFFFu;
-  Elf32_Addr max_vaddr = 0x00000000u;
-  for (Elf32_Word i = 0; i < GetProgramHeaderNum(); i++) {
-    Elf32_Phdr* program_header = GetProgramHeader(i);
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+size_t ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GetLoadedSize() const {
+  Elf_Addr min_vaddr = 0xFFFFFFFFu;
+  Elf_Addr max_vaddr = 0x00000000u;
+  for (Elf_Word i = 0; i < GetProgramHeaderNum(); i++) {
+    Elf_Phdr* program_header = GetProgramHeader(i);
     if (program_header->p_type != PT_LOAD) {
       continue;
     }
-    Elf32_Addr begin_vaddr = program_header->p_vaddr;
+    Elf_Addr begin_vaddr = program_header->p_vaddr;
     if (begin_vaddr < min_vaddr) {
        min_vaddr = begin_vaddr;
     }
-    Elf32_Addr end_vaddr = program_header->p_vaddr + program_header->p_memsz;
+    Elf_Addr end_vaddr = program_header->p_vaddr + program_header->p_memsz;
     if (end_vaddr > max_vaddr) {
       max_vaddr = end_vaddr;
     }
@@ -1020,7 +1282,12 @@
   return loaded_size;
 }
 
-bool ElfFile::Load(bool executable, std::string* error_msg) {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::Load(bool executable, std::string* error_msg) {
   CHECK(program_header_only_) << file_->GetPath();
 
   if (executable) {
@@ -1057,8 +1324,8 @@
   }
 
   bool reserved = false;
-  for (Elf32_Word i = 0; i < GetProgramHeaderNum(); i++) {
-    Elf32_Phdr* program_header = GetProgramHeader(i);
+  for (Elf_Word i = 0; i < GetProgramHeaderNum(); i++) {
+    Elf_Phdr* program_header = GetProgramHeader(i);
     if (program_header == nullptr) {
       *error_msg = StringPrintf("No program header for entry %d in ELF file %s.",
                                 i, file_->GetPath().c_str());
@@ -1137,8 +1404,8 @@
     }
     if (file_length < (program_header->p_offset + program_header->p_memsz)) {
       *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF segment "
-                                "%d of %d bytes: '%s'", file_length, i,
-                                program_header->p_offset + program_header->p_memsz,
+                                "%d of %" PRIu64 " bytes: '%s'", file_length, i,
+                                static_cast<uint64_t>(program_header->p_offset + program_header->p_memsz),
                                 file_->GetPath().c_str());
       return false;
     }
@@ -1170,10 +1437,10 @@
                               file_->GetPath().c_str());
     return false;
   }
-  dynamic_section_start_ = reinterpret_cast<Elf32_Dyn*>(dsptr);
+  dynamic_section_start_ = reinterpret_cast<Elf_Dyn*>(dsptr);
 
-  for (Elf32_Word i = 0; i < GetDynamicNum(); i++) {
-    Elf32_Dyn& elf_dyn = GetDynamic(i);
+  for (Elf_Word i = 0; i < GetDynamicNum(); i++) {
+    Elf_Dyn& elf_dyn = GetDynamic(i);
     byte* d_ptr = base_address_ + elf_dyn.d_un.d_ptr;
     switch (elf_dyn.d_tag) {
       case DT_HASH: {
@@ -1182,7 +1449,7 @@
                                     d_ptr, file_->GetPath().c_str());
           return false;
         }
-        hash_section_start_ = reinterpret_cast<Elf32_Word*>(d_ptr);
+        hash_section_start_ = reinterpret_cast<Elf_Word*>(d_ptr);
         break;
       }
       case DT_STRTAB: {
@@ -1200,7 +1467,7 @@
                                     d_ptr, file_->GetPath().c_str());
           return false;
         }
-        dynsym_section_start_ = reinterpret_cast<Elf32_Sym*>(d_ptr);
+        dynsym_section_start_ = reinterpret_cast<Elf_Sym*>(d_ptr);
         break;
       }
       case DT_NULL: {
@@ -1228,7 +1495,12 @@
   return true;
 }
 
-bool ElfFile::ValidPointer(const byte* start) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::ValidPointer(const byte* start) const {
   for (size_t i = 0; i < segments_.size(); ++i) {
     const MemMap* segment = segments_[i];
     if (segment->Begin() <= start && start < segment->End()) {
@@ -1239,14 +1511,19 @@
 }
 
 
-Elf32_Shdr* ElfFile::FindSectionByName(const std::string& name) const {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+Elf_Shdr* ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FindSectionByName(const std::string& name) const {
   CHECK(!program_header_only_);
-  Elf32_Shdr* shstrtab_sec = GetSectionNameStringSection();
+  Elf_Shdr* shstrtab_sec = GetSectionNameStringSection();
   if (shstrtab_sec == nullptr) {
     return nullptr;
   }
   for (uint32_t i = 0; i < GetSectionHeaderNum(); i++) {
-    Elf32_Shdr* shdr = GetSectionHeader(i);
+    Elf_Shdr* shdr = GetSectionHeader(i);
     if (shdr == nullptr) {
       return nullptr;
     }
@@ -1731,14 +2008,19 @@
   return true;
 }
 
-bool ElfFile::FixupDebugSections(off_t base_address_delta) {
-  const Elf32_Shdr* debug_info = FindSectionByName(".debug_info");
-  const Elf32_Shdr* debug_abbrev = FindSectionByName(".debug_abbrev");
-  const Elf32_Shdr* eh_frame = FindSectionByName(".eh_frame");
-  const Elf32_Shdr* debug_str = FindSectionByName(".debug_str");
-  const Elf32_Shdr* debug_line = FindSectionByName(".debug_line");
-  const Elf32_Shdr* strtab_sec = FindSectionByName(".strtab");
-  const Elf32_Shdr* symtab_sec = FindSectionByName(".symtab");
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FixupDebugSections(off_t base_address_delta) {
+  const Elf_Shdr* debug_info = FindSectionByName(".debug_info");
+  const Elf_Shdr* debug_abbrev = FindSectionByName(".debug_abbrev");
+  const Elf_Shdr* eh_frame = FindSectionByName(".eh_frame");
+  const Elf_Shdr* debug_str = FindSectionByName(".debug_str");
+  const Elf_Shdr* debug_line = FindSectionByName(".debug_line");
+  const Elf_Shdr* strtab_sec = FindSectionByName(".strtab");
+  const Elf_Shdr* symtab_sec = FindSectionByName(".symtab");
 
   if (debug_info == nullptr || debug_abbrev == nullptr ||
       debug_str == nullptr || strtab_sec == nullptr || symtab_sec == nullptr) {
@@ -1781,7 +2063,12 @@
   return FixupDebugInfo(base_address_delta, info_iter.get());
 }
 
-void ElfFile::GdbJITSupport() {
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+void ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::GdbJITSupport() {
   // We only get here if we only are mapping the program header.
   DCHECK(program_header_only_);
 
@@ -1789,15 +2076,18 @@
   std::string error_msg;
   // Make it MAP_PRIVATE so we can just give it to gdb if all the necessary
   // sections are there.
-  std::unique_ptr<ElfFile> all_ptr(Open(const_cast<File*>(file_), PROT_READ | PROT_WRITE,
-                                        MAP_PRIVATE, &error_msg));
+  std::unique_ptr<ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+      Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>>
+      all_ptr(Open(const_cast<File*>(file_), PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE, &error_msg));
   if (all_ptr.get() == nullptr) {
     return;
   }
-  ElfFile& all = *all_ptr;
+  ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+      Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>& all = *all_ptr;
 
   // We need the eh_frame for gdb but debug info might be present without it.
-  const Elf32_Shdr* eh_frame = all.FindSectionByName(".eh_frame");
+  const Elf_Shdr* eh_frame = all.FindSectionByName(".eh_frame");
   if (eh_frame == nullptr) {
     return;
   }
@@ -1806,7 +2096,7 @@
   // We need to add in a strtab and symtab to the image.
   // all is MAP_PRIVATE so it can be written to freely.
   // We also already have strtab and symtab so we are fine there.
-  Elf32_Ehdr& elf_hdr = all.GetHeader();
+  Elf_Ehdr& elf_hdr = all.GetHeader();
   elf_hdr.e_entry = 0;
   elf_hdr.e_phoff = 0;
   elf_hdr.e_phnum = 0;
@@ -1824,4 +2114,477 @@
   gdb_file_mapping_.reset(all_ptr.release());
 }
 
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::Strip(std::string* error_msg) {
+  // ELF files produced by MCLinker look roughly like this
+  //
+  // +------------+
+  // | Elf_Ehdr   | contains number of Elf_Shdr and offset to first
+  // +------------+
+  // | Elf_Phdr   | program headers
+  // | Elf_Phdr   |
+  // | ...        |
+  // | Elf_Phdr   |
+  // +------------+
+  // | section    | mixture of needed and unneeded sections
+  // +------------+
+  // | section    |
+  // +------------+
+  // | ...        |
+  // +------------+
+  // | section    |
+  // +------------+
+  // | Elf_Shdr   | section headers
+  // | Elf_Shdr   |
+  // | ...        | contains offset to section start
+  // | Elf_Shdr   |
+  // +------------+
+  //
+  // To strip:
+  // - leave the Elf_Ehdr and Elf_Phdr values in place.
+  // - walk the sections making a new set of Elf_Shdr section headers for what we want to keep
+  // - move the sections are keeping up to fill in gaps of sections we want to strip
+  // - write new Elf_Shdr section headers to end of file, updating Elf_Ehdr
+  // - truncate rest of file
+  //
+
+  std::vector<Elf_Shdr> section_headers;
+  std::vector<Elf_Word> section_headers_original_indexes;
+  section_headers.reserve(GetSectionHeaderNum());
+
+
+  Elf_Shdr* string_section = GetSectionNameStringSection();
+  CHECK(string_section != nullptr);
+  for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) {
+    Elf_Shdr* sh = GetSectionHeader(i);
+    CHECK(sh != nullptr);
+    const char* name = GetString(*string_section, sh->sh_name);
+    if (name == nullptr) {
+      CHECK_EQ(0U, i);
+      section_headers.push_back(*sh);
+      section_headers_original_indexes.push_back(0);
+      continue;
+    }
+    if (StartsWith(name, ".debug")
+        || (strcmp(name, ".strtab") == 0)
+        || (strcmp(name, ".symtab") == 0)) {
+      continue;
+    }
+    section_headers.push_back(*sh);
+    section_headers_original_indexes.push_back(i);
+  }
+  CHECK_NE(0U, section_headers.size());
+  CHECK_EQ(section_headers.size(), section_headers_original_indexes.size());
+
+  // section 0 is the NULL section, sections start at offset of first section
+  CHECK(GetSectionHeader(1) != nullptr);
+  Elf_Off offset = GetSectionHeader(1)->sh_offset;
+  for (size_t i = 1; i < section_headers.size(); i++) {
+    Elf_Shdr& new_sh = section_headers[i];
+    Elf_Shdr* old_sh = GetSectionHeader(section_headers_original_indexes[i]);
+    CHECK(old_sh != nullptr);
+    CHECK_EQ(new_sh.sh_name, old_sh->sh_name);
+    if (old_sh->sh_addralign > 1) {
+      offset = RoundUp(offset, old_sh->sh_addralign);
+    }
+    if (old_sh->sh_offset == offset) {
+      // already in place
+      offset += old_sh->sh_size;
+      continue;
+    }
+    // shift section earlier
+    memmove(Begin() + offset,
+            Begin() + old_sh->sh_offset,
+            old_sh->sh_size);
+    new_sh.sh_offset = offset;
+    offset += old_sh->sh_size;
+  }
+
+  Elf_Off shoff = offset;
+  size_t section_headers_size_in_bytes = section_headers.size() * sizeof(Elf_Shdr);
+  memcpy(Begin() + offset, &section_headers[0], section_headers_size_in_bytes);
+  offset += section_headers_size_in_bytes;
+
+  GetHeader().e_shnum = section_headers.size();
+  GetHeader().e_shoff = shoff;
+  int result = ftruncate(file_->Fd(), offset);
+  if (result != 0) {
+    *error_msg = StringPrintf("Failed to truncate while stripping ELF file: '%s': %s",
+                              file_->GetPath().c_str(), strerror(errno));
+    return false;
+  }
+  return true;
+}
+
+static const bool DEBUG_FIXUP = false;
+
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::Fixup(uintptr_t base_address) {
+  if (!FixupDynamic(base_address)) {
+    LOG(WARNING) << "Failed to fixup .dynamic in " << file_->GetPath();
+    return false;
+  }
+  if (!FixupSectionHeaders(base_address)) {
+    LOG(WARNING) << "Failed to fixup section headers in " << file_->GetPath();
+    return false;
+  }
+  if (!FixupProgramHeaders(base_address)) {
+    LOG(WARNING) << "Failed to fixup program headers in " << file_->GetPath();
+    return false;
+  }
+  if (!FixupSymbols(base_address, true)) {
+    LOG(WARNING) << "Failed to fixup .dynsym in " << file_->GetPath();
+    return false;
+  }
+  if (!FixupSymbols(base_address, false)) {
+    LOG(WARNING) << "Failed to fixup .symtab in " << file_->GetPath();
+    return false;
+  }
+  if (!FixupRelocations(base_address)) {
+    LOG(WARNING) << "Failed to fixup .rel.dyn in " << file_->GetPath();
+    return false;
+  }
+  if (!FixupDebugSections(base_address)) {
+    LOG(WARNING) << "Failed to fixup debug sections in " << file_->GetPath();
+    return false;
+  }
+  return true;
+}
+
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FixupDynamic(uintptr_t base_address) {
+  for (Elf_Word i = 0; i < GetDynamicNum(); i++) {
+    Elf_Dyn& elf_dyn = GetDynamic(i);
+    Elf_Word d_tag = elf_dyn.d_tag;
+    if (IsDynamicSectionPointer(d_tag, GetHeader().e_machine)) {
+      Elf_Addr d_ptr = elf_dyn.d_un.d_ptr;
+      if (DEBUG_FIXUP) {
+        LOG(INFO) << StringPrintf("In %s moving Elf_Dyn[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
+                                  GetFile().GetPath().c_str(), i,
+                                  static_cast<uint64_t>(d_ptr),
+                                  static_cast<uint64_t>(d_ptr + base_address));
+      }
+      d_ptr += base_address;
+      elf_dyn.d_un.d_ptr = d_ptr;
+    }
+  }
+  return true;
+}
+
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FixupSectionHeaders(uintptr_t base_address) {
+  for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) {
+    Elf_Shdr* sh = GetSectionHeader(i);
+    CHECK(sh != nullptr);
+    // 0 implies that the section will not exist in the memory of the process
+    if (sh->sh_addr == 0) {
+      continue;
+    }
+    if (DEBUG_FIXUP) {
+      LOG(INFO) << StringPrintf("In %s moving Elf_Shdr[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
+                                GetFile().GetPath().c_str(), i,
+                                static_cast<uint64_t>(sh->sh_addr),
+                                static_cast<uint64_t>(sh->sh_addr + base_address));
+    }
+    sh->sh_addr += base_address;
+  }
+  return true;
+}
+
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FixupProgramHeaders(uintptr_t base_address) {
+  // TODO: ELFObjectFile doesn't have give to Elf_Phdr, so we do that ourselves for now.
+  for (Elf_Word i = 0; i < GetProgramHeaderNum(); i++) {
+    Elf_Phdr* ph = GetProgramHeader(i);
+    CHECK(ph != nullptr);
+    CHECK_EQ(ph->p_vaddr, ph->p_paddr) << GetFile().GetPath() << " i=" << i;
+    CHECK((ph->p_align == 0) || (0 == ((ph->p_vaddr - ph->p_offset) & (ph->p_align - 1))))
+            << GetFile().GetPath() << " i=" << i;
+    if (DEBUG_FIXUP) {
+      LOG(INFO) << StringPrintf("In %s moving Elf_Phdr[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
+                                GetFile().GetPath().c_str(), i,
+                                static_cast<uint64_t>(ph->p_vaddr),
+                                static_cast<uint64_t>(ph->p_vaddr + base_address));
+    }
+    ph->p_vaddr += base_address;
+    ph->p_paddr += base_address;
+    CHECK((ph->p_align == 0) || (0 == ((ph->p_vaddr - ph->p_offset) & (ph->p_align - 1))))
+            << GetFile().GetPath() << " i=" << i;
+  }
+  return true;
+}
+
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FixupSymbols(uintptr_t base_address, bool dynamic) {
+  Elf_Word section_type = dynamic ? SHT_DYNSYM : SHT_SYMTAB;
+  // TODO: Unfortunate ELFObjectFile has protected symbol access, so use ElfFile
+  Elf_Shdr* symbol_section = FindSectionByType(section_type);
+  if (symbol_section == nullptr) {
+    // file is missing optional .symtab
+    CHECK(!dynamic) << GetFile().GetPath();
+    return true;
+  }
+  for (uint32_t i = 0; i < GetSymbolNum(*symbol_section); i++) {
+    Elf_Sym* symbol = GetSymbol(section_type, i);
+    CHECK(symbol != nullptr);
+    if (symbol->st_value != 0) {
+      if (DEBUG_FIXUP) {
+        LOG(INFO) << StringPrintf("In %s moving Elf_Sym[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
+                                  GetFile().GetPath().c_str(), i,
+                                  static_cast<uint64_t>(symbol->st_value),
+                                  static_cast<uint64_t>(symbol->st_value + base_address));
+      }
+      symbol->st_value += base_address;
+    }
+  }
+  return true;
+}
+
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+bool ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+    Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel, Elf_Rela, Elf_Dyn, Elf_Off>
+    ::FixupRelocations(uintptr_t base_address) {
+  for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) {
+    Elf_Shdr* sh = GetSectionHeader(i);
+    CHECK(sh != nullptr);
+    if (sh->sh_type == SHT_REL) {
+      for (uint32_t i = 0; i < GetRelNum(*sh); i++) {
+        Elf_Rel& rel = GetRel(*sh, i);
+        if (DEBUG_FIXUP) {
+          LOG(INFO) << StringPrintf("In %s moving Elf_Rel[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
+                                    GetFile().GetPath().c_str(), i,
+                                    static_cast<uint64_t>(rel.r_offset),
+                                    static_cast<uint64_t>(rel.r_offset + base_address));
+        }
+        rel.r_offset += base_address;
+      }
+    } else if (sh->sh_type == SHT_RELA) {
+      for (uint32_t i = 0; i < GetRelaNum(*sh); i++) {
+        Elf_Rela& rela = GetRela(*sh, i);
+        if (DEBUG_FIXUP) {
+          LOG(INFO) << StringPrintf("In %s moving Elf_Rela[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
+                                    GetFile().GetPath().c_str(), i,
+                                    static_cast<uint64_t>(rela.r_offset),
+                                    static_cast<uint64_t>(rela.r_offset + base_address));
+        }
+        rela.r_offset += base_address;
+      }
+    }
+  }
+  return true;
+}
+
+// Explicit instantiations
+template class ElfFileImpl<Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr, Elf32_Word,
+    Elf32_Sword, Elf32_Addr, Elf32_Sym, Elf32_Rel, Elf32_Rela, Elf32_Dyn, Elf32_Off>;
+template class ElfFileImpl<Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr, Elf64_Word,
+    Elf64_Sword, Elf64_Addr, Elf64_Sym, Elf64_Rel, Elf64_Rela, Elf64_Dyn, Elf64_Off>;
+
+ElfFile::ElfFile(ElfFileImpl32* elf32) : is_elf64_(false) {
+  CHECK_NE(elf32, static_cast<ElfFileImpl32*>(nullptr));
+  elf_.elf32_ = elf32;
+}
+
+ElfFile::ElfFile(ElfFileImpl64* elf64) : is_elf64_(true) {
+  CHECK_NE(elf64, static_cast<ElfFileImpl64*>(nullptr));
+  elf_.elf64_ = elf64;
+}
+
+ElfFile::~ElfFile() {
+  if (is_elf64_) {
+    CHECK_NE(elf_.elf64_, static_cast<ElfFileImpl64*>(nullptr));
+    delete elf_.elf64_;
+  } else {
+    CHECK_NE(elf_.elf32_, static_cast<ElfFileImpl32*>(nullptr));
+    delete elf_.elf32_;
+  }
+}
+
+ElfFile* ElfFile::Open(File* file, bool writable, bool program_header_only, std::string* error_msg) {
+  if (file->GetLength() < EI_NIDENT) {
+    *error_msg = StringPrintf("File %s is too short to be a valid ELF file",
+                              file->GetPath().c_str());
+    return nullptr;
+  }
+  std::unique_ptr<MemMap> map(MemMap::MapFile(EI_NIDENT, PROT_READ, MAP_PRIVATE, file->Fd(), 0,
+                                              file->GetPath().c_str(), error_msg));
+  if (map == nullptr && map->Size() != EI_NIDENT) {
+    return nullptr;
+  }
+  byte *header = map->Begin();
+  if (header[EI_CLASS] == ELFCLASS64) {
+    ElfFileImpl64* elf_file_impl = ElfFileImpl64::Open(file, writable, program_header_only, error_msg);
+    if (elf_file_impl == nullptr)
+      return nullptr;
+    return new ElfFile(elf_file_impl);
+  } else if (header[EI_CLASS] == ELFCLASS32) {
+    ElfFileImpl32* elf_file_impl = ElfFileImpl32::Open(file, writable, program_header_only, error_msg);
+    if (elf_file_impl == nullptr)
+      return nullptr;
+    return new ElfFile(elf_file_impl);
+  } else {
+    *error_msg = StringPrintf("Failed to find expected EI_CLASS value %d or %d in %s, found %d",
+                              ELFCLASS32, ELFCLASS64,
+                              file->GetPath().c_str(),
+                              header[EI_CLASS]);
+    return nullptr;
+  }
+}
+
+ElfFile* ElfFile::Open(File* file, int mmap_prot, int mmap_flags, std::string* error_msg) {
+  if (file->GetLength() < EI_NIDENT) {
+    *error_msg = StringPrintf("File %s is too short to be a valid ELF file",
+                              file->GetPath().c_str());
+    return nullptr;
+  }
+  std::unique_ptr<MemMap> map(MemMap::MapFile(EI_NIDENT, PROT_READ, MAP_PRIVATE, file->Fd(), 0,
+                                              file->GetPath().c_str(), error_msg));
+  if (map == nullptr && map->Size() != EI_NIDENT) {
+    return nullptr;
+  }
+  byte *header = map->Begin();
+  if (header[EI_CLASS] == ELFCLASS64) {
+    ElfFileImpl64* elf_file_impl = ElfFileImpl64::Open(file, mmap_prot, mmap_flags, error_msg);
+    if (elf_file_impl == nullptr)
+      return nullptr;
+    return new ElfFile(elf_file_impl);
+  } else if (header[EI_CLASS] == ELFCLASS32) {
+    ElfFileImpl32* elf_file_impl = ElfFileImpl32::Open(file, mmap_prot, mmap_flags, error_msg);
+    if (elf_file_impl == nullptr)
+      return nullptr;
+    return new ElfFile(elf_file_impl);
+  } else {
+    *error_msg = StringPrintf("Failed to find expected EI_CLASS value %d or %d in %s, found %d",
+                              ELFCLASS32, ELFCLASS64,
+                              file->GetPath().c_str(),
+                              header[EI_CLASS]);
+    return nullptr;
+  }
+}
+
+#define DELEGATE_TO_IMPL(func, ...) \
+  if (is_elf64_) { \
+    CHECK_NE(elf_.elf64_, static_cast<ElfFileImpl64*>(nullptr)); \
+    return elf_.elf64_->func(__VA_ARGS__); \
+  } else { \
+    CHECK_NE(elf_.elf32_, static_cast<ElfFileImpl32*>(nullptr)); \
+    return elf_.elf32_->func(__VA_ARGS__); \
+  }
+
+bool ElfFile::Load(bool executable, std::string* error_msg) {
+  DELEGATE_TO_IMPL(Load, executable, error_msg);
+}
+
+const byte* ElfFile::FindDynamicSymbolAddress(const std::string& symbol_name) const {
+  DELEGATE_TO_IMPL(FindDynamicSymbolAddress, symbol_name);
+}
+
+size_t ElfFile::Size() const {
+  DELEGATE_TO_IMPL(Size);
+}
+
+byte* ElfFile::Begin() const {
+  DELEGATE_TO_IMPL(Begin);
+}
+
+byte* ElfFile::End() const {
+  DELEGATE_TO_IMPL(End);
+}
+
+const File& ElfFile::GetFile() const {
+  DELEGATE_TO_IMPL(GetFile);
+}
+
+bool ElfFile::GetSectionOffsetAndSize(const char* section_name, uint64_t* offset, uint64_t* size) {
+  if (is_elf64_) {
+    CHECK_NE(elf_.elf64_, static_cast<ElfFileImpl64*>(nullptr));
+
+    Elf64_Shdr *shdr = elf_.elf64_->FindSectionByName(section_name);
+    if (shdr == nullptr)
+      return false;
+
+    if (offset != nullptr)
+      *offset = shdr->sh_offset;
+    if (size != nullptr)
+      *size = shdr->sh_size;
+    return true;
+  } else {
+    CHECK_NE(elf_.elf32_, static_cast<ElfFileImpl32*>(nullptr));
+
+    Elf32_Shdr *shdr = elf_.elf32_->FindSectionByName(section_name);
+    if (shdr == nullptr)
+      return false;
+
+    if (offset != nullptr)
+      *offset = shdr->sh_offset;
+    if (size != nullptr)
+      *size = shdr->sh_size;
+    return true;
+  }
+}
+
+uint64_t ElfFile::FindSymbolAddress(unsigned section_type,
+                                    const std::string& symbol_name,
+                                    bool build_map) {
+  DELEGATE_TO_IMPL(FindSymbolAddress, section_type, symbol_name, build_map);
+}
+
+size_t ElfFile::GetLoadedSize() const {
+  DELEGATE_TO_IMPL(GetLoadedSize);
+}
+
+bool ElfFile::Strip(File* file, std::string* error_msg) {
+  std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file, true, false, error_msg));
+  if (elf_file.get() == nullptr) {
+    return false;
+  }
+
+  if (elf_file->is_elf64_)
+    return elf_file->elf_.elf64_->Strip(error_msg);
+  else
+    return elf_file->elf_.elf32_->Strip(error_msg);
+}
+
+bool ElfFile::Fixup(uintptr_t base_address) {
+  DELEGATE_TO_IMPL(Fixup, base_address);
+}
+
+ElfFileImpl32* ElfFile::GetImpl32() const {
+  CHECK(!is_elf64_);
+  CHECK_NE(elf_.elf32_, static_cast<ElfFileImpl32*>(nullptr));
+  return elf_.elf32_;
+}
+
+ElfFileImpl64* ElfFile::GetImpl64() const {
+  CHECK(is_elf64_);
+  CHECK_NE(elf_.elf64_, static_cast<ElfFileImpl64*>(nullptr));
+  return elf_.elf64_;
+}
+
 }  // namespace art
diff --git a/runtime/elf_file.h b/runtime/elf_file.h
index 916d693..ea6538b 100644
--- a/runtime/elf_file.h
+++ b/runtime/elf_file.h
@@ -17,24 +17,13 @@
 #ifndef ART_RUNTIME_ELF_FILE_H_
 #define ART_RUNTIME_ELF_FILE_H_
 
-#include <map>
-#include <memory>
-#include <vector>
+#include <string>
 
 #include "base/unix_file/fd_file.h"
-#include "globals.h"
-#include "elf_utils.h"
-#include "mem_map.h"
-#include "os.h"
+#include "elf_file_impl.h"
 
 namespace art {
 
-// Interface to GDB JIT for backtrace information.
-extern "C" {
-  struct JITCodeEntry;
-}
-
-
 // Used for compile time and runtime for ElfFile access. Because of
 // the need for use at runtime, cannot directly use LLVM classes such as
 // ELFObjectFile.
@@ -46,162 +35,50 @@
   static ElfFile* Open(File* file, int mmap_prot, int mmap_flags, std::string* error_msg);
   ~ElfFile();
 
+  const bool is_elf64_;
+
   // Load segments into memory based on PT_LOAD program headers
-
-  const File& GetFile() const {
-    return *file_;
-  }
-
-  byte* Begin() const {
-    return map_->Begin();
-  }
-
-  byte* End() const {
-    return map_->End();
-  }
-
-  size_t Size() const {
-    return map_->Size();
-  }
-
-  Elf32_Ehdr& GetHeader() const;
-
-  Elf32_Word GetProgramHeaderNum() const;
-  Elf32_Phdr* GetProgramHeader(Elf32_Word) const;
-
-  Elf32_Word GetSectionHeaderNum() const;
-  Elf32_Shdr* GetSectionHeader(Elf32_Word) const;
-  Elf32_Shdr* FindSectionByType(Elf32_Word type) const;
-  Elf32_Shdr* FindSectionByName(const std::string& name) const;
-
-  Elf32_Shdr* GetSectionNameStringSection() const;
-
-  // Find .dynsym using .hash for more efficient lookup than FindSymbolAddress.
-  const byte* FindDynamicSymbolAddress(const std::string& symbol_name) const;
-
-  Elf32_Word GetSymbolNum(Elf32_Shdr&) const;
-  Elf32_Sym* GetSymbol(Elf32_Word section_type, Elf32_Word i) const;
-
-  // Find address of symbol in specified table, returning 0 if it is
-  // not found. See FindSymbolByName for an explanation of build_map.
-  Elf32_Addr FindSymbolAddress(Elf32_Word section_type,
-                               const std::string& symbol_name,
-                               bool build_map);
-
-  // Lookup a string given string section and offset. Returns nullptr for
-  // special 0 offset.
-  const char* GetString(Elf32_Shdr&, Elf32_Word) const;
-
-  Elf32_Word GetDynamicNum() const;
-  Elf32_Dyn& GetDynamic(Elf32_Word) const;
-
-  Elf32_Word GetRelNum(Elf32_Shdr&) const;
-  Elf32_Rel& GetRel(Elf32_Shdr&, Elf32_Word) const;
-
-  Elf32_Word GetRelaNum(Elf32_Shdr&) const;
-  Elf32_Rela& GetRela(Elf32_Shdr&, Elf32_Word) const;
-
-  // Returns the expected size when the file is loaded at runtime
-  size_t GetLoadedSize() const;
-
-  // Load segments into memory based on PT_LOAD program headers.
-  // executable is true at run time, false at compile time.
   bool Load(bool executable, std::string* error_msg);
 
-  bool FixupDebugSections(off_t base_address_delta);
+  const byte* FindDynamicSymbolAddress(const std::string& symbol_name) const;
+
+  size_t Size() const;
+
+  byte* Begin() const;
+
+  byte* End() const;
+
+  const File& GetFile() const;
+
+  bool GetSectionOffsetAndSize(const char* section_name, uint64_t* offset, uint64_t* size);
+
+  uint64_t FindSymbolAddress(unsigned section_type,
+                             const std::string& symbol_name,
+                             bool build_map);
+
+  size_t GetLoadedSize() const;
+
+  // Strip an ELF file of unneeded debugging information.
+  // Returns true on success, false on failure.
+  static bool Strip(File* file, std::string* error_msg);
+
+  // Fixup an ELF file so that that oat header will be loaded at oat_begin.
+  // Returns true on success, false on failure.
+  static bool Fixup(File* file, uintptr_t oat_data_begin);
+
+  bool Fixup(uintptr_t base_address);
+
+  ElfFileImpl32* GetImpl32() const;
+  ElfFileImpl64* GetImpl64() const;
 
  private:
-  ElfFile(File* file, bool writable, bool program_header_only);
+  explicit ElfFile(ElfFileImpl32* elf32);
+  explicit ElfFile(ElfFileImpl64* elf64);
 
-  bool Setup(int prot, int flags, std::string* error_msg);
-
-  bool SetMap(MemMap* map, std::string* error_msg);
-
-  byte* GetProgramHeadersStart() const;
-  byte* GetSectionHeadersStart() const;
-  Elf32_Phdr& GetDynamicProgramHeader() const;
-  Elf32_Dyn* GetDynamicSectionStart() const;
-  Elf32_Sym* GetSymbolSectionStart(Elf32_Word section_type) const;
-  const char* GetStringSectionStart(Elf32_Word section_type) const;
-  Elf32_Rel* GetRelSectionStart(Elf32_Shdr&) const;
-  Elf32_Rela* GetRelaSectionStart(Elf32_Shdr&) const;
-  Elf32_Word* GetHashSectionStart() const;
-  Elf32_Word GetHashBucketNum() const;
-  Elf32_Word GetHashChainNum() const;
-  Elf32_Word GetHashBucket(size_t i, bool* ok) const;
-  Elf32_Word GetHashChain(size_t i, bool* ok) const;
-
-  typedef std::map<std::string, Elf32_Sym*> SymbolTable;
-  SymbolTable** GetSymbolTable(Elf32_Word section_type);
-
-  bool ValidPointer(const byte* start) const;
-
-  const Elf32_Sym* FindDynamicSymbol(const std::string& symbol_name) const;
-
-  // Check that certain sections and their dependencies exist.
-  bool CheckSectionsExist(std::string* error_msg) const;
-
-  // Check that the link of the first section links to the second section.
-  bool CheckSectionsLinked(const byte* source, const byte* target) const;
-
-  // Check whether the offset is in range, and set to target to Begin() + offset if OK.
-  bool CheckAndSet(Elf32_Off offset, const char* label, byte** target, std::string* error_msg);
-
-  // Find symbol in specified table, returning nullptr if it is not found.
-  //
-  // If build_map is true, builds a map to speed repeated access. The
-  // map does not included untyped symbol values (aka STT_NOTYPE)
-  // since they can contain duplicates. If build_map is false, the map
-  // will be used if it was already created. Typically build_map
-  // should be set unless only a small number of symbols will be
-  // looked up.
-  Elf32_Sym* FindSymbolByName(Elf32_Word section_type,
-                              const std::string& symbol_name,
-                              bool build_map);
-
-  Elf32_Phdr* FindProgamHeaderByType(Elf32_Word type) const;
-
-  Elf32_Dyn* FindDynamicByType(Elf32_Sword type) const;
-  Elf32_Word FindDynamicValueByType(Elf32_Sword type) const;
-
-  // Lookup a string by section type. Returns nullptr for special 0 offset.
-  const char* GetString(Elf32_Word section_type, Elf32_Word) const;
-
-  const File* const file_;
-  const bool writable_;
-  const bool program_header_only_;
-
-  // ELF header mapping. If program_header_only_ is false, will
-  // actually point to the entire elf file.
-  std::unique_ptr<MemMap> map_;
-  Elf32_Ehdr* header_;
-  std::vector<MemMap*> segments_;
-
-  // Pointer to start of first PT_LOAD program segment after Load()
-  // when program_header_only_ is true.
-  byte* base_address_;
-
-  // The program header should always available but use GetProgramHeadersStart() to be sure.
-  byte* program_headers_start_;
-
-  // Conditionally available values. Use accessors to ensure they exist if they are required.
-  byte* section_headers_start_;
-  Elf32_Phdr* dynamic_program_header_;
-  Elf32_Dyn* dynamic_section_start_;
-  Elf32_Sym* symtab_section_start_;
-  Elf32_Sym* dynsym_section_start_;
-  char* strtab_section_start_;
-  char* dynstr_section_start_;
-  Elf32_Word* hash_section_start_;
-
-  SymbolTable* symtab_symbol_table_;
-  SymbolTable* dynsym_symbol_table_;
-
-  // Support for GDB JIT
-  byte* jit_elf_image_;
-  JITCodeEntry* jit_gdb_entry_;
-  std::unique_ptr<ElfFile> gdb_file_mapping_;
-  void GdbJITSupport();
+  union ElfFileContainer {
+    ElfFileImpl32* elf32_;
+    ElfFileImpl64* elf64_;
+  } elf_;
 };
 
 }  // namespace art
diff --git a/runtime/elf_file_impl.h b/runtime/elf_file_impl.h
new file mode 100644
index 0000000..942dc291
--- /dev/null
+++ b/runtime/elf_file_impl.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ELF_FILE_IMPL_H_
+#define ART_RUNTIME_ELF_FILE_IMPL_H_
+
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "base/unix_file/fd_file.h"
+#include "globals.h"
+#include "elf_utils.h"
+#include "mem_map.h"
+#include "os.h"
+
+namespace art {
+
+extern "C" {
+  struct JITCodeEntry;
+}
+
+template <typename Elf_Ehdr, typename Elf_Phdr, typename Elf_Shdr, typename Elf_Word,
+          typename Elf_Sword, typename Elf_Addr, typename Elf_Sym, typename Elf_Rel,
+          typename Elf_Rela, typename Elf_Dyn, typename Elf_Off>
+class ElfFileImpl {
+ public:
+  static ElfFileImpl* Open(File* file, bool writable, bool program_header_only, std::string* error_msg);
+  static ElfFileImpl* Open(File* file, int mmap_prot, int mmap_flags, std::string* error_msg);
+  ~ElfFileImpl();
+
+  const File& GetFile() const {
+    return *file_;
+  }
+
+  byte* Begin() const {
+    return map_->Begin();
+  }
+
+  byte* End() const {
+    return map_->End();
+  }
+
+  size_t Size() const {
+    return map_->Size();
+  }
+
+  Elf_Ehdr& GetHeader() const;
+
+  Elf_Word GetProgramHeaderNum() const;
+  Elf_Phdr* GetProgramHeader(Elf_Word) const;
+
+  Elf_Word GetSectionHeaderNum() const;
+  Elf_Shdr* GetSectionHeader(Elf_Word) const;
+  Elf_Shdr* FindSectionByType(Elf_Word type) const;
+  Elf_Shdr* FindSectionByName(const std::string& name) const;
+
+  Elf_Shdr* GetSectionNameStringSection() const;
+
+  // Find .dynsym using .hash for more efficient lookup than FindSymbolAddress.
+  const byte* FindDynamicSymbolAddress(const std::string& symbol_name) const;
+
+  static bool IsSymbolSectionType(Elf_Word section_type);
+  Elf_Word GetSymbolNum(Elf_Shdr&) const;
+  Elf_Sym* GetSymbol(Elf_Word section_type, Elf_Word i) const;
+
+  // Find address of symbol in specified table, returning 0 if it is
+  // not found. See FindSymbolByName for an explanation of build_map.
+  Elf_Addr FindSymbolAddress(Elf_Word section_type,
+                             const std::string& symbol_name,
+                             bool build_map);
+
+  // Lookup a string given string section and offset. Returns nullptr for
+  // special 0 offset.
+  const char* GetString(Elf_Shdr&, Elf_Word) const;
+
+  Elf_Word GetDynamicNum() const;
+  Elf_Dyn& GetDynamic(Elf_Word) const;
+
+  Elf_Word GetRelNum(Elf_Shdr&) const;
+  Elf_Rel& GetRel(Elf_Shdr&, Elf_Word) const;
+
+  Elf_Word GetRelaNum(Elf_Shdr&) const;
+  Elf_Rela& GetRela(Elf_Shdr&, Elf_Word) const;
+
+  // Returns the expected size when the file is loaded at runtime
+  size_t GetLoadedSize() const;
+
+  // Load segments into memory based on PT_LOAD program headers.
+  // executable is true at run time, false at compile time.
+  bool Load(bool executable, std::string* error_msg);
+
+  bool Fixup(uintptr_t base_address);
+  bool FixupDynamic(uintptr_t base_address);
+  bool FixupSectionHeaders(uintptr_t base_address);
+  bool FixupProgramHeaders(uintptr_t base_address);
+  bool FixupSymbols(uintptr_t base_address, bool dynamic);
+  bool FixupRelocations(uintptr_t base_address);
+  bool FixupDebugSections(off_t base_address_delta);
+
+  bool Strip(std::string* error_msg);
+
+ private:
+  ElfFileImpl(File* file, bool writable, bool program_header_only);
+
+  bool Setup(int prot, int flags, std::string* error_msg);
+
+  bool SetMap(MemMap* map, std::string* error_msg);
+
+  byte* GetProgramHeadersStart() const;
+  byte* GetSectionHeadersStart() const;
+  Elf_Phdr& GetDynamicProgramHeader() const;
+  Elf_Dyn* GetDynamicSectionStart() const;
+  Elf_Sym* GetSymbolSectionStart(Elf_Word section_type) const;
+  const char* GetStringSectionStart(Elf_Word section_type) const;
+  Elf_Rel* GetRelSectionStart(Elf_Shdr&) const;
+  Elf_Rela* GetRelaSectionStart(Elf_Shdr&) const;
+  Elf_Word* GetHashSectionStart() const;
+  Elf_Word GetHashBucketNum() const;
+  Elf_Word GetHashChainNum() const;
+  Elf_Word GetHashBucket(size_t i, bool* ok) const;
+  Elf_Word GetHashChain(size_t i, bool* ok) const;
+
+  typedef std::map<std::string, Elf_Sym*> SymbolTable;
+  SymbolTable** GetSymbolTable(Elf_Word section_type);
+
+  bool ValidPointer(const byte* start) const;
+
+  const Elf_Sym* FindDynamicSymbol(const std::string& symbol_name) const;
+
+  // Check that certain sections and their dependencies exist.
+  bool CheckSectionsExist(std::string* error_msg) const;
+
+  // Check that the link of the first section links to the second section.
+  bool CheckSectionsLinked(const byte* source, const byte* target) const;
+
+  // Check whether the offset is in range, and set to target to Begin() + offset if OK.
+  bool CheckAndSet(Elf32_Off offset, const char* label, byte** target, std::string* error_msg);
+
+  // Find symbol in specified table, returning nullptr if it is not found.
+  //
+  // If build_map is true, builds a map to speed repeated access. The
+  // map does not included untyped symbol values (aka STT_NOTYPE)
+  // since they can contain duplicates. If build_map is false, the map
+  // will be used if it was already created. Typically build_map
+  // should be set unless only a small number of symbols will be
+  // looked up.
+  Elf_Sym* FindSymbolByName(Elf_Word section_type,
+                            const std::string& symbol_name,
+                            bool build_map);
+
+  Elf_Phdr* FindProgamHeaderByType(Elf_Word type) const;
+
+  Elf_Dyn* FindDynamicByType(Elf_Sword type) const;
+  Elf_Word FindDynamicValueByType(Elf_Sword type) const;
+
+  // Lookup a string by section type. Returns nullptr for special 0 offset.
+  const char* GetString(Elf_Word section_type, Elf_Word) const;
+
+  const File* const file_;
+  const bool writable_;
+  const bool program_header_only_;
+
+  // ELF header mapping. If program_header_only_ is false, will
+  // actually point to the entire elf file.
+  std::unique_ptr<MemMap> map_;
+  Elf_Ehdr* header_;
+  std::vector<MemMap*> segments_;
+
+  // Pointer to start of first PT_LOAD program segment after Load()
+  // when program_header_only_ is true.
+  byte* base_address_;
+
+  // The program header should always available but use GetProgramHeadersStart() to be sure.
+  byte* program_headers_start_;
+
+  // Conditionally available values. Use accessors to ensure they exist if they are required.
+  byte* section_headers_start_;
+  Elf_Phdr* dynamic_program_header_;
+  Elf_Dyn* dynamic_section_start_;
+  Elf_Sym* symtab_section_start_;
+  Elf_Sym* dynsym_section_start_;
+  char* strtab_section_start_;
+  char* dynstr_section_start_;
+  Elf_Word* hash_section_start_;
+
+  SymbolTable* symtab_symbol_table_;
+  SymbolTable* dynsym_symbol_table_;
+
+  // Support for GDB JIT
+  byte* jit_elf_image_;
+  JITCodeEntry* jit_gdb_entry_;
+  std::unique_ptr<ElfFileImpl<Elf_Ehdr, Elf_Phdr, Elf_Shdr, Elf_Word,
+                  Elf_Sword, Elf_Addr, Elf_Sym, Elf_Rel,
+                  Elf_Rela, Elf_Dyn, Elf_Off>> gdb_file_mapping_;
+  void GdbJITSupport();
+};
+
+// Explicitly instantiated in elf_file.cc
+typedef ElfFileImpl<Elf32_Ehdr, Elf32_Phdr, Elf32_Shdr, Elf32_Word, Elf32_Sword,
+                    Elf32_Addr, Elf32_Sym, Elf32_Rel, Elf32_Rela, Elf32_Dyn, Elf32_Off> ElfFileImpl32;
+typedef ElfFileImpl<Elf64_Ehdr, Elf64_Phdr, Elf64_Shdr, Elf64_Word, Elf64_Sword,
+                    Elf64_Addr, Elf64_Sym, Elf64_Rel, Elf64_Rela, Elf64_Dyn, Elf64_Off> ElfFileImpl64;
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ELF_FILE_IMPL_H_
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1dbbb70..f970ef8 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -492,19 +492,18 @@
     self->PushShadowFrame(shadow_frame);
     self->EndAssertNoThreadSuspension(old_cause);
 
-    if (method->IsStatic() && !method->GetDeclaringClass()->IsInitialized()) {
+    StackHandleScope<1> hs(self);
+    MethodHelper mh(hs.NewHandle(method));
+    if (mh.Get()->IsStatic() && !mh.Get()->GetDeclaringClass()->IsInitialized()) {
       // Ensure static method's class is initialized.
       StackHandleScope<1> hs(self);
-      Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
+      Handle<mirror::Class> h_class(hs.NewHandle(mh.Get()->GetDeclaringClass()));
       if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
-        DCHECK(Thread::Current()->IsExceptionPending()) << PrettyMethod(method);
+        DCHECK(Thread::Current()->IsExceptionPending()) << PrettyMethod(mh.Get());
         self->PopManagedStackFragment(fragment);
         return 0;
       }
     }
-
-    StackHandleScope<1> hs(self);
-    MethodHelper mh(hs.NewHandle(method));
     JValue result = interpreter::EnterInterpreterFromStub(self, mh, code_item, *shadow_frame);
     // Pop transition.
     self->PopManagedStackFragment(fragment);
diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h
index 7e770f6..e571a0e 100644
--- a/runtime/indirect_reference_table-inl.h
+++ b/runtime/indirect_reference_table-inl.h
@@ -77,7 +77,7 @@
     return nullptr;
   }
   uint32_t idx = ExtractIndex(iref);
-  mirror::Object* obj = table_[idx].GetReference()->Read<kWithoutReadBarrier>();
+  mirror::Object* obj = table_[idx].GetReference()->Read<kReadBarrierOption>();
   VerifyObject(obj);
   return obj;
 }
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 07224ef..8fb1712 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -529,10 +529,10 @@
   }
 
   self->PushShadowFrame(shadow_frame);
-  ArtMethod* method = shadow_frame->GetMethod();
+  DCHECK_EQ(shadow_frame->GetMethod(), mh.Get());
   // Ensure static methods are initialized.
-  if (method->IsStatic()) {
-    mirror::Class* declaring_class = method->GetDeclaringClass();
+  if (mh.Get()->IsStatic()) {
+    mirror::Class* declaring_class = mh.Get()->GetDeclaringClass();
     if (UNLIKELY(!declaring_class->IsInitialized())) {
       StackHandleScope<1> hs(self);
       HandleWrapper<Class> h_declaring_class(hs.NewHandleWrapper(&declaring_class));
@@ -546,15 +546,15 @@
     }
   }
 
-  if (LIKELY(!method->IsNative())) {
+  if (LIKELY(!mh.Get()->IsNative())) {
     result->SetJ(Execute(self, mh, code_item, *shadow_frame, JValue()).GetJ());
   } else {
     // We don't expect to be asked to interpret native code (which is entered via a JNI compiler
     // generated stub) except during testing and image writing.
     CHECK(!Runtime::Current()->IsStarted());
-    Object* receiver = method->IsStatic() ? nullptr : shadow_frame->GetVRegReference(0);
-    uint32_t* args = shadow_frame->GetVRegArgs(method->IsStatic() ? 0 : 1);
-    UnstartedRuntimeJni(self, method, receiver, args, result);
+    Object* receiver = mh.Get()->IsStatic() ? nullptr : shadow_frame->GetVRegReference(0);
+    uint32_t* args = shadow_frame->GetVRegArgs(mh.Get()->IsStatic() ? 0 : 1);
+    UnstartedRuntimeJni(self, mh.Get(), receiver, args, result);
   }
 
   self->PopShadowFrame();
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 733f1d1..52583ae 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -569,9 +569,9 @@
 
     // We need to do runtime check on reference assignment. We need to load the shorty
     // to get the exact type of each reference argument.
-    const DexFile::TypeList* params = method->GetParameterTypeList();
+    const DexFile::TypeList* params = mh.Get()->GetParameterTypeList();
     uint32_t shorty_len = 0;
-    const char* shorty = method->GetShorty(&shorty_len);
+    const char* shorty = mh.Get()->GetShorty(&shorty_len);
 
     // TODO: find a cleaner way to separate non-range and range information without duplicating code.
     uint32_t arg[5];  // only used in invoke-XXX.
@@ -585,7 +585,7 @@
     // Handle receiver apart since it's not part of the shorty.
     size_t dest_reg = first_dest_reg;
     size_t arg_offset = 0;
-    if (!method->IsStatic()) {
+    if (!mh.Get()->IsStatic()) {
       size_t receiver_reg = is_range ? vregC : arg[0];
       new_shadow_frame->SetVRegReference(dest_reg, shadow_frame.GetVRegReference(receiver_reg));
       ++dest_reg;
@@ -609,7 +609,7 @@
               self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
                                        "Ljava/lang/VirtualMachineError;",
                                        "Invoking %s with bad arg %d, type '%s' not instance of '%s'",
-                                       method->GetName(), shorty_pos,
+                                       mh.Get()->GetName(), shorty_pos,
                                        o->GetClass()->GetDescriptor(&temp1),
                                        arg_type->GetDescriptor(&temp2));
               return false;
@@ -658,15 +658,15 @@
 
   // Do the call now.
   if (LIKELY(Runtime::Current()->IsStarted())) {
-    if (kIsDebugBuild && method->GetEntryPointFromInterpreter() == nullptr) {
-      LOG(FATAL) << "Attempt to invoke non-executable method: " << PrettyMethod(method);
+    if (kIsDebugBuild && mh.Get()->GetEntryPointFromInterpreter() == nullptr) {
+      LOG(FATAL) << "Attempt to invoke non-executable method: " << PrettyMethod(mh.Get());
     }
     if (kIsDebugBuild && Runtime::Current()->GetInstrumentation()->IsForcedInterpretOnly() &&
-        !method->IsNative() && !method->IsProxyMethod() &&
-        method->GetEntryPointFromInterpreter() == artInterpreterToCompiledCodeBridge) {
-      LOG(FATAL) << "Attempt to call compiled code when -Xint: " << PrettyMethod(method);
+        !mh.Get()->IsNative() && !mh.Get()->IsProxyMethod() &&
+        mh.Get()->GetEntryPointFromInterpreter() == artInterpreterToCompiledCodeBridge) {
+      LOG(FATAL) << "Attempt to call compiled code when -Xint: " << PrettyMethod(mh.Get());
     }
-    (method->GetEntryPointFromInterpreter())(self, mh, code_item, new_shadow_frame, result);
+    (mh.Get()->GetEntryPointFromInterpreter())(self, mh, code_item, new_shadow_frame, result);
   } else {
     UnstartedRuntimeInvoke(self, mh, code_item, new_shadow_frame, result, first_dest_reg);
   }
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index bf979c1..dea3014 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -56,6 +56,10 @@
 
 namespace art {
 
+// Consider turning this on when there is errors which could be related to JNI array copies such as
+// things not rendering correctly. E.g. b/16858794
+static constexpr bool kWarnJniAbort = false;
+
 // Section 12.3.2 of the JNI spec describes JNI class descriptors. They're
 // separated with slashes but aren't wrapped with "L;" like regular descriptors
 // (i.e. "a/b/C" rather than "La/b/C;"). Arrays of reference types are an
@@ -2375,10 +2379,13 @@
                             reinterpret_cast<void*>(elements), array_data);
         return;
       }
-    }
-    // Don't need to copy if we had a direct pointer.
-    if (mode != JNI_ABORT && is_copy) {
-      memcpy(array_data, elements, bytes);
+      if (mode != JNI_ABORT) {
+        memcpy(array_data, elements, bytes);
+      } else if (kWarnJniAbort && memcmp(array_data, elements, bytes) != 0) {
+        // Warn if we have JNI_ABORT and the arrays don't match since this is usually an error.
+        LOG(WARNING) << "Possible incorrect JNI_ABORT in Release*ArrayElements";
+        soa.Self()->DumpJavaStack(LOG(WARNING));
+      }
     }
     if (mode != JNI_COMMIT) {
       if (is_copy) {
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index a896f3e..a8a8307 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -47,9 +47,11 @@
                                   std::string* error_msg) {
   std::unique_ptr<OatFile> oat_file(new OatFile(location, false));
   oat_file->elf_file_.reset(elf_file);
-  Elf32_Shdr* hdr = elf_file->FindSectionByName(".rodata");
-  oat_file->begin_ = elf_file->Begin() + hdr->sh_offset;
-  oat_file->end_ = elf_file->Begin() + hdr->sh_size + hdr->sh_offset;
+  uint64_t offset, size;
+  bool has_section = elf_file->GetSectionOffsetAndSize(".rodata", &offset, &size);
+  CHECK(has_section);
+  oat_file->begin_ = elf_file->Begin() + offset;
+  oat_file->end_ = elf_file->Begin() + size + offset;
   return oat_file->Setup(error_msg) ? oat_file.release() : nullptr;
 }
 
diff --git a/test/005-annotations/build b/test/005-annotations/build
index 1690213..2474055 100644
--- a/test/005-annotations/build
+++ b/test/005-annotations/build
@@ -23,5 +23,6 @@
 ${JAVAC} -d classes `find src -name '*.java'`
 
 # ...but not at run time.
-rm classes/android/test/anno/MissingAnnotation.class
+rm 'classes/android/test/anno/MissingAnnotation.class'
+rm 'classes/android/test/anno/ClassWithInnerAnnotationClass$MissingInnerAnnotationClass.class'
 ${DX} -JXmx256m --debug --dex --output=$TEST_NAME.jar classes
diff --git a/test/005-annotations/src/android/test/anno/ClassWithInnerAnnotationClass.java b/test/005-annotations/src/android/test/anno/ClassWithInnerAnnotationClass.java
new file mode 100644
index 0000000..c69e01a
--- /dev/null
+++ b/test/005-annotations/src/android/test/anno/ClassWithInnerAnnotationClass.java
@@ -0,0 +1,8 @@
+package android.test.anno;
+
+import java.lang.annotation.*;
+
+public class ClassWithInnerAnnotationClass {
+  @Retention(RetentionPolicy.SOURCE)
+  public @interface MissingInnerAnnotationClass {}
+}
diff --git a/test/005-annotations/src/android/test/anno/TestAnnotations.java b/test/005-annotations/src/android/test/anno/TestAnnotations.java
index 4eabb12..1deff33 100644
--- a/test/005-annotations/src/android/test/anno/TestAnnotations.java
+++ b/test/005-annotations/src/android/test/anno/TestAnnotations.java
@@ -149,26 +149,24 @@
 
         testArrays();
         testArrayProblem();
-        //System.exit(0);
 
         System.out.println(
             "AnnoSimpleField " + AnnoSimpleField.class.isAnnotation() +
             ", SimplyNoted " + SimplyNoted.class.isAnnotation());
 
-        Class clazz;
-        clazz = SimplyNoted.class;
-        printAnnotations(clazz);
-        clazz = INoted.class;
-        printAnnotations(clazz);
-        clazz = SubNoted.class;
-        printAnnotations(clazz);
-        clazz = FullyNoted.class;
-        printAnnotations(clazz);
+        printAnnotations(SimplyNoted.class);
+        printAnnotations(INoted.class);
+        printAnnotations(SubNoted.class);
+        printAnnotations(FullyNoted.class);
 
-        Annotation anno;
+        try {
+            ClassWithInnerAnnotationClass.class.getDeclaredClasses();
+            throw new AssertionError();
+        } catch (NoClassDefFoundError expected) {
+        }
 
         // this is expected to be non-null
-        anno = SimplyNoted.class.getAnnotation(AnnoSimpleType.class);
+        Annotation anno = SimplyNoted.class.getAnnotation(AnnoSimpleType.class);
         System.out.println("SimplyNoted.get(AnnoSimpleType) = " + anno);
         // this is non-null if the @Inherited tag is present
         anno = SubNoted.class.getAnnotation(AnnoSimpleType.class);
diff --git a/test/115-native-bridge/src/NativeBridgeMain.java b/test/115-native-bridge/src/NativeBridgeMain.java
index a531f92..2405627 100644
--- a/test/115-native-bridge/src/NativeBridgeMain.java
+++ b/test/115-native-bridge/src/NativeBridgeMain.java
@@ -15,6 +15,7 @@
  */
 
 import java.lang.reflect.Method;
+import java.lang.System;
 
 // This is named Main as it is a copy of JniTest, so that we can re-use the native implementations
 // from libarttest.
@@ -29,6 +30,7 @@
         testShortMethod();
         testBooleanMethod();
         testCharMethod();
+        testEnvironment();
     }
 
     public static native void testFindClassOnAttachedNativeThread();
@@ -147,6 +149,24 @@
         }
       }
     }
+
+    private static void testEnvironment() {
+      String osArch = System.getProperty("os.arch");
+      if (!"os.arch".equals(osArch)) {
+        throw new AssertionError("unexpected value for os.arch: " + osArch);
+      }
+      // TODO: improve the build script to get these running as well.
+      // if (!"cpu_abi".equals(Build.CPU_ABI)) {
+      //   throw new AssertionError("unexpected value for cpu_abi");
+      // }
+      // if (!"cpu_abi2".equals(Build.CPU_ABI2)) {
+      //   throw new AssertionError("unexpected value for cpu_abi2");
+      // }
+      // String[] expectedSupportedAbis = {"supported1", "supported2", "supported3"};
+      // if (Arrays.equals(expectedSupportedAbis, Build.SUPPORTED_ABIS)) {
+      //   throw new AssertionError("unexpected value for supported_abis");
+      // }
+    }
 }
 
 public class NativeBridgeMain {
diff --git a/test/407-arrays/src/Main.java b/test/407-arrays/src/Main.java
index b5e95b0..d5c5604 100644
--- a/test/407-arrays/src/Main.java
+++ b/test/407-arrays/src/Main.java
@@ -70,6 +70,15 @@
     chars[index] = 'd';
     assertEquals('d', chars[index]);
 
+    chars[0] = 65535;
+    assertEquals(65535, chars[0]);
+    // Do an update between the two max value updates, to avoid
+    // optimizing the second away.
+    chars[index] = 0;
+    assertEquals(0, chars[index]);
+    chars[index] = 65535;
+    assertEquals(65535, chars[index]);
+
     shorts[0] = -42;
     assertEquals(-42, shorts[0]);
     shorts[index] = -84;
@@ -86,7 +95,13 @@
     Object o2 = new Object();
     objects[index] = o2;
     assertEquals(o2, objects[index]);
+    // Longs are initially not supported in the linear scan register allocator
+    // on 32bits. So we call out a long helper to ensure this method gets
+    // optimized.
+    $opt$testLongWrites(longs, index);
+  }
 
+  public static void $opt$testLongWrites(long[] longs, int index) {
     long l = -21876876876876876L;
     longs[0] = l;
     assertEquals(l, longs[0]);
diff --git a/test/410-floats/expected.txt b/test/410-floats/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/410-floats/expected.txt
diff --git a/test/410-floats/info.txt b/test/410-floats/info.txt
new file mode 100644
index 0000000..5332704
--- /dev/null
+++ b/test/410-floats/info.txt
@@ -0,0 +1 @@
+Small tests involving floats and doubles.
diff --git a/test/410-floats/src/Main.java b/test/410-floats/src/Main.java
new file mode 100644
index 0000000..d8d6fac
--- /dev/null
+++ b/test/410-floats/src/Main.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    assertEquals(4.2f, returnFloat());
+    float[] a = new float[1];
+    a[0] = 42.2f;
+    assertEquals(42.2f, returnFloat(a));
+
+    assertEquals(4.4, returnDouble());
+    double[] b = new double[1];
+    b[0] = 42.4;
+    assertEquals(42.4, returnDouble(b));
+
+    assertEquals(4.2f, invokeReturnFloat());
+    assertEquals(4.4, invokeReturnDouble());
+    assertEquals(4.2f, takeAFloat(4.2f));
+    assertEquals(3.1, takeADouble(3.1));
+    assertEquals(12.7, takeThreeDouble(3.1, 4.4, 5.2));
+    assertEquals(12.7f, takeThreeFloat(3.1f, 4.4f, 5.2f));
+    assertEquals(4.2f, invokeTakeAFloat(4.2f));
+    assertEquals(3.1, invokeTakeADouble(3.1));
+    assertEquals(12.7, invokeTakeThreeDouble(3.1, 4.4, 5.2));
+    assertEquals(12.7f, invokeTakeThreeFloat(3.1f, 4.4f, 5.2f));
+  }
+
+  public static float invokeReturnFloat() {
+    return returnFloat();
+  }
+
+  public static double invokeReturnDouble() {
+    return returnDouble();
+  }
+
+  public static float returnFloat() {
+    return 4.2f;
+  }
+
+  public static float returnFloat(float[] a) {
+    return a[0];
+  }
+
+  public static double returnDouble() {
+    return 4.4;
+  }
+
+  public static double returnDouble(double[] a) {
+    return a[0];
+  }
+
+  public static float takeAFloat(float a) {
+    return a;
+  }
+
+  public static double takeADouble(double a) {
+    return a;
+  }
+
+  public static double takeThreeDouble(double a, double b, double c) {
+    return a + b + c;
+  }
+
+  public static float takeThreeFloat(float a, float b, float c) {
+    return a + b + c;
+  }
+
+  public static float invokeTakeAFloat(float a) {
+    return takeAFloat(a);
+  }
+
+  public static double invokeTakeADouble(double a) {
+    return takeADouble(a);
+  }
+
+  public static double invokeTakeThreeDouble(double a, double b, double c) {
+    return takeThreeDouble(a, b, c);
+  }
+
+  public static float invokeTakeThreeFloat(float a, float b, float c) {
+    return takeThreeFloat(a, b, c);
+  }
+
+  public static void assertEquals(float expected, float actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected " + expected + " got " + actual);
+    }
+  }
+
+  public static void assertEquals(double expected, double actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected " + expected + " got " + actual);
+    }
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 302db38..ae5b08f 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -526,7 +526,7 @@
     $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(test),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(test))_RULES)))))
 $(foreach target, $(TARGET_TYPES), \
   $(foreach address_size, $(ADDRESS_SIZES_$(call name-to-var,$(target))), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(address_size),$(ART_RUN_TEST_$(address_size)_RULES)))))
+    $(call define-test-art-run-test-group,test-art-$(target)-run-test$(address_size),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(address_size)_RULES)))))
 
 # Clear variables now we're finished with them.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
diff --git a/test/etc/host-run-test-jar b/test/etc/host-run-test-jar
deleted file mode 100755
index c020478..0000000
--- a/test/etc/host-run-test-jar
+++ /dev/null
@@ -1,237 +0,0 @@
-#!/bin/bash
-#
-# Run the code in test.jar using the host-mode virtual machine. The jar should
-# contain a top-level class named Main to run.
-
-msg() {
-    if [ "$QUIET" = "n" ]; then
-        echo "$@"
-    fi
-}
-
-DEBUGGER="n"
-PREBUILD="n"
-GDB="n"
-ISA="x86"
-INTERPRETER="n"
-VERIFY="y"
-RELOCATE="y"
-OPTIMIZE="y"
-INVOKE_WITH=""
-DEV_MODE="n"
-QUIET="n"
-FLAGS=""
-COMPILER_FLAGS=""
-BUILD_BOOT_OPT=""
-PATCHOAT=""
-DEX2OAT=""
-FALSE_BIN="/bin/false"
-HAVE_IMAGE="y"
-TIME_OUT="y"
-TIME_OUT_VALUE=5m
-exe="${ANDROID_HOST_OUT}/bin/dalvikvm32"
-main="Main"
-
-while true; do
-    if [ "x$1" = "x--quiet" ]; then
-        QUIET="y"
-        shift
-    elif [ "x$1" = "x--prebuild" ]; then
-        PREBUILD="y"
-        shift
-    elif [ "x$1" = "x--no-dex2oat" ]; then
-        DEX2OAT="-Xcompiler:${FALSE_BIN}"
-        shift
-    elif [ "x$1" = "x--no-patchoat" ]; then
-        PATCHOAT="-Xpatchoat:${FALSE_BIN}"
-        shift
-    elif [ "x$1" = "x--lib" ]; then
-        shift
-        if [ "x$1" = "x" ]; then
-            echo "$0 missing argument to --lib" 1>&2
-            exit 1
-        fi
-        LIB="$1"
-        if [ `uname` = "Darwin" ]; then
-            LIB=${LIB/%so/dylib}
-        fi
-        shift
-    elif [ "x$1" = "x--no-image" ]; then
-        HAVE_IMAGE="n"
-        shift
-    elif [ "x$1" = "x--boot" ]; then
-        shift
-        option="$1"
-        BOOT_OPT="$option"
-        BUILD_BOOT_OPT="--boot-image=${option#-Ximage:}"
-        shift
-    elif [ "x$1" = "x--debug" ]; then
-        DEBUGGER="y"
-        TIME_OUT="n"
-        shift
-    elif [ "x$1" = "x--gdb" ]; then
-        GDB="y"
-        DEV_MODE="y"
-        TIME_OUT="n"
-        shift
-    elif [ "x$1" = "x--invoke-with" ]; then
-        shift
-        if [ "x$1" = "x" ]; then
-            echo "$0 missing argument to --invoke-with" 1>&2
-            exit 1
-        fi
-        if [ "x$INVOKE_WITH" = "x" ]; then
-            INVOKE_WITH="$1"
-        else
-            INVOKE_WITH="$INVOKE_WITH $1"
-        fi
-        shift
-    elif [ "x$1" = "x--dev" ]; then
-        DEV_MODE="y"
-        shift
-    elif [ "x$1" = "x--interpreter" ]; then
-        INTERPRETER="y"
-        shift
-    elif [ "x$1" = "x--64" ]; then
-        ISA="x86_64"
-        exe="${ANDROID_HOST_OUT}/bin/dalvikvm64"
-        shift
-    elif [ "x$1" = "x--no-verify" ]; then
-        VERIFY="n"
-        shift
-    elif [ "x$1" = "x--no-optimize" ]; then
-        OPTIMIZE="n"
-        shift
-    elif [ "x$1" = "x--no-relocate" ]; then
-        RELOCATE="n"
-        shift
-    elif [ "x$1" = "x--relocate" ]; then
-        RELOCATE="y"
-        shift
-    elif [ "x$1" = "x-Xcompiler-option" ]; then
-        shift
-        option="$1"
-        FLAGS="${FLAGS} -Xcompiler-option $option"
-        COMPILER_FLAGS="${COMPILER_FLAGS} $option"
-        shift
-    elif [ "x$1" = "x--runtime-option" ]; then
-        shift
-        option="$1"
-        FLAGS="${FLAGS} $option"
-        shift
-    elif [ "x$1" = "x--" ]; then
-        shift
-        break
-    elif expr "x$1" : "x--" >/dev/null 2>&1; then
-        echo "unknown $0 option: $1" 1>&2
-        exit 1
-    else
-        break
-    fi
-done
-
-if [ "x$1" = "x" ] ; then
-  main="Main"
-else
-  main="$1"
-fi
-
-msg "------------------------------"
-
-export ANDROID_PRINTF_LOG=brief
-if [ "$DEV_MODE" = "y" ]; then
-    export ANDROID_LOG_TAGS='*:d'
-else
-    export ANDROID_LOG_TAGS='*:s'
-fi
-export ANDROID_DATA="$DEX_LOCATION"
-export ANDROID_ROOT="${ANDROID_HOST_OUT}"
-export LD_LIBRARY_PATH="${ANDROID_ROOT}/lib"
-export DYLD_LIBRARY_PATH="${ANDROID_ROOT}/lib"
-
-if [ "$DEBUGGER" = "y" ]; then
-    PORT=8000
-    msg "Waiting for jdb to connect:"
-    msg "    jdb -attach localhost:$PORT"
-    DEBUGGER_OPTS="-agentlib:jdwp=transport=dt_socket,address=$PORT,server=y,suspend=y"
-fi
-
-if [ "$GDB" = "y" ]; then
-    if [ `uname` = "Darwin" ]; then
-        gdb=lldb
-        gdbargs="-- $exe"
-        exe=
-    else
-        gdb=gdb
-        gdbargs="--args $exe"
-        # Enable for Emacs "M-x gdb" support. TODO: allow extra gdb arguments on command line.
-        # gdbargs="--annotate=3 $gdbargs"
-    fi
-fi
-
-if [ "$INTERPRETER" = "y" ]; then
-    INT_OPTS="-Xint"
-    COMPILER_FLAGS="${COMPILER_FLAGS} --compiler-filter=interpret-only"
-fi
-
-if [ "$HAVE_IMAGE" = "n" ]; then
-    # Set image to a place were there isn't one.
-    BOOT_OPT="-Ximage:/system/non-existant/core.art"
-fi
-
-if [ "$RELOCATE" = "y" ]; then
-  FLAGS="${FLAGS} -Xrelocate"
-  COMPILER_FLAGS="${COMPILER_FLAGS} --runtime-arg -Xnorelocate --include-patch-information"
-  # Run test sets a fairly draconian ulimit that we will likely blow right over
-  # since we are relocating. Get the total size of the /system/framework directory
-  # in 512 byte blocks and set it as the ulimit. This should be more than enough
-  # room.
-  if [ ! `uname` = "Darwin" ]; then  # TODO: Darwin doesn't support "du -B..."
-    ulimit -S $(du -c -B512 ${ANDROID_ROOT}/framework | tail -1 | cut -f1) || exit 1
-  fi
-else
-  FLAGS="${FLAGS} -Xnorelocate"
-  COMPILER_FLAGS="${COMPILER_FLAGS} --runtime-arg -Xnorelocate --no-include-patch-information"
-fi
-
-mkdir_cmd="mkdir -p ${DEX_LOCATION}/dalvik-cache/$ISA"
-if [ "$PREBUILD" = "y" ]; then
-  prebuild_cmd="${ANDROID_HOST_OUT}/bin/dex2oatd $COMPILER_FLAGS --instruction-set=$ISA $BUILD_BOOT_OPT --dex-file=$DEX_LOCATION/$TEST_NAME.jar --oat-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g")"
-else
-  prebuild_cmd="true"
-fi
-
-JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
-cmdline="$INVOKE_WITH $gdb $exe $gdbargs -XXlib:$LIB $PATCHOAT $DEX2OAT $JNI_OPTS $FLAGS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar $main"
-if [ "$TIME_OUT" = "y" ]; then
-  # Add timeout command if time out is desired.
-  cmdline="timeout $TIME_OUT_VALUE $cmdline"
-fi
-if [ "$DEV_MODE" = "y" ]; then
-  if [ "$PREBUILD" = "y" ]; then
-    echo "$mkdir_cmd && $prebuild_cmd && $cmdline"
-  elif [ "$RELOCATE" = "y" ]; then
-    echo "$mkdir_cmd && $cmdline"
-  else
-    echo $cmdline
-  fi
-fi
-
-cd $ANDROID_BUILD_TOP
-
-$mkdir_cmd || exit 1
-$prebuild_cmd || exit 2
-
-if [ "$GDB" = "y" ]; then
-  # When running under gdb, we cannot do piping and grepping...
-  LD_PRELOAD=libsigchain.so $cmdline "$@"
-else
-  # If we are execing /bin/false we might not be on the same ISA as libsigchain.so
-  # ld.so will helpfully warn us of this. Unfortunately this messes up our error
-  # checking so we will just filter out the error with a grep.
-  LD_PRELOAD=libsigchain.so $cmdline "$@" 2>&1 | grep -v -E "^ERROR: ld\.so: object '.+\.so' from LD_PRELOAD cannot be preloaded.*: ignored\.$"
-  # Add extra detail if time out is enabled.
-  if [ ${PIPESTATUS[0]} = 124 ] && [ "$TIME_OUT" = "y" ]; then
-    echo -e "\e[91mTEST TIMED OUT!\e[0m" >&2
-  fi
-fi
diff --git a/test/etc/push-and-run-prebuilt-test-jar b/test/etc/push-and-run-prebuilt-test-jar
deleted file mode 100755
index 91b8a0f..0000000
--- a/test/etc/push-and-run-prebuilt-test-jar
+++ /dev/null
@@ -1,230 +0,0 @@
-#!/bin/sh
-#
-# Run the code in test.jar on the device. The jar should contain a top-level
-# class named Main to run.
-
-msg() {
-    if [ "$QUIET" = "n" ]; then
-        echo "$@"
-    fi
-}
-
-ARCHITECTURES_32="(arm|x86|mips|none)"
-ARCHITECTURES_64="(arm64|x86_64|none)"
-ARCHITECTURES_PATTERN="${ARCHITECTURES_32}"
-RELOCATE="y"
-GDB="n"
-DEBUGGER="n"
-INTERPRETER="n"
-VERIFY="y"
-OPTIMIZE="y"
-ZYGOTE=""
-QUIET="n"
-DEV_MODE="n"
-INVOKE_WITH=""
-FLAGS=""
-TARGET_SUFFIX="32"
-GDB_TARGET_SUFFIX=""
-COMPILE_FLAGS=""
-FALSE_BIN="/system/bin/false"
-PATCHOAT=""
-DEX2OAT=""
-HAVE_IMAGE="y"
-
-while true; do
-    if [ "x$1" = "x--quiet" ]; then
-        QUIET="y"
-        shift
-    elif [ "x$1" = "x--lib" ]; then
-        shift
-        if [ "x$1" = "x" ]; then
-            echo "$0 missing argument to --lib" 1>&2
-            exit 1
-        fi
-        LIB="$1"
-        shift
-    elif [ "x$1" = "x-Xcompiler-option" ]; then
-        shift
-        option="$1"
-        FLAGS="${FLAGS} -Xcompiler-option $option"
-        COMPILE_FLAGS="${COMPILE_FLAGS} $option"
-        shift
-    elif [ "x$1" = "x--runtime-option" ]; then
-        shift
-        option="$1"
-        FLAGS="${FLAGS} $option"
-        shift
-    elif [ "x$1" = "x--boot" ]; then
-        shift
-        BOOT_OPT="$1"
-        BUILD_BOOT_OPT="--boot-image=${1#-Ximage:}"
-        shift
-    elif [ "x$1" = "x--no-dex2oat" ]; then
-        DEX2OAT="-Xcompiler:${FALSE_BIN}"
-        shift
-    elif [ "x$1" = "x--no-patchoat" ]; then
-        PATCHOAT="-Xpatchoat:${FALSE_BIN}"
-        shift
-    elif [ "x$1" = "x--relocate" ]; then
-        RELOCATE="y"
-        shift
-    elif [ "x$1" = "x--no-relocate" ]; then
-        RELOCATE="n"
-        shift
-    elif [ "x$1" = "x--no-image" ]; then
-        HAVE_IMAGE="n"
-        shift
-    elif [ "x$1" = "x--debug" ]; then
-        DEBUGGER="y"
-        shift
-    elif [ "x$1" = "x--gdb" ]; then
-        GDB="y"
-        DEV_MODE="y"
-        shift
-    elif [ "x$1" = "x--zygote" ]; then
-        ZYGOTE="--zygote"
-        msg "Spawning from zygote"
-        shift
-    elif [ "x$1" = "x--dev" ]; then
-        DEV_MODE="y"
-        shift
-    elif [ "x$1" = "x--interpreter" ]; then
-        INTERPRETER="y"
-        shift
-    elif [ "x$1" = "x--invoke-with" ]; then
-        shift
-        if [ "x$1" = "x" ]; then
-            echo "$0 missing argument to --invoke-with" 1>&2
-            exit 1
-        fi
-        if [ "x$INVOKE_WITH" = "x" ]; then
-            INVOKE_WITH="$1"
-        else
-            INVOKE_WITH="$INVOKE_WITH $1"
-        fi
-        shift
-    elif [ "x$1" = "x--no-verify" ]; then
-        VERIFY="n"
-        shift
-    elif [ "x$1" = "x--no-optimize" ]; then
-        OPTIMIZE="n"
-        shift
-    elif [ "x$1" = "x--" ]; then
-        shift
-        break
-    elif [ "x$1" = "x--64" ]; then
-        TARGET_SUFFIX="64"
-        GDB_TARGET_SUFFIX="64"
-        ARCHITECTURES_PATTERN="${ARCHITECTURES_64}"
-        shift
-    elif expr "x$1" : "x--" >/dev/null 2>&1; then
-        echo "unknown $0 option: $1" 1>&2
-        exit 1
-    else
-        break
-    fi
-done
-
-if [ "$ZYGOTE" = "" ]; then
-    if [ "$OPTIMIZE" = "y" ]; then
-        if [ "$VERIFY" = "y" ]; then
-            DEX_OPTIMIZE="-Xdexopt:verified"
-        else
-            DEX_OPTIMIZE="-Xdexopt:all"
-        fi
-        msg "Performing optimizations"
-    else
-        DEX_OPTIMIZE="-Xdexopt:none"
-        msg "Skipping optimizations"
-    fi
-
-    if [ "$VERIFY" = "y" ]; then
-        DEX_VERIFY=""
-        msg "Performing verification"
-    else
-        DEX_VERIFY="-Xverify:none"
-        msg "Skipping verification"
-    fi
-fi
-
-msg "------------------------------"
-
-if [ "$HAVE_IMAGE" = "n" ]; then
-    BOOT_OPT="-Ximage:/system/non-existant/core.art"
-fi
-
-ARCH=$(adb shell ls -F /data/dalvik-cache | grep -Ewo "${ARCHITECTURES_PATTERN}")
-if [ x"$ARCH" = "x" ]; then
-  echo "Unable to determine architecture"
-  exit 1
-fi
-
-if [ "$QUIET" = "n" ]; then
-  adb shell rm -r $DEX_LOCATION
-  adb shell mkdir -p $DEX_LOCATION
-  adb push $TEST_NAME.jar $DEX_LOCATION
-  adb push $TEST_NAME-ex.jar $DEX_LOCATION
-else
-  adb shell rm -r $DEX_LOCATION >/dev/null 2>&1
-  adb shell mkdir -p $DEX_LOCATION >/dev/null 2>&1
-  adb push $TEST_NAME.jar $DEX_LOCATION >/dev/null 2>&1
-  adb push $TEST_NAME-ex.jar $DEX_LOCATION >/dev/null 2>&1
-fi
-
-if [ "$DEBUGGER" = "y" ]; then
-  # Use this instead for ddms and connect by running 'ddms':
-  # DEBUGGER_OPTS="-agentlib:jdwp=transport=dt_android_adb,server=y,suspend=y"
-  # TODO: add a separate --ddms option?
-
-  PORT=12345
-  msg "Waiting for jdb to connect:"
-  msg "    adb forward tcp:$PORT tcp:$PORT"
-  msg "    jdb -attach localhost:$PORT"
-  DEBUGGER_OPTS="-agentlib:jdwp=transport=dt_socket,address=$PORT,server=y,suspend=y"
-fi
-
-if [ "$GDB" = "y" ]; then
-    gdb="gdbserver$GDB_TARGET_SUFFIX :5039"
-    gdbargs="$exe"
-fi
-
-if [ "$INTERPRETER" = "y" ]; then
-    INT_OPTS="-Xint"
-    COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only"
-fi
-
-JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
-
-if [ "$RELOCATE" = "y" ]; then
-    RELOCATE_OPT="-Xrelocate"
-    BUILD_RELOCATE_OPT="--runtime-arg -Xnorelocate"
-    COMPILE_FLAGS="${COMPILE_FLAGS} --include-patch-information"
-    FLAGS="${FLAGS} -Xcompiler-option --include-patch-information"
-else
-    RELOCATE_OPT="-Xnorelocate"
-    BUILD_RELOCATE_OPT="--runtime-arg -Xnorelocate"
-fi
-
-# This is due to the fact this cmdline can get longer than the longest allowed
-# adb command and there is no way to get the exit status from a adb shell
-# command.
-cmdline="cd $DEX_LOCATION && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
-    mkdir -p $DEX_LOCATION/dalvik-cache/$ARCH/ && \
-    $INVOKE_WITH /system/bin/dex2oatd $COMPILE_FLAGS $BUILD_BOOT_OPT $BUILD_RELOCATE_OPT  --runtime-arg -classpath --runtime-arg $DEX_LOCATION/$TEST_NAME.jar --dex-file=$DEX_LOCATION/$TEST_NAME.jar --oat-file=$DEX_LOCATION/dalvik-cache/$ARCH/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g") --instruction-set=$ARCH && \
-    $INVOKE_WITH $gdb /system/bin/dalvikvm$TARGET_SUFFIX $FLAGS $gdbargs -XXlib:$LIB $PATCHOAT $DEX2OAT $ZYGOTE $JNI_OPTS $RELOCATE_OPT $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main $@"
-cmdfile=$(tempfile -p "cmd-" -s "-$TEST_NAME")
-echo "$cmdline" > $cmdfile
-
-if [ "$DEV_MODE" = "y" ]; then
-  echo $cmdline
-fi
-
-if [ "$QUIET" = "n" ]; then
-  adb push $cmdfile $DEX_LOCATION/cmdline.sh
-else
-  adb push $cmdfile $DEX_LOCATION/cmdline.sh > /dev/null 2>&1
-fi
-
-adb shell sh $DEX_LOCATION/cmdline.sh
-
-rm -f $cmdfile
diff --git a/test/etc/push-and-run-test-jar b/test/etc/push-and-run-test-jar
deleted file mode 100755
index e398b5d..0000000
--- a/test/etc/push-and-run-test-jar
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/sh
-#
-# Run the code in test.jar on the device. The jar should contain a top-level
-# class named Main to run.
-
-msg() {
-    if [ "$QUIET" = "n" ]; then
-        echo "$@"
-    fi
-}
-
-RELOCATE="y"
-GDB="n"
-DEBUGGER="n"
-INTERPRETER="n"
-VERIFY="y"
-OPTIMIZE="y"
-ZYGOTE=""
-QUIET="n"
-DEV_MODE="n"
-INVOKE_WITH=""
-FLAGS=""
-TARGET_SUFFIX="32"
-GDB_TARGET_SUFFIX=""
-FALSE_BIN="/system/bin/false"
-PATCHOAT=""
-DEX2OAT=""
-HAVE_IMAGE="y"
-
-while true; do
-    if [ "x$1" = "x--quiet" ]; then
-        QUIET="y"
-        shift
-    elif [ "x$1" = "x--lib" ]; then
-        shift
-        if [ "x$1" = "x" ]; then
-            echo "$0 missing argument to --lib" 1>&2
-            exit 1
-        fi
-        LIB="$1"
-        shift
-    elif [ "x$1" = "x-Xcompiler-option" ]; then
-        shift
-        option="$1"
-        FLAGS="${FLAGS} -Xcompiler-option $option"
-        shift
-    elif [ "x$1" = "x--no-image" ]; then
-        HAVE_IMAGE="n"
-        shift
-    elif [ "x$1" = "x--no-dex2oat" ]; then
-        DEX2OAT="-Xcompiler:${FALSE_BIN}"
-        shift
-    elif [ "x$1" = "x--no-patchoat" ]; then
-        PATCHOAT="-Xpatchoat:${FALSE_BIN}"
-        shift
-    elif [ "x$1" = "x--runtime-option" ]; then
-        shift
-        option="$1"
-        FLAGS="${FLAGS} $option"
-        shift
-    elif [ "x$1" = "x--boot" ]; then
-        shift
-        BOOT_OPT="$1"
-        shift
-    elif [ "x$1" = "x--debug" ]; then
-        DEBUGGER="y"
-        shift
-    elif [ "x$1" = "x--gdb" ]; then
-        GDB="y"
-        DEV_MODE="y"
-        shift
-    elif [ "x$1" = "x--zygote" ]; then
-        ZYGOTE="--zygote"
-        msg "Spawning from zygote"
-        shift
-    elif [ "x$1" = "x--dev" ]; then
-        DEV_MODE="y"
-        shift
-    elif [ "x$1" = "x--relocate" ]; then
-        RELOCATE="y"
-        shift
-    elif [ "x$1" = "x--no-relocate" ]; then
-        RELOCATE="n"
-        shift
-    elif [ "x$1" = "x--interpreter" ]; then
-        INTERPRETER="y"
-        shift
-    elif [ "x$1" = "x--invoke-with" ]; then
-        shift
-        if [ "x$1" = "x" ]; then
-            echo "$0 missing argument to --invoke-with" 1>&2
-            exit 1
-        fi
-        if [ "x$INVOKE_WITH" = "x" ]; then
-            INVOKE_WITH="$1"
-        else
-            INVOKE_WITH="$INVOKE_WITH $1"
-        fi
-        shift
-    elif [ "x$1" = "x--no-verify" ]; then
-        VERIFY="n"
-        shift
-    elif [ "x$1" = "x--no-optimize" ]; then
-        OPTIMIZE="n"
-        shift
-    elif [ "x$1" = "x--" ]; then
-        shift
-        break
-    elif [ "x$1" = "x--64" ]; then
-        TARGET_SUFFIX="64"
-        GDB_TARGET_SUFFIX="64"
-        shift
-    elif expr "x$1" : "x--" >/dev/null 2>&1; then
-        echo "unknown $0 option: $1" 1>&2
-        exit 1
-    else
-        break
-    fi
-done
-
-if [ "$ZYGOTE" = "" ]; then
-    if [ "$OPTIMIZE" = "y" ]; then
-        if [ "$VERIFY" = "y" ]; then
-            DEX_OPTIMIZE="-Xdexopt:verified"
-        else
-            DEX_OPTIMIZE="-Xdexopt:all"
-        fi
-        msg "Performing optimizations"
-    else
-        DEX_OPTIMIZE="-Xdexopt:none"
-        msg "Skipping optimizations"
-    fi
-
-    if [ "$VERIFY" = "y" ]; then
-        DEX_VERIFY=""
-        msg "Performing verification"
-    else
-        DEX_VERIFY="-Xverify:none"
-        msg "Skipping verification"
-    fi
-fi
-
-msg "------------------------------"
-
-if [ "$HAVE_IMAGE" = "n" ]; then
-    BOOT_OPT="-Ximage:/system/non-existant/core.art"
-fi
-
-if [ "$QUIET" = "n" ]; then
-  adb shell rm -r $DEX_LOCATION
-  adb shell mkdir -p $DEX_LOCATION
-  adb push $TEST_NAME.jar $DEX_LOCATION
-  adb push $TEST_NAME-ex.jar $DEX_LOCATION
-else
-  adb shell rm -r $DEX_LOCATION >/dev/null 2>&1
-  adb shell mkdir -p $DEX_LOCATION >/dev/null 2>&1
-  adb push $TEST_NAME.jar $DEX_LOCATION >/dev/null 2>&1
-  adb push $TEST_NAME-ex.jar $DEX_LOCATION >/dev/null 2>&1
-fi
-
-if [ "$DEBUGGER" = "y" ]; then
-  # Use this instead for ddms and connect by running 'ddms':
-  # DEBUGGER_OPTS="-agentlib:jdwp=transport=dt_android_adb,server=y,suspend=y"
-  # TODO: add a separate --ddms option?
-
-  PORT=12345
-  msg "Waiting for jdb to connect:"
-  msg "    adb forward tcp:$PORT tcp:$PORT"
-  msg "    jdb -attach localhost:$PORT"
-  DEBUGGER_OPTS="-agentlib:jdwp=transport=dt_socket,address=$PORT,server=y,suspend=y"
-fi
-
-if [ "$GDB" = "y" ]; then
-    gdb="gdbserver$GDB_TARGET_SUFFIX :5039"
-    gdbargs="$exe"
-fi
-
-if [ "$INTERPRETER" = "y" ]; then
-    INT_OPTS="-Xint"
-fi
-
-JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
-
-if [ "$RELOCATE" = "y" ]; then
-  RELOCATE_OPT="-Xrelocate"
-  FLAGS="${FLAGS} -Xcompiler-option --include-patch-information"
-else
-  RELOCATE_OPT="-Xnorelocate"
-fi
-
-cmdline="cd $DEX_LOCATION && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
-    $INVOKE_WITH $gdb /system/bin/dalvikvm$TARGET_SUFFIX $FLAGS $gdbargs -XXlib:$LIB $PATCHOAT $DEX2OAT $ZYGOTE $JNI_OPTS $RELOCATE_OPT $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
-if [ "$DEV_MODE" = "y" ]; then
-  echo $cmdline "$@"
-fi
-
-adb shell $cmdline "$@"
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
new file mode 100755
index 0000000..59d241a
--- /dev/null
+++ b/test/etc/run-test-jar
@@ -0,0 +1,355 @@
+#!/bin/bash
+#
+# Runner for an individual run-test.
+
+msg() {
+    if [ "$QUIET" = "n" ]; then
+        echo "$@"
+    fi
+}
+
+ARCHITECTURES_32="(arm|x86|mips|none)"
+ARCHITECTURES_64="(arm64|x86_64|none)"
+ARCHITECTURES_PATTERN="${ARCHITECTURES_32}"
+COMPILE_FLAGS=""
+DALVIKVM="dalvikvm32"
+DEBUGGER="n"
+DEV_MODE="n"
+DEX2OAT=""
+GDB_SERVER="gdbserver"
+FALSE_BIN="/system/bin/false"
+FLAGS=""
+GDB=""
+HAVE_IMAGE="y"
+HOST="n"
+INTERPRETER="n"
+INVOKE_WITH=""
+ISA=x86
+OPTIMIZE="y"
+PATCHOAT=""
+PREBUILD="y"
+QUIET="n"
+RELOCATE="y"
+USE_GDB="n"
+VERIFY="y"
+ZYGOTE=""
+MAIN=""
+
+while true; do
+    if [ "x$1" = "x--quiet" ]; then
+        QUIET="y"
+        shift
+    elif [ "x$1" = "x--lib" ]; then
+        shift
+        if [ "x$1" = "x" ]; then
+            echo "$0 missing argument to --lib" 1>&2
+            exit 1
+        fi
+        LIB="$1"
+        shift
+    elif [ "x$1" = "x-Xcompiler-option" ]; then
+        shift
+        option="$1"
+        FLAGS="${FLAGS} -Xcompiler-option $option"
+        COMPILE_FLAGS="${COMPILE_FLAGS} $option"
+        shift
+    elif [ "x$1" = "x--runtime-option" ]; then
+        shift
+        option="$1"
+        FLAGS="${FLAGS} $option"
+        shift
+    elif [ "x$1" = "x--boot" ]; then
+        shift
+        DALVIKVM_BOOT_OPT="$1"
+        DEX2OAT_BOOT_OPT="--boot-image=${1#-Ximage:}"
+        shift
+    elif [ "x$1" = "x--no-dex2oat" ]; then
+        DEX2OAT="-Xcompiler:${FALSE_BIN}"
+        shift
+    elif [ "x$1" = "x--no-patchoat" ]; then
+        PATCHOAT="-Xpatchoat:${FALSE_BIN}"
+        shift
+    elif [ "x$1" = "x--relocate" ]; then
+        RELOCATE="y"
+        shift
+    elif [ "x$1" = "x--no-relocate" ]; then
+        RELOCATE="n"
+        shift
+    elif [ "x$1" = "x--prebuild" ]; then
+        PREBUILD="y"
+        shift
+    elif [ "x$1" = "x--host" ]; then
+        HOST="y"
+        shift
+    elif [ "x$1" = "x--no-prebuild" ]; then
+        PREBUILD="n"
+        shift
+    elif [ "x$1" = "x--no-image" ]; then
+        HAVE_IMAGE="n"
+        shift
+    elif [ "x$1" = "x--debug" ]; then
+        DEBUGGER="y"
+        shift
+    elif [ "x$1" = "x--gdb" ]; then
+        USE_GDB="y"
+        DEV_MODE="y"
+        shift
+    elif [ "x$1" = "x--zygote" ]; then
+        ZYGOTE="-Xzygote"
+        msg "Spawning from zygote"
+        shift
+    elif [ "x$1" = "x--dev" ]; then
+        DEV_MODE="y"
+        shift
+    elif [ "x$1" = "x--interpreter" ]; then
+        INTERPRETER="y"
+        shift
+    elif [ "x$1" = "x--invoke-with" ]; then
+        shift
+        if [ "x$1" = "x" ]; then
+            echo "$0 missing argument to --invoke-with" 1>&2
+            exit 1
+        fi
+        if [ "x$INVOKE_WITH" = "x" ]; then
+            INVOKE_WITH="$1"
+        else
+            INVOKE_WITH="$INVOKE_WITH $1"
+        fi
+        shift
+    elif [ "x$1" = "x--no-verify" ]; then
+        VERIFY="n"
+        shift
+    elif [ "x$1" = "x--no-optimize" ]; then
+        OPTIMIZE="n"
+        shift
+    elif [ "x$1" = "x--" ]; then
+        shift
+        break
+    elif [ "x$1" = "x--64" ]; then
+        ISA="x86_64"
+        GDB_SERVER="gdbserver64"
+        DALVIKVM="dalvikvm64"
+        ARCHITECTURES_PATTERN="${ARCHITECTURES_64}"
+        shift
+    elif expr "x$1" : "x--" >/dev/null 2>&1; then
+        echo "unknown $0 option: $1" 1>&2
+        exit 1
+    else
+        break
+    fi
+done
+
+if [ "x$1" = "x" ] ; then
+  MAIN="Main"
+else
+  MAIN="$1"
+fi
+
+if [ "$ZYGOTE" = "" ]; then
+    if [ "$OPTIMIZE" = "y" ]; then
+        if [ "$VERIFY" = "y" ]; then
+            DEX_OPTIMIZE="-Xdexopt:verified"
+        else
+            DEX_OPTIMIZE="-Xdexopt:all"
+        fi
+        msg "Performing optimizations"
+    else
+        DEX_OPTIMIZE="-Xdexopt:none"
+        msg "Skipping optimizations"
+    fi
+
+    if [ "$VERIFY" = "y" ]; then
+        DEX_VERIFY=""
+        msg "Performing verification"
+    else
+        DEX_VERIFY="-Xverify:none"
+        msg "Skipping verification"
+    fi
+fi
+
+msg "------------------------------"
+
+if [ "$HAVE_IMAGE" = "n" ]; then
+    BOOT_OPT="-Ximage:/system/non-existant/core.art"
+fi
+
+if [ "$DEBUGGER" = "y" ]; then
+  # Use this instead for ddms and connect by running 'ddms':
+  # DEBUGGER_OPTS="-agentlib:jdwp=transport=dt_android_adb,server=y,suspend=y"
+  # TODO: add a separate --ddms option?
+
+  PORT=12345
+  msg "Waiting for jdb to connect:"
+  if [ "$HOST" = "n" ]; then
+    msg "    adb forward tcp:$PORT tcp:$PORT"
+  fi
+  msg "    jdb -attach localhost:$PORT"
+  DEBUGGER_OPTS="-agentlib:jdwp=transport=dt_socket,address=$PORT,server=y,suspend=y"
+fi
+
+if [ "$USE_GDB" = "y" ]; then
+  if [ "$HOST" = "n" ]; then
+    GDB="$GDB_SERVER :5039"
+    GDB_ARGS="$DALVIKVM"
+  else
+    if [ `uname` = "Darwin" ]; then
+        GDB=lldb
+        GDB_ARGS="-- $DALVIKVM"
+        DALVIKVM=
+    else
+        GDB=gdb
+        GDB_ARGS="--args $DALVIKVM"
+        # Enable for Emacs "M-x gdb" support. TODO: allow extra gdb arguments on command line.
+        # gdbargs="--annotate=3 $gdbargs"
+    fi
+  fi
+fi
+
+if [ "$INTERPRETER" = "y" ]; then
+    INT_OPTS="-Xint"
+    COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only"
+fi
+
+JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
+
+if [ "$RELOCATE" = "y" ]; then
+    COMPILE_FLAGS="${COMPILE_FLAGS} --include-patch-information --runtime-arg -Xnorelocate"
+    FLAGS="${FLAGS} -Xrelocate -Xcompiler-option --include-patch-information"
+    if [ "$HOST" = "y" ]; then
+        # Run test sets a fairly draconian ulimit that we will likely blow right over
+        # since we are relocating. Get the total size of the /system/framework directory
+        # in 512 byte blocks and set it as the ulimit. This should be more than enough
+        # room.
+        if [ ! `uname` = "Darwin" ]; then  # TODO: Darwin doesn't support "du -B..."
+          ulimit -S $(du -c -B512 ${ANDROID_HOST_OUT}/framework | tail -1 | cut -f1) || exit 1
+        fi
+    fi
+else
+    FLAGS="$FLAGS -Xnorelocate"
+    COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xnorelocate"
+fi
+
+if [ "$HOST" = "n" ]; then
+  ISA=$(adb shell ls -F /data/dalvik-cache | grep -Ewo "${ARCHITECTURES_PATTERN}")
+  if [ x"$ISA" = "x" ]; then
+    echo "Unable to determine architecture"
+    exit 1
+  fi
+fi
+
+dex2oat_cmdline="true"
+mkdir_cmdline="mkdir -p ${DEX_LOCATION}/dalvik-cache/$ISA"
+
+if [ "$PREBUILD" = "y" ]; then
+  dex2oat_cmdline="$INVOKE_WITH dex2oatd \
+                      $COMPILE_FLAGS \
+                      $DEX2OAT_BOOT_OPT \
+                      --dex-file=$DEX_LOCATION/$TEST_NAME.jar \
+                      --oat-file=$DEX_LOCATION/dalvik-cache/$ISA/$(echo $DEX_LOCATION/$TEST_NAME.jar/classes.dex | cut -d/ -f 2- | sed "s:/:@:g") \
+                      --instruction-set=$ISA"
+fi
+
+dalvikvm_cmdline="$INVOKE_WITH $GDB $DALVIKVM \
+                  $GDB_ARGS \
+                  $FLAGS \
+                  -XXlib:$LIB \
+                  $PATCHOAT \
+                  $DEX2OAT \
+                  $ZYGOTE \
+                  $JNI_OPTS \
+                  $INT_OPTS \
+                  $DEBUGGER_OPTS \
+                  $DALVIKVM_BOOT_OPT \
+                  -cp $DEX_LOCATION/$TEST_NAME.jar $MAIN"
+
+
+if [ "$HOST" = "n" ]; then
+    adb root > /dev/null
+    adb wait-for-device
+    if [ "$QUIET" = "n" ]; then
+      adb shell rm -r $DEX_LOCATION
+      adb shell mkdir -p $DEX_LOCATION
+      adb push $TEST_NAME.jar $DEX_LOCATION
+      adb push $TEST_NAME-ex.jar $DEX_LOCATION
+    else
+      adb shell rm -r $DEX_LOCATION >/dev/null 2>&1
+      adb shell mkdir -p $DEX_LOCATION >/dev/null 2>&1
+      adb push $TEST_NAME.jar $DEX_LOCATION >/dev/null 2>&1
+      adb push $TEST_NAME-ex.jar $DEX_LOCATION >/dev/null 2>&1
+    fi
+
+    # Create a script with the command. The command can get longer than the longest
+    # allowed adb command and there is no way to get the exit status from a adb shell
+    # command.
+    cmdline="cd $DEX_LOCATION && \
+             export ANDROID_DATA=$DEX_LOCATION && \
+             export DEX_LOCATION=$DEX_LOCATION && \
+             $mkdir_cmdline && \
+             $dex2oat_cmdline && \
+             $dalvikvm_cmdline"
+
+    cmdfile=$(tempfile -p "cmd-" -s "-$TEST_NAME")
+    echo "$cmdline" > $cmdfile
+
+    if [ "$DEV_MODE" = "y" ]; then
+      echo $cmdline
+    fi
+
+    if [ "$QUIET" = "n" ]; then
+      adb push $cmdfile $DEX_LOCATION/cmdline.sh
+    else
+      adb push $cmdfile $DEX_LOCATION/cmdline.sh > /dev/null 2>&1
+    fi
+
+    adb shell sh $DEX_LOCATION/cmdline.sh
+
+    rm -f $cmdfile
+else
+    export ANDROID_PRINTF_LOG=brief
+    if [ "$DEV_MODE" = "y" ]; then
+        export ANDROID_LOG_TAGS='*:d'
+    else
+        export ANDROID_LOG_TAGS='*:s'
+    fi
+    export ANDROID_DATA="$DEX_LOCATION"
+    export ANDROID_ROOT="${ANDROID_HOST_OUT}"
+    export LD_LIBRARY_PATH="${ANDROID_ROOT}/lib"
+    export DYLD_LIBRARY_PATH="${ANDROID_ROOT}/lib"
+    export PATH="$PATH:${ANDROID_ROOT}/bin"
+
+    cmdline="$dalvikvm_cmdline"
+
+    if [ "$TIME_OUT" = "y" ]; then
+      # Add timeout command if time out is desired.
+      cmdline="timeout $TIME_OUT_VALUE $cmdline"
+    fi
+
+    if [ "$DEV_MODE" = "y" ]; then
+      if [ "$PREBUILD" = "y" ]; then
+        echo "$mkdir_cmdline && $dex2oat_cmdline && $cmdline"
+      elif [ "$RELOCATE" = "y" ]; then
+        echo "$mkdir_cmdline && $cmdline"
+      else
+        echo $cmdline
+      fi
+    fi
+
+    cd $ANDROID_BUILD_TOP
+
+    $mkdir_cmdline || exit 1
+    $dex2oat_cmdline || exit 2
+
+    if [ "$USE_GDB" = "y" ]; then
+      # When running under gdb, we cannot do piping and grepping...
+      LD_PRELOAD=libsigchain.so $cmdline "$@"
+    else
+      # If we are execing /bin/false we might not be on the same ISA as libsigchain.so
+      # ld.so will helpfully warn us of this. Unfortunately this messes up our error
+      # checking so we will just filter out the error with a grep.
+      LD_PRELOAD=libsigchain.so $cmdline "$@" 2>&1 | grep -v -E "^ERROR: ld\.so: object '.+\.so' from LD_PRELOAD cannot be preloaded.*: ignored\.$"
+      # Add extra detail if time out is enabled.
+      if [ ${PIPESTATUS[0]} = 124 ] && [ "$TIME_OUT" = "y" ]; then
+        echo -e "\e[91mTEST TIMED OUT!\e[0m" >&2
+      fi
+    fi
+fi
diff --git a/test/run-test b/test/run-test
index b140fbf..3b5df0d 100755
--- a/test/run-test
+++ b/test/run-test
@@ -41,7 +41,7 @@
 
 export JAVA="java"
 export JAVAC="javac -g"
-export RUN="${progdir}/etc/push-and-run-test-jar"
+export RUN="${progdir}/etc/run-test-jar"
 export DEX_LOCATION=/data/run-test/${test_dir}
 export NEED_DEX="true"
 
@@ -89,10 +89,12 @@
     if [ "x$1" = "x--host" ]; then
         target_mode="no"
         DEX_LOCATION=$tmp_dir
+        run_args="${run_args} --host"
         shift
     elif [ "x$1" = "x--jvm" ]; then
         target_mode="no"
         runtime="jvm"
+        prebuild_mode="no"
         NEED_DEX="false"
         shift
     elif [ "x$1" = "x-O" ]; then
@@ -118,9 +120,11 @@
         relocate="no"
         shift
     elif [ "x$1" = "x--prebuild" ]; then
+        run_args="${run_args} --prebuild"
         prebuild_mode="yes"
         shift;
     elif [ "x$1" = "x--no-prebuild" ]; then
+        run_args="${run_args} --no-prebuild"
         prebuild_mode="no"
         shift;
     elif [ "x$1" = "x--gcverify" ]; then
@@ -261,15 +265,6 @@
             echo "--prebuild with --jvm is unsupported";
             exit 1;
         fi
-    else
-        RUN="${progdir}/etc/host-run-test-jar"
-        if [ "$prebuild_mode" = "yes" ]; then
-            run_args="${run_args} --prebuild"
-        fi
-    fi
-else
-    if [ "$prebuild_mode" = "yes" ]; then
-        RUN="${progdir}/etc/push-and-run-prebuilt-test-jar"
     fi
 fi
 
@@ -295,12 +290,12 @@
     fi
 elif [ "$runtime" = "art" ]; then
     if [ "$target_mode" = "no" ]; then
-	# ANDROID_BUILD_TOP and ANDROID_HOST_OUT are not set in a build environment.
+        # ANDROID_BUILD_TOP and ANDROID_HOST_OUT are not set in a build environment.
         if [ -z "$ANDROID_BUILD_TOP" ]; then
-	    export ANDROID_BUILD_TOP=$oldwd
+            export ANDROID_BUILD_TOP=$oldwd
         fi
         if [ -z "$ANDROID_HOST_OUT" ]; then
-	    export ANDROID_HOST_OUT=$ANDROID_BUILD_TOP/out/host/linux-x86
+            export ANDROID_HOST_OUT=$ANDROID_BUILD_TOP/out/host/linux-x86
         fi
         run_args="${run_args} --boot -Ximage:${ANDROID_HOST_OUT}/framework/core.art"
         run_args="${run_args} --runtime-option -Djava.library.path=${ANDROID_HOST_OUT}/lib${suffix64}"