Merge "Revert "Revert "Revert "Unfortunately, the test still hits too many run failures.""""
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index a221cfc..7b38e5e 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -243,14 +243,16 @@
 
 ifeq ($(HOST_OS),linux)
   # Larger frame-size for host clang builds today
-  art_host_non_debug_cflags += -Wframe-larger-than=2600
+  ifndef SANITIZE_HOST
+    art_host_non_debug_cflags += -Wframe-larger-than=2700
+  endif
   art_target_non_debug_cflags += -Wframe-larger-than=1728
 endif
 
 ifndef LIBART_IMG_HOST_BASE_ADDRESS
   $(error LIBART_IMG_HOST_BASE_ADDRESS unset)
 endif
-ART_HOST_CFLAGS += $(art_cflags) -DANDROID_SMP=1 -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
+ART_HOST_CFLAGS += $(art_cflags) -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
 ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default
 
 ifndef LIBART_IMG_TARGET_BASE_ADDRESS
@@ -281,18 +283,6 @@
 ART_TARGET_CFLAGS += -DART_BASE_ADDRESS_MIN_DELTA=$(LIBART_IMG_TARGET_MIN_BASE_ADDRESS_DELTA)
 ART_TARGET_CFLAGS += -DART_BASE_ADDRESS_MAX_DELTA=$(LIBART_IMG_TARGET_MAX_BASE_ADDRESS_DELTA)
 
-ifeq ($(TARGET_CPU_SMP),true)
-  ART_TARGET_CFLAGS += -DANDROID_SMP=1
-else
-  ifeq ($(TARGET_CPU_SMP),false)
-    ART_TARGET_CFLAGS += -DANDROID_SMP=0
-  else
-    $(warning TARGET_CPU_SMP should be (true|false), found $(TARGET_CPU_SMP))
-    # Make sure we emit barriers for the worst case.
-    ART_TARGET_CFLAGS += -DANDROID_SMP=1
-  endif
-endif
-
 # To use oprofile_android --callgraph, uncomment this and recompile with "mmm art -B -j16"
 # ART_TARGET_CFLAGS += -fno-omit-frame-pointer -marm -mapcs
 
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 2951350..7e28b37 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -70,8 +70,15 @@
 
 RUNTIME_GTEST_COMMON_SRC_FILES := \
   runtime/arch/arch_test.cc \
+  runtime/arch/instruction_set_test.cc \
+  runtime/arch/instruction_set_features_test.cc \
   runtime/arch/memcmp16_test.cc \
   runtime/arch/stub_test.cc \
+  runtime/arch/arm/instruction_set_features_arm_test.cc \
+  runtime/arch/arm64/instruction_set_features_arm64_test.cc \
+  runtime/arch/mips/instruction_set_features_mips_test.cc \
+  runtime/arch/x86/instruction_set_features_x86_test.cc \
+  runtime/arch/x86_64/instruction_set_features_x86_64_test.cc \
   runtime/barrier_test.cc \
   runtime/base/bit_field_test.cc \
   runtime/base/bit_vector_test.cc \
@@ -109,7 +116,6 @@
   runtime/handle_scope_test.cc \
   runtime/indenter_test.cc \
   runtime/indirect_reference_table_test.cc \
-  runtime/instruction_set_test.cc \
   runtime/intern_table_test.cc \
   runtime/interpreter/safe_math_test.cc \
   runtime/java_vm_ext_test.cc \
@@ -186,7 +192,9 @@
 
 COMPILER_GTEST_HOST_SRC_FILES := \
   $(COMPILER_GTEST_COMMON_SRC_FILES) \
-  compiler/utils//assembler_thumb_test.cc \
+  compiler/utils/arm/assembler_arm32_test.cc \
+  compiler/utils/arm/assembler_thumb2_test.cc \
+  compiler/utils/assembler_thumb_test.cc \
   compiler/utils/x86/assembler_x86_test.cc \
   compiler/utils/x86_64/assembler_x86_64_test.cc
 
@@ -203,7 +211,7 @@
 LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc
 LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/compiler
 LOCAL_SHARED_LIBRARIES := libartd libartd-compiler libdl
-LOCAL_STATIC_LIBRARIES += libgtest_libc++
+LOCAL_STATIC_LIBRARIES += libgtest
 LOCAL_CLANG := $(ART_TARGET_CLANG)
 LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index 9fe3807..e8b363b 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -23,6 +23,13 @@
 
 include art/build/Android.common_build.mk
 
+ifeq ($(DEX2OAT_HOST_INSTRUCTION_SET_FEATURES),)
+  DEX2OAT_HOST_INSTRUCTION_SET_FEATURES := default
+endif
+ifeq ($($(HOST_2ND_ARCH_VAR_PREFIX)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES),)
+  $(HOST_2ND_ARCH_VAR_PREFIX)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES := default
+endif
+
 # Use dex2oat debug version for better error reporting
 # $(1): compiler - default, optimizing or interpreter.
 # $(2): pic/no-pic
@@ -91,7 +98,7 @@
 	  $$(addprefix --dex-location=,$$(HOST_CORE_DEX_LOCATIONS)) --oat-file=$$(PRIVATE_CORE_OAT_NAME) \
 	  --oat-location=$$(PRIVATE_CORE_OAT_NAME) --image=$$(PRIVATE_CORE_IMG_NAME) \
 	  --base=$$(LIBART_IMG_HOST_BASE_ADDRESS) --instruction-set=$$($(3)ART_HOST_ARCH) \
-	  --instruction-set-features=$$($(3)HOST_INSTRUCTION_SET_FEATURES) \
+	  --instruction-set-features=$$($(3)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES) \
 	  --host --android-root=$$(HOST_OUT) --include-patch-information \
 	  $$(PRIVATE_CORE_COMPILE_OPTIONS)
 
@@ -194,7 +201,7 @@
 	  $$(addprefix --dex-location=,$$(TARGET_CORE_DEX_LOCATIONS)) --oat-file=$$(PRIVATE_CORE_OAT_NAME) \
 	  --oat-location=$$(PRIVATE_CORE_OAT_NAME) --image=$$(PRIVATE_CORE_IMG_NAME) \
 	  --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) --instruction-set=$$($(3)TARGET_ARCH) \
-	  --instruction-set-features=$$($(3)TARGET_INSTRUCTION_SET_FEATURES) \
+	  --instruction-set-features=$$($(3)DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \
 	  --android-root=$$(PRODUCT_OUT)/system --include-patch-information \
 	  $$(PRIVATE_CORE_COMPILE_OPTIONS) || (rm $$(PRIVATE_CORE_OAT_NAME); exit 1)
 
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index bfdb537..97387a1 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -16,6 +16,7 @@
 
 #include "common_compiler_test.h"
 
+#include "arch/instruction_set_features.h"
 #include "class_linker.h"
 #include "compiled_method.h"
 #include "dex/quick_compiler_callbacks.h"
@@ -156,7 +157,7 @@
                                               method_inliner_map_.get(),
                                               compiler_kind, instruction_set,
                                               instruction_set_features_.get(),
-                                              true, new std::set<std::string>,
+                                              true, new std::set<std::string>, nullptr,
                                               2, true, true, timer_.get(), ""));
   }
   // We typically don't generate an image in unit tests, disable this optimization by default.
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 0361cd1..7f76eef 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -21,7 +21,7 @@
 #include <string>
 #include <vector>
 
-#include "instruction_set.h"
+#include "arch/instruction_set.h"
 #include "method_reference.h"
 #include "utils.h"
 #include "utils/array_ref.h"
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 5d877fd..b56fd6f 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -610,21 +610,22 @@
 // LIR fixup kinds for Arm
 enum FixupKind {
   kFixupNone,
-  kFixupLabel,       // For labels we just adjust the offset.
-  kFixupLoad,        // Mostly for immediates.
-  kFixupVLoad,       // FP load which *may* be pc-relative.
-  kFixupCBxZ,        // Cbz, Cbnz.
-  kFixupTBxZ,        // Tbz, Tbnz.
-  kFixupPushPop,     // Not really pc relative, but changes size based on args.
-  kFixupCondBranch,  // Conditional branch
-  kFixupT1Branch,    // Thumb1 Unconditional branch
-  kFixupT2Branch,    // Thumb2 Unconditional branch
-  kFixupBlx1,        // Blx1 (start of Blx1/Blx2 pair).
-  kFixupBl1,         // Bl1 (start of Bl1/Bl2 pair).
-  kFixupAdr,         // Adr.
-  kFixupMovImmLST,   // kThumb2MovImm16LST.
-  kFixupMovImmHST,   // kThumb2MovImm16HST.
-  kFixupAlign4,      // Align to 4-byte boundary.
+  kFixupLabel,             // For labels we just adjust the offset.
+  kFixupLoad,              // Mostly for immediates.
+  kFixupVLoad,             // FP load which *may* be pc-relative.
+  kFixupCBxZ,              // Cbz, Cbnz.
+  kFixupTBxZ,              // Tbz, Tbnz.
+  kFixupPushPop,           // Not really pc relative, but changes size based on args.
+  kFixupCondBranch,        // Conditional branch
+  kFixupT1Branch,          // Thumb1 Unconditional branch
+  kFixupT2Branch,          // Thumb2 Unconditional branch
+  kFixupBlx1,              // Blx1 (start of Blx1/Blx2 pair).
+  kFixupBl1,               // Bl1 (start of Bl1/Bl2 pair).
+  kFixupAdr,               // Adr.
+  kFixupMovImmLST,         // kThumb2MovImm16LST.
+  kFixupMovImmHST,         // kThumb2MovImm16HST.
+  kFixupAlign4,            // Align to 4-byte boundary.
+  kFixupA53Erratum835769,  // Cortex A53 Erratum 835769.
 };
 std::ostream& operator<<(std::ostream& os, const FixupKind& kind);
 
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index b4eebb3..a3b4df3 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -536,8 +536,8 @@
       if (direct_code == 0) {
         // kInvokeTgt := arg0_ref->entrypoint
         cg->LoadWordDisp(arg0_ref,
-                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
-                         cg->TargetPtrReg(kInvokeTgt));
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                             kArmPointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
       }
       break;
     default:
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 57544b5..cab039b 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -16,6 +16,7 @@
 
 /* This file contains codegen for the Thumb2 ISA. */
 
+#include "arch/instruction_set_features.h"
 #include "arm_lir.h"
 #include "codegen_arm.h"
 #include "dex/quick/mir_to_lir-inl.h"
@@ -1119,7 +1120,9 @@
 }
 
 bool ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-#if ANDROID_SMP != 0
+  if (!cu_->GetInstructionSetFeatures()->IsSmp()) {
+    return false;
+  }
   // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
   LIR* barrier = last_lir_insn_;
 
@@ -1149,9 +1152,6 @@
   DCHECK(!barrier->flags.use_def_invalid);
   barrier->u.m.def_mask = &kEncodeAll;
   return ret;
-#else
-  return false;
-#endif
 }
 
 void ArmMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 0d5aa90..0c7812b 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
-#include "arm_lir.h"
 #include "codegen_arm.h"
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "arm_lir.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 
@@ -974,7 +976,7 @@
   LIR* load;
   if (is_volatile == kVolatile && (size == k64 || size == kDouble) &&
       !cu_->compiler_driver->GetInstructionSetFeatures()->
-          AsArmInstructionSetFeatures()->HasLpae()) {
+          AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd()) {
     // Only 64-bit load needs special handling.
     // If the cpu supports LPAE, aligned LDRD is atomic - fall through to LoadBaseDisp().
     DCHECK(!r_dest.IsFloat());  // See RegClassForFieldLoadSave().
@@ -1104,7 +1106,7 @@
   LIR* store;
   if (is_volatile == kVolatile && (size == k64 || size == kDouble) &&
       !cu_->compiler_driver->GetInstructionSetFeatures()->
-          AsArmInstructionSetFeatures()->HasLpae()) {
+          AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd()) {
     // Only 64-bit store needs special handling.
     // If the cpu supports LPAE, aligned STRD is atomic - fall through to StoreBaseDisp().
     // Use STREXD for the atomic store. (Expect displacement > 0, don't optimize for == 0.)
diff --git a/compiler/dex/quick/arm64/arm64_lir.h b/compiler/dex/quick/arm64/arm64_lir.h
index 973279e..f8a7310 100644
--- a/compiler/dex/quick/arm64/arm64_lir.h
+++ b/compiler/dex/quick/arm64/arm64_lir.h
@@ -320,6 +320,7 @@
   kA64Mul3rrr,       // mul [00011011000] rm[20-16] [011111] rn[9-5] rd[4-0].
   kA64Msub4rrrr,     // msub[s0011011000] rm[20-16] [1] ra[14-10] rn[9-5] rd[4-0].
   kA64Neg3rro,       // neg alias of "sub arg0, rzr, arg1, arg2".
+  kA64Nop0,          // nop alias of "hint #0" [11010101000000110010000000011111].
   kA64Orr3Rrl,       // orr [s01100100] N[22] imm_r[21-16] imm_s[15-10] rn[9-5] rd[4-0].
   kA64Orr4rrro,      // orr [s0101010] shift[23-22] [0] rm[20-16] imm_6[15-10] rn[9-5] rd[4-0].
   kA64Ret,           // ret [11010110010111110000001111000000].
@@ -332,7 +333,7 @@
   kA64Scvtf2fw,      // scvtf  [000111100s100010000000] rn[9-5] rd[4-0].
   kA64Scvtf2fx,      // scvtf  [100111100s100010000000] rn[9-5] rd[4-0].
   kA64Sdiv3rrr,      // sdiv[s0011010110] rm[20-16] [000011] rn[9-5] rd[4-0].
-  kA64Smaddl4xwwx,   // smaddl [10011011001] rm[20-16] [0] ra[14-10] rn[9-5] rd[4-0].
+  kA64Smull3xww,     // smull [10011011001] rm[20-16] [011111] rn[9-5] rd[4-0].
   kA64Smulh3xxx,     // smulh [10011011010] rm[20-16] [011111] rn[9-5] rd[4-0].
   kA64Stp4ffXD,      // stp [0s10110100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
   kA64Stp4rrXD,      // stp [s010100100] imm_7[21-15] rt2[14-10] rn[9-5] rt[4-0].
diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc
index da7ac87..cab11cc 100644
--- a/compiler/dex/quick/arm64/assemble_arm64.cc
+++ b/compiler/dex/quick/arm64/assemble_arm64.cc
@@ -14,8 +14,10 @@
  * limitations under the License.
  */
 
-#include "arm64_lir.h"
 #include "codegen_arm64.h"
+
+#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arm64_lir.h"
 #include "dex/quick/mir_to_lir-inl.h"
 
 namespace art {
@@ -468,13 +470,17 @@
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "mul", "!0r, !1r, !2r", kFixupNone),
     ENCODING_MAP(WIDE(kA64Msub4rrrr), SF_VARIANTS(0x1b008000),
-                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 14, 10,
-                 kFmtRegR, 20, 16, IS_QUAD_OP | REG_DEF0_USE123,
-                 "msub", "!0r, !1r, !3r, !2r", kFixupNone),
+                 kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
+                 kFmtRegR, 14, 10, IS_QUAD_OP | REG_DEF0_USE123 | NEEDS_FIXUP,
+                 "msub", "!0r, !1r, !2r, !3r", kFixupA53Erratum835769),
     ENCODING_MAP(WIDE(kA64Neg3rro), SF_VARIANTS(0x4b0003e0),
                  kFmtRegR, 4, 0, kFmtRegR, 20, 16, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "neg", "!0r, !1r!2o", kFixupNone),
+    ENCODING_MAP(kA64Nop0, NO_VARIANTS(0xd503201f),
+                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND,
+                 "nop", "", kFixupNone),
     ENCODING_MAP(WIDE(kA64Orr3Rrl), SF_VARIANTS(0x32000000),
                  kFmtRegROrSp, 4, 0, kFmtRegR, 9, 5, kFmtBitBlt, 22, 10,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
@@ -523,10 +529,10 @@
                  kFmtRegR, 4, 0, kFmtRegR, 9, 5, kFmtRegR, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "sdiv", "!0r, !1r, !2r", kFixupNone),
-    ENCODING_MAP(WIDE(kA64Smaddl4xwwx), NO_VARIANTS(0x9b200000),
+    ENCODING_MAP(kA64Smull3xww, NO_VARIANTS(0x9b207c00),
                  kFmtRegX, 4, 0, kFmtRegW, 9, 5, kFmtRegW, 20, 16,
-                 kFmtRegX, 14, 10, IS_QUAD_OP | REG_DEF0_USE123,
-                 "smaddl", "!0x, !1w, !2w, !3x", kFixupNone),
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "smull", "!0x, !1w, !2w", kFixupNone),
     ENCODING_MAP(kA64Smulh3xxx, NO_VARIANTS(0x9b407c00),
                  kFmtRegX, 4, 0, kFmtRegX, 9, 5, kFmtRegX, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
@@ -881,6 +887,14 @@
             LOG(FATAL) << "Invalid jump range in kFixupT1Branch";
           }
           lir->operands[0] = delta >> 2;
+          if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && lir->operands[0] == 1) {
+            // Useless branch.
+            offset_adjustment -= lir->flags.size;
+            lir->flags.is_nop = true;
+            // Don't unlink - just set to do-nothing.
+            lir->flags.fixup = kFixupNone;
+            res = kRetryAll;
+          }
           break;
         }
         case kFixupLoad:
@@ -980,6 +994,30 @@
           lir->operands[1] = delta;
           break;
         }
+        case kFixupA53Erratum835769:
+          // Avoid emitting code that could trigger Cortex A53's erratum 835769.
+          // This fixup should be carried out for all multiply-accumulate instructions: madd, msub,
+          // smaddl, smsubl, umaddl and umsubl.
+          if (cu_->GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()
+              ->NeedFixCortexA53_835769()) {
+            // Check that this is a 64-bit multiply-accumulate.
+            if (IS_WIDE(lir->opcode)) {
+              uint64_t prev_insn_flags = EncodingMap[UNWIDE(lir->prev->opcode)].flags;
+              // Check that the instruction preceding the multiply-accumulate is a load or store.
+              if ((prev_insn_flags & IS_LOAD) != 0 || (prev_insn_flags & IS_STORE) != 0) {
+                // insert a NOP between the load/store and the multiply-accumulate.
+                LIR* new_lir = RawLIR(lir->dalvik_offset, kA64Nop0, 0, 0, 0, 0, 0, NULL);
+                new_lir->offset = lir->offset;
+                new_lir->flags.fixup = kFixupNone;
+                new_lir->flags.size = EncodingMap[kA64Nop0].size;
+                InsertLIRBefore(lir, new_lir);
+                lir->offset += new_lir->flags.size;
+                offset_adjustment += new_lir->flags.size;
+                res = kRetryAll;
+              }
+            }
+          }
+          break;
         default:
           LOG(FATAL) << "Unexpected case " << lir->flags.fixup;
       }
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index a9a58a3..3e5b7bf 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -296,7 +296,8 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm64);
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
+    !FrameNeedsStackCheck(frame_size_, kArm64);
 
   NewLIR0(kPseudoMethodEntry);
 
@@ -320,7 +321,7 @@
       // TODO: If the frame size is small enough, is it possible to make this a pre-indexed load,
       //       so that we can avoid the following "sub sp" when spilling?
       OpRegRegImm(kOpSub, rs_x8, rs_sp, GetStackOverflowReservedBytes(kArm64));
-      LoadWordDisp(rs_x8, 0, rs_x8);
+      Load32Disp(rs_x8, 0, rs_wzr);
       MarkPossibleStackOverflowException();
     }
   }
@@ -472,8 +473,8 @@
       if (direct_code == 0) {
         // kInvokeTgt := arg0_ref->entrypoint
         cg->LoadWordDisp(arg0_ref,
-                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
-                         cg->TargetPtrReg(kInvokeTgt));
+                         mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                             kArm64PointerSize).Int32Value(), cg->TargetPtrReg(kInvokeTgt));
       }
       break;
     default:
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index e57f99c..0e00698 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -16,6 +16,7 @@
 
 /* This file contains codegen for the Thumb2 ISA. */
 
+#include "arch/instruction_set_features.h"
 #include "arm64_lir.h"
 #include "codegen_arm64.h"
 #include "dex/quick/mir_to_lir-inl.h"
@@ -427,8 +428,7 @@
   rl_src = LoadValue(rl_src, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage r_long_mul = AllocTemp();
-  NewLIR4(kA64Smaddl4xwwx, As64BitReg(r_long_mul).GetReg(),
-          r_magic.GetReg(), rl_src.reg.GetReg(), rxzr);
+  NewLIR3(kA64Smull3xww, As64BitReg(r_long_mul).GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
   switch (pattern) {
     case Divide3:
       OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32);
@@ -648,7 +648,7 @@
     }
     OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
     NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
-            r_src1.GetReg(), r_src2.GetReg());
+            r_src2.GetReg(), r_src1.GetReg());
     FreeTemp(temp);
   }
   return rl_result;
@@ -979,7 +979,9 @@
 }
 
 bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-#if ANDROID_SMP != 0
+  if (!cu_->GetInstructionSetFeatures()->IsSmp()) {
+    return false;
+  }
   // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
   LIR* barrier = last_lir_insn_;
 
@@ -1015,9 +1017,6 @@
   DCHECK(!barrier->flags.use_def_invalid);
   barrier->u.m.def_mask = &kEncodeAll;
   return ret;
-#else
-  return false;
-#endif
 }
 
 void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
@@ -1147,11 +1146,6 @@
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   }
 
-  // If index is constant, just fold it into the data offset
-  if (constant_index) {
-    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
-  }
-
   /* null object? */
   GenNullCheck(rl_array.reg, opt_flags);
 
@@ -1165,42 +1159,22 @@
   } else {
     ForceImplicitNullCheck(rl_array.reg, opt_flags);
   }
-  if (rl_dest.wide || rl_dest.fp || constant_index) {
-    RegStorage reg_ptr;
-    if (constant_index) {
-      reg_ptr = rl_array.reg;  // NOTE: must not alter reg_ptr in constant case.
-    } else {
-      // No special indexed operation, lea + load w/ displacement
-      reg_ptr = AllocTempRef();
-      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
-                       EncodeShift(kA64Lsl, scale));
-      FreeTemp(rl_index.reg);
-    }
+  if (constant_index) {
     rl_result = EvalLoc(rl_dest, reg_class, true);
 
     if (needs_range_check) {
-      if (constant_index) {
-        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
-      } else {
-        GenArrayBoundsCheck(rl_index.reg, reg_len);
-      }
+      GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
       FreeTemp(reg_len);
     }
+    // Fold the constant index into the data offset.
+    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
     if (rl_result.ref) {
-      LoadRefDisp(reg_ptr, data_offset, rl_result.reg, kNotVolatile);
+      LoadRefDisp(rl_array.reg, data_offset, rl_result.reg, kNotVolatile);
     } else {
-      LoadBaseDisp(reg_ptr, data_offset, rl_result.reg, size, kNotVolatile);
-    }
-    if (!constant_index) {
-      FreeTemp(reg_ptr);
-    }
-    if (rl_dest.wide) {
-      StoreValueWide(rl_dest, rl_result);
-    } else {
-      StoreValue(rl_dest, rl_result);
+      LoadBaseDisp(rl_array.reg, data_offset, rl_result.reg, size, kNotVolatile);
     }
   } else {
-    // Offset base, then use indexed load
+    // Offset base, then use indexed load.
     RegStorage reg_ptr = AllocTempRef();
     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
     FreeTemp(rl_array.reg);
@@ -1211,11 +1185,15 @@
       FreeTemp(reg_len);
     }
     if (rl_result.ref) {
-      LoadRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale);
+      LoadRefIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale);
     } else {
-      LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale, size);
+      LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
     }
     FreeTemp(reg_ptr);
+  }
+  if (rl_dest.wide) {
+    StoreValueWide(rl_dest, rl_result);
+  } else {
     StoreValue(rl_dest, rl_result);
   }
 }
@@ -1237,11 +1215,6 @@
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   }
 
-  // If index is constant, just fold it into the data offset.
-  if (constant_index) {
-    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
-  }
-
   rl_array = LoadValue(rl_array, kRefReg);
   if (!constant_index) {
     rl_index = LoadValue(rl_index, kCoreReg);
@@ -1274,24 +1247,18 @@
     ForceImplicitNullCheck(rl_array.reg, opt_flags);
   }
   /* at this point, reg_ptr points to array, 2 live temps */
-  if (rl_src.wide || rl_src.fp || constant_index) {
-    if (rl_src.wide) {
-      rl_src = LoadValueWide(rl_src, reg_class);
-    } else {
-      rl_src = LoadValue(rl_src, reg_class);
-    }
-    if (!constant_index) {
-      OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, As64BitReg(rl_index.reg),
-                       EncodeShift(kA64Lsl, scale));
-    }
+  if (rl_src.wide) {
+    rl_src = LoadValueWide(rl_src, reg_class);
+  } else {
+    rl_src = LoadValue(rl_src, reg_class);
+  }
+  if (constant_index) {
     if (needs_range_check) {
-      if (constant_index) {
-        GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
-      } else {
-        GenArrayBoundsCheck(rl_index.reg, reg_len);
-      }
+      GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
       FreeTemp(reg_len);
     }
+    // Fold the constant index into the data offset.
+    data_offset += mir_graph_->ConstantValue(rl_index) << scale;
     if (rl_src.ref) {
       StoreRefDisp(reg_ptr, data_offset, rl_src.reg, kNotVolatile);
     } else {
@@ -1300,15 +1267,14 @@
   } else {
     /* reg_ptr -> array data */
     OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
-    rl_src = LoadValue(rl_src, reg_class);
     if (needs_range_check) {
       GenArrayBoundsCheck(rl_index.reg, reg_len);
       FreeTemp(reg_len);
     }
     if (rl_src.ref) {
-      StoreRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale);
+      StoreRefIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale);
     } else {
-      StoreBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale, size);
+      StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
     }
   }
   if (allocated_reg_ptr_temp) {
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 061ee07..98ddc36 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include "arch/arm/instruction_set_features_arm.h"
 #include "dex/compiler_ir.h"
 #include "dex/compiler_internals.h"
 #include "dex/quick/arm/arm_lir.h"
@@ -161,6 +162,10 @@
   if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
     return GenExplicitNullCheck(m_reg, opt_flags);
   }
+  // If null check has not been eliminated, reset redundant store tracking.
+  if ((opt_flags & MIR_IGNORE_NULL_CHECK) == 0) {
+    ResetDefTracking();
+  }
   return nullptr;
 }
 
@@ -425,7 +430,11 @@
       RegLocation loc = UpdateLoc(info->args[i]);
       if (loc.location == kLocPhysReg) {
         ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
-        Store32Disp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
+        if (loc.ref) {
+          StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
+        } else {
+          Store32Disp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg);
+        }
       }
     }
     /*
@@ -481,9 +490,17 @@
   } else if (!info->is_range) {
     // TUNING: interleave
     for (int i = 0; i < elems; i++) {
-      RegLocation rl_arg = LoadValue(info->args[i], kCoreReg);
-      Store32Disp(ref_reg,
-                  mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg);
+      RegLocation rl_arg;
+      if (info->args[i].ref) {
+        rl_arg = LoadValue(info->args[i], kRefReg);
+        StoreRefDisp(ref_reg,
+                    mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg,
+                    kNotVolatile);
+      } else {
+        rl_arg = LoadValue(info->args[i], kCoreReg);
+        Store32Disp(ref_reg,
+                    mirror::Array::DataOffset(component_size).Int32Value() + i * 4, rl_arg.reg);
+      }
       // If the LoadValue caused a temp to be allocated, free it
       if (IsTemp(rl_arg.reg)) {
         FreeTemp(rl_arg.reg);
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 4cb12f1..a7900ae 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -476,9 +476,10 @@
 static bool CommonCallCodeLoadCodePointerIntoInvokeTgt(const RegStorage* alt_from,
                                                        const CompilationUnit* cu, Mir2Lir* cg) {
   if (cu->instruction_set != kX86 && cu->instruction_set != kX86_64) {
+    int32_t offset = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+        InstructionSetPointerSize(cu->instruction_set)).Int32Value();
     // Get the compiled code address [use *alt_from or kArg0, set kInvokeTgt]
-    cg->LoadWordDisp(alt_from == nullptr ? cg->TargetReg(kArg0, kRef) : *alt_from,
-                     mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value(),
+    cg->LoadWordDisp(alt_from == nullptr ? cg->TargetReg(kArg0, kRef) : *alt_from, offset,
                      cg->TargetPtrReg(kInvokeTgt));
     return true;
   }
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index ca71c30..0d1d9bf 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -15,6 +15,7 @@
  */
 
 #include "codegen_mips.h"
+
 #include "dex/quick/mir_to_lir-inl.h"
 #include "mips_lir.h"
 
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 01784e2..ed73ef0 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -17,6 +17,7 @@
 /* This file contains codegen for the Mips ISA */
 
 #include "codegen_mips.h"
+
 #include "dex/quick/mir_to_lir-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index 0a7aa99..495d85e 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -15,6 +15,7 @@
  */
 
 #include "codegen_mips.h"
+
 #include "dex/quick/mir_to_lir-inl.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "mips_lir.h"
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index d58ddb0..fb47238 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -17,6 +17,7 @@
 /* This file contains codegen for the Mips ISA */
 
 #include "codegen_mips.h"
+
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 #include "entrypoints/quick/quick_entrypoints.h"
diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h
index 3615916..3df8f2e 100644
--- a/compiler/dex/quick/mips/mips_lir.h
+++ b/compiler/dex/quick/mips/mips_lir.h
@@ -214,44 +214,43 @@
   rF30 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 30,
   rF31 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 31,
 #endif
-#if (FR_BIT == 0)
-  rD0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
-  rD1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
-  rD2  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
-  rD3  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
-  rD4  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
-  rD5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
-  rD6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
-  rD7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
+  // Double precision registers where the FPU is in 32-bit mode.
+  rD0_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
+  rD1_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
+  rD2_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
+  rD3_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
+  rD4_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
+  rD5_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
+  rD6_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
+  rD7_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
 #if 0  // TODO: expand resource mask to enable use of all MIPS fp registers.
-  rD8  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
-  rD9  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
-  rD10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
-  rD11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
-  rD12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
-  rD13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
-  rD14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
-  rD15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
+  rD8_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 16,
+  rD9_fr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 18,
+  rD10_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 20,
+  rD11_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 22,
+  rD12_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 24,
+  rD13_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 26,
+  rD14_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 28,
+  rD15_fr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 30,
 #endif
-#else
-  rD0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
-  rD1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  1,
-  rD2  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
-  rD3  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  3,
-  rD4  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
-  rD5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  5,
-  rD6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
-  rD7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  7,
+  // Double precision registers where the FPU is in 64-bit mode.
+  rD0_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  0,
+  rD1_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  1,
+  rD2_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  2,
+  rD3_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  3,
+  rD4_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  4,
+  rD5_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  5,
+  rD6_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  6,
+  rD7_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  7,
 #if 0  // TODO: expand resource mask to enable use of all MIPS fp registers.
-  rD8  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
-  rD9  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  9,
-  rD10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
-  rD11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
-  rD12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
-  rD13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
-  rD14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
-  rD15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
-#endif
+  rD8_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  8,
+  rD9_fr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint |  9,
+  rD10_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
+  rD11_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
+  rD12_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
+  rD13_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
+  rD14_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
+  rD15_fr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
 #endif
 };
 
@@ -309,14 +308,23 @@
 constexpr RegStorage rs_rF14(RegStorage::kValid | rF14);
 constexpr RegStorage rs_rF15(RegStorage::kValid | rF15);
 
-constexpr RegStorage rs_rD0(RegStorage::kValid | rD0);
-constexpr RegStorage rs_rD1(RegStorage::kValid | rD1);
-constexpr RegStorage rs_rD2(RegStorage::kValid | rD2);
-constexpr RegStorage rs_rD3(RegStorage::kValid | rD3);
-constexpr RegStorage rs_rD4(RegStorage::kValid | rD4);
-constexpr RegStorage rs_rD5(RegStorage::kValid | rD5);
-constexpr RegStorage rs_rD6(RegStorage::kValid | rD6);
-constexpr RegStorage rs_rD7(RegStorage::kValid | rD7);
+constexpr RegStorage rs_rD0_fr0(RegStorage::kValid | rD0_fr0);
+constexpr RegStorage rs_rD1_fr0(RegStorage::kValid | rD1_fr0);
+constexpr RegStorage rs_rD2_fr0(RegStorage::kValid | rD2_fr0);
+constexpr RegStorage rs_rD3_fr0(RegStorage::kValid | rD3_fr0);
+constexpr RegStorage rs_rD4_fr0(RegStorage::kValid | rD4_fr0);
+constexpr RegStorage rs_rD5_fr0(RegStorage::kValid | rD5_fr0);
+constexpr RegStorage rs_rD6_fr0(RegStorage::kValid | rD6_fr0);
+constexpr RegStorage rs_rD7_fr0(RegStorage::kValid | rD7_fr0);
+
+constexpr RegStorage rs_rD0_fr1(RegStorage::kValid | rD0_fr1);
+constexpr RegStorage rs_rD1_fr1(RegStorage::kValid | rD1_fr1);
+constexpr RegStorage rs_rD2_fr1(RegStorage::kValid | rD2_fr1);
+constexpr RegStorage rs_rD3_fr1(RegStorage::kValid | rD3_fr1);
+constexpr RegStorage rs_rD4_fr1(RegStorage::kValid | rD4_fr1);
+constexpr RegStorage rs_rD5_fr1(RegStorage::kValid | rD5_fr1);
+constexpr RegStorage rs_rD6_fr1(RegStorage::kValid | rD6_fr1);
+constexpr RegStorage rs_rD7_fr1(RegStorage::kValid | rD7_fr1);
 
 // TODO: reduce/eliminate use of these.
 #define rMIPS_SUSPEND rS0
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 4a340ec..185112d 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -20,6 +20,7 @@
 
 #include <string>
 
+#include "arch/mips/instruction_set_features_mips.h"
 #include "backend_mips.h"
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
@@ -34,8 +35,12 @@
 static constexpr RegStorage sp_regs_arr[] =
     {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
      rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15};
-static constexpr RegStorage dp_regs_arr[] =
-    {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7};
+static constexpr RegStorage dp_fr0_regs_arr[] =
+    {rs_rD0_fr0, rs_rD1_fr0, rs_rD2_fr0, rs_rD3_fr0, rs_rD4_fr0, rs_rD5_fr0, rs_rD6_fr0,
+     rs_rD7_fr0};
+static constexpr RegStorage dp_fr1_regs_arr[] =
+    {rs_rD0_fr1, rs_rD1_fr1, rs_rD2_fr1, rs_rD3_fr1, rs_rD4_fr1, rs_rD5_fr1, rs_rD6_fr1,
+     rs_rD7_fr1};
 static constexpr RegStorage reserved_regs_arr[] =
     {rs_rZERO, rs_rAT, rs_rS0, rs_rS1, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rRA};
 static constexpr RegStorage core_temps_arr[] =
@@ -44,17 +49,23 @@
 static constexpr RegStorage sp_temps_arr[] =
     {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10,
      rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15};
-static constexpr RegStorage dp_temps_arr[] =
-    {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7};
+static constexpr RegStorage dp_fr0_temps_arr[] =
+    {rs_rD0_fr0, rs_rD1_fr0, rs_rD2_fr0, rs_rD3_fr0, rs_rD4_fr0, rs_rD5_fr0, rs_rD6_fr0,
+     rs_rD7_fr0};
+static constexpr RegStorage dp_fr1_temps_arr[] =
+    {rs_rD0_fr1, rs_rD1_fr1, rs_rD2_fr1, rs_rD3_fr1, rs_rD4_fr1, rs_rD5_fr1, rs_rD6_fr1,
+     rs_rD7_fr1};
 
 static constexpr ArrayRef<const RegStorage> empty_pool;
 static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr);
 static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr);
-static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr);
+static constexpr ArrayRef<const RegStorage> dp_fr0_regs(dp_fr0_regs_arr);
+static constexpr ArrayRef<const RegStorage> dp_fr1_regs(dp_fr1_regs_arr);
 static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr);
 static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr);
 static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr);
-static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr);
+static constexpr ArrayRef<const RegStorage> dp_fr0_temps(dp_fr0_temps_arr);
+static constexpr ArrayRef<const RegStorage> dp_fr1_temps(dp_fr1_temps_arr);
 
 RegLocation MipsMir2Lir::LocCReturn() {
   return mips_loc_c_return;
@@ -129,14 +140,17 @@
  * Decode the register id.
  */
 ResourceMask MipsMir2Lir::GetRegMaskCommon(const RegStorage& reg) const {
-  return reg.IsDouble()
-      /* Each double register is equal to a pair of single-precision FP registers */
-#if (FR_BIT == 0)
-      ? ResourceMask::TwoBits((reg.GetRegNum() & ~1) + kMipsFPReg0)
-#else
-      ? ResourceMask::TwoBits(reg.GetRegNum() * 2 + kMipsFPReg0)
-#endif
-      : ResourceMask::Bit(reg.IsSingle() ? reg.GetRegNum() + kMipsFPReg0 : reg.GetRegNum());
+  if (reg.IsDouble()) {
+    if (cu_->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint()) {
+      return ResourceMask::TwoBits((reg.GetRegNum() & ~1) + kMipsFPReg0);
+    } else {
+      return ResourceMask::TwoBits(reg.GetRegNum() * 2 + kMipsFPReg0);
+    }
+  } else if (reg.IsSingle()) {
+    return ResourceMask::Bit(reg.GetRegNum() + kMipsFPReg0);
+  } else {
+    return ResourceMask::Bit(reg.GetRegNum());
+  }
 }
 
 ResourceMask MipsMir2Lir::GetPCUseDefEncoding() const {
@@ -382,14 +396,25 @@
   Clobber(rs_rF13);
   Clobber(rs_rF14);
   Clobber(rs_rF15);
-  Clobber(rs_rD0);
-  Clobber(rs_rD1);
-  Clobber(rs_rD2);
-  Clobber(rs_rD3);
-  Clobber(rs_rD4);
-  Clobber(rs_rD5);
-  Clobber(rs_rD6);
-  Clobber(rs_rD7);
+  if (cu_->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint()) {
+    Clobber(rs_rD0_fr0);
+    Clobber(rs_rD1_fr0);
+    Clobber(rs_rD2_fr0);
+    Clobber(rs_rD3_fr0);
+    Clobber(rs_rD4_fr0);
+    Clobber(rs_rD5_fr0);
+    Clobber(rs_rD6_fr0);
+    Clobber(rs_rD7_fr0);
+  } else {
+    Clobber(rs_rD0_fr1);
+    Clobber(rs_rD1_fr1);
+    Clobber(rs_rD2_fr1);
+    Clobber(rs_rD3_fr1);
+    Clobber(rs_rD4_fr1);
+    Clobber(rs_rD5_fr1);
+    Clobber(rs_rD6_fr1);
+    Clobber(rs_rD7_fr1);
+  }
 }
 
 RegLocation MipsMir2Lir::GetReturnWideAlt() {
@@ -420,33 +445,37 @@
   FreeTemp(rs_rMIPS_ARG3);
 }
 
-bool MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-  UNUSED(barrier_kind);
-#if ANDROID_SMP != 0
-  NewLIR1(kMipsSync, 0 /* Only stype currently supported */);
-  return true;
-#else
-  return false;
-#endif
+bool MipsMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind ATTRIBUTE_UNUSED) {
+  if (cu_->GetInstructionSetFeatures()->IsSmp()) {
+    NewLIR1(kMipsSync, 0 /* Only stype currently supported */);
+    return true;
+  } else {
+    return false;
+  }
 }
 
 void MipsMir2Lir::CompilerInitializeRegAlloc() {
+  const bool fpu_is_32bit =
+      cu_->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint();
   reg_pool_.reset(new (arena_) RegisterPool(this, arena_, core_regs, empty_pool /* core64 */,
-                                            sp_regs, dp_regs,
+                                            sp_regs,
+                                            fpu_is_32bit ? dp_fr0_regs : dp_fr1_regs,
                                             reserved_regs, empty_pool /* reserved64 */,
                                             core_temps, empty_pool /* core64_temps */,
-                                            sp_temps, dp_temps));
+                                            sp_temps,
+                                            fpu_is_32bit ? dp_fr0_temps : dp_fr1_temps));
 
   // Target-specific adjustments.
 
   // Alias single precision floats to appropriate half of overlapping double.
   for (RegisterInfo* info : reg_pool_->sp_regs_) {
     int sp_reg_num = info->GetReg().GetRegNum();
-#if (FR_BIT == 0)
-    int dp_reg_num = sp_reg_num & ~1;
-#else
-    int dp_reg_num = sp_reg_num >> 1;
-#endif
+    int dp_reg_num;
+    if (fpu_is_32bit) {
+      dp_reg_num = sp_reg_num & ~1;
+    } else {
+      dp_reg_num = sp_reg_num >> 1;
+    }
     RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | dp_reg_num);
     RegisterInfo* dp_reg_info = GetRegInfo(dp_reg);
     // Double precision register's master storage should refer to itself.
@@ -465,11 +494,11 @@
   // TODO: adjust when we roll to hard float calling convention.
   reg_pool_->next_core_reg_ = 2;
   reg_pool_->next_sp_reg_ = 2;
-#if (FR_BIT == 0)
-  reg_pool_->next_dp_reg_ = 2;
-#else
-  reg_pool_->next_dp_reg_ = 1;
-#endif
+  if (fpu_is_32bit) {
+    reg_pool_->next_dp_reg_ = 2;
+  } else {
+    reg_pool_->next_dp_reg_ = 1;
+  }
 }
 
 /*
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index a7dc84f..18f1cde 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -15,6 +15,8 @@
  */
 
 #include "codegen_mips.h"
+
+#include "arch/mips/instruction_set_features_mips.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "dex/reg_storage_eq.h"
 #include "mips_lir.h"
@@ -304,20 +306,22 @@
     case kOpXor:
       return OpRegRegReg(op, r_dest_src1, r_dest_src1, r_src2);
     case kOp2Byte:
-#if __mips_isa_rev >= 2
-      res = NewLIR2(kMipsSeb, r_dest_src1.GetReg(), r_src2.GetReg());
-#else
-      res = OpRegRegImm(kOpLsl, r_dest_src1, r_src2, 24);
-      OpRegRegImm(kOpAsr, r_dest_src1, r_dest_src1, 24);
-#endif
+      if (cu_->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
+          ->IsMipsIsaRevGreaterThanEqual2()) {
+        res = NewLIR2(kMipsSeb, r_dest_src1.GetReg(), r_src2.GetReg());
+      } else {
+        res = OpRegRegImm(kOpLsl, r_dest_src1, r_src2, 24);
+        OpRegRegImm(kOpAsr, r_dest_src1, r_dest_src1, 24);
+      }
       return res;
     case kOp2Short:
-#if __mips_isa_rev >= 2
-      res = NewLIR2(kMipsSeh, r_dest_src1.GetReg(), r_src2.GetReg());
-#else
-      res = OpRegRegImm(kOpLsl, r_dest_src1, r_src2, 16);
-      OpRegRegImm(kOpAsr, r_dest_src1, r_dest_src1, 16);
-#endif
+      if (cu_->GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()
+          ->IsMipsIsaRevGreaterThanEqual2()) {
+        res = NewLIR2(kMipsSeh, r_dest_src1.GetReg(), r_src2.GetReg());
+      } else {
+        res = OpRegRegImm(kOpLsl, r_dest_src1, r_src2, 16);
+        OpRegRegImm(kOpAsr, r_dest_src1, r_dest_src1, 16);
+      }
       return res;
     case kOp2Char:
        return NewLIR3(kMipsAndi, r_dest_src1.GetReg(), r_src2.GetReg(), 0xFFFF);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 92ef70d..70ef991 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -201,6 +201,16 @@
     RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
     RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1);
 
+    if (cu_->instruction_set == kX86) {
+      // Can't handle double split between reg & memory.  Flush reg half to memory.
+      if (rl_dest.reg.IsDouble() && (reg_arg_low.Valid() != reg_arg_high.Valid())) {
+        DCHECK(reg_arg_low.Valid());
+        DCHECK(!reg_arg_high.Valid());
+        Store32Disp(TargetPtrReg(kSp), offset, reg_arg_low);
+        reg_arg_low = RegStorage::InvalidReg();
+      }
+    }
+
     if (reg_arg_low.Valid() && reg_arg_high.Valid()) {
       OpRegCopyWide(rl_dest.reg, RegStorage::MakeRegPair(reg_arg_low, reg_arg_high));
     } else if (reg_arg_low.Valid() && !reg_arg_high.Valid()) {
@@ -1238,7 +1248,7 @@
     if (opcode == kMirOpCheck) {
       // Combine check and work halves of throwing instruction.
       MIR* work_half = mir->meta.throw_insn;
-      mir->dalvikInsn.opcode = work_half->dalvikInsn.opcode;
+      mir->dalvikInsn = work_half->dalvikInsn;
       mir->optimization_flags = work_half->optimization_flags;
       mir->meta = work_half->meta;  // Whatever the work_half had, we need to copy it.
       opcode = work_half->dalvikInsn.opcode;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index bacc6d2..13ebc1e 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_
 #define ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_
 
-#include "invoke_type.h"
+#include "arch/instruction_set.h"
 #include "compiled_method.h"
 #include "dex/compiler_enums.h"
 #include "dex/compiler_ir.h"
@@ -26,9 +26,9 @@
 #include "dex/backend.h"
 #include "dex/quick/resource_mask.h"
 #include "driver/compiler_driver.h"
-#include "instruction_set.h"
-#include "leb128.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "invoke_type.h"
+#include "leb128.h"
 #include "safe_map.h"
 #include "utils/array_ref.h"
 #include "utils/arena_allocator.h"
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index a54c55f..8d4cb3c 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -613,7 +613,8 @@
 }
 
 uintptr_t QuickCompiler::GetEntryPointOf(mirror::ArtMethod* method) const {
-  return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCode());
+  return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize(
+      InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
 }
 
 bool QuickCompiler::WriteElf(art::File* file,
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index db2f272..ead31b3 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -18,6 +18,7 @@
 #include <inttypes.h>
 #include <string>
 
+#include "arch/instruction_set_features.h"
 #include "backend_x86.h"
 #include "codegen_x86.h"
 #include "dex/compiler_internals.h"
@@ -594,7 +595,9 @@
 }
 
 bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
-#if ANDROID_SMP != 0
+  if (!cu_->GetInstructionSetFeatures()->IsSmp()) {
+    return false;
+  }
   // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
   LIR* mem_barrier = last_lir_insn_;
 
@@ -630,9 +633,6 @@
     mem_barrier->u.m.def_mask = &kEncodeAll;
   }
   return ret;
-#else
-  return false;
-#endif
 }
 
 void X86Mir2Lir::CompilerInitializeRegAlloc() {
@@ -1006,7 +1006,8 @@
       call_insn = CallWithLinkerFixup(method_info.GetTargetMethod(), method_info.GetSharpType());
     } else {
       call_insn = OpMem(kOpBlx, TargetReg(kArg0, kRef),
-                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                            cu_->target64 ? 8 : 4).Int32Value());
     }
   } else {
     call_insn = GenInvokeNoInlineCall(this, method_info.GetSharpType());
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index c1c79ca..ad3222c 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -488,6 +488,7 @@
       case kOpAdc:
       case kOpAnd:
       case kOpXor:
+      case kOpMul:
         break;
       default:
         LOG(FATAL) << "Bad case in OpRegRegReg " << op;
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 1805d59..ebf7874 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -240,7 +240,8 @@
   bool can_sharpen_super_based_on_type = (*invoke_type == kSuper) &&
       (referrer_class != methods_class) && referrer_class->IsSubClass(methods_class) &&
       resolved_method->GetMethodIndex() < methods_class->GetVTableLength() &&
-      (methods_class->GetVTableEntry(resolved_method->GetMethodIndex()) == resolved_method);
+      (methods_class->GetVTableEntry(resolved_method->GetMethodIndex()) == resolved_method) &&
+      !resolved_method->IsAbstract();
 
   if (can_sharpen_virtual_based_on_type || can_sharpen_super_based_on_type) {
     // Sharpen a virtual call into a direct call. The method_idx is into referrer's
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index aab94c0..08041e8 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -330,7 +330,8 @@
                                Compiler::Kind compiler_kind,
                                InstructionSet instruction_set,
                                const InstructionSetFeatures* instruction_set_features,
-                               bool image, std::set<std::string>* image_classes, size_t thread_count,
+                               bool image, std::set<std::string>* image_classes,
+                               std::set<std::string>* compiled_classes, size_t thread_count,
                                bool dump_stats, bool dump_passes, CumulativeLogger* timer,
                                const std::string& profile_file)
     : profile_present_(false), compiler_options_(compiler_options),
@@ -346,6 +347,7 @@
       non_relative_linker_patch_count_(0u),
       image_(image),
       image_classes_(image_classes),
+      classes_to_compile_(compiled_classes),
       thread_count_(thread_count),
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
@@ -570,7 +572,7 @@
                                                                class_def);
   }
   CompileMethod(code_item, access_flags, invoke_type, class_def_idx, method_idx, jclass_loader,
-                *dex_file, dex_to_dex_compilation_level);
+                *dex_file, dex_to_dex_compilation_level, true);
 
   self->GetJniEnv()->DeleteGlobalRef(jclass_loader);
 
@@ -613,6 +615,17 @@
   }
 }
 
+bool CompilerDriver::IsClassToCompile(const char* descriptor) const {
+  if (!IsImage()) {
+    return true;
+  } else {
+    if (classes_to_compile_ == nullptr) {
+      return true;
+    }
+    return classes_to_compile_->find(descriptor) != classes_to_compile_->end();
+  }
+}
+
 static void ResolveExceptionsForMethod(MutableMethodHelper* mh,
     std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -1916,6 +1929,10 @@
     it.Next();
   }
   CompilerDriver* driver = manager->GetCompiler();
+
+  bool compilation_enabled = driver->IsClassToCompile(
+      dex_file.StringByTypeIdx(class_def.class_idx_));
+
   // Compile direct methods
   int64_t previous_direct_method_idx = -1;
   while (it.HasNextDirectMethod()) {
@@ -1929,7 +1946,8 @@
     previous_direct_method_idx = method_idx;
     driver->CompileMethod(it.GetMethodCodeItem(), it.GetMethodAccessFlags(),
                           it.GetMethodInvokeType(class_def), class_def_index,
-                          method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level);
+                          method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level,
+                          compilation_enabled);
     it.Next();
   }
   // Compile virtual methods
@@ -1945,7 +1963,8 @@
     previous_virtual_method_idx = method_idx;
     driver->CompileMethod(it.GetMethodCodeItem(), it.GetMethodAccessFlags(),
                           it.GetMethodInvokeType(class_def), class_def_index,
-                          method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level);
+                          method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level,
+                          compilation_enabled);
     it.Next();
   }
   DCHECK(!it.HasNext());
@@ -1977,7 +1996,8 @@
                                    InvokeType invoke_type, uint16_t class_def_idx,
                                    uint32_t method_idx, jobject class_loader,
                                    const DexFile& dex_file,
-                                   DexToDexCompilationLevel dex_to_dex_compilation_level) {
+                                   DexToDexCompilationLevel dex_to_dex_compilation_level,
+                                   bool compilation_enabled) {
   CompiledMethod* compiled_method = nullptr;
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
 
@@ -1994,7 +2014,8 @@
     // Abstract methods don't have code.
   } else {
     MethodReference method_ref(&dex_file, method_idx);
-    bool compile = verification_results_->IsCandidateForCompilation(method_ref, access_flags);
+    bool compile = compilation_enabled &&
+                   verification_results_->IsCandidateForCompilation(method_ref, access_flags);
     if (compile) {
       // NOTE: if compiler declines to compile this method, it will return nullptr.
       compiled_method = compiler_->Compile(code_item, access_flags, invoke_type, class_def_idx,
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 682b17a..437a1a9 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -21,6 +21,7 @@
 #include <string>
 #include <vector>
 
+#include "arch/instruction_set.h"
 #include "base/mutex.h"
 #include "base/timing_logger.h"
 #include "class_reference.h"
@@ -28,7 +29,6 @@
 #include "compiler.h"
 #include "dex_file.h"
 #include "driver/compiler_options.h"
-#include "instruction_set.h"
 #include "invoke_type.h"
 #include "method_reference.h"
 #include "mirror/class.h"  // For mirror::Class::Status.
@@ -51,6 +51,7 @@
 class DexCompilationUnit;
 class DexFileToMethodInlinerMap;
 struct InlineIGetIPutData;
+class InstructionSetFeatures;
 class OatWriter;
 class ParallelCompilationManager;
 class ScopedObjectAccess;
@@ -91,6 +92,7 @@
                           InstructionSet instruction_set,
                           const InstructionSetFeatures* instruction_set_features,
                           bool image, std::set<std::string>* image_classes,
+                          std::set<std::string>* compiled_classes,
                           size_t thread_count, bool dump_stats, bool dump_passes,
                           CumulativeLogger* timer, const std::string& profile_file);
 
@@ -374,6 +376,9 @@
   // Checks if class specified by type_idx is one of the image_classes_
   bool IsImageClass(const char* descriptor) const;
 
+  // Checks if the provided class should be compiled, i.e., is in classes_to_compile_.
+  bool IsClassToCompile(const char* descriptor) const;
+
   void RecordClassStatus(ClassReference ref, mirror::Class::Status status)
       LOCKS_EXCLUDED(compiled_classes_lock_);
 
@@ -475,7 +480,8 @@
   void CompileMethod(const DexFile::CodeItem* code_item, uint32_t access_flags,
                      InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx,
                      jobject class_loader, const DexFile& dex_file,
-                     DexToDexCompilationLevel dex_to_dex_compilation_level)
+                     DexToDexCompilationLevel dex_to_dex_compilation_level,
+                     bool compilation_enabled)
       LOCKS_EXCLUDED(compiled_methods_lock_);
 
   static void CompileClass(const ParallelCompilationManager* context, size_t class_def_index)
@@ -514,6 +520,11 @@
   // included in the image.
   std::unique_ptr<std::set<std::string>> image_classes_;
 
+  // If image_ is true, specifies the classes that will be compiled in
+  // the image. Note if classes_to_compile_ is nullptr, all classes are
+  // included in the image.
+  std::unique_ptr<std::set<std::string>> classes_to_compile_;
+
   size_t thread_count_;
 
   class AOTCompilationStats;
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 7f30565..273b62d 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -17,12 +17,12 @@
 #ifndef ART_COMPILER_ELF_BUILDER_H_
 #define ART_COMPILER_ELF_BUILDER_H_
 
+#include "arch/instruction_set.h"
 #include "base/stl_util.h"
 #include "base/value_object.h"
 #include "buffered_output_stream.h"
 #include "elf_utils.h"
 #include "file_output_stream.h"
-#include "instruction_set.h"
 
 namespace art {
 
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 7e2be3e..dac1ef4 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -105,13 +105,16 @@
     ASSERT_TRUE(success_image);
     bool success_fixup = ElfWriter::Fixup(dup_oat.get(), writer->GetOatDataBegin());
     ASSERT_TRUE(success_fixup);
+
+    ASSERT_EQ(dup_oat->FlushCloseOrErase(), 0) << "Could not flush and close oat file "
+                                               << oat_file.GetFilename();
   }
 
   {
     std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str()));
     ASSERT_TRUE(file.get() != NULL);
     ImageHeader image_header;
-    file->ReadFully(&image_header, sizeof(image_header));
+    ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
     ASSERT_TRUE(image_header.IsValid());
     ASSERT_GE(image_header.GetImageBitmapOffset(), sizeof(image_header));
     ASSERT_NE(0U, image_header.GetImageBitmapSize());
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index cf2cddb..b03727b 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -68,11 +68,11 @@
 namespace art {
 
 bool ImageWriter::PrepareImageAddressSpace() {
+  target_ptr_size_ = InstructionSetPointerSize(compiler_driver_.GetInstructionSet());
   {
     Thread::Current()->TransitionFromSuspendedToRunnable();
     PruneNonImageClasses();  // Remove junk
     ComputeLazyFieldsForImageClasses();  // Add useful information
-    ComputeEagerResolvedStrings();
     Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -149,6 +149,11 @@
 
   SetOatChecksumFromElfFile(oat_file.get());
 
+  if (oat_file->FlushCloseOrErase() != 0) {
+    LOG(ERROR) << "Failed to flush and close oat file " << oat_filename << " for " << oat_location;
+    return false;
+  }
+
   std::unique_ptr<File> image_file(OS::CreateEmptyFile(image_filename.c_str()));
   ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin());
   if (image_file.get() == NULL) {
@@ -157,6 +162,7 @@
   }
   if (fchmod(image_file->Fd(), 0644) != 0) {
     PLOG(ERROR) << "Failed to make image file world readable: " << image_filename;
+    image_file->Erase();
     return EXIT_FAILURE;
   }
 
@@ -164,6 +170,7 @@
   CHECK_EQ(image_end_, image_header->GetImageSize());
   if (!image_file->WriteFully(image_->Begin(), image_end_)) {
     PLOG(ERROR) << "Failed to write image file " << image_filename;
+    image_file->Erase();
     return false;
   }
 
@@ -173,9 +180,14 @@
                          image_header->GetImageBitmapSize(),
                          image_header->GetImageBitmapOffset())) {
     PLOG(ERROR) << "Failed to write image file " << image_filename;
+    image_file->Erase();
     return false;
   }
 
+  if (image_file->FlushCloseOrErase() != 0) {
+    PLOG(ERROR) << "Failed to flush and close image file " << image_filename;
+    return false;
+  }
   return true;
 }
 
@@ -215,7 +227,14 @@
 void ImageWriter::AssignImageOffset(mirror::Object* object) {
   DCHECK(object != nullptr);
   SetImageOffset(object, image_end_);
-  image_end_ += RoundUp(object->SizeOf(), 8);  // 64-bit alignment
+  size_t object_size;
+  if (object->IsArtMethod()) {
+    // Methods are sized based on the target pointer size.
+    object_size = mirror::ArtMethod::InstanceSize(target_ptr_size_);
+  } else {
+    object_size = object->SizeOf();
+  }
+  image_end_ += RoundUp(object_size, 8);  // 64-bit alignment
   DCHECK_LT(image_end_, image_->Size());
 }
 
@@ -265,6 +284,149 @@
   return true;
 }
 
+// Count the number of strings in the heap and put the result in arg as a size_t pointer.
+static void CountStringsCallback(Object* obj, void* arg)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (obj->GetClass()->IsStringClass()) {
+    ++*reinterpret_cast<size_t*>(arg);
+  }
+}
+
+// Collect all the java.lang.String in the heap and put them in the output strings_ array.
+class StringCollector {
+ public:
+  StringCollector(Handle<mirror::ObjectArray<mirror::String>> strings, size_t index)
+      : strings_(strings), index_(index) {
+  }
+  static void Callback(Object* obj, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    auto* collector = reinterpret_cast<StringCollector*>(arg);
+    if (obj->GetClass()->IsStringClass()) {
+      collector->strings_->SetWithoutChecks<false>(collector->index_++, obj->AsString());
+    }
+  }
+  size_t GetIndex() const {
+    return index_;
+  }
+
+ private:
+  Handle<mirror::ObjectArray<mirror::String>> strings_;
+  size_t index_;
+};
+
+// Compare strings based on length, used for sorting strings by length / reverse length.
+class StringLengthComparator {
+ public:
+  explicit StringLengthComparator(Handle<mirror::ObjectArray<mirror::String>> strings)
+      : strings_(strings) {
+  }
+  bool operator()(size_t a, size_t b) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return strings_->GetWithoutChecks(a)->GetLength() < strings_->GetWithoutChecks(b)->GetLength();
+  }
+
+ private:
+  Handle<mirror::ObjectArray<mirror::String>> strings_;
+};
+
+// Normal string < comparison through the chars_ array.
+class SubstringComparator {
+ public:
+  explicit SubstringComparator(const std::vector<uint16_t>* const chars) : chars_(chars) {
+  }
+  bool operator()(const std::pair<size_t, size_t>& a, const std::pair<size_t, size_t>& b) {
+    return std::lexicographical_compare(chars_->begin() + a.first,
+                                        chars_->begin() + a.first + a.second,
+                                        chars_->begin() + b.first,
+                                        chars_->begin() + b.first + b.second);
+  }
+
+ private:
+  const std::vector<uint16_t>* const chars_;
+};
+
+void ImageWriter::ProcessStrings() {
+  size_t total_strings = 0;
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  ClassLinker* cl = Runtime::Current()->GetClassLinker();
+  {
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    heap->VisitObjects(CountStringsCallback, &total_strings);  // Count the strings.
+  }
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  auto strings = hs.NewHandle(cl->AllocStringArray(self, total_strings));
+  StringCollector string_collector(strings, 0U);
+  {
+    ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
+    // Read strings into the array.
+    heap->VisitObjects(StringCollector::Callback, &string_collector);
+  }
+  // Some strings could have gotten freed if AllocStringArray caused a GC.
+  CHECK_LE(string_collector.GetIndex(), total_strings);
+  total_strings = string_collector.GetIndex();
+  size_t total_length = 0;
+  std::vector<size_t> reverse_sorted_strings;
+  for (size_t i = 0; i < total_strings; ++i) {
+    mirror::String* s = strings->GetWithoutChecks(i);
+    // Look up the string in the array.
+    total_length += s->GetLength();
+    reverse_sorted_strings.push_back(i);
+  }
+  // Sort by reverse length.
+  StringLengthComparator comparator(strings);
+  std::sort(reverse_sorted_strings.rbegin(), reverse_sorted_strings.rend(), comparator);
+  // Deduplicate prefixes and add strings to the char array.
+  std::vector<uint16_t> combined_chars(total_length, 0U);
+  size_t num_chars = 0;
+  // Characters of strings which are non equal prefix of another string (not the same string).
+  // We don't count the savings from equal strings since these would get interned later anyways.
+  size_t prefix_saved_chars = 0;
+  std::set<std::pair<size_t, size_t>, SubstringComparator> existing_strings((
+      SubstringComparator(&combined_chars)));
+  for (size_t i = 0; i < total_strings; ++i) {
+    mirror::String* s = strings->GetWithoutChecks(reverse_sorted_strings[i]);
+    // Add the string to the end of the char array.
+    size_t length = s->GetLength();
+    for (size_t j = 0; j < length; ++j) {
+      combined_chars[num_chars++] = s->CharAt(j);
+    }
+    // Try to see if the string exists as a prefix of an existing string.
+    size_t new_offset = 0;
+    std::pair<size_t, size_t> new_string(num_chars - length, length);
+    auto it = existing_strings.lower_bound(new_string);
+    bool is_prefix = false;
+    if (it != existing_strings.end()) {
+      CHECK_LE(length, it->second);
+      is_prefix = std::equal(combined_chars.begin() + it->first,
+                             combined_chars.begin() + it->first + it->second,
+                             combined_chars.begin() + new_string.first);
+    }
+    if (is_prefix) {
+      // Shares a prefix, set the offset to where the new offset will be.
+      new_offset = it->first;
+      // Remove the added chars.
+      num_chars -= length;
+      if (it->second != length) {
+        prefix_saved_chars += length;
+      }
+    } else {
+      new_offset = new_string.first;
+      existing_strings.insert(new_string);
+    }
+    s->SetOffset(new_offset);
+  }
+  // Allocate and update the char arrays.
+  auto* array = mirror::CharArray::Alloc(self, num_chars);
+  for (size_t i = 0; i < num_chars; ++i) {
+    array->SetWithoutChecks<false>(i, combined_chars[i]);
+  }
+  for (size_t i = 0; i < total_strings; ++i) {
+    strings->GetWithoutChecks(i)->SetArray(array);
+  }
+  LOG(INFO) << "Total # image strings=" << total_strings << " combined length="
+      << total_length << " prefix saved chars=" << prefix_saved_chars;
+  ComputeEagerResolvedStrings();
+}
+
 void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) {
   if (!obj->GetClass()->IsStringClass()) {
     return;
@@ -293,7 +455,7 @@
   }
 }
 
-void ImageWriter::ComputeEagerResolvedStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+void ImageWriter::ComputeEagerResolvedStrings() {
   ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
   Runtime::Current()->GetHeap()->VisitObjects(ComputeEagerResolvedStringsCallback, this);
 }
@@ -364,8 +526,7 @@
   return true;
 }
 
-void ImageWriter::CheckNonImageClassesRemoved()
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+void ImageWriter::CheckNonImageClassesRemoved() {
   if (compiler_driver_.GetImageClasses() != nullptr) {
     gc::Heap* heap = Runtime::Current()->GetHeap();
     ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_);
@@ -484,13 +645,14 @@
   }
   //
   size_t num_reference_fields = h_class->NumReferenceInstanceFields();
+  MemberOffset field_offset = h_class->GetFirstReferenceInstanceFieldOffset();
   for (size_t i = 0; i < num_reference_fields; ++i) {
-    mirror::ArtField* field = h_class->GetInstanceField(i);
-    MemberOffset field_offset = field->GetOffset();
     mirror::Object* value = obj->GetFieldObject<mirror::Object>(field_offset);
     if (value != nullptr) {
       WalkFieldsInOrder(value);
     }
+    field_offset = MemberOffset(field_offset.Uint32Value() +
+                                sizeof(mirror::HeapReference<mirror::Object>));
   }
 }
 
@@ -507,13 +669,14 @@
     // Walk static fields of a Class.
     if (h_obj->IsClass()) {
       size_t num_static_fields = klass->NumReferenceStaticFields();
+      MemberOffset field_offset = klass->GetFirstReferenceStaticFieldOffset();
       for (size_t i = 0; i < num_static_fields; ++i) {
-        mirror::ArtField* field = klass->GetStaticField(i);
-        MemberOffset field_offset = field->GetOffset();
         mirror::Object* value = h_obj->GetFieldObject<mirror::Object>(field_offset);
         if (value != nullptr) {
           WalkFieldsInOrder(value);
         }
+        field_offset = MemberOffset(field_offset.Uint32Value() +
+                                    sizeof(mirror::HeapReference<mirror::Object>));
       }
     } else if (h_obj->IsObjectArray()) {
       // Walk elements of an object array.
@@ -585,9 +748,7 @@
                                     compile_pic_);
 }
 
-
-void ImageWriter::CopyAndFixupObjects()
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+void ImageWriter::CopyAndFixupObjects() {
   ScopedAssertNoThreadSuspension ants(Thread::Current(), "ImageWriter");
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // TODO: heap validation can't handle this fix up pass
@@ -610,7 +771,14 @@
   size_t offset = image_writer->GetImageOffset(obj);
   uint8_t* dst = image_writer->image_->Begin() + offset;
   const uint8_t* src = reinterpret_cast<const uint8_t*>(obj);
-  size_t n = obj->SizeOf();
+  size_t n;
+  if (obj->IsArtMethod()) {
+    // Size without pointer fields since we don't want to overrun the buffer if target art method
+    // is 32 bits but source is 64 bits.
+    n = mirror::ArtMethod::SizeWithoutPointerFields();
+  } else {
+    n = obj->SizeOf();
+  }
   DCHECK_LT(offset + n, image_writer->image_->Size());
   memcpy(dst, src, n);
   Object* copy = reinterpret_cast<Object*>(dst);
@@ -690,6 +858,10 @@
   }
   if (orig->IsArtMethod<kVerifyNone>()) {
     FixupMethod(orig->AsArtMethod<kVerifyNone>(), down_cast<ArtMethod*>(copy));
+  } else if (orig->IsClass() && orig->AsClass()->IsArtMethodClass()) {
+    // Set the right size for the target.
+    size_t size = mirror::ArtMethod::InstanceSize(target_ptr_size_);
+    down_cast<mirror::Class*>(copy)->SetObjectSizeWithoutChecks(size);
   }
 }
 
@@ -748,29 +920,48 @@
 void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) {
   // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
   // oat_begin_
+  // For 64 bit targets we need to repack the current runtime pointer sized fields to the right
+  // locations.
+  // Copy all of the fields from the runtime methods to the target methods first since we did a
+  // bytewise copy earlier.
+  copy->SetEntryPointFromPortableCompiledCodePtrSize<kVerifyNone>(
+      orig->GetEntryPointFromPortableCompiledCode(), target_ptr_size_);
+  copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>(orig->GetEntryPointFromInterpreter(),
+                                                         target_ptr_size_);
+  copy->SetEntryPointFromJniPtrSize<kVerifyNone>(orig->GetEntryPointFromJni(), target_ptr_size_);
+  copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(
+      orig->GetEntryPointFromQuickCompiledCode(), target_ptr_size_);
+  copy->SetNativeGcMapPtrSize<kVerifyNone>(orig->GetNativeGcMap(), target_ptr_size_);
 
   // The resolution method has a special trampoline to call.
   Runtime* runtime = Runtime::Current();
   if (UNLIKELY(orig == runtime->GetResolutionMethod())) {
-    copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(GetOatAddress(portable_resolution_trampoline_offset_));
-    copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_resolution_trampoline_offset_));
+    copy->SetEntryPointFromPortableCompiledCodePtrSize<kVerifyNone>(
+        GetOatAddress(portable_resolution_trampoline_offset_), target_ptr_size_);
+    copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(
+        GetOatAddress(quick_resolution_trampoline_offset_), target_ptr_size_);
   } else if (UNLIKELY(orig == runtime->GetImtConflictMethod() ||
                       orig == runtime->GetImtUnimplementedMethod())) {
-    copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(GetOatAddress(portable_imt_conflict_trampoline_offset_));
-    copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_imt_conflict_trampoline_offset_));
+    copy->SetEntryPointFromPortableCompiledCodePtrSize<kVerifyNone>(
+        GetOatAddress(portable_imt_conflict_trampoline_offset_), target_ptr_size_);
+    copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(
+        GetOatAddress(quick_imt_conflict_trampoline_offset_), target_ptr_size_);
   } else {
     // We assume all methods have code. If they don't currently then we set them to the use the
     // resolution trampoline. Abstract methods never have code and so we need to make sure their
     // use results in an AbstractMethodError. We use the interpreter to achieve this.
     if (UNLIKELY(orig->IsAbstract())) {
-      copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(GetOatAddress(portable_to_interpreter_bridge_offset_));
-      copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_to_interpreter_bridge_offset_));
-      copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*>
-          (const_cast<uint8_t*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_))));
+      copy->SetEntryPointFromPortableCompiledCodePtrSize<kVerifyNone>(
+          GetOatAddress(portable_to_interpreter_bridge_offset_), target_ptr_size_);
+      copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(
+          GetOatAddress(quick_to_interpreter_bridge_offset_), target_ptr_size_);
+      copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>(
+          reinterpret_cast<EntryPointFromInterpreter*>(const_cast<uint8_t*>(
+                  GetOatAddress(interpreter_to_interpreter_bridge_offset_))), target_ptr_size_);
     } else {
       bool quick_is_interpreted;
       const uint8_t* quick_code = GetQuickCode(orig, &quick_is_interpreted);
-      copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code);
+      copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(quick_code, target_ptr_size_);
 
       // Portable entrypoint:
       const uint8_t* portable_code = GetOatAddress(orig->GetPortableOatCodeOffset());
@@ -793,18 +984,19 @@
         // initialization.
         portable_code = GetOatAddress(portable_resolution_trampoline_offset_);
       }
-      copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(portable_code);
-
+      copy->SetEntryPointFromPortableCompiledCodePtrSize<kVerifyNone>(
+          portable_code, target_ptr_size_);
       // JNI entrypoint:
       if (orig->IsNative()) {
         // The native method's pointer is set to a stub to lookup via dlsym.
         // Note this is not the code_ pointer, that is handled above.
-        copy->SetNativeMethod<kVerifyNone>(GetOatAddress(jni_dlsym_lookup_offset_));
+        copy->SetEntryPointFromJniPtrSize<kVerifyNone>(GetOatAddress(jni_dlsym_lookup_offset_),
+                                                       target_ptr_size_);
       } else {
         // Normal (non-abstract non-native) methods have various tables to relocate.
         uint32_t native_gc_map_offset = orig->GetOatNativeGcMapOffset();
         const uint8_t* native_gc_map = GetOatAddress(native_gc_map_offset);
-        copy->SetNativeGcMap<kVerifyNone>(reinterpret_cast<const uint8_t*>(native_gc_map));
+        copy->SetNativeGcMapPtrSize<kVerifyNone>(native_gc_map, target_ptr_size_);
       }
 
       // Interpreter entrypoint:
@@ -812,9 +1004,11 @@
       uint32_t interpreter_code = (quick_is_interpreted && portable_is_interpreted)
           ? interpreter_to_interpreter_bridge_offset_
           : interpreter_to_compiled_code_bridge_offset_;
-      copy->SetEntryPointFromInterpreter<kVerifyNone>(
+      EntryPointFromInterpreter* interpreter_entrypoint =
           reinterpret_cast<EntryPointFromInterpreter*>(
-              const_cast<uint8_t*>(GetOatAddress(interpreter_code))));
+              const_cast<uint8_t*>(GetOatAddress(interpreter_code)));
+      copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>(
+          interpreter_entrypoint, target_ptr_size_);
     }
   }
 }
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index b0cf2b2..2fec0aa 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -26,12 +26,12 @@
 
 #include "base/macros.h"
 #include "driver/compiler_driver.h"
+#include "gc/space/space.h"
 #include "mem_map.h"
 #include "oat_file.h"
 #include "mirror/dex_cache.h"
 #include "os.h"
 #include "safe_map.h"
-#include "gc/space/space.h"
 
 namespace art {
 
@@ -47,7 +47,8 @@
         portable_imt_conflict_trampoline_offset_(0), portable_resolution_trampoline_offset_(0),
         portable_to_interpreter_bridge_offset_(0), quick_generic_jni_trampoline_offset_(0),
         quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0),
-        quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic) {
+        quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic),
+        target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())) {
     CHECK_NE(image_begin, 0U);
   }
 
@@ -136,13 +137,16 @@
   static void ComputeEagerResolvedStringsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Combine string char arrays.
+  void ProcessStrings() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Remove unwanted classes from various roots.
   void PruneNonImageClasses() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static bool NonImageClassesVisitor(mirror::Class* c, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Verify unwanted classes removed.
-  void CheckNonImageClassesRemoved();
+  void CheckNonImageClassesRemoved() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void CheckNonImageClassesRemovedCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -164,7 +168,7 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Creates the contiguous image in memory and adjusts pointers.
-  void CopyAndFixupObjects();
+  void CopyAndFixupObjects() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   void FixupMethod(mirror::ArtMethod* orig, mirror::ArtMethod* copy)
@@ -221,6 +225,9 @@
   uint32_t quick_to_interpreter_bridge_offset_;
   const bool compile_pic_;
 
+  // Size of pointers on the target architecture.
+  size_t target_ptr_size_;
+
   friend class FixupVisitor;
   friend class FixupClassVisitor;
   DISALLOW_COPY_AND_ASSIGN(ImageWriter);
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 3c3aa02..c3fe75b 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -307,7 +307,9 @@
   }
 
   // 9. Plant call to native code associated with method.
-  __ Call(main_jni_conv->MethodStackOffset(), mirror::ArtMethod::NativeMethodOffset(),
+  MemberOffset jni_entrypoint_offset = mirror::ArtMethod::EntryPointFromJniOffset(
+      InstructionSetPointerSize(instruction_set));
+  __ Call(main_jni_conv->MethodStackOffset(), jni_entrypoint_offset,
           mr_conv->InterproceduralScratchRegister());
 
   // 10. Fix differences in result widths.
diff --git a/compiler/llvm/ir_builder.h b/compiler/llvm/ir_builder.h
index 03498ef..990ba02 100644
--- a/compiler/llvm/ir_builder.h
+++ b/compiler/llvm/ir_builder.h
@@ -101,10 +101,8 @@
   // Extend memory barrier
   //--------------------------------------------------------------------------
   void CreateMemoryBarrier(MemBarrierKind barrier_kind) {
-#if ANDROID_SMP
     // TODO: select atomic ordering according to given barrier kind.
     CreateFence(::llvm::SequentiallyConsistent);
-#endif
   }
 
   //--------------------------------------------------------------------------
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 97b7cc9..ce4ed6d 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "arch/instruction_set_features.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "compiler.h"
@@ -97,7 +98,7 @@
 
   std::string error_msg;
   std::unique_ptr<const InstructionSetFeatures> insn_features(
-      InstructionSetFeatures::FromFeatureString(insn_set, "default", &error_msg));
+      InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
   ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
   compiler_options_.reset(new CompilerOptions);
   verification_results_.reset(new VerificationResults(compiler_options_.get()));
@@ -109,8 +110,8 @@
                                             verification_results_.get(),
                                             method_inliner_map_.get(),
                                             compiler_kind, insn_set,
-                                            insn_features.get(), false, nullptr, 2, true, true,
-                                            timer_.get(), ""));
+                                            insn_features.get(), false, nullptr, nullptr, 2, true,
+                                            true, timer_.get(), ""));
   jobject class_loader = nullptr;
   if (kCompile) {
     TimingLogger timings2("OatTest::WriteRead", false, false);
@@ -198,7 +199,7 @@
     InstructionSet insn_set = kX86;
     std::string error_msg;
     std::unique_ptr<const InstructionSetFeatures> insn_features(
-        InstructionSetFeatures::FromFeatureString(insn_set, "default", &error_msg));
+        InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg));
     ASSERT_TRUE(insn_features.get() != nullptr) << error_msg;
     std::vector<const DexFile*> dex_files;
     uint32_t image_file_location_oat_checksum = 0;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 659c332..c6beb36 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -309,7 +309,7 @@
     arm::Thumb2Assembler assembler;
     assembler.LoadFromOffset(
         arm::kLoadWord, arm::PC, arm::R0,
-        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
     assembler.bkpt(0);
     std::vector<uint8_t> thunk_code(assembler.CodeSize());
     MemoryRegion code(thunk_code.data(), thunk_code.size());
@@ -363,7 +363,8 @@
     // The thunk just uses the entry point in the ArtMethod. This works even for calls
     // to the generic JNI and interpreter trampolines.
     arm64::Arm64Assembler assembler;
-    Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+    Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+        kArm64PointerSize).Int32Value());
     assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
     std::vector<uint8_t> thunk_code(assembler.CodeSize());
     MemoryRegion code(thunk_code.data(), thunk_code.size());
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index d3ac4e0..be8631a 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -41,25 +41,29 @@
  */
 class Temporaries : public ValueObject {
  public:
-  Temporaries(HGraph* graph, size_t count) : graph_(graph), count_(count), index_(0) {
-    graph_->UpdateNumberOfTemporaries(count_);
-  }
+  explicit Temporaries(HGraph* graph) : graph_(graph), index_(0) {}
 
   void Add(HInstruction* instruction) {
-    // We currently only support vreg size temps.
-    DCHECK(instruction->GetType() != Primitive::kPrimLong
-           && instruction->GetType() != Primitive::kPrimDouble);
-    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_++);
+    HInstruction* temp = new (graph_->GetArena()) HTemporary(index_);
     instruction->GetBlock()->AddInstruction(temp);
+
     DCHECK(temp->GetPrevious() == instruction);
+
+    size_t offset;
+    if (instruction->GetType() == Primitive::kPrimLong
+        || instruction->GetType() == Primitive::kPrimDouble) {
+      offset = 2;
+    } else {
+      offset = 1;
+    }
+    index_ += offset;
+
+    graph_->UpdateTemporariesVRegSlots(index_);
   }
 
  private:
   HGraph* const graph_;
 
-  // The total number of temporaries that will be used.
-  const size_t count_;
-
   // Current index in the temporary stack, updated by `Add`.
   size_t index_;
 };
@@ -115,37 +119,37 @@
 }
 
 template<typename T>
-void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_offset) {
+void HGraphBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) {
   int32_t target_offset = instruction.GetTargetOffset();
-  PotentiallyAddSuspendCheck(target_offset, dex_offset);
+  PotentiallyAddSuspendCheck(target_offset, dex_pc);
   HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
   HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
   T* comparison = new (arena_) T(first, second);
   current_block_->AddInstruction(comparison);
   HInstruction* ifinst = new (arena_) HIf(comparison);
   current_block_->AddInstruction(ifinst);
-  HBasicBlock* target = FindBlockStartingAt(dex_offset + target_offset);
+  HBasicBlock* target = FindBlockStartingAt(dex_pc + target_offset);
   DCHECK(target != nullptr);
   current_block_->AddSuccessor(target);
-  target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits());
+  target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
   DCHECK(target != nullptr);
   current_block_->AddSuccessor(target);
   current_block_ = nullptr;
 }
 
 template<typename T>
-void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_offset) {
+void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
   int32_t target_offset = instruction.GetTargetOffset();
-  PotentiallyAddSuspendCheck(target_offset, dex_offset);
+  PotentiallyAddSuspendCheck(target_offset, dex_pc);
   HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
   T* comparison = new (arena_) T(value, GetIntConstant(0));
   current_block_->AddInstruction(comparison);
   HInstruction* ifinst = new (arena_) HIf(comparison);
   current_block_->AddInstruction(ifinst);
-  HBasicBlock* target = FindBlockStartingAt(dex_offset + target_offset);
+  HBasicBlock* target = FindBlockStartingAt(dex_pc + target_offset);
   DCHECK(target != nullptr);
   current_block_->AddSuccessor(target);
-  target = FindBlockStartingAt(dex_offset + instruction.SizeInCodeUnits());
+  target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
   DCHECK(target != nullptr);
   current_block_->AddSuccessor(target);
   current_block_ = nullptr;
@@ -192,13 +196,13 @@
 
   InitializeParameters(code_item.ins_size_);
 
-  size_t dex_offset = 0;
+  size_t dex_pc = 0;
   while (code_ptr < code_end) {
-    // Update the current block if dex_offset starts a new block.
-    MaybeUpdateCurrentBlock(dex_offset);
+    // Update the current block if dex_pc starts a new block.
+    MaybeUpdateCurrentBlock(dex_pc);
     const Instruction& instruction = *Instruction::At(code_ptr);
-    if (!AnalyzeDexInstruction(instruction, dex_offset)) return nullptr;
-    dex_offset += instruction.SizeInCodeUnits();
+    if (!AnalyzeDexInstruction(instruction, dex_pc)) return nullptr;
+    dex_pc += instruction.SizeInCodeUnits();
     code_ptr += instruction.SizeInCodeUnits();
   }
 
@@ -239,25 +243,25 @@
 
   // Iterate over all instructions and find branching instructions. Create blocks for
   // the locations these instructions branch to.
-  size_t dex_offset = 0;
+  size_t dex_pc = 0;
   while (code_ptr < code_end) {
     const Instruction& instruction = *Instruction::At(code_ptr);
     if (instruction.IsBranch()) {
-      int32_t target = instruction.GetTargetOffset() + dex_offset;
+      int32_t target = instruction.GetTargetOffset() + dex_pc;
       // Create a block for the target instruction.
       if (FindBlockStartingAt(target) == nullptr) {
         block = new (arena_) HBasicBlock(graph_, target);
         branch_targets_.Put(target, block);
       }
-      dex_offset += instruction.SizeInCodeUnits();
+      dex_pc += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
-      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_offset) == nullptr)) {
-        block = new (arena_) HBasicBlock(graph_, dex_offset);
-        branch_targets_.Put(dex_offset, block);
+      if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+        block = new (arena_) HBasicBlock(graph_, dex_pc);
+        branch_targets_.Put(dex_pc, block);
       }
     } else {
       code_ptr += instruction.SizeInCodeUnits();
-      dex_offset += instruction.SizeInCodeUnits();
+      dex_pc += instruction.SizeInCodeUnits();
     }
   }
 }
@@ -291,6 +295,16 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_23x(const Instruction& instruction,
+                              Primitive::Type type,
+                              uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_12x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegA(), type);
   HInstruction* second = LoadLocal(instruction.VRegB(), type);
@@ -299,6 +313,16 @@
 }
 
 template<typename T>
+void HGraphBuilder::Binop_12x(const Instruction& instruction,
+                              Primitive::Type type,
+                              uint32_t dex_pc) {
+  HInstruction* first = LoadLocal(instruction.VRegA(), type);
+  HInstruction* second = LoadLocal(instruction.VRegB(), type);
+  current_block_->AddInstruction(new (arena_) T(type, first, second, dex_pc));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
+template<typename T>
 void HGraphBuilder::Binop_22s(const Instruction& instruction, bool reverse) {
   HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt);
   HInstruction* second = GetIntConstant(instruction.VRegC_22s());
@@ -332,7 +356,7 @@
 }
 
 bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
-                                uint32_t dex_offset,
+                                uint32_t dex_pc,
                                 uint32_t method_idx,
                                 uint32_t number_of_vreg_arguments,
                                 bool is_range,
@@ -380,7 +404,7 @@
     uintptr_t direct_method;
     int table_index;
     InvokeType optimized_invoke_type = invoke_type;
-    compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_offset, true, true,
+    compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true,
                                         &optimized_invoke_type, &target_method, &table_index,
                                         &direct_code, &direct_method);
     if (table_index == -1) {
@@ -389,29 +413,29 @@
 
     if (optimized_invoke_type == kVirtual) {
       invoke = new (arena_) HInvokeVirtual(
-          arena_, number_of_arguments, return_type, dex_offset, table_index);
+          arena_, number_of_arguments, return_type, dex_pc, table_index);
     } else if (optimized_invoke_type == kInterface) {
       invoke = new (arena_) HInvokeInterface(
-          arena_, number_of_arguments, return_type, dex_offset, method_idx, table_index);
+          arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
     } else if (optimized_invoke_type == kDirect) {
       // For this compiler, sharpening only works if we compile PIC.
       DCHECK(compiler_driver_->GetCompilerOptions().GetCompilePic());
       // Treat invoke-direct like static calls for now.
       invoke = new (arena_) HInvokeStatic(
-          arena_, number_of_arguments, return_type, dex_offset, target_method.dex_method_index);
+          arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index);
     }
   } else {
     DCHECK(invoke_type == kDirect || invoke_type == kStatic);
     // Treat invoke-direct like static calls for now.
     invoke = new (arena_) HInvokeStatic(
-        arena_, number_of_arguments, return_type, dex_offset, method_idx);
+        arena_, number_of_arguments, return_type, dex_pc, method_idx);
   }
 
   size_t start_index = 0;
-  Temporaries temps(graph_, is_instance_call ? 1 : 0);
+  Temporaries temps(graph_);
   if (is_instance_call) {
     HInstruction* arg = LoadLocal(is_range ? register_index : args[0], Primitive::kPrimNot);
-    HNullCheck* null_check = new (arena_) HNullCheck(arg, dex_offset);
+    HNullCheck* null_check = new (arena_) HNullCheck(arg, dex_pc);
     current_block_->AddInstruction(null_check);
     temps.Add(null_check);
     invoke->SetArgumentAt(0, null_check);
@@ -425,7 +449,7 @@
     bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
     if (!is_range && is_wide && args[i] + 1 != args[i + 1]) {
       LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol()
-                   << " at " << dex_offset;
+                   << " at " << dex_pc;
       // We do not implement non sequential register pair.
       return false;
     }
@@ -443,7 +467,7 @@
 }
 
 bool HGraphBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
-                                             uint32_t dex_offset,
+                                             uint32_t dex_pc,
                                              bool is_put) {
   uint32_t source_or_dest_reg = instruction.VRegA_22c();
   uint32_t obj_reg = instruction.VRegB_22c();
@@ -464,9 +488,9 @@
   Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
 
   HInstruction* object = LoadLocal(obj_reg, Primitive::kPrimNot);
-  current_block_->AddInstruction(new (arena_) HNullCheck(object, dex_offset));
+  current_block_->AddInstruction(new (arena_) HNullCheck(object, dex_pc));
   if (is_put) {
-    Temporaries temps(graph_, 1);
+    Temporaries temps(graph_);
     HInstruction* null_check = current_block_->GetLastInstruction();
     // We need one temporary for the null check.
     temps.Add(null_check);
@@ -490,7 +514,7 @@
 
 
 bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
-                                           uint32_t dex_offset,
+                                           uint32_t dex_pc,
                                            bool is_put) {
   uint32_t source_or_dest_reg = instruction.VRegA_21c();
   uint16_t field_index = instruction.VRegB_21c();
@@ -520,18 +544,18 @@
   }
 
   HLoadClass* constant = new (arena_) HLoadClass(
-      storage_index, is_referrers_class, dex_offset);
+      storage_index, is_referrers_class, dex_pc);
   current_block_->AddInstruction(constant);
 
   HInstruction* cls = constant;
   if (!is_initialized) {
-    cls = new (arena_) HClinitCheck(constant, dex_offset);
+    cls = new (arena_) HClinitCheck(constant, dex_pc);
     current_block_->AddInstruction(cls);
   }
 
   if (is_put) {
     // We need to keep the class alive before loading the value.
-    Temporaries temps(graph_, 1);
+    Temporaries temps(graph_);
     temps.Add(cls);
     HInstruction* value = LoadLocal(source_or_dest_reg, field_type);
     DCHECK_EQ(value->GetType(), field_type);
@@ -544,29 +568,46 @@
   return true;
 }
 
-void HGraphBuilder::BuildCheckedDiv(uint16_t out_reg,
-                                    uint16_t first_reg,
-                                    int32_t second_reg,
-                                    uint32_t dex_offset,
-                                    Primitive::Type type,
-                                    bool second_is_lit) {
-  DCHECK(type == Primitive::kPrimInt);
+void HGraphBuilder::BuildCheckedDivRem(uint16_t out_vreg,
+                                       uint16_t first_vreg,
+                                       int64_t second_vreg_or_constant,
+                                       uint32_t dex_pc,
+                                       Primitive::Type type,
+                                       bool second_is_constant,
+                                       bool isDiv) {
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
-  HInstruction* first = LoadLocal(first_reg, type);
-  HInstruction* second = second_is_lit ? GetIntConstant(second_reg) : LoadLocal(second_reg, type);
-  if (!second->IsIntConstant() || (second->AsIntConstant()->GetValue() == 0)) {
-    second = new (arena_) HDivZeroCheck(second, dex_offset);
-    Temporaries temps(graph_, 1);
+  HInstruction* first = LoadLocal(first_vreg, type);
+  HInstruction* second = nullptr;
+  if (second_is_constant) {
+    if (type == Primitive::kPrimInt) {
+      second = GetIntConstant(second_vreg_or_constant);
+    } else {
+      second = GetLongConstant(second_vreg_or_constant);
+    }
+  } else {
+    second = LoadLocal(second_vreg_or_constant, type);
+  }
+
+  if (!second_is_constant
+      || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0)
+      || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) {
+    second = new (arena_) HDivZeroCheck(second, dex_pc);
+    Temporaries temps(graph_);
     current_block_->AddInstruction(second);
     temps.Add(current_block_->GetLastInstruction());
   }
 
-  current_block_->AddInstruction(new (arena_) HDiv(type, first, second));
-  UpdateLocal(out_reg, current_block_->GetLastInstruction());
+  if (isDiv) {
+    current_block_->AddInstruction(new (arena_) HDiv(type, first, second, dex_pc));
+  } else {
+    current_block_->AddInstruction(new (arena_) HRem(type, first, second, dex_pc));
+  }
+  UpdateLocal(out_vreg, current_block_->GetLastInstruction());
 }
 
 void HGraphBuilder::BuildArrayAccess(const Instruction& instruction,
-                                     uint32_t dex_offset,
+                                     uint32_t dex_pc,
                                      bool is_put,
                                      Primitive::Type anticipated_type) {
   uint8_t source_or_dest_reg = instruction.VRegA_23x();
@@ -574,10 +615,10 @@
   uint8_t index_reg = instruction.VRegC_23x();
 
   // We need one temporary for the null check, one for the index, and one for the length.
-  Temporaries temps(graph_, 3);
+  Temporaries temps(graph_);
 
   HInstruction* object = LoadLocal(array_reg, Primitive::kPrimNot);
-  object = new (arena_) HNullCheck(object, dex_offset);
+  object = new (arena_) HNullCheck(object, dex_pc);
   current_block_->AddInstruction(object);
   temps.Add(object);
 
@@ -585,28 +626,28 @@
   current_block_->AddInstruction(length);
   temps.Add(length);
   HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt);
-  index = new (arena_) HBoundsCheck(index, length, dex_offset);
+  index = new (arena_) HBoundsCheck(index, length, dex_pc);
   current_block_->AddInstruction(index);
   temps.Add(index);
   if (is_put) {
     HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type);
     // TODO: Insert a type check node if the type is Object.
     current_block_->AddInstruction(new (arena_) HArraySet(
-        object, index, value, anticipated_type, dex_offset));
+        object, index, value, anticipated_type, dex_pc));
   } else {
     current_block_->AddInstruction(new (arena_) HArrayGet(object, index, anticipated_type));
     UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction());
   }
 }
 
-void HGraphBuilder::BuildFilledNewArray(uint32_t dex_offset,
+void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc,
                                         uint32_t type_index,
                                         uint32_t number_of_vreg_arguments,
                                         bool is_range,
                                         uint32_t* args,
                                         uint32_t register_index) {
   HInstruction* length = GetIntConstant(number_of_vreg_arguments);
-  HInstruction* object = new (arena_) HNewArray(length, dex_offset, type_index);
+  HInstruction* object = new (arena_) HNewArray(length, dex_pc, type_index);
   current_block_->AddInstruction(object);
 
   const char* descriptor = dex_file_->StringByTypeIdx(type_index);
@@ -618,13 +659,13 @@
   bool is_reference_array = (primitive == 'L') || (primitive == '[');
   Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt;
 
-  Temporaries temps(graph_, 1);
+  Temporaries temps(graph_);
   temps.Add(object);
   for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
     HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type);
     HInstruction* index = GetIntConstant(i);
     current_block_->AddInstruction(
-        new (arena_) HArraySet(object, index, value, type, dex_offset));
+        new (arena_) HArraySet(object, index, value, type, dex_pc));
   }
   latest_result_ = object;
 }
@@ -634,26 +675,26 @@
                                        const T* data,
                                        uint32_t element_count,
                                        Primitive::Type anticipated_type,
-                                       uint32_t dex_offset) {
+                                       uint32_t dex_pc) {
   for (uint32_t i = 0; i < element_count; ++i) {
     HInstruction* index = GetIntConstant(i);
     HInstruction* value = GetIntConstant(data[i]);
     current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, anticipated_type, dex_offset));
+      object, index, value, anticipated_type, dex_pc));
   }
 }
 
-void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_offset) {
-  Temporaries temps(graph_, 1);
+void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc) {
+  Temporaries temps(graph_);
   HInstruction* array = LoadLocal(instruction.VRegA_31t(), Primitive::kPrimNot);
-  HNullCheck* null_check = new (arena_) HNullCheck(array, dex_offset);
+  HNullCheck* null_check = new (arena_) HNullCheck(array, dex_pc);
   current_block_->AddInstruction(null_check);
   temps.Add(null_check);
 
   HInstruction* length = new (arena_) HArrayLength(null_check);
   current_block_->AddInstruction(length);
 
-  int32_t payload_offset = instruction.VRegB_31t() + dex_offset;
+  int32_t payload_offset = instruction.VRegB_31t() + dex_pc;
   const Instruction::ArrayDataPayload* payload =
       reinterpret_cast<const Instruction::ArrayDataPayload*>(code_start_ + payload_offset);
   const uint8_t* data = payload->data;
@@ -662,7 +703,7 @@
   // Implementation of this DEX instruction seems to be that the bounds check is
   // done before doing any stores.
   HInstruction* last_index = GetIntConstant(payload->element_count - 1);
-  current_block_->AddInstruction(new (arena_) HBoundsCheck(last_index, length, dex_offset));
+  current_block_->AddInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc));
 
   switch (payload->element_width) {
     case 1:
@@ -670,27 +711,27 @@
                          reinterpret_cast<const int8_t*>(data),
                          element_count,
                          Primitive::kPrimByte,
-                         dex_offset);
+                         dex_pc);
       break;
     case 2:
       BuildFillArrayData(null_check,
                          reinterpret_cast<const int16_t*>(data),
                          element_count,
                          Primitive::kPrimShort,
-                         dex_offset);
+                         dex_pc);
       break;
     case 4:
       BuildFillArrayData(null_check,
                          reinterpret_cast<const int32_t*>(data),
                          element_count,
                          Primitive::kPrimInt,
-                         dex_offset);
+                         dex_pc);
       break;
     case 8:
       BuildFillWideArrayData(null_check,
                              reinterpret_cast<const int64_t*>(data),
                              element_count,
-                             dex_offset);
+                             dex_pc);
       break;
     default:
       LOG(FATAL) << "Unknown element width for " << payload->element_width;
@@ -700,12 +741,12 @@
 void HGraphBuilder::BuildFillWideArrayData(HInstruction* object,
                                            const int64_t* data,
                                            uint32_t element_count,
-                                           uint32_t dex_offset) {
+                                           uint32_t dex_pc) {
   for (uint32_t i = 0; i < element_count; ++i) {
     HInstruction* index = GetIntConstant(i);
     HInstruction* value = GetLongConstant(data[i]);
     current_block_->AddInstruction(new (arena_) HArraySet(
-      object, index, value, Primitive::kPrimLong, dex_offset));
+      object, index, value, Primitive::kPrimLong, dex_pc));
   }
 }
 
@@ -713,7 +754,7 @@
                                    uint8_t destination,
                                    uint8_t reference,
                                    uint16_t type_index,
-                                   uint32_t dex_offset) {
+                                   uint32_t dex_pc) {
   bool type_known_final;
   bool type_known_abstract;
   bool is_referrers_class;
@@ -724,32 +765,32 @@
     return false;
   }
   HInstruction* object = LoadLocal(reference, Primitive::kPrimNot);
-  HLoadClass* cls = new (arena_) HLoadClass(type_index, is_referrers_class, dex_offset);
+  HLoadClass* cls = new (arena_) HLoadClass(type_index, is_referrers_class, dex_pc);
   current_block_->AddInstruction(cls);
   // The class needs a temporary before being used by the type check.
-  Temporaries temps(graph_, 1);
+  Temporaries temps(graph_);
   temps.Add(cls);
   if (instruction.Opcode() == Instruction::INSTANCE_OF) {
     current_block_->AddInstruction(
-        new (arena_) HInstanceOf(object, cls, type_known_final, dex_offset));
+        new (arena_) HInstanceOf(object, cls, type_known_final, dex_pc));
     UpdateLocal(destination, current_block_->GetLastInstruction());
   } else {
     DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
     current_block_->AddInstruction(
-        new (arena_) HCheckCast(object, cls, type_known_final, dex_offset));
+        new (arena_) HCheckCast(object, cls, type_known_final, dex_pc));
   }
   return true;
 }
 
-void HGraphBuilder::PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_offset) {
+void HGraphBuilder::PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_pc) {
   if (target_offset <= 0) {
     // Unconditionnally add a suspend check to backward branches. We can remove
     // them after we recognize loops in the graph.
-    current_block_->AddInstruction(new (arena_) HSuspendCheck(dex_offset));
+    current_block_->AddInstruction(new (arena_) HSuspendCheck(dex_pc));
   }
 }
 
-bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_offset) {
+bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
   if (current_block_ == nullptr) {
     return true;  // Dead code
   }
@@ -852,8 +893,8 @@
     }
 
 #define IF_XX(comparison, cond) \
-    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_offset); break; \
-    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_offset); break
+    case Instruction::IF_##cond: If_22t<comparison>(instruction, dex_pc); break; \
+    case Instruction::IF_##cond##Z: If_21t<comparison>(instruction, dex_pc); break
 
     IF_XX(HEqual, EQ);
     IF_XX(HNotEqual, NE);
@@ -866,8 +907,8 @@
     case Instruction::GOTO_16:
     case Instruction::GOTO_32: {
       int32_t offset = instruction.GetTargetOffset();
-      PotentiallyAddSuspendCheck(offset, dex_offset);
-      HBasicBlock* target = FindBlockStartingAt(offset + dex_offset);
+      PotentiallyAddSuspendCheck(offset, dex_pc);
+      HBasicBlock* target = FindBlockStartingAt(offset + dex_pc);
       DCHECK(target != nullptr);
       current_block_->AddInstruction(new (arena_) HGoto());
       current_block_->AddSuccessor(target);
@@ -904,7 +945,7 @@
       uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
       uint32_t args[5];
       instruction.GetVarArgs(args);
-      if (!BuildInvoke(instruction, dex_offset, method_idx,
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
                        number_of_vreg_arguments, false, args, -1)) {
         return false;
       }
@@ -919,7 +960,7 @@
       uint32_t method_idx = instruction.VRegB_3rc();
       uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
       uint32_t register_index = instruction.VRegC();
-      if (!BuildInvoke(instruction, dex_offset, method_idx,
+      if (!BuildInvoke(instruction, dex_pc, method_idx,
                        number_of_vreg_arguments, true, nullptr, register_index)) {
         return false;
       }
@@ -961,11 +1002,36 @@
       break;
     }
 
+    case Instruction::INT_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat);
+      break;
+    }
+
+    case Instruction::INT_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble);
+      break;
+    }
+
     case Instruction::LONG_TO_INT: {
       Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt);
       break;
     }
 
+    case Instruction::INT_TO_BYTE: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte);
+      break;
+    }
+
+    case Instruction::INT_TO_SHORT: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort);
+      break;
+    }
+
+    case Instruction::INT_TO_CHAR: {
+      Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar);
+      break;
+    }
+
     case Instruction::ADD_INT: {
       Binop_23x<HAdd>(instruction, Primitive::kPrimInt);
       break;
@@ -1032,18 +1098,36 @@
     }
 
     case Instruction::DIV_INT: {
-      BuildCheckedDiv(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                      dex_offset, Primitive::kPrimInt, false);
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, true);
+      break;
+    }
+
+    case Instruction::DIV_LONG: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, true);
       break;
     }
 
     case Instruction::DIV_FLOAT: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat);
+      Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
       break;
     }
 
     case Instruction::DIV_DOUBLE: {
-      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble);
+      Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
+      break;
+    }
+
+    case Instruction::REM_INT: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimLong, false, false);
       break;
     }
 
@@ -1133,18 +1217,36 @@
     }
 
     case Instruction::DIV_INT_2ADDR: {
-      BuildCheckedDiv(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
-                      dex_offset, Primitive::kPrimInt, false);
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, true);
+      break;
+    }
+
+    case Instruction::DIV_LONG_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, true);
+      break;
+    }
+
+    case Instruction::REM_INT_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimInt, false, false);
+      break;
+    }
+
+    case Instruction::REM_LONG_2ADDR: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(),
+                         dex_pc, Primitive::kPrimLong, false, false);
       break;
     }
 
     case Instruction::DIV_FLOAT_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat);
+      Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc);
       break;
     }
 
     case Instruction::DIV_DOUBLE_2ADDR: {
-      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble);
+      Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc);
       break;
     }
 
@@ -1240,14 +1342,21 @@
 
     case Instruction::DIV_INT_LIT16:
     case Instruction::DIV_INT_LIT8: {
-      BuildCheckedDiv(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
-                      dex_offset, Primitive::kPrimInt, true);
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, true);
+      break;
+    }
+
+    case Instruction::REM_INT_LIT16:
+    case Instruction::REM_INT_LIT8: {
+      BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(),
+                         dex_pc, Primitive::kPrimInt, true, false);
       break;
     }
 
     case Instruction::NEW_INSTANCE: {
       current_block_->AddInstruction(
-          new (arena_) HNewInstance(dex_offset, instruction.VRegB_21c()));
+          new (arena_) HNewInstance(dex_pc, instruction.VRegB_21c()));
       UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
       break;
     }
@@ -1255,7 +1364,7 @@
     case Instruction::NEW_ARRAY: {
       HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt);
       current_block_->AddInstruction(
-          new (arena_) HNewArray(length, dex_offset, instruction.VRegC_22c()));
+          new (arena_) HNewArray(length, dex_pc, instruction.VRegC_22c()));
       UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
       break;
     }
@@ -1265,7 +1374,7 @@
       uint32_t type_index = instruction.VRegB_35c();
       uint32_t args[5];
       instruction.GetVarArgs(args);
-      BuildFilledNewArray(dex_offset, type_index, number_of_vreg_arguments, false, args, 0);
+      BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
       break;
     }
 
@@ -1274,12 +1383,12 @@
       uint32_t type_index = instruction.VRegB_3rc();
       uint32_t register_index = instruction.VRegC_3rc();
       BuildFilledNewArray(
-          dex_offset, type_index, number_of_vreg_arguments, true, nullptr, register_index);
+          dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
       break;
     }
 
     case Instruction::FILL_ARRAY_DATA: {
-      BuildFillArrayData(instruction, dex_offset);
+      BuildFillArrayData(instruction, dex_pc);
       break;
     }
 
@@ -1305,7 +1414,7 @@
     case Instruction::IGET_BYTE:
     case Instruction::IGET_CHAR:
     case Instruction::IGET_SHORT: {
-      if (!BuildInstanceFieldAccess(instruction, dex_offset, false)) {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
         return false;
       }
       break;
@@ -1318,7 +1427,7 @@
     case Instruction::IPUT_BYTE:
     case Instruction::IPUT_CHAR:
     case Instruction::IPUT_SHORT: {
-      if (!BuildInstanceFieldAccess(instruction, dex_offset, true)) {
+      if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
         return false;
       }
       break;
@@ -1331,7 +1440,7 @@
     case Instruction::SGET_BYTE:
     case Instruction::SGET_CHAR:
     case Instruction::SGET_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_offset, false)) {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, false)) {
         return false;
       }
       break;
@@ -1344,7 +1453,7 @@
     case Instruction::SPUT_BYTE:
     case Instruction::SPUT_CHAR:
     case Instruction::SPUT_SHORT: {
-      if (!BuildStaticFieldAccess(instruction, dex_offset, true)) {
+      if (!BuildStaticFieldAccess(instruction, dex_pc, true)) {
         return false;
       }
       break;
@@ -1352,11 +1461,11 @@
 
 #define ARRAY_XX(kind, anticipated_type)                                          \
     case Instruction::AGET##kind: {                                               \
-      BuildArrayAccess(instruction, dex_offset, false, anticipated_type);         \
+      BuildArrayAccess(instruction, dex_pc, false, anticipated_type);         \
       break;                                                                      \
     }                                                                             \
     case Instruction::APUT##kind: {                                               \
-      BuildArrayAccess(instruction, dex_offset, true, anticipated_type);          \
+      BuildArrayAccess(instruction, dex_pc, true, anticipated_type);          \
       break;                                                                      \
     }
 
@@ -1372,7 +1481,7 @@
       HInstruction* object = LoadLocal(instruction.VRegB_12x(), Primitive::kPrimNot);
       // No need for a temporary for the null check, it is the only input of the following
       // instruction.
-      object = new (arena_) HNullCheck(object, dex_offset);
+      object = new (arena_) HNullCheck(object, dex_pc);
       current_block_->AddInstruction(object);
       current_block_->AddInstruction(new (arena_) HArrayLength(object));
       UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction());
@@ -1380,13 +1489,13 @@
     }
 
     case Instruction::CONST_STRING: {
-      current_block_->AddInstruction(new (arena_) HLoadString(instruction.VRegB_21c(), dex_offset));
+      current_block_->AddInstruction(new (arena_) HLoadString(instruction.VRegB_21c(), dex_pc));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
       break;
     }
 
     case Instruction::CONST_STRING_JUMBO: {
-      current_block_->AddInstruction(new (arena_) HLoadString(instruction.VRegB_31c(), dex_offset));
+      current_block_->AddInstruction(new (arena_) HLoadString(instruction.VRegB_31c(), dex_pc));
       UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction());
       break;
     }
@@ -1403,7 +1512,7 @@
         return false;
       }
       current_block_->AddInstruction(
-          new (arena_) HLoadClass(type_index, is_referrers_class, dex_offset));
+          new (arena_) HLoadClass(type_index, is_referrers_class, dex_pc));
       UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
       break;
     }
@@ -1416,7 +1525,7 @@
 
     case Instruction::THROW: {
       HInstruction* exception = LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot);
-      current_block_->AddInstruction(new (arena_) HThrow(exception, dex_offset));
+      current_block_->AddInstruction(new (arena_) HThrow(exception, dex_pc));
       // A throw instruction must branch to the exit block.
       current_block_->AddSuccessor(exit_block_);
       // We finished building this block. Set the current block to null to avoid
@@ -1429,7 +1538,7 @@
       uint8_t destination = instruction.VRegA_22c();
       uint8_t reference = instruction.VRegB_22c();
       uint16_t type_index = instruction.VRegC_22c();
-      if (!BuildTypeCheck(instruction, destination, reference, type_index, dex_offset)) {
+      if (!BuildTypeCheck(instruction, destination, reference, type_index, dex_pc)) {
         return false;
       }
       break;
@@ -1438,7 +1547,7 @@
     case Instruction::CHECK_CAST: {
       uint8_t reference = instruction.VRegA_21c();
       uint16_t type_index = instruction.VRegB_21c();
-      if (!BuildTypeCheck(instruction, -1, reference, type_index, dex_offset)) {
+      if (!BuildTypeCheck(instruction, -1, reference, type_index, dex_pc)) {
         return false;
       }
       break;
@@ -1448,7 +1557,7 @@
       current_block_->AddInstruction(new (arena_) HMonitorOperation(
           LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
           HMonitorOperation::kEnter,
-          dex_offset));
+          dex_pc));
       break;
     }
 
@@ -1456,7 +1565,7 @@
       current_block_->AddInstruction(new (arena_) HMonitorOperation(
           LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot),
           HMonitorOperation::kExit,
-          dex_offset));
+          dex_pc));
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 9cf8305..897bcec 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -76,7 +76,7 @@
   // Analyzes the dex instruction and adds HInstruction to the graph
   // to execute that instruction. Returns whether the instruction can
   // be handled.
-  bool AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_offset);
+  bool AnalyzeDexInstruction(const Instruction& instruction, uint32_t dex_pc);
 
   // Finds all instructions that start a new block, and populates branch_targets_ with
   // the newly created blocks.
@@ -92,7 +92,7 @@
   HLocal* GetLocalAt(int register_index) const;
   void UpdateLocal(int register_index, HInstruction* instruction) const;
   HInstruction* LoadLocal(int register_index, Primitive::Type type) const;
-  void PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_offset);
+  void PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_pc);
   void InitializeParameters(uint16_t number_of_parameters);
 
   template<typename T>
@@ -102,44 +102,51 @@
   void Binop_23x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
+  void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
+  void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
+
+  template<typename T>
   void Binop_22b(const Instruction& instruction, bool reverse);
 
   template<typename T>
   void Binop_22s(const Instruction& instruction, bool reverse);
 
-  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_offset);
-  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_offset);
+  template<typename T> void If_21t(const Instruction& instruction, uint32_t dex_pc);
+  template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc);
 
   void Conversion_12x(const Instruction& instruction,
                       Primitive::Type input_type,
                       Primitive::Type result_type);
 
-  void BuildCheckedDiv(uint16_t out_reg,
-                       uint16_t first_reg,
-                       int32_t second_reg,  // can be a constant
-                       uint32_t dex_offset,
-                       Primitive::Type type,
-                       bool second_is_lit);
+  void BuildCheckedDivRem(uint16_t out_reg,
+                          uint16_t first_reg,
+                          int64_t second_reg_or_constant,
+                          uint32_t dex_pc,
+                          Primitive::Type type,
+                          bool second_is_lit,
+                          bool is_div);
 
   void BuildReturn(const Instruction& instruction, Primitive::Type type);
 
   // Builds an instance field access node and returns whether the instruction is supported.
-  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_offset, bool is_put);
+  bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
 
   // Builds a static field access node and returns whether the instruction is supported.
-  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_offset, bool is_put);
+  bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
 
   void BuildArrayAccess(const Instruction& instruction,
-                        uint32_t dex_offset,
+                        uint32_t dex_pc,
                         bool is_get,
                         Primitive::Type anticipated_type);
 
   // Builds an invocation node and returns whether the instruction is supported.
   bool BuildInvoke(const Instruction& instruction,
-                   uint32_t dex_offset,
+                   uint32_t dex_pc,
                    uint32_t method_idx,
                    uint32_t number_of_vreg_arguments,
                    bool is_range,
@@ -147,14 +154,14 @@
                    uint32_t register_index);
 
   // Builds a new array node and the instructions that fill it.
-  void BuildFilledNewArray(uint32_t dex_offset,
+  void BuildFilledNewArray(uint32_t dex_pc,
                            uint32_t type_index,
                            uint32_t number_of_vreg_arguments,
                            bool is_range,
                            uint32_t* args,
                            uint32_t register_index);
 
-  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_offset);
+  void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
 
   // Fills the given object with data as specified in the fill-array-data
   // instruction. Currently only used for non-reference and non-floating point
@@ -164,14 +171,14 @@
                           const T* data,
                           uint32_t element_count,
                           Primitive::Type anticipated_type,
-                          uint32_t dex_offset);
+                          uint32_t dex_pc);
 
   // Fills the given object with data as specified in the fill-array-data
   // instruction. The data must be for long and double arrays.
   void BuildFillWideArrayData(HInstruction* object,
                               const int64_t* data,
                               uint32_t element_count,
-                              uint32_t dex_offset);
+                              uint32_t dex_pc);
 
   // Builds a `HInstanceOf`, or a `HCheckCast` instruction.
   // Returns whether we succeeded in building the instruction.
@@ -179,7 +186,7 @@
                       uint8_t destination,
                       uint8_t reference,
                       uint16_t type_index,
-                      uint32_t dex_offset);
+                      uint32_t dex_pc);
 
   ArenaAllocator* const arena_;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 6b5ec1d..0b59327 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -51,7 +51,7 @@
     MarkNotLeaf();
   }
   ComputeFrameSize(GetGraph()->GetNumberOfLocalVRegs()
-                     + GetGraph()->GetNumberOfTemporaries()
+                     + GetGraph()->GetTemporariesVRegSlots()
                      + 1 /* filler */,
                    0, /* the baseline compiler does not have live registers at slow path */
                    GetGraph()->GetMaximumNumberOfOutVRegs()
@@ -150,12 +150,15 @@
 
 Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const {
   uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
+  // The type of the previous instruction tells us if we need a single or double stack slot.
+  Primitive::Type type = temp->GetType();
+  int32_t temp_size = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble) ? 2 : 1;
   // Use the temporary region (right below the dex registers).
   int32_t slot = GetFrameSize() - FrameEntrySpillSize()
                                 - kVRegSize  // filler
                                 - (number_of_locals * kVRegSize)
-                                - ((1 + temp->GetIndex()) * kVRegSize);
-  return Location::StackSlot(slot);
+                                - ((temp_size + temp->GetIndex()) * kVRegSize);
+  return temp_size == 2 ? Location::DoubleStackSlot(slot) : Location::StackSlot(slot);
 }
 
 int32_t CodeGenerator::GetStackSlot(HLocal* local) const {
@@ -586,12 +589,14 @@
       if (locations->RegisterContainsObject(i)) {
         locations->SetStackBit(stack_offset / kVRegSize);
       }
+      DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize());
       stack_offset += SaveCoreRegister(stack_offset, i);
     }
   }
 
   for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (register_set->ContainsFloatingPointRegister(i)) {
+      DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize());
       stack_offset += SaveFloatingPointRegister(stack_offset, i);
     }
   }
@@ -602,12 +607,14 @@
   size_t stack_offset = first_register_slot_in_slow_path_;
   for (size_t i = 0, e = GetNumberOfCoreRegisters(); i < e; ++i) {
     if (register_set->ContainsCoreRegister(i)) {
+      DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize());
       stack_offset += RestoreCoreRegister(stack_offset, i);
     }
   }
 
   for (size_t i = 0, e = GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (register_set->ContainsFloatingPointRegister(i)) {
+      DCHECK_LT(stack_offset, GetFrameSize() - FrameEntrySpillSize());
       stack_offset += RestoreFloatingPointRegister(stack_offset, i);
     }
   }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ac4fc67..f906eb8 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -17,9 +17,9 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
 
+#include "arch/instruction_set.h"
 #include "base/bit_field.h"
 #include "globals.h"
-#include "instruction_set.h"
 #include "locations.h"
 #include "memory_region.h"
 #include "nodes.h"
@@ -168,6 +168,15 @@
 
   void EmitParallelMoves(Location from1, Location to1, Location from2, Location to2);
 
+  static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) {
+    if (kIsDebugBuild) {
+      if (type == Primitive::kPrimNot && value->IsIntConstant()) {
+        CHECK_EQ(value->AsIntConstant()->GetValue(), 0);
+      }
+    }
+    return type == Primitive::kPrimNot && !value->IsIntConstant();
+  }
+
  protected:
   CodeGenerator(HGraph* graph,
                 size_t number_of_core_registers,
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 6f5a3cb..1701ef5 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -41,7 +41,7 @@
 static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2;  // LR, R6, R7
 static constexpr int kCurrentMethodStackOffset = 0;
 
-static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2 };
+static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 static constexpr SRegister kRuntimeParameterFpuRegisters[] = { };
@@ -80,7 +80,7 @@
  public:
   explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {}
 
-  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
     arm_codegen->InvokeRuntime(
@@ -96,7 +96,7 @@
  public:
   explicit DivZeroCheckSlowPathARM(HDivZeroCheck* instruction) : instruction_(instruction) {}
 
-  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
     arm_codegen->InvokeRuntime(
@@ -112,7 +112,7 @@
  public:
   StackOverflowCheckSlowPathARM() {}
 
-  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
     __ LoadFromOffset(kLoadWord, PC, TR,
         QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowStackOverflow).Int32Value());
@@ -124,10 +124,10 @@
 
 class SuspendCheckSlowPathARM : public SlowPathCodeARM {
  public:
-  explicit SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor)
+  SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor)
       : instruction_(instruction), successor_(successor) {}
 
-  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
     codegen->SaveLiveRegisters(instruction_->GetLocations());
@@ -166,7 +166,7 @@
         index_location_(index_location),
         length_location_(length_location) {}
 
-  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
     // We're moving two locations to locations that could overlap, so we need a parallel
@@ -199,7 +199,7 @@
     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
   }
 
-  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = at_->GetLocations();
 
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
@@ -245,7 +245,7 @@
  public:
   explicit LoadStringSlowPathARM(HLoadString* instruction) : instruction_(instruction) {}
 
-  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -281,7 +281,7 @@
         object_class_(object_class),
         dex_pc_(dex_pc) {}
 
-  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -670,13 +670,13 @@
       __ LoadSFromOffset(destination.As<SRegister>(), SP, source.GetStackIndex());
     }
   } else {
-    DCHECK(destination.IsStackSlot());
+    DCHECK(destination.IsStackSlot()) << destination;
     if (source.IsRegister()) {
       __ StoreToOffset(kStoreWord, source.As<Register>(), SP, destination.GetStackIndex());
     } else if (source.IsFpuRegister()) {
       __ StoreSToOffset(source.As<SRegister>(), SP, destination.GetStackIndex());
     } else {
-      DCHECK(source.IsStackSlot());
+      DCHECK(source.IsStackSlot()) << source;
       __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
       __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
     }
@@ -778,26 +778,29 @@
     return;
   }
 
-  if (instruction->IsIntConstant()) {
-    int32_t value = instruction->AsIntConstant()->GetValue();
-    if (location.IsRegister()) {
-      __ LoadImmediate(location.As<Register>(), value);
-    } else {
-      DCHECK(location.IsStackSlot());
-      __ LoadImmediate(IP, value);
-      __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
-    }
-  } else if (instruction->IsLongConstant()) {
-    int64_t value = instruction->AsLongConstant()->GetValue();
-    if (location.IsRegisterPair()) {
-      __ LoadImmediate(location.AsRegisterPairLow<Register>(), Low32Bits(value));
-      __ LoadImmediate(location.AsRegisterPairHigh<Register>(), High32Bits(value));
-    } else {
-      DCHECK(location.IsDoubleStackSlot());
-      __ LoadImmediate(IP, Low32Bits(value));
-      __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
-      __ LoadImmediate(IP, High32Bits(value));
-      __ StoreToOffset(kStoreWord, IP, SP, location.GetHighStackIndex(kArmWordSize));
+  if (locations != nullptr && locations->Out().IsConstant()) {
+    HConstant* const_to_move = locations->Out().GetConstant();
+    if (const_to_move->IsIntConstant()) {
+      int32_t value = const_to_move->AsIntConstant()->GetValue();
+      if (location.IsRegister()) {
+        __ LoadImmediate(location.As<Register>(), value);
+      } else {
+        DCHECK(location.IsStackSlot());
+        __ LoadImmediate(IP, value);
+        __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
+      }
+    } else if (const_to_move->IsLongConstant()) {
+      int64_t value = const_to_move->AsLongConstant()->GetValue();
+      if (location.IsRegisterPair()) {
+        __ LoadImmediate(location.AsRegisterPairLow<Register>(), Low32Bits(value));
+        __ LoadImmediate(location.AsRegisterPairHigh<Register>(), High32Bits(value));
+      } else {
+        DCHECK(location.IsDoubleStackSlot());
+        __ LoadImmediate(IP, Low32Bits(value));
+        __ StoreToOffset(kStoreWord, IP, SP, location.GetStackIndex());
+        __ LoadImmediate(IP, High32Bits(value));
+        __ StoreToOffset(kStoreWord, IP, SP, location.GetHighStackIndex(kArmWordSize));
+      }
     }
   } else if (instruction->IsLoadLocal()) {
     uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
@@ -822,7 +825,12 @@
     }
   } else if (instruction->IsTemporary()) {
     Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    Move32(location, temp_location);
+    if (temp_location.IsStackSlot()) {
+      Move32(location, temp_location);
+    } else {
+      DCHECK(temp_location.IsDoubleStackSlot());
+      Move64(location, temp_location);
+    }
   } else {
     DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
     switch (instruction->GetType()) {
@@ -1180,7 +1188,8 @@
       kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()));
   // LR = temp[offset_of_quick_compiled_code]
   __ LoadFromOffset(kLoadWord, LR, temp,
-                     mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value());
+                     mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                         kArmWordSize).Int32Value());
   // LR()
   __ blx(LR);
 
@@ -1221,7 +1230,8 @@
     __ LoadFromOffset(kLoadWord, temp, receiver.As<Register>(), class_offset);
   }
   // temp = temp->GetMethodAt(method_offset);
-  uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value();
+  uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+      kArmWordSize).Int32Value();
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
@@ -1257,7 +1267,8 @@
     __ LoadFromOffset(kLoadWord, temp, receiver.As<Register>(), class_offset);
   }
   // temp = temp->GetImtEntryAt(method_offset);
-  uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value();
+  uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+      kArmWordSize).Int32Value();
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
@@ -1343,10 +1354,42 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   switch (result_type) {
+    case Primitive::kPrimByte:
+      switch (input_type) {
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-byte' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
+    case Primitive::kPrimShort:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-short' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimInt:
       switch (input_type) {
         case Primitive::kPrimLong:
-          // long-to-int conversion.
+          // Processing a Dex `long-to-int' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
           break;
@@ -1369,7 +1412,7 @@
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
-          // int-to-long conversion.
+          // Processing a Dex `int-to-long' instruction.
           locations->SetInAt(0, Location::RequiresRegister());
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
           break;
@@ -1386,10 +1429,67 @@
       }
       break;
 
+    case Primitive::kPrimChar:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-char' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimFloat:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-float' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          break;
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimDouble:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      };
+      break;
+
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Type conversion from " << input_type
-                 << " to " << result_type << " not yet implemented";
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-double' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          break;
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimFloat:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      };
       break;
 
     default:
@@ -1405,10 +1505,40 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   switch (result_type) {
+    case Primitive::kPrimByte:
+      switch (input_type) {
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-byte' instruction.
+          __ sbfx(out.As<Register>(), in.As<Register>(), 0, 8);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
+    case Primitive::kPrimShort:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-short' instruction.
+          __ sbfx(out.As<Register>(), in.As<Register>(), 0, 16);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimInt:
       switch (input_type) {
         case Primitive::kPrimLong:
-          // long-to-int conversion.
+          // Processing a Dex `long-to-int' instruction.
           DCHECK(out.IsRegister());
           if (in.IsRegisterPair()) {
             __ Mov(out.As<Register>(), in.AsRegisterPairLow<Register>());
@@ -1440,7 +1570,7 @@
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
-          // int-to-long conversion.
+          // Processing a Dex `int-to-long' instruction.
           DCHECK(out.IsRegisterPair());
           DCHECK(in.IsRegister());
           __ Mov(out.AsRegisterPairLow<Register>(), in.As<Register>());
@@ -1462,10 +1592,69 @@
       }
       break;
 
+    case Primitive::kPrimChar:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-char' instruction.
+          __ ubfx(out.As<Register>(), in.As<Register>(), 0, 16);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimFloat:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar: {
+          // Processing a Dex `int-to-float' instruction.
+          __ vmovsr(out.As<SRegister>(), in.As<Register>());
+          __ vcvtsi(out.As<SRegister>(), out.As<SRegister>());
+          break;
+        }
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimDouble:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      };
+      break;
+
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Type conversion from " << input_type
-                 << " to " << result_type << " not yet implemented";
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar: {
+          // Processing a Dex `int-to-double' instruction.
+          __ vmovsr(out.AsFpuRegisterPairLow<SRegister>(), in.As<Register>());
+          __ vcvtdi(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()),
+                    out.AsFpuRegisterPairLow<SRegister>());
+          break;
+        }
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimFloat:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      };
       break;
 
     default:
@@ -1693,8 +1882,11 @@
 }
 
 void LocationsBuilderARM::VisitDiv(HDiv* div) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
+  LocationSummary::CallKind call_kind = div->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
+
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
       locations->SetInAt(0, Location::RequiresRegister());
@@ -1703,7 +1895,13 @@
       break;
     }
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // The runtime helper puts the output in R0,R2.
+      locations->SetOut(Location::RegisterPairLocation(R0, R2));
       break;
     }
     case Primitive::kPrimFloat:
@@ -1732,7 +1930,15 @@
     }
 
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>());
+
+      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc());
       break;
     }
 
@@ -1753,10 +1959,90 @@
   }
 }
 
+void LocationsBuilderARM::VisitRem(HRem* rem) {
+  LocationSummary::CallKind call_kind = rem->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (rem->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      locations->AddTemp(Location::RequiresRegister());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // The runtime helper puts the output in R2,R3.
+      locations->SetOut(Location::RegisterPairLocation(R2, R3));
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM::VisitRem(HRem* rem) {
+  LocationSummary* locations = rem->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  switch (rem->GetResultType()) {
+    case Primitive::kPrimInt: {
+      Register reg1 = first.As<Register>();
+      Register reg2 = second.As<Register>();
+      Register temp = locations->GetTemp(0).As<Register>();
+
+      // temp = reg1 / reg2  (integer division)
+      // temp = temp * reg2
+      // dest = reg1 - temp
+      __ sdiv(temp, reg1, reg2);
+      __ mul(temp, temp, reg2);
+      __ sub(out.As<Register>(), reg1, ShifterOperand(temp));
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(R2, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(R3, out.AsRegisterPairHigh<Register>());
+
+      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc());
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
 void LocationsBuilderARM::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -1769,9 +2055,36 @@
   LocationSummary* locations = instruction->GetLocations();
   Location value = locations->InAt(0);
 
-  DCHECK(value.IsRegister()) << value;
-  __ cmp(value.As<Register>(), ShifterOperand(0));
-  __ b(slow_path->GetEntryLabel(), EQ);
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      if (value.IsRegister()) {
+        __ cmp(value.As<Register>(), ShifterOperand(0));
+        __ b(slow_path->GetEntryLabel(), EQ);
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+          __ b(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      if (value.IsRegisterPair()) {
+        __ orrs(IP,
+                value.AsRegisterPairLow<Register>(),
+                ShifterOperand(value.AsRegisterPairHigh<Register>()));
+        __ b(slow_path->GetEntryLabel(), EQ);
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+          __ b(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
+    }
+  }
 }
 
 void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
@@ -1918,11 +2231,12 @@
 void LocationsBuilderARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  bool is_object_type = instruction->GetFieldType() == Primitive::kPrimNot;
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue());
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // Temporary registers for the write barrier.
-  if (is_object_type) {
+  if (needs_write_barrier) {
     locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
   }
@@ -1953,7 +2267,7 @@
     case Primitive::kPrimNot: {
       Register value = locations->InAt(1).As<Register>();
       __ StoreToOffset(kStoreWord, value, obj, offset);
-      if (field_type == Primitive::kPrimNot) {
+      if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
         Register temp = locations->GetTemp(0).As<Register>();
         Register card = locations->GetTemp(1).As<Register>();
         codegen_->MarkGCCard(temp, card, obj, value);
@@ -2186,10 +2500,14 @@
 
 void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) {
   Primitive::Type value_type = instruction->GetComponentType();
-  bool is_object = value_type == Primitive::kPrimNot;
+
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  bool needs_runtime_call = instruction->NeedsTypeCheck();
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, is_object ? LocationSummary::kCall : LocationSummary::kNoCall);
-  if (is_object) {
+      instruction, needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+  if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
     locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
@@ -2198,6 +2516,12 @@
     locations->SetInAt(0, Location::RequiresRegister());
     locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
     locations->SetInAt(2, Location::RequiresRegister());
+
+    if (needs_write_barrier) {
+      // Temporary registers for the write barrier.
+      locations->AddTemp(Location::RequiresRegister());
+      locations->AddTemp(Location::RequiresRegister());
+    }
   }
 }
 
@@ -2206,6 +2530,9 @@
   Register obj = locations->InAt(0).As<Register>();
   Location index = locations->InAt(1);
   Primitive::Type value_type = instruction->GetComponentType();
+  bool needs_runtime_call = locations->WillCall();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
@@ -2236,21 +2563,29 @@
       break;
     }
 
-    case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register value = locations->InAt(2).As<Register>();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ StoreToOffset(kStoreWord, value, obj, offset);
-      } else {
-        __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_4));
-        __ StoreToOffset(kStoreWord, value, IP, data_offset);
-      }
-      break;
-    }
-
+    case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc());
+      if (!needs_runtime_call) {
+        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+        Register value = locations->InAt(2).As<Register>();
+        if (index.IsConstant()) {
+          size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ StoreToOffset(kStoreWord, value, obj, offset);
+        } else {
+          DCHECK(index.IsRegister()) << index;
+          __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_4));
+          __ StoreToOffset(kStoreWord, value, IP, data_offset);
+        }
+        if (needs_write_barrier) {
+          DCHECK_EQ(value_type, Primitive::kPrimNot);
+          Register temp = locations->GetTemp(0).As<Register>();
+          Register card = locations->GetTemp(1).As<Register>();
+          codegen_->MarkGCCard(temp, card, obj, value);
+        }
+      } else {
+        DCHECK_EQ(value_type, Primitive::kPrimNot);
+        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc());
+      }
       break;
     }
 
@@ -2602,11 +2937,12 @@
 void LocationsBuilderARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  bool is_object_type = instruction->GetFieldType() == Primitive::kPrimNot;
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(instruction->GetFieldType(), instruction->GetValue());
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // Temporary registers for the write barrier.
-  if (is_object_type) {
+  if (needs_write_barrier) {
     locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
   }
@@ -2637,7 +2973,7 @@
     case Primitive::kPrimNot: {
       Register value = locations->InAt(1).As<Register>();
       __ StoreToOffset(kStoreWord, value, cls, offset);
-      if (field_type == Primitive::kPrimNot) {
+      if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
         Register temp = locations->GetTemp(0).As<Register>();
         Register card = locations->GetTemp(1).As<Register>();
         codegen_->MarkGCCard(temp, card, cls, value);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index acc3fd6..c00fac1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -28,7 +28,8 @@
 class CodeGeneratorARM;
 class SlowPathCodeARM;
 
-static constexpr size_t kArmWordSize = 4;
+// Use a local definition to prevent copying mistakes.
+static constexpr size_t kArmWordSize = kArmPointerSize;
 
 static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
 static constexpr RegisterPair kParameterCorePairRegisters[] = { R1_R2, R2_R3 };
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 887a4ef..82dced5 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -48,18 +48,28 @@
   return type == Primitive::kPrimFloat || type == Primitive::kPrimDouble;
 }
 
+bool IsIntegralType(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      return true;
+    default:
+      return false;
+  }
+}
+
 bool Is64BitType(Primitive::Type type) {
   return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
 }
 
 // Convenience helpers to ease conversion to and from VIXL operands.
+static_assert((SP == 31) && (WSP == 31) && (XZR == 32) && (WZR == 32),
+              "Unexpected values for register codes.");
 
 int VIXLRegCodeFromART(int code) {
-  // TODO: static check?
-  DCHECK_EQ(SP, 31);
-  DCHECK_EQ(WSP, 31);
-  DCHECK_EQ(XZR, 32);
-  DCHECK_EQ(WZR, 32);
   if (code == SP) {
     return vixl::kSPRegInternalCode;
   }
@@ -70,11 +80,6 @@
 }
 
 int ARTRegCodeFromVIXL(int code) {
-  // TODO: static check?
-  DCHECK_EQ(SP, 31);
-  DCHECK_EQ(WSP, 31);
-  DCHECK_EQ(XZR, 32);
-  DCHECK_EQ(WZR, 32);
   if (code == vixl::kSPRegInternalCode) {
     return SP;
   }
@@ -128,6 +133,17 @@
                         instr->InputAt(input_index)->GetType());
 }
 
+CPURegister OutputCPURegister(HInstruction* instr) {
+  return IsFPType(instr->GetType()) ? static_cast<CPURegister>(OutputFPRegister(instr))
+                                    : static_cast<CPURegister>(OutputRegister(instr));
+}
+
+CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
+  return IsFPType(instr->InputAt(index)->GetType())
+      ? static_cast<CPURegister>(InputFPRegisterAt(instr, index))
+      : static_cast<CPURegister>(InputRegisterAt(instr, index));
+}
+
 int64_t Int64ConstantFrom(Location location) {
   HConstant* instr = location.GetConstant();
   return instr->IsIntConstant() ? instr->AsIntConstant()->GetValue()
@@ -151,14 +167,18 @@
   return MemOperand(sp, location.GetStackIndex());
 }
 
-MemOperand HeapOperand(const Register& base, Offset offset) {
+MemOperand HeapOperand(const Register& base, size_t offset) {
   // A heap reference must be 32bit, so fit in a W register.
   DCHECK(base.IsW());
-  return MemOperand(base.X(), offset.SizeValue());
+  return MemOperand(base.X(), offset);
 }
 
-MemOperand HeapOperandFrom(Location location, Primitive::Type type, Offset offset) {
-  return HeapOperand(RegisterFrom(location, type), offset);
+MemOperand HeapOperand(const Register& base, Offset offset) {
+  return HeapOperand(base, offset.SizeValue());
+}
+
+MemOperand HeapOperandFrom(Location location, Offset offset) {
+  return HeapOperand(RegisterFrom(location, Primitive::kPrimNot), offset);
 }
 
 Location LocationFrom(const Register& reg) {
@@ -227,7 +247,8 @@
   return ARM64ReturnLocation(return_type);
 }
 
-#define __ reinterpret_cast<Arm64Assembler*>(codegen->GetAssembler())->vixl_masm_->
+#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, x).Int32Value()
 
 class SlowPathCodeARM64 : public SlowPathCode {
  public:
@@ -245,45 +266,125 @@
 
 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction,
-                                    Location index_location,
-                                    Location length_location)
-      : instruction_(instruction),
-        index_location_(index_location),
-        length_location_(length_location) {}
+  BoundsCheckSlowPathARM64() {}
 
-  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    CodeGeneratorARM64* arm64_codegen = reinterpret_cast<CodeGeneratorARM64*>(codegen);
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
-    InvokeRuntimeCallingConvention calling_convention;
-    arm64_codegen->MoveHelper(LocationFrom(calling_convention.GetRegisterAt(0)),
-                              index_location_, Primitive::kPrimInt);
-    arm64_codegen->MoveHelper(LocationFrom(calling_convention.GetRegisterAt(1)),
-                              length_location_, Primitive::kPrimInt);
-    size_t offset = QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pThrowArrayBounds).SizeValue();
-    __ Ldr(lr, MemOperand(tr, offset));
-    __ Blr(lr);
-    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+    __ Brk(__LINE__);  // TODO: Unimplemented BoundsCheckSlowPathARM64.
   }
 
  private:
-  HBoundsCheck* const instruction_;
-  const Location index_location_;
-  const Location length_location_;
-
   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
 };
 
+class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    __ Bind(GetEntryLabel());
+    arm64_codegen->InvokeRuntime(
+        QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc());
+  }
+
+ private:
+  HDivZeroCheck* const instruction_;
+  DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
+};
+
+class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  LoadClassSlowPathARM64(HLoadClass* cls,
+                         HInstruction* at,
+                         uint32_t dex_pc,
+                         bool do_clinit)
+      : cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+    DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = at_->GetLocations();
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    codegen->SaveLiveRegisters(locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    __ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex());
+    arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W());
+    int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
+                                            : QUICK_ENTRY_POINT(pInitializeType);
+    arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_);
+
+    // Move the class to the desired location.
+    Location out = locations->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      Primitive::Type type = at_->GetType();
+      arm64_codegen->MoveHelper(out, calling_convention.GetReturnLocation(type), type);
+    }
+
+    codegen->RestoreLiveRegisters(locations);
+    __ B(GetExitLabel());
+  }
+
+ private:
+  // The class this slow path will load.
+  HLoadClass* const cls_;
+
+  // The instruction where this slow path is happening.
+  // (Might be the load class or an initialization check).
+  HInstruction* const at_;
+
+  // The dex PC of `at_`.
+  const uint32_t dex_pc_;
+
+  // Whether to initialize the class.
+  const bool do_clinit_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
+};
+
+class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  explicit LoadStringSlowPathARM64(HLoadString* instruction) : instruction_(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    codegen->SaveLiveRegisters(locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(0).W());
+    __ Mov(calling_convention.GetRegisterAt(1).W(), instruction_->GetStringIndex());
+    arm64_codegen->InvokeRuntime(
+        QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc());
+    Primitive::Type type = instruction_->GetType();
+    arm64_codegen->MoveHelper(locations->Out(), calling_convention.GetReturnLocation(type), type);
+
+    codegen->RestoreLiveRegisters(locations);
+    __ B(GetExitLabel());
+  }
+
+ private:
+  HLoadString* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
+};
+
 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   explicit NullCheckSlowPathARM64(HNullCheck* instr) : instruction_(instr) {}
 
-  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
-    int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pThrowNullPointer).Int32Value();
-    __ Ldr(lr, MemOperand(tr, offset));
-    __ Blr(lr);
-    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+    arm64_codegen->InvokeRuntime(
+        QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc());
   }
 
  private:
@@ -298,13 +399,18 @@
                                      HBasicBlock* successor)
       : instruction_(instruction), successor_(successor) {}
 
-  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    size_t offset = QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pTestSuspend).SizeValue();
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
-    __ Ldr(lr, MemOperand(tr, offset));
-    __ Blr(lr);
-    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
-    __ B(GetReturnLabel());
+    codegen->SaveLiveRegisters(instruction_->GetLocations());
+    arm64_codegen->InvokeRuntime(
+        QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc());
+    codegen->RestoreLiveRegisters(instruction_->GetLocations());
+    if (successor_ == nullptr) {
+      __ B(GetReturnLabel());
+    } else {
+      __ B(arm64_codegen->GetLabelOf(successor_));
+    }
   }
 
   vixl::Label* GetReturnLabel() {
@@ -324,6 +430,20 @@
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
 };
 
+class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  TypeCheckSlowPathARM64() {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    __ Brk(__LINE__);  // TODO: Unimplemented TypeCheckSlowPathARM64.
+    __ b(GetExitLabel());
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
+};
+
 #undef __
 
 Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -356,11 +476,12 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this) {}
 
-#define __ reinterpret_cast<Arm64Assembler*>(GetAssembler())->vixl_masm_->
+#undef __
+#define __ GetVIXLAssembler()->
 
 void CodeGeneratorARM64::GenerateFrameEntry() {
   // TODO: Add proper support for the stack overflow check.
-  UseScratchRegisterScope temps(assembler_.vixl_masm_);
+  UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireX();
   __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
   __ Ldr(temp, MemOperand(temp, 0));
@@ -378,7 +499,7 @@
   // ...                       : other preserved registers.
   // sp[frame_size - regs_size]: first preserved register.
   // ...                       : reserved frame space.
-  // sp[0]                     : context pointer.
+  // sp[0]                     : current method.
 }
 
 void CodeGeneratorARM64::GenerateFrameExit() {
@@ -413,7 +534,7 @@
       __ Mov(dst, value);
     } else {
       DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
-      UseScratchRegisterScope temps(assembler_.vixl_masm_);
+      UseScratchRegisterScope temps(GetVIXLAssembler());
       Register temp = instruction->IsIntConstant() ? temps.AcquireW() : temps.AcquireX();
       __ Mov(temp, value);
       __ Str(temp, StackOperandFrom(location));
@@ -465,7 +586,7 @@
 }
 
 void CodeGeneratorARM64::MarkGCCard(Register object, Register value) {
-  UseScratchRegisterScope temps(assembler_.vixl_masm_);
+  UseScratchRegisterScope temps(GetVIXLAssembler());
   Register card = temps.AcquireX();
   Register temp = temps.AcquireX();
   vixl::Label done;
@@ -522,6 +643,19 @@
   stream << Arm64ManagedRegister::FromDRegister(DRegister(reg));
 }
 
+void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
+  if (constant->IsIntConstant() || constant->IsLongConstant()) {
+    __ Mov(Register(destination),
+           constant->IsIntConstant() ? constant->AsIntConstant()->GetValue()
+                                     : constant->AsLongConstant()->GetValue());
+  } else if (constant->IsFloatConstant()) {
+    __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
+  } else {
+    DCHECK(constant->IsDoubleConstant());
+    __ Fmov(FPRegister(destination), constant->AsDoubleConstant()->GetValue());
+  }
+}
+
 void CodeGeneratorARM64::MoveHelper(Location destination,
                                     Location source,
                                     Primitive::Type type) {
@@ -544,13 +678,7 @@
     } else if (source.IsFpuRegister()) {
       __ Fmov(dst, FPRegisterFrom(source, type));
     } else {
-      HConstant* cst = source.GetConstant();
-      if (cst->IsFloatConstant()) {
-        __ Fmov(dst, cst->AsFloatConstant()->GetValue());
-      } else {
-        DCHECK(cst->IsDoubleConstant());
-        __ Fmov(dst, cst->AsDoubleConstant()->GetValue());
-      }
+      MoveConstant(dst, source.GetConstant());
     }
   } else {
     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
@@ -558,8 +686,21 @@
       __ Str(RegisterFrom(source, type), StackOperandFrom(destination));
     } else if (source.IsFpuRegister()) {
       __ Str(FPRegisterFrom(source, type), StackOperandFrom(destination));
+    } else if (source.IsConstant()) {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      HConstant* cst = source.GetConstant();
+      CPURegister temp;
+      if (cst->IsIntConstant() || cst->IsLongConstant()) {
+        temp = cst->IsIntConstant() ? temps.AcquireW() : temps.AcquireX();
+      } else {
+        DCHECK(cst->IsFloatConstant() || cst->IsDoubleConstant());
+        temp = cst->IsFloatConstant() ? temps.AcquireS() : temps.AcquireD();
+      }
+      MoveConstant(temp, cst);
+      __ Str(temp, StackOperandFrom(destination));
     } else {
-      UseScratchRegisterScope temps(assembler_.vixl_masm_);
+      DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
+      UseScratchRegisterScope temps(GetVIXLAssembler());
       Register temp = destination.IsDoubleStackSlot() ? temps.AcquireX() : temps.AcquireW();
       __ Ldr(temp, StackOperandFrom(source));
       __ Str(temp, StackOperandFrom(destination));
@@ -568,61 +709,89 @@
 }
 
 void CodeGeneratorARM64::Load(Primitive::Type type,
-                              vixl::Register dst,
+                              vixl::CPURegister dst,
                               const vixl::MemOperand& src) {
   switch (type) {
     case Primitive::kPrimBoolean:
-      __ Ldrb(dst, src);
+      __ Ldrb(Register(dst), src);
       break;
     case Primitive::kPrimByte:
-      __ Ldrsb(dst, src);
+      __ Ldrsb(Register(dst), src);
       break;
     case Primitive::kPrimShort:
-      __ Ldrsh(dst, src);
+      __ Ldrsh(Register(dst), src);
       break;
     case Primitive::kPrimChar:
-      __ Ldrh(dst, src);
+      __ Ldrh(Register(dst), src);
       break;
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
     case Primitive::kPrimLong:
-      DCHECK(dst.Is64Bits() == (type == Primitive::kPrimLong));
-      __ Ldr(dst, src);
-      break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
+      DCHECK(dst.Is64Bits() == Is64BitType(type));
+      __ Ldr(dst, src);
+      break;
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << type;
   }
 }
 
 void CodeGeneratorARM64::Store(Primitive::Type type,
-                               vixl::Register rt,
+                               vixl::CPURegister rt,
                                const vixl::MemOperand& dst) {
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
-      __ Strb(rt, dst);
+      __ Strb(Register(rt), dst);
       break;
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
-      __ Strh(rt, dst);
+      __ Strh(Register(rt), dst);
       break;
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
     case Primitive::kPrimLong:
-      DCHECK(rt.Is64Bits() == (type == Primitive::kPrimLong));
-      __ Str(rt, dst);
-      break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
+      DCHECK(rt.Is64Bits() == Is64BitType(type));
+      __ Str(rt, dst);
+      break;
     case Primitive::kPrimVoid:
       LOG(FATAL) << "Unreachable type " << type;
   }
 }
 
-#undef __
-#define __ GetAssembler()->vixl_masm_->
+void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) {
+  DCHECK(current_method.IsW());
+  __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset));
+}
+
+void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset,
+                                       HInstruction* instruction,
+                                       uint32_t dex_pc) {
+  __ Ldr(lr, MemOperand(tr, entry_point_offset));
+  __ Blr(lr);
+  RecordPcInfo(instruction, dex_pc);
+  DCHECK(instruction->IsSuspendCheck()
+      || instruction->IsBoundsCheck()
+      || instruction->IsNullCheck()
+      || instruction->IsDivZeroCheck()
+      || !IsLeafMethod());
+}
+
+void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
+                                                                     vixl::Register class_reg) {
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  Register temp = temps.AcquireW();
+  __ Ldr(temp, HeapOperand(class_reg, mirror::Class::StatusOffset()));
+  __ Cmp(temp, mirror::Class::kStatusInitialized);
+  __ B(lt, slow_path->GetEntryLabel());
+  // Even if the initialized flag is set, we may be in a situation where caches are not synced
+  // properly. Therefore, we do a memory fence.
+  __ Dmb(InnerShareable, BarrierAll);
+  __ Bind(slow_path->GetExitLabel());
+}
 
 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
                                                              CodeGeneratorARM64* codegen)
@@ -631,27 +800,14 @@
         codegen_(codegen) {}
 
 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
-  M(And)                                                   \
-  M(CheckCast)                                             \
-  M(ClinitCheck)                                           \
-  M(DivZeroCheck)                                          \
-  M(InstanceOf)                                            \
-  M(InvokeInterface)                                       \
-  M(LoadClass)                                             \
-  M(LoadException)                                         \
-  M(LoadString)                                            \
-  M(MonitorOperation)                                      \
-  M(Or)                                                    \
   M(ParallelMove)                                          \
-  M(StaticFieldGet)                                        \
-  M(StaticFieldSet)                                        \
-  M(Throw)                                                 \
-  M(TypeConversion)                                        \
-  M(Xor)                                                    \
+  M(Rem)
 
 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
 
 enum UnimplementedInstructionBreakCode {
+  // Using a base helps identify when we hit such breakpoints.
+  UnimplementedInstructionBreakCodeBaseCode = 0x900,
 #define ENUM_UNIMPLEMENTED_INSTRUCTION(name) UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name),
   FOR_EACH_UNIMPLEMENTED_INSTRUCTION(ENUM_UNIMPLEMENTED_INSTRUCTION)
 #undef ENUM_UNIMPLEMENTED_INSTRUCTION
@@ -670,9 +826,9 @@
 #undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS
 
 #undef UNIMPLEMENTED_INSTRUCTION_BREAK_CODE
+#undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION
 
-void LocationsBuilderARM64::HandleAddSub(HBinaryOperation* instr) {
-  DCHECK(instr->IsAdd() || instr->IsSub());
+void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
   DCHECK_EQ(instr->InputCount(), 2U);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   Primitive::Type type = instr->GetResultType();
@@ -688,7 +844,7 @@
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
       locations->SetInAt(1, Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
 
     default:
@@ -696,9 +852,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::HandleAddSub(HBinaryOperation* instr) {
-  DCHECK(instr->IsAdd() || instr->IsSub());
-
+void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
   Primitive::Type type = instr->GetType();
 
   switch (type) {
@@ -709,8 +863,15 @@
       Operand rhs = InputOperandAt(instr, 1);
       if (instr->IsAdd()) {
         __ Add(dst, lhs, rhs);
-      } else {
+      } else if (instr->IsAnd()) {
+        __ And(dst, lhs, rhs);
+      } else if (instr->IsOr()) {
+        __ Orr(dst, lhs, rhs);
+      } else if (instr->IsSub()) {
         __ Sub(dst, lhs, rhs);
+      } else {
+        DCHECK(instr->IsXor());
+        __ Eor(dst, lhs, rhs);
       }
       break;
     }
@@ -721,22 +882,32 @@
       FPRegister rhs = InputFPRegisterAt(instr, 1);
       if (instr->IsAdd()) {
         __ Fadd(dst, lhs, rhs);
-      } else {
+      } else if (instr->IsSub()) {
         __ Fsub(dst, lhs, rhs);
+      } else {
+        LOG(FATAL) << "Unexpected floating-point binary operation";
       }
       break;
     }
     default:
-      LOG(FATAL) << "Unexpected add/sub type " << type;
+      LOG(FATAL) << "Unexpected binary operation type " << type;
   }
 }
 
 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
-  HandleAddSub(instruction);
+  HandleBinaryOp(instruction);
 }
 
 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
-  HandleAddSub(instruction);
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
+  HandleBinaryOp(instruction);
 }
 
 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
@@ -751,11 +922,10 @@
   LocationSummary* locations = instruction->GetLocations();
   Primitive::Type type = instruction->GetType();
   Register obj = InputRegisterAt(instruction, 0);
-  Register out = OutputRegister(instruction);
   Location index = locations->InAt(1);
   size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
   MemOperand source(obj);
-  UseScratchRegisterScope temps(GetAssembler()->vixl_masm_);
+  UseScratchRegisterScope temps(GetVIXLAssembler());
 
   if (index.IsConstant()) {
     offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
@@ -767,7 +937,7 @@
     source = MemOperand(temp, offset);
   }
 
-  codegen_->Load(type, out, source);
+  codegen_->Load(type, OutputCPURegister(instruction), source);
 }
 
 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
@@ -801,18 +971,16 @@
 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
   Primitive::Type value_type = instruction->GetComponentType();
   if (value_type == Primitive::kPrimNot) {
-    __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAputObject).Int32Value()));
-    __ Blr(lr);
-    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
-    DCHECK(!codegen_->IsLeafMethod());
+    codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc());
+
   } else {
     LocationSummary* locations = instruction->GetLocations();
     Register obj = InputRegisterAt(instruction, 0);
-    Register value = InputRegisterAt(instruction, 2);
+    CPURegister value = InputCPURegisterAt(instruction, 2);
     Location index = locations->InAt(1);
     size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
     MemOperand destination(obj);
-    UseScratchRegisterScope temps(GetAssembler()->vixl_masm_);
+    UseScratchRegisterScope temps(GetVIXLAssembler());
 
     if (index.IsConstant()) {
       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
@@ -828,6 +996,66 @@
   }
 }
 
+void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
+  BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64();
+  codegen_->AddSlowPath(slow_path);
+
+  __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
+  __ B(slow_path->GetEntryLabel(), hs);
+}
+
+void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  Register obj = InputRegisterAt(instruction, 0);;
+  Register cls = InputRegisterAt(instruction, 1);;
+  Register temp = temps.AcquireW();
+
+  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64();
+  codegen_->AddSlowPath(slow_path);
+
+  // TODO: avoid this check if we know obj is not null.
+  __ Cbz(obj, slow_path->GetExitLabel());
+  // Compare the class of `obj` with `cls`.
+  __ Ldr(temp, HeapOperand(obj, mirror::Object::ClassOffset()));
+  __ Cmp(temp, cls);
+  __ B(ne, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (check->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
+  // We assume the class is not null.
+  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
+      check->GetLoadClass(), check, check->GetDexPc(), true);
+  codegen_->AddSlowPath(slow_path);
+  GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
+}
+
 void LocationsBuilderARM64::VisitCompare(HCompare* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -846,7 +1074,7 @@
       Register result = OutputRegister(instruction);
       Register left = InputRegisterAt(instruction, 0);
       Operand right = InputOperandAt(instruction, 1);
-      __ Subs(result, left, right);
+      __ Subs(result.X(), left, right);
       __ B(eq, &done);
       __ Mov(result, 1);
       __ Cneg(result, result, le);
@@ -893,6 +1121,7 @@
 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }         \
 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }
 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
+#undef DEFINE_CONDITION_VISITORS
 #undef FOR_EACH_CONDITION_INSTRUCTION
 
 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
@@ -936,6 +1165,33 @@
   }
 }
 
+void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  SlowPathCodeARM64* slow_path =
+      new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM64(instruction);
+  codegen_->AddSlowPath(slow_path);
+  Location value = instruction->GetLocations()->InAt(0);
+
+  if (value.IsConstant()) {
+    int64_t divisor = Int64ConstantFrom(value);
+    if (divisor == 0) {
+      __ B(slow_path->GetEntryLabel());
+    } else {
+      LOG(FATAL) << "Divisions by non-null constants should have been optimized away.";
+    }
+  } else {
+    __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
+  }
+}
+
 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -955,7 +1211,7 @@
   UNUSED(exit);
   if (kIsDebugBuild) {
     down_cast<Arm64Assembler*>(GetAssembler())->Comment("Unreachable");
-    __ Brk(0);    // TODO: Introduce special markers for such code locations.
+    __ Brk(__LINE__);    // TODO: Introduce special markers for such code locations.
   }
 }
 
@@ -1038,7 +1294,7 @@
 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   MemOperand field = MemOperand(InputRegisterAt(instruction, 0),
                                 instruction->GetFieldOffset().Uint32Value());
-  codegen_->Load(instruction->GetType(), OutputRegister(instruction), field);
+  codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
 }
 
 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
@@ -1049,14 +1305,56 @@
 
 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   Primitive::Type field_type = instruction->GetFieldType();
-  Register value = InputRegisterAt(instruction, 1);
+  CPURegister value = InputCPURegisterAt(instruction, 1);
   Register obj = InputRegisterAt(instruction, 0);
   codegen_->Store(field_type, value, MemOperand(obj, instruction->GetFieldOffset().Uint32Value()));
   if (field_type == Primitive::kPrimNot) {
-    codegen_->MarkGCCard(obj, value);
+    codegen_->MarkGCCard(obj, Register(value));
   }
 }
 
+void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
+  LocationSummary::CallKind call_kind =
+      instruction->IsClassFinal() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), true);  // The output does overlap inputs.
+}
+
+void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register obj = InputRegisterAt(instruction, 0);;
+  Register cls = InputRegisterAt(instruction, 1);;
+  Register out = OutputRegister(instruction);
+
+  vixl::Label done;
+
+  // Return 0 if `obj` is null.
+  // TODO: Avoid this check if we know `obj` is not null.
+  __ Mov(out, 0);
+  __ Cbz(obj, &done);
+
+  // Compare the class of `obj` with `cls`.
+  __ Ldr(out, MemOperand(obj, mirror::Object::ClassOffset().Int32Value()));
+  __ Cmp(out, cls);
+  if (instruction->IsClassFinal()) {
+    // Classes must be equal for the instanceof to succeed.
+    __ Cset(out, eq);
+  } else {
+    // If the classes are not equal, we go into a slow path.
+    DCHECK(locations->OnlyCallsOnSlowPath());
+    SlowPathCodeARM64* slow_path =
+        new (GetGraph()->GetArena()) TypeCheckSlowPathARM64();
+    codegen_->AddSlowPath(slow_path);
+    __ B(ne, slow_path->GetEntryLabel());
+    __ Mov(out, 1);
+    __ Bind(slow_path->GetExitLabel());
+  }
+
+  __ Bind(&done);
+}
+
 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
   locations->SetOut(Location::ConstantLocation(constant));
@@ -1067,14 +1365,6 @@
   UNUSED(constant);
 }
 
-void LocationsBuilderARM64::VisitInvokeStatic(HInvokeStatic* invoke) {
-  HandleInvoke(invoke);
-}
-
-void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  HandleInvoke(invoke);
-}
-
 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
@@ -1092,6 +1382,50 @@
   }
 }
 
+void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
+  HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
+  // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
+  Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() +
+          (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry);
+  Location receiver = invoke->GetLocations()->InAt(0);
+  Offset class_offset = mirror::Object::ClassOffset();
+  Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+
+  // The register ip1 is required to be used for the hidden argument in
+  // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
+  UseScratchRegisterScope scratch_scope(GetVIXLAssembler());
+  scratch_scope.Exclude(ip1);
+  __ Mov(ip1, invoke->GetDexMethodIndex());
+
+  // temp = object->GetClass();
+  if (receiver.IsStackSlot()) {
+    __ Ldr(temp, StackOperandFrom(receiver));
+    __ Ldr(temp, HeapOperand(temp, class_offset));
+  } else {
+    __ Ldr(temp, HeapOperandFrom(receiver, class_offset));
+  }
+  // temp = temp->GetImtEntryAt(method_offset);
+  __ Ldr(temp, HeapOperand(temp, method_offset));
+  // lr = temp->GetEntryPoint();
+  __ Ldr(lr, HeapOperand(temp, entry_point));
+  // lr();
+  __ Blr(lr);
+  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  HandleInvoke(invoke);
+}
+
+void LocationsBuilderARM64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  HandleInvoke(invoke);
+}
+
 void InstructionCodeGeneratorARM64::VisitInvokeStatic(HInvokeStatic* invoke) {
   Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0));
   // Make sure that ArtMethod* is passed in W0 as per the calling convention
@@ -1107,7 +1441,7 @@
   // Currently we implement the app -> app logic, which looks up in the resolve cache.
 
   // temp = method;
-  __ Ldr(temp, MemOperand(sp, kCurrentMethodStackOffset));
+  codegen_->LoadCurrentMethod(temp);
   // temp = temp->dex_cache_resolved_methods_;
   __ Ldr(temp, MemOperand(temp.X(),
                           mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
@@ -1115,7 +1449,8 @@
   __ Ldr(temp, MemOperand(temp.X(), index_in_cache));
   // lr = temp->entry_point_from_quick_compiled_code_;
   __ Ldr(lr, MemOperand(temp.X(),
-                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
+                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                            kArm64WordSize).SizeValue()));
   // lr();
   __ Blr(lr);
 
@@ -1130,7 +1465,7 @@
   size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
     invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
   Offset class_offset = mirror::Object::ClassOffset();
-  Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset();
+  Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
 
   // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
@@ -1138,8 +1473,7 @@
     __ Ldr(temp.W(), MemOperand(temp, class_offset.SizeValue()));
   } else {
     DCHECK(receiver.IsRegister());
-    __ Ldr(temp.W(), HeapOperandFrom(receiver, Primitive::kPrimNot,
-                                     class_offset));
+    __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
   }
   // temp = temp->GetMethodAt(method_offset);
   __ Ldr(temp.W(), MemOperand(temp, method_offset));
@@ -1151,6 +1485,50 @@
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
+void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
+  LocationSummary::CallKind call_kind = cls->CanCallRuntime() ? LocationSummary::kCallOnSlowPath
+                                                              : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
+  Register out = OutputRegister(cls);
+  if (cls->IsReferrersClass()) {
+    DCHECK(!cls->CanCallRuntime());
+    DCHECK(!cls->MustGenerateClinitCheck());
+    codegen_->LoadCurrentMethod(out);
+    __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DeclaringClassOffset()));
+  } else {
+    DCHECK(cls->CanCallRuntime());
+    codegen_->LoadCurrentMethod(out);
+    __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DexCacheResolvedTypesOffset()));
+    __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+
+    SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    __ Cbz(out, slow_path->GetEntryLabel());
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
+    }
+  }
+}
+
+void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
+  MemOperand exception = MemOperand(tr, Thread::ExceptionOffset<kArm64WordSize>().Int32Value());
+  __ Ldr(OutputRegister(instruction), exception);
+  __ Str(wzr, exception);
+}
+
 void LocationsBuilderARM64::VisitLoadLocal(HLoadLocal* load) {
   load->SetLocations(nullptr);
 }
@@ -1160,6 +1538,24 @@
   UNUSED(load);
 }
 
+void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
+  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
+  codegen_->AddSlowPath(slow_path);
+
+  Register out = OutputRegister(load);
+  codegen_->LoadCurrentMethod(out);
+  __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DexCacheStringsOffset()));
+  __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())));
+  __ Cbz(out, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void LocationsBuilderARM64::VisitLocal(HLocal* local) {
   local->SetLocations(nullptr);
 }
@@ -1178,6 +1574,20 @@
   UNUSED(constant);
 }
 
+void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
+  codegen_->InvokeRuntime(instruction->IsEnter()
+        ? QUICK_ENTRY_POINT(pLockObject) : QUICK_ENTRY_POINT(pUnlockObject),
+      instruction,
+      instruction->GetDexPc());
+}
+
 void LocationsBuilderARM64::VisitMul(HMul* mul) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
@@ -1193,7 +1603,7 @@
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
       locations->SetInAt(1, Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
 
     default:
@@ -1223,15 +1633,15 @@
       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
   switch (neg->GetResultType()) {
     case Primitive::kPrimInt:
-    case Primitive::kPrimLong: {
+    case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RegisterOrConstant(neg->InputAt(0)));
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
-    }
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
 
     default:
@@ -1248,7 +1658,7 @@
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
+      __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
       break;
 
     default:
@@ -1273,14 +1683,10 @@
   DCHECK(type_index.Is(w0));
   Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot);
   DCHECK(current_method.Is(w1));
-  __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset));
+  codegen_->LoadCurrentMethod(current_method);
   __ Mov(type_index, instruction->GetTypeIndex());
-  int32_t quick_entrypoint_offset =
-      QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocArrayWithAccessCheck).Int32Value();
-  __ Ldr(lr, MemOperand(tr, quick_entrypoint_offset));
-  __ Blr(lr);
-  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
-  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->InvokeRuntime(
+      QUICK_ENTRY_POINT(pAllocArrayWithAccessCheck), instruction, instruction->GetDexPc());
 }
 
 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
@@ -1298,14 +1704,10 @@
   DCHECK(type_index.Is(w0));
   Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot);
   DCHECK(current_method.Is(w1));
-  __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset));
+  codegen_->LoadCurrentMethod(current_method);
   __ Mov(type_index, instruction->GetTypeIndex());
-  int32_t quick_entrypoint_offset =
-      QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocObjectWithAccessCheck).Int32Value();
-  __ Ldr(lr, MemOperand(tr, quick_entrypoint_offset));
-  __ Blr(lr);
-  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
-  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->InvokeRuntime(
+      QUICK_ENTRY_POINT(pAllocObjectWithAccessCheck), instruction, instruction->GetDexPc());
 }
 
 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
@@ -1354,6 +1756,14 @@
   }
 }
 
+void LocationsBuilderARM64::VisitOr(HOr* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
+  HandleBinaryOp(instruction);
+}
+
 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
@@ -1434,31 +1844,43 @@
 }
 
 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
-  HandleAddSub(instruction);
+  HandleBinaryOp(instruction);
 }
 
 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
-  HandleAddSub(instruction);
+  HandleBinaryOp(instruction);
 }
 
-void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
+void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  Register cls = InputRegisterAt(instruction, 0);
+  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+  codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), MemOperand(cls, offset));
+}
+
+void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  if (instruction->HasUses()) {
-    locations->SetOut(Location::SameAsFirstInput());
-  }
 }
 
-void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(
-      instruction, locations->InAt(0), locations->InAt(1));
-  codegen_->AddSlowPath(slow_path);
+void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  CPURegister value = InputCPURegisterAt(instruction, 1);
+  Register cls = InputRegisterAt(instruction, 0);
+  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+  Primitive::Type field_type = instruction->GetFieldType();
 
-  __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
-  __ B(slow_path->GetEntryLabel(), hs);
+  codegen_->Store(field_type, value, MemOperand(cls, offset));
+  if (field_type == Primitive::kPrimNot) {
+    codegen_->MarkGCCard(cls, Register(value));
+  }
 }
 
 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -1485,5 +1907,74 @@
   UNUSED(temp);
 }
 
+void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
+  codegen_->InvokeRuntime(
+      QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc());
+}
+
+void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
+  Primitive::Type input_type = conversion->GetInputType();
+  Primitive::Type result_type = conversion->GetResultType();
+  if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
+      (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
+    LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
+  }
+
+  if (IsFPType(input_type)) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+
+  if (IsFPType(result_type)) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
+  Primitive::Type result_type = conversion->GetResultType();
+  Primitive::Type input_type = conversion->GetInputType();
+
+  DCHECK_NE(input_type, result_type);
+
+  if (IsIntegralType(result_type) && IsIntegralType(input_type)) {
+    int result_size = Primitive::ComponentSize(result_type);
+    int input_size = Primitive::ComponentSize(input_type);
+    int min_size = kBitsPerByte * std::min(result_size, input_size);
+    if ((result_type == Primitive::kPrimChar) ||
+        ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
+      __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, min_size);
+    } else {
+      __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, min_size);
+    }
+    return;
+  }
+
+  LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
+             << " to " << result_type;
+}
+
+void LocationsBuilderARM64::VisitXor(HXor* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+#undef __
+#undef QUICK_ENTRY_POINT
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 54e87f4..a40f27f 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -29,8 +29,11 @@
 namespace arm64 {
 
 class CodeGeneratorARM64;
+class SlowPathCodeARM64;
 
-static constexpr size_t kArm64WordSize = 8;
+// Use a local definition to prevent copying mistakes.
+static constexpr size_t kArm64WordSize = kArm64PointerSize;
+
 static const vixl::Register kParameterCoreRegisters[] = {
   vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7
 };
@@ -103,9 +106,11 @@
   void LoadCurrentMethod(XRegister reg);
 
   Arm64Assembler* GetAssembler() const { return assembler_; }
+  vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
 
  private:
-  void HandleAddSub(HBinaryOperation* instr);
+  void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
+  void HandleBinaryOp(HBinaryOperation* instr);
 
   Arm64Assembler* const assembler_;
   CodeGeneratorARM64* const codegen_;
@@ -124,7 +129,7 @@
 #undef DECLARE_VISIT_INSTRUCTION
 
  private:
-  void HandleAddSub(HBinaryOperation* instr);
+  void HandleBinaryOp(HBinaryOperation* instr);
   void HandleInvoke(HInvoke* instr);
 
   CodeGeneratorARM64* const codegen_;
@@ -162,9 +167,10 @@
     return kArm64WordSize;
   }
 
-  uintptr_t GetAddressOf(HBasicBlock* block ATTRIBUTE_UNUSED) const OVERRIDE {
-    UNIMPLEMENTED(INFO) << "TODO: GetAddressOf";
-    return 0u;
+  uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
+    vixl::Label* block_entry_label = GetLabelOf(block);
+    DCHECK(block_entry_label->IsBound());
+    return block_entry_label->location();
   }
 
   size_t FrameEntrySpillSize() const OVERRIDE;
@@ -172,6 +178,7 @@
   HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
   HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
   Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
+  vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; }
 
   // Emit a write barrier.
   void MarkGCCard(vixl::Register object, vixl::Register value);
@@ -185,18 +192,18 @@
 
   Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
 
-  size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE {
+  size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
     UNUSED(stack_index);
     UNUSED(reg_id);
-    UNIMPLEMENTED(INFO) << "TODO: SaveCoreRegister";
-    return 0;
+    LOG(INFO) << "CodeGeneratorARM64::SaveCoreRegister()";
+    return kArm64WordSize;
   }
 
-  size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE {
+  size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
     UNUSED(stack_index);
     UNUSED(reg_id);
-    UNIMPLEMENTED(INFO) << "TODO: RestoreCoreRegister";
-    return 0;
+    LOG(INFO) << "CodeGeneratorARM64::RestoreCoreRegister()";
+    return kArm64WordSize;
   }
 
   // The number of registers that can be allocated. The register allocator may
@@ -226,9 +233,14 @@
   }
 
   // Code generation helpers.
+  void MoveConstant(vixl::CPURegister destination, HConstant* constant);
   void MoveHelper(Location destination, Location source, Primitive::Type type);
-  void Load(Primitive::Type type, vixl::Register dst, const vixl::MemOperand& src);
-  void Store(Primitive::Type type, vixl::Register rt, const vixl::MemOperand& dst);
+  void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
+  void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
+  void LoadCurrentMethod(vixl::Register current_method);
+
+  // Generate code to invoke a runtime entry point.
+  void InvokeRuntime(int32_t offset, HInstruction* instruction, uint32_t dex_pc);
 
   ParallelMoveResolver* GetMoveResolver() OVERRIDE {
     UNIMPLEMENTED(INFO) << "TODO: MoveResolver";
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 100f380..3c53cea 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -36,12 +36,15 @@
 static constexpr int kNumberOfPushedRegistersAtEntry = 1;
 static constexpr int kCurrentMethodStackOffset = 0;
 
-static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX };
+static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX };
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { };
 static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
 
+// Marker for places that can be updated once we don't follow the quick ABI.
+static constexpr bool kFollowsQuickABI = true;
+
 class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
  public:
   InvokeRuntimeCallingConvention()
@@ -100,19 +103,24 @@
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
 };
 
-class DivMinusOneSlowPathX86 : public SlowPathCodeX86 {
+class DivRemMinusOneSlowPathX86 : public SlowPathCodeX86 {
  public:
-  explicit DivMinusOneSlowPathX86(Register reg) : reg_(reg) {}
+  explicit DivRemMinusOneSlowPathX86(Register reg, bool is_div) : reg_(reg), is_div_(is_div) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
-    __ negl(reg_);
+    if (is_div_) {
+      __ negl(reg_);
+    } else {
+      __ movl(reg_, Immediate(0));
+    }
     __ jmp(GetExitLabel());
   }
 
  private:
   Register reg_;
-  DISALLOW_COPY_AND_ASSIGN(DivMinusOneSlowPathX86);
+  bool is_div_;
+  DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86);
 };
 
 class StackOverflowCheckSlowPathX86 : public SlowPathCodeX86 {
@@ -427,6 +435,7 @@
   blocked_core_registers_[ESP] = true;
 
   // TODO: We currently don't use Quick's callee saved registers.
+  DCHECK(kFollowsQuickABI);
   blocked_core_registers_[EBP] = true;
   blocked_core_registers_[ESI] = true;
   blocked_core_registers_[EDI] = true;
@@ -575,7 +584,7 @@
       __ movss(destination.As<XmmRegister>(), Address(ESP, source.GetStackIndex()));
     }
   } else {
-    DCHECK(destination.IsStackSlot());
+    DCHECK(destination.IsStackSlot()) << destination;
     if (source.IsRegister()) {
       __ movl(Address(ESP, destination.GetStackIndex()), source.As<Register>());
     } else if (source.IsFpuRegister()) {
@@ -636,7 +645,7 @@
       LOG(FATAL) << "Unimplemented";
     }
   } else {
-    DCHECK(destination.IsDoubleStackSlot());
+    DCHECK(destination.IsDoubleStackSlot()) << destination;
     if (source.IsRegisterPair()) {
       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
@@ -662,31 +671,44 @@
 }
 
 void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
-  if (instruction->IsIntConstant()) {
-    Immediate imm(instruction->AsIntConstant()->GetValue());
-    if (location.IsRegister()) {
-      __ movl(location.As<Register>(), imm);
-    } else if (location.IsStackSlot()) {
-      __ movl(Address(ESP, location.GetStackIndex()), imm);
-    } else {
-      DCHECK(location.IsConstant());
-      DCHECK_EQ(location.GetConstant(), instruction);
-    }
-  } else if (instruction->IsLongConstant()) {
-    int64_t value = instruction->AsLongConstant()->GetValue();
-    if (location.IsRegister()) {
-      __ movl(location.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
-      __ movl(location.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
-    } else if (location.IsDoubleStackSlot()) {
-      __ movl(Address(ESP, location.GetStackIndex()), Immediate(Low32Bits(value)));
-      __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
-    } else {
-      DCHECK(location.IsConstant());
-      DCHECK_EQ(location.GetConstant(), instruction);
+  LocationSummary* locations = instruction->GetLocations();
+  if (locations != nullptr && locations->Out().Equals(location)) {
+    return;
+  }
+
+  if (locations != nullptr && locations->Out().IsConstant()) {
+    HConstant* const_to_move = locations->Out().GetConstant();
+    if (const_to_move->IsIntConstant()) {
+      Immediate imm(const_to_move->AsIntConstant()->GetValue());
+      if (location.IsRegister()) {
+        __ movl(location.As<Register>(), imm);
+      } else if (location.IsStackSlot()) {
+        __ movl(Address(ESP, location.GetStackIndex()), imm);
+      } else {
+        DCHECK(location.IsConstant());
+        DCHECK_EQ(location.GetConstant(), const_to_move);
+      }
+    } else if (const_to_move->IsLongConstant()) {
+      int64_t value = const_to_move->AsLongConstant()->GetValue();
+      if (location.IsRegisterPair()) {
+        __ movl(location.AsRegisterPairLow<Register>(), Immediate(Low32Bits(value)));
+        __ movl(location.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
+      } else if (location.IsDoubleStackSlot()) {
+        __ movl(Address(ESP, location.GetStackIndex()), Immediate(Low32Bits(value)));
+        __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
+      } else {
+        DCHECK(location.IsConstant());
+        DCHECK_EQ(location.GetConstant(), instruction);
+      }
     }
   } else if (instruction->IsTemporary()) {
     Location temp_location = GetTemporaryLocation(instruction->AsTemporary());
-    Move32(location, temp_location);
+    if (temp_location.IsStackSlot()) {
+      Move32(location, temp_location);
+    } else {
+      DCHECK(temp_location.IsDoubleStackSlot());
+      Move64(location, temp_location);
+    }
   } else if (instruction->IsLoadLocal()) {
     int slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
     switch (instruction->GetType()) {
@@ -718,12 +740,12 @@
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
       case Primitive::kPrimFloat:
-        Move32(location, instruction->GetLocations()->Out());
+        Move32(location, locations->Out());
         break;
 
       case Primitive::kPrimLong:
       case Primitive::kPrimDouble:
-        Move64(location, instruction->GetLocations()->Out());
+        Move64(location, locations->Out());
         break;
 
       default:
@@ -1098,7 +1120,8 @@
   // temp = temp[index_in_cache]
   __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())));
   // (temp + offset_of_quick_compiled_code)()
-  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
+  __ call(Address(
+      temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1162,7 +1185,8 @@
   // temp = temp->GetMethodAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
-  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
+  __ call(Address(
+      temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1197,7 +1221,8 @@
   // temp = temp->GetImtEntryAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
-  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value()));
+  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+      kX86WordSize).Int32Value()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1277,10 +1302,42 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   switch (result_type) {
+    case Primitive::kPrimByte:
+      switch (input_type) {
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-byte' instruction.
+          locations->SetInAt(0, Location::Any());
+          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
+    case Primitive::kPrimShort:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-short' instruction.
+          locations->SetInAt(0, Location::Any());
+          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimInt:
       switch (input_type) {
         case Primitive::kPrimLong:
-          // long-to-int conversion.
+          // Processing a Dex `long-to-int' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
           break;
@@ -1303,7 +1360,7 @@
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
-          // int-to-long conversion.
+          // Processing a Dex `int-to-long' instruction.
           locations->SetInAt(0, Location::RegisterLocation(EAX));
           locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
           break;
@@ -1320,10 +1377,67 @@
       }
       break;
 
+    case Primitive::kPrimChar:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-char' instruction.
+          locations->SetInAt(0, Location::Any());
+          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimFloat:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-float' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          break;
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimDouble:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      };
+      break;
+
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Type conversion from " << input_type
-                 << " to " << result_type << " not yet implemented";
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-double' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          break;
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimFloat:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
       break;
 
     default:
@@ -1339,10 +1453,56 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   switch (result_type) {
+    case Primitive::kPrimByte:
+      switch (input_type) {
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-byte' instruction.
+          if (in.IsRegister()) {
+            __ movsxb(out.As<Register>(), in.As<ByteRegister>());
+          } else if (in.IsStackSlot()) {
+            __ movsxb(out.As<Register>(), Address(ESP, in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsIntConstant());
+            int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
+            __ movl(out.As<Register>(), Immediate(static_cast<int8_t>(value)));
+          }
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
+    case Primitive::kPrimShort:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-short' instruction.
+          if (in.IsRegister()) {
+            __ movsxw(out.As<Register>(), in.As<Register>());
+          } else if (in.IsStackSlot()) {
+            __ movsxw(out.As<Register>(), Address(ESP, in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsIntConstant());
+            int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
+            __ movl(out.As<Register>(), Immediate(static_cast<int16_t>(value)));
+          }
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimInt:
       switch (input_type) {
         case Primitive::kPrimLong:
-          // long-to-int conversion.
+          // Processing a Dex `long-to-int' instruction.
           if (in.IsRegisterPair()) {
             __ movl(out.As<Register>(), in.AsRegisterPairLow<Register>());
           } else if (in.IsDoubleStackSlot()) {
@@ -1373,7 +1533,7 @@
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
-          // int-to-long conversion.
+          // Processing a Dex `int-to-long' instruction.
           DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
           DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
           DCHECK_EQ(in.As<Register>(), EAX);
@@ -1392,10 +1552,72 @@
       }
       break;
 
+    case Primitive::kPrimChar:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `Process a Dex `int-to-char'' instruction.
+          if (in.IsRegister()) {
+            __ movzxw(out.As<Register>(), in.As<Register>());
+          } else if (in.IsStackSlot()) {
+            __ movzxw(out.As<Register>(), Address(ESP, in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsIntConstant());
+            int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
+            __ movl(out.As<Register>(), Immediate(static_cast<uint16_t>(value)));
+          }
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimFloat:
+      switch (input_type) {
+          // Processing a Dex `int-to-float' instruction.
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          __ cvtsi2ss(out.As<XmmRegister>(), in.As<Register>());
+          break;
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimDouble:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      };
+      break;
+
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Type conversion from " << input_type
-                 << " to " << result_type << " not yet implemented";
+      switch (input_type) {
+          // Processing a Dex `int-to-double' instruction.
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          __ cvtsi2sd(out.As<XmmRegister>(), in.As<Register>());
+          break;
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimFloat:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      };
       break;
 
     default:
@@ -1660,50 +1882,23 @@
   }
 }
 
-void LocationsBuilderX86::VisitDiv(HDiv* div) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
-  switch (div->GetResultType()) {
-    case Primitive::kPrimInt: {
-      locations->SetInAt(0, Location::RegisterLocation(EAX));
-      locations->SetInAt(1, Location::RequiresRegister());
-      locations->SetOut(Location::SameAsFirstInput());
-      // Intel uses edx:eax as the dividend.
-      locations->AddTemp(Location::RegisterLocation(EDX));
-      break;
-    }
-    case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
-      break;
-    }
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble: {
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, Location::RequiresFpuRegister());
-      locations->SetOut(Location::SameAsFirstInput());
-      break;
-    }
+void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
 
-    default:
-      LOG(FATAL) << "Unexpected div type " << div->GetResultType();
-  }
-}
-
-void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
-  LocationSummary* locations = div->GetLocations();
+  LocationSummary* locations = instruction->GetLocations();
+  Location out = locations->Out();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-  DCHECK(first.Equals(locations->Out()));
+  bool is_div = instruction->IsDiv();
 
-  switch (div->GetResultType()) {
+  switch (instruction->GetResultType()) {
     case Primitive::kPrimInt: {
-      Register first_reg = first.As<Register>();
       Register second_reg = second.As<Register>();
-      DCHECK_EQ(EAX, first_reg);
-      DCHECK_EQ(EDX, locations->GetTemp(0).As<Register>());
+      DCHECK_EQ(EAX, first.As<Register>());
+      DCHECK_EQ(is_div ? EAX : EDX, out.As<Register>());
 
       SlowPathCodeX86* slow_path =
-          new (GetGraph()->GetArena()) DivMinusOneSlowPathX86(first_reg);
+          new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.As<Register>(), is_div);
       codegen_->AddSlowPath(slow_path);
 
       // 0x80000000/-1 triggers an arithmetic exception!
@@ -1723,16 +1918,91 @@
     }
 
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      InvokeRuntimeCallingConvention calling_convention;
+      DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
+      DCHECK_EQ(EAX, out.AsRegisterPairLow<Register>());
+      DCHECK_EQ(EDX, out.AsRegisterPairHigh<Register>());
+
+      if (is_div) {
+        __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pLdiv)));
+      } else {
+        __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pLmod)));
+      }
+      uint32_t dex_pc = is_div
+          ? instruction->AsDiv()->GetDexPc()
+          : instruction->AsRem()->GetDexPc();
+      codegen_->RecordPcInfo(instruction, dex_pc);
+
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type for GenerateDivRemIntegral " << instruction->GetResultType();
+  }
+}
+
+void LocationsBuilderX86::VisitDiv(HDiv* div) {
+  LocationSummary::CallKind call_kind = div->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind);
+
+  switch (div->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RegisterLocation(EAX));
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      // Intel uses edx:eax as the dividend.
+      locations->AddTemp(Location::RegisterLocation(EDX));
+      break;
+    }
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // Runtime helper puts the result in EAX, EDX.
+      locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected div type " << div->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
+  LocationSummary* locations = div->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  switch (div->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GenerateDivRemIntegral(div);
       break;
     }
 
     case Primitive::kPrimFloat: {
+      DCHECK(first.Equals(out));
       __ divss(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
+      DCHECK(first.Equals(out));
       __ divsd(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
     }
@@ -1742,10 +2012,76 @@
   }
 }
 
+void LocationsBuilderX86::VisitRem(HRem* rem) {
+  LocationSummary::CallKind call_kind = rem->GetResultType() == Primitive::kPrimLong
+      ? LocationSummary::kCall
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (rem->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RegisterLocation(EAX));
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RegisterLocation(EDX));
+      break;
+    }
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, Location::RegisterPairLocation(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // Runtime helper puts the result in EAX, EDX.
+      locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GenerateDivRemIntegral(rem);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << type;
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
 void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::Any());
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::Any());
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
+      if (!instruction->IsConstant()) {
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
+  }
   if (instruction->HasUses()) {
     locations->SetOut(Location::SameAsFirstInput());
   }
@@ -1758,18 +2094,39 @@
   LocationSummary* locations = instruction->GetLocations();
   Location value = locations->InAt(0);
 
-  if (value.IsRegister()) {
-    __ testl(value.As<Register>(), value.As<Register>());
-  } else if (value.IsStackSlot()) {
-    __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
-  } else {
-    DCHECK(value.IsConstant()) << value;
-    if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
-    __ jmp(slow_path->GetEntryLabel());
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      if (value.IsRegister()) {
+        __ testl(value.As<Register>(), value.As<Register>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else if (value.IsStackSlot()) {
+        __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+        __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
     }
-    return;
+    case Primitive::kPrimLong: {
+      if (value.IsRegisterPair()) {
+        Register temp = locations->GetTemp(0).As<Register>();
+        __ movl(temp, value.AsRegisterPairLow<Register>());
+        __ orl(temp, value.AsRegisterPairHigh<Register>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+          __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck" << instruction->GetType();
   }
-  __ j(kEqual, slow_path->GetEntryLabel());
 }
 
 void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
@@ -1931,7 +2288,9 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   Primitive::Type field_type = instruction->GetFieldType();
-  bool is_object_type = field_type == Primitive::kPrimNot;
+  bool needs_write_barrier =
+    CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
+
   bool is_byte_type = (field_type == Primitive::kPrimBoolean)
       || (field_type == Primitive::kPrimByte);
   // The register allocator does not support multiple
@@ -1943,7 +2302,7 @@
     locations->SetInAt(1, Location::RequiresRegister());
   }
   // Temporary registers for the write barrier.
-  if (is_object_type) {
+  if (needs_write_barrier) {
     locations->AddTemp(Location::RequiresRegister());
     // Ensure the card is in a byte register.
     locations->AddTemp(Location::RegisterLocation(ECX));
@@ -1976,7 +2335,7 @@
       Register value = locations->InAt(1).As<Register>();
       __ movl(Address(obj, offset), value);
 
-      if (field_type == Primitive::kPrimNot) {
+      if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
         Register temp = locations->GetTemp(0).As<Register>();
         Register card = locations->GetTemp(1).As<Register>();
         codegen_->MarkGCCard(temp, card, obj, value);
@@ -2222,11 +2581,20 @@
 
 void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
   Primitive::Type value_type = instruction->GetComponentType();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+
+  DCHECK(kFollowsQuickABI);
+  bool not_enough_registers = needs_write_barrier
+      && !instruction->GetValue()->IsConstant()
+      && !instruction->GetIndex()->IsConstant();
+  bool needs_runtime_call = instruction->NeedsTypeCheck() || not_enough_registers;
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      value_type == Primitive::kPrimNot ? LocationSummary::kCall : LocationSummary::kNoCall);
+      needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
 
-  if (value_type == Primitive::kPrimNot) {
+  if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
     locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
@@ -2245,6 +2613,12 @@
     } else {
       locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
     }
+    // Temporary registers for the write barrier.
+    if (needs_write_barrier) {
+      locations->AddTemp(Location::RequiresRegister());
+      // Ensure the card is in a byte register.
+      locations->AddTemp(Location::RegisterLocation(ECX));
+    }
   }
 }
 
@@ -2254,6 +2628,9 @@
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
+  bool needs_runtime_call = locations->WillCall();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
@@ -2302,34 +2679,45 @@
       break;
     }
 
-    case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        if (value.IsRegister()) {
-          __ movl(Address(obj, offset), value.As<Register>());
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      if (!needs_runtime_call) {
+        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+        if (index.IsConstant()) {
+          size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          if (value.IsRegister()) {
+            __ movl(Address(obj, offset), value.As<Register>());
+          } else {
+            DCHECK(value.IsConstant()) << value;
+            __ movl(Address(obj, offset),
+                    Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+          }
         } else {
-          __ movl(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+          DCHECK(index.IsRegister()) << index;
+          if (value.IsRegister()) {
+            __ movl(Address(obj, index.As<Register>(), TIMES_4, data_offset),
+                    value.As<Register>());
+          } else {
+            DCHECK(value.IsConstant()) << value;
+            __ movl(Address(obj, index.As<Register>(), TIMES_4, data_offset),
+                    Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+          }
+        }
+
+        if (needs_write_barrier) {
+          Register temp = locations->GetTemp(0).As<Register>();
+          Register card = locations->GetTemp(1).As<Register>();
+          codegen_->MarkGCCard(temp, card, obj, value.As<Register>());
         }
       } else {
-        if (value.IsRegister()) {
-          __ movl(Address(obj, index.As<Register>(), TIMES_4, data_offset),
-                  value.As<Register>());
-        } else {
-          __ movl(Address(obj, index.As<Register>(), TIMES_4, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
+        DCHECK_EQ(value_type, Primitive::kPrimNot);
+        DCHECK(!codegen_->IsLeafMethod());
+        __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAputObject)));
+        codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
       }
       break;
     }
 
-    case Primitive::kPrimNot: {
-      DCHECK(!codegen_->IsLeafMethod());
-      __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAputObject)));
-      codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
-      break;
-    }
-
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       if (index.IsConstant()) {
@@ -2694,7 +3082,8 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   Primitive::Type field_type = instruction->GetFieldType();
-  bool is_object_type = field_type == Primitive::kPrimNot;
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
   bool is_byte_type = (field_type == Primitive::kPrimBoolean)
       || (field_type == Primitive::kPrimByte);
   // The register allocator does not support multiple
@@ -2706,7 +3095,7 @@
     locations->SetInAt(1, Location::RequiresRegister());
   }
   // Temporary registers for the write barrier.
-  if (is_object_type) {
+  if (needs_write_barrier) {
     locations->AddTemp(Location::RequiresRegister());
     // Ensure the card is in a byte register.
     locations->AddTemp(Location::RegisterLocation(ECX));
@@ -2739,7 +3128,7 @@
       Register value = locations->InAt(1).As<Register>();
       __ movl(Address(cls, offset), value);
 
-      if (field_type == Primitive::kPrimNot) {
+      if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
         Register temp = locations->GetTemp(0).As<Register>();
         Register card = locations->GetTemp(1).As<Register>();
         codegen_->MarkGCCard(temp, card, cls, value);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 841b28b..0aff6cc 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -25,7 +25,8 @@
 namespace art {
 namespace x86 {
 
-static constexpr size_t kX86WordSize = 4;
+// Use a local definition to prevent copying mistakes.
+static constexpr size_t kX86WordSize = kX86PointerSize;
 
 class CodeGeneratorX86;
 class SlowPathCodeX86;
@@ -130,6 +131,7 @@
   void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg);
   void HandleBitwiseOperation(HBinaryOperation* instruction);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
 
   X86Assembler* const assembler_;
   CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 6f3b161..97f5e5c 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -106,19 +106,36 @@
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
 };
 
-class DivMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 {
+class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 {
  public:
-  explicit DivMinusOneSlowPathX86_64(Register reg) : reg_(reg) {}
+  explicit DivRemMinusOneSlowPathX86_64(Register reg, Primitive::Type type, bool is_div)
+      : cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
-    __ negl(CpuRegister(reg_));
+    if (type_ == Primitive::kPrimInt) {
+      if (is_div_) {
+        __ negl(cpu_reg_);
+      } else {
+        __ movl(cpu_reg_, Immediate(0));
+      }
+
+    } else {
+      DCHECK_EQ(Primitive::kPrimLong, type_);
+      if (is_div_) {
+        __ negq(cpu_reg_);
+      } else {
+        __ movq(cpu_reg_, Immediate(0));
+      }
+    }
     __ jmp(GetExitLabel());
   }
 
  private:
-  Register reg_;
-  DISALLOW_COPY_AND_ASSIGN(DivMinusOneSlowPathX86_64);
+  const CpuRegister cpu_reg_;
+  const Primitive::Type type_;
+  const bool is_div_;
+  DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
 };
 
 class StackOverflowCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
@@ -577,26 +594,34 @@
 void CodeGeneratorX86_64::Move(HInstruction* instruction,
                                Location location,
                                HInstruction* move_for) {
-  if (instruction->IsIntConstant()) {
-    Immediate imm(instruction->AsIntConstant()->GetValue());
-    if (location.IsRegister()) {
-      __ movl(location.As<CpuRegister>(), imm);
-    } else if (location.IsStackSlot()) {
-      __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm);
-    } else {
-      DCHECK(location.IsConstant());
-      DCHECK_EQ(location.GetConstant(), instruction);
-    }
-  } else if (instruction->IsLongConstant()) {
-    int64_t value = instruction->AsLongConstant()->GetValue();
-    if (location.IsRegister()) {
-      __ movq(location.As<CpuRegister>(), Immediate(value));
-    } else if (location.IsDoubleStackSlot()) {
-      __ movq(CpuRegister(TMP), Immediate(value));
-      __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
-    } else {
-      DCHECK(location.IsConstant());
-      DCHECK_EQ(location.GetConstant(), instruction);
+  LocationSummary* locations = instruction->GetLocations();
+  if (locations != nullptr && locations->Out().Equals(location)) {
+    return;
+  }
+
+  if (locations != nullptr && locations->Out().IsConstant()) {
+    HConstant* const_to_move = locations->Out().GetConstant();
+    if (const_to_move->IsIntConstant()) {
+      Immediate imm(const_to_move->AsIntConstant()->GetValue());
+      if (location.IsRegister()) {
+        __ movl(location.As<CpuRegister>(), imm);
+      } else if (location.IsStackSlot()) {
+        __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm);
+      } else {
+        DCHECK(location.IsConstant());
+        DCHECK_EQ(location.GetConstant(), const_to_move);
+      }
+    } else if (const_to_move->IsLongConstant()) {
+      int64_t value = const_to_move->AsLongConstant()->GetValue();
+      if (location.IsRegister()) {
+        __ movq(location.As<CpuRegister>(), Immediate(value));
+      } else if (location.IsDoubleStackSlot()) {
+        __ movq(CpuRegister(TMP), Immediate(value));
+        __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
+      } else {
+        DCHECK(location.IsConstant());
+        DCHECK_EQ(location.GetConstant(), const_to_move);
+      }
     }
   } else if (instruction->IsLoadLocal()) {
     switch (instruction->GetType()) {
@@ -633,7 +658,7 @@
       case Primitive::kPrimLong:
       case Primitive::kPrimFloat:
       case Primitive::kPrimDouble:
-        Move(location, instruction->GetLocations()->Out());
+        Move(location, locations->Out());
         break;
 
       default:
@@ -1087,7 +1112,8 @@
   // temp = temp[index_in_cache]
   __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())));
   // (temp + offset_of_quick_compiled_code)()
-  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
+  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+      kX86_64WordSize).SizeValue()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1146,7 +1172,8 @@
   // temp = temp->GetMethodAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
-  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
+  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+      kX86_64WordSize).SizeValue()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1181,7 +1208,8 @@
   // temp = temp->GetImtEntryAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
   // call temp->GetEntryPoint();
-  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
+  __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+      kX86_64WordSize).SizeValue()));
 
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1273,10 +1301,42 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   switch (result_type) {
+    case Primitive::kPrimByte:
+      switch (input_type) {
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-byte' instruction.
+          locations->SetInAt(0, Location::Any());
+          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
+    case Primitive::kPrimShort:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-short' instruction.
+          locations->SetInAt(0, Location::Any());
+          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimInt:
       switch (input_type) {
         case Primitive::kPrimLong:
-          // long-to-int conversion.
+          // Processing a Dex `long-to-int' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
           break;
@@ -1299,7 +1359,7 @@
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
-          // int-to-long conversion.
+          // Processing a Dex `int-to-long' instruction.
           // TODO: We would benefit from a (to-be-implemented)
           // Location::RegisterOrStackSlot requirement for this input.
           locations->SetInAt(0, Location::RequiresRegister());
@@ -1318,10 +1378,67 @@
       }
       break;
 
+    case Primitive::kPrimChar:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-char' instruction.
+          locations->SetInAt(0, Location::Any());
+          locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimFloat:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-float' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          break;
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimDouble:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      };
+      break;
+
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Type conversion from " << input_type
-                 << " to " << result_type << " not yet implemented";
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-double' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          break;
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimFloat:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
       break;
 
     default:
@@ -1337,10 +1454,58 @@
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
   switch (result_type) {
+    case Primitive::kPrimByte:
+      switch (input_type) {
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-byte' instruction.
+          if (in.IsRegister()) {
+            __ movsxb(out.As<CpuRegister>(), in.As<CpuRegister>());
+          } else if (in.IsStackSlot()) {
+            __ movsxb(out.As<CpuRegister>(),
+                      Address(CpuRegister(RSP), in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsIntConstant());
+            __ movl(out.As<CpuRegister>(),
+                    Immediate(static_cast<int8_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+          }
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
+    case Primitive::kPrimShort:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-short' instruction.
+          if (in.IsRegister()) {
+            __ movsxw(out.As<CpuRegister>(), in.As<CpuRegister>());
+          } else if (in.IsStackSlot()) {
+            __ movsxw(out.As<CpuRegister>(),
+                      Address(CpuRegister(RSP), in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsIntConstant());
+            __ movl(out.As<CpuRegister>(),
+                    Immediate(static_cast<int16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+          }
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimInt:
       switch (input_type) {
         case Primitive::kPrimLong:
-          // long-to-int conversion.
+          // Processing a Dex `long-to-int' instruction.
           if (in.IsRegister()) {
             __ movl(out.As<CpuRegister>(), in.As<CpuRegister>());
           } else if (in.IsDoubleStackSlot()) {
@@ -1373,7 +1538,7 @@
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
-          // int-to-long conversion.
+          // Processing a Dex `int-to-long' instruction.
           DCHECK(in.IsRegister());
           __ movsxd(out.As<CpuRegister>(), in.As<CpuRegister>());
           break;
@@ -1390,10 +1555,73 @@
       }
       break;
 
+    case Primitive::kPrimChar:
+      switch (input_type) {
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          // Processing a Dex `int-to-char' instruction.
+          if (in.IsRegister()) {
+            __ movzxw(out.As<CpuRegister>(), in.As<CpuRegister>());
+          } else if (in.IsStackSlot()) {
+            __ movzxw(out.As<CpuRegister>(),
+                      Address(CpuRegister(RSP), in.GetStackIndex()));
+          } else {
+            DCHECK(in.GetConstant()->IsIntConstant());
+            __ movl(out.As<CpuRegister>(),
+                    Immediate(static_cast<uint16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
+          }
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      }
+      break;
+
     case Primitive::kPrimFloat:
+      switch (input_type) {
+          // Processing a Dex `int-to-float' instruction.
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          __ cvtsi2ss(out.As<XmmRegister>(), in.As<CpuRegister>());
+          break;
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimDouble:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      };
+      break;
+
     case Primitive::kPrimDouble:
-      LOG(FATAL) << "Type conversion from " << input_type
-                 << " to " << result_type << " not yet implemented";
+      switch (input_type) {
+          // Processing a Dex `int-to-double' instruction.
+        case Primitive::kPrimByte:
+        case Primitive::kPrimShort:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+          __ cvtsi2sd(out.As<XmmRegister>(), in.As<CpuRegister>());
+          break;
+
+        case Primitive::kPrimLong:
+        case Primitive::kPrimFloat:
+          LOG(FATAL) << "Type conversion from " << input_type
+                     << " to " << result_type << " not yet implemented";
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected type conversion from " << input_type
+                     << " to " << result_type;
+      };
       break;
 
     default:
@@ -1604,11 +1832,53 @@
   }
 }
 
+void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  Primitive::Type type = instruction->GetResultType();
+  DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+
+  bool is_div = instruction->IsDiv();
+  LocationSummary* locations = instruction->GetLocations();
+
+  CpuRegister out_reg = locations->Out().As<CpuRegister>();
+  CpuRegister second_reg = locations->InAt(1).As<CpuRegister>();
+
+  DCHECK_EQ(RAX, locations->InAt(0).As<CpuRegister>().AsRegister());
+  DCHECK_EQ(is_div ? RAX : RDX, out_reg.AsRegister());
+
+  SlowPathCodeX86_64* slow_path =
+      new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
+          out_reg.AsRegister(), type, is_div);
+  codegen_->AddSlowPath(slow_path);
+
+  // 0x80000000(00000000)/-1 triggers an arithmetic exception!
+  // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
+  // so it's safe to just use negl instead of more complex comparisons.
+
+  __ cmpl(second_reg, Immediate(-1));
+  __ j(kEqual, slow_path->GetEntryLabel());
+
+  if (type == Primitive::kPrimInt) {
+    // edx:eax <- sign-extended of eax
+    __ cdq();
+    // eax = quotient, edx = remainder
+    __ idivl(second_reg);
+  } else {
+    // rdx:rax <- sign-extended of rax
+    __ cqo();
+    // rax = quotient, rdx = remainder
+    __ idivq(second_reg);
+  }
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
   switch (div->GetResultType()) {
-    case Primitive::kPrimInt: {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RegisterLocation(RAX));
       locations->SetInAt(1, Location::RequiresRegister());
       locations->SetOut(Location::SameAsFirstInput());
@@ -1616,10 +1886,7 @@
       locations->AddTemp(Location::RegisterLocation(RDX));
       break;
     }
-    case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
-      break;
-    }
+
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -1639,35 +1906,11 @@
   Location second = locations->InAt(1);
   DCHECK(first.Equals(locations->Out()));
 
-  switch (div->GetResultType()) {
-    case Primitive::kPrimInt: {
-      CpuRegister first_reg = first.As<CpuRegister>();
-      CpuRegister second_reg = second.As<CpuRegister>();
-      DCHECK_EQ(RAX,  first_reg.AsRegister());
-      DCHECK_EQ(RDX, locations->GetTemp(0).As<CpuRegister>().AsRegister());
-
-      SlowPathCodeX86_64* slow_path =
-          new (GetGraph()->GetArena()) DivMinusOneSlowPathX86_64(first_reg.AsRegister());
-      codegen_->AddSlowPath(slow_path);
-
-      // 0x80000000/-1 triggers an arithmetic exception!
-      // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
-      // it's safe to just use negl instead of more complex comparisons.
-
-      __ cmpl(second_reg, Immediate(-1));
-      __ j(kEqual, slow_path->GetEntryLabel());
-
-      // edx:eax <- sign-extended of eax
-      __ cdq();
-      // eax = quotient, edx = remainder
-      __ idivl(second_reg);
-
-      __ Bind(slow_path->GetExitLabel());
-      break;
-    }
-
+  Primitive::Type type = div->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
-      LOG(FATAL) << "Not implemented div type" << div->GetResultType();
+      GenerateDivRemIntegral(div);
       break;
     }
 
@@ -1686,6 +1929,50 @@
   }
 }
 
+void LocationsBuilderX86_64::VisitRem(HRem* rem) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
+  switch (rem->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RegisterLocation(RAX));
+      locations->SetInAt(1, Location::RequiresRegister());
+      // Intel uses rdx:rax as the dividend and puts the remainder in rdx
+      locations->SetOut(Location::RegisterLocation(RDX));
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      GenerateDivRemIntegral(rem);
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -1703,18 +1990,40 @@
   LocationSummary* locations = instruction->GetLocations();
   Location value = locations->InAt(0);
 
-  if (value.IsRegister()) {
-    __ testl(value.As<CpuRegister>(), value.As<CpuRegister>());
-  } else if (value.IsStackSlot()) {
-    __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
-  } else {
-    DCHECK(value.IsConstant()) << value;
-    if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
-      __ jmp(slow_path->GetEntryLabel());
+  switch (instruction->GetType()) {
+    case Primitive::kPrimInt: {
+      if (value.IsRegister()) {
+        __ testl(value.As<CpuRegister>(), value.As<CpuRegister>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else if (value.IsStackSlot()) {
+        __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
+        __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
     }
-    return;
+    case Primitive::kPrimLong: {
+      if (value.IsRegister()) {
+        __ testq(value.As<CpuRegister>(), value.As<CpuRegister>());
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else if (value.IsDoubleStackSlot()) {
+        __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
+        __ j(kEqual, slow_path->GetEntryLabel());
+      } else {
+        DCHECK(value.IsConstant()) << value;
+        if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
+        __ jmp(slow_path->GetEntryLabel());
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
   }
-  __ j(kEqual, slow_path->GetEntryLabel());
 }
 
 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
@@ -1825,10 +2134,11 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   Primitive::Type field_type = instruction->GetFieldType();
-  bool is_object_type = field_type == Primitive::kPrimNot;
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue());
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  if (is_object_type) {
+  if (needs_write_barrier) {
     // Temporary registers for the write barrier.
     locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
@@ -1860,7 +2170,7 @@
     case Primitive::kPrimNot: {
       CpuRegister value = locations->InAt(1).As<CpuRegister>();
       __ movl(Address(obj, offset), value);
-      if (field_type == Primitive::kPrimNot) {
+      if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
         CpuRegister temp = locations->GetTemp(0).As<CpuRegister>();
         CpuRegister card = locations->GetTemp(1).As<CpuRegister>();
         codegen_->MarkGCCard(temp, card, obj, value);
@@ -2110,10 +2420,14 @@
 
 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
   Primitive::Type value_type = instruction->GetComponentType();
-  bool is_object = value_type == Primitive::kPrimNot;
+
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  bool needs_runtime_call = instruction->NeedsTypeCheck();
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, is_object ? LocationSummary::kCall : LocationSummary::kNoCall);
-  if (is_object) {
+      instruction, needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+  if (needs_runtime_call) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
     locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
@@ -2130,6 +2444,12 @@
     } else {
       locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
     }
+
+    if (needs_write_barrier) {
+      // Temporary registers for the write barrier.
+      locations->AddTemp(Location::RequiresRegister());
+      locations->AddTemp(Location::RequiresRegister());
+    }
   }
 }
 
@@ -2139,6 +2459,9 @@
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
+  bool needs_runtime_call = locations->WillCall();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
@@ -2171,46 +2494,61 @@
         if (value.IsRegister()) {
           __ movw(Address(obj, offset), value.As<CpuRegister>());
         } else {
+          DCHECK(value.IsConstant()) << value;
           __ movw(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
         }
       } else {
+        DCHECK(index.IsRegister()) << index;
         if (value.IsRegister()) {
           __ movw(Address(obj, index.As<CpuRegister>(), TIMES_2, data_offset),
                   value.As<CpuRegister>());
         } else {
-          __ movw(Address(obj, index.As<CpuRegister>(), TIMES_2, data_offset),
-                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
-      }
-      break;
-    }
-
-    case Primitive::kPrimInt: {
-      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        if (value.IsRegister()) {
-          __ movl(Address(obj, offset), value.As<CpuRegister>());
-        } else {
-          __ movl(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
-        }
-      } else {
-        if (value.IsRegister()) {
-          __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
-                  value.As<CpuRegister>());
-        } else {
           DCHECK(value.IsConstant()) << value;
-          __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
+          __ movw(Address(obj, index.As<CpuRegister>(), TIMES_2, data_offset),
                   Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
         }
       }
       break;
     }
 
+    case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAputObject), true));
-      DCHECK(!codegen_->IsLeafMethod());
-      codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+      if (!needs_runtime_call) {
+        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          if (value.IsRegister()) {
+            __ movl(Address(obj, offset), value.As<CpuRegister>());
+          } else {
+            DCHECK(value.IsConstant()) << value;
+            __ movl(Address(obj, offset),
+                    Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+          }
+        } else {
+          DCHECK(index.IsRegister()) << index;
+          if (value.IsRegister()) {
+            __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
+                    value.As<CpuRegister>());
+          } else {
+            DCHECK(value.IsConstant()) << value;
+            __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
+                    Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+          }
+        }
+
+        if (needs_write_barrier) {
+          DCHECK_EQ(value_type, Primitive::kPrimNot);
+          CpuRegister temp = locations->GetTemp(0).As<CpuRegister>();
+          CpuRegister card = locations->GetTemp(1).As<CpuRegister>();
+          codegen_->MarkGCCard(temp, card, obj, value.As<CpuRegister>());
+        }
+      } else {
+        DCHECK_EQ(value_type, Primitive::kPrimNot);
+        __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAputObject), true));
+        DCHECK(!codegen_->IsLeafMethod());
+        codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+      }
       break;
     }
 
@@ -2692,10 +3030,11 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   Primitive::Type field_type = instruction->GetFieldType();
-  bool is_object_type = field_type == Primitive::kPrimNot;
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue());
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  if (is_object_type) {
+  if (needs_write_barrier) {
     // Temporary registers for the write barrier.
     locations->AddTemp(Location::RequiresRegister());
     locations->AddTemp(Location::RequiresRegister());
@@ -2727,7 +3066,7 @@
     case Primitive::kPrimNot: {
       CpuRegister value = locations->InAt(1).As<CpuRegister>();
       __ movl(Address(cls, offset), value);
-      if (field_type == Primitive::kPrimNot) {
+      if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
         CpuRegister temp = locations->GetTemp(0).As<CpuRegister>();
         CpuRegister card = locations->GetTemp(1).As<CpuRegister>();
         codegen_->MarkGCCard(temp, card, cls, value);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 4c6e475..29c679d 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -25,7 +25,8 @@
 namespace art {
 namespace x86_64 {
 
-static constexpr size_t kX86_64WordSize = 8;
+// Use a local definition to prevent copying mistakes.
+static constexpr size_t kX86_64WordSize = kX86_64PointerSize;
 
 static constexpr Register kParameterCoreRegisters[] = { RSI, RDX, RCX, R8, R9 };
 static constexpr FloatRegister kParameterFloatRegisters[] =
@@ -134,6 +135,7 @@
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
 
   X86_64Assembler* const assembler_;
   CodeGeneratorX86_64* const codegen_;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 9752b1d..fee3ea6 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -16,6 +16,7 @@
 
 #include <functional>
 
+#include "arch/instruction_set.h"
 #include "base/macros.h"
 #include "builder.h"
 #include "code_generator_arm.h"
@@ -25,7 +26,6 @@
 #include "common_compiler_test.h"
 #include "dex_file.h"
 #include "dex_instruction.h"
-#include "instruction_set.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
 #include "prepare_for_register_allocation.h"
@@ -39,7 +39,7 @@
 
 class InternalCodeAllocator : public CodeAllocator {
  public:
-  InternalCodeAllocator() { }
+  InternalCodeAllocator() : size_(0) { }
 
   virtual uint8_t* Allocate(size_t size) {
     size_ = size;
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index d2acfa6..ac00824 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -32,10 +32,10 @@
  */
 class HConstantFolding : public HOptimization {
  public:
-  HConstantFolding(HGraph* graph, const HGraphVisualizer& visualizer)
-      : HOptimization(graph, true, kConstantFoldingPassName, visualizer) {}
+  explicit HConstantFolding(HGraph* graph)
+      : HOptimization(graph, true, kConstantFoldingPassName) {}
 
-  virtual void Run() OVERRIDE;
+  void Run() OVERRIDE;
 
   static constexpr const char* kConstantFoldingPassName = "constant_folding";
 
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index 856c516..a56b9d9 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -47,8 +47,7 @@
   ASSERT_EQ(expected_before, actual_before);
 
   x86::CodeGeneratorX86 codegen(graph);
-  HGraphVisualizer visualizer(nullptr, graph, codegen, "");
-  HConstantFolding(graph, visualizer).Run();
+  HConstantFolding(graph).Run();
   SSAChecker ssa_checker(&allocator, graph);
   ssa_checker.Run();
   ASSERT_TRUE(ssa_checker.IsValid());
@@ -60,7 +59,7 @@
 
   check_after_cf(graph);
 
-  HDeadCodeElimination(graph, visualizer).Run();
+  HDeadCodeElimination(graph).Run();
   ssa_checker.Run();
   ASSERT_TRUE(ssa_checker.IsValid());
 
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index a4446ae..3db2c3f 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -28,10 +28,10 @@
  */
 class HDeadCodeElimination : public HOptimization {
  public:
-  HDeadCodeElimination(HGraph* graph, const HGraphVisualizer& visualizer)
-      : HOptimization(graph, true, kDeadCodeEliminationPassName, visualizer) {}
+  explicit HDeadCodeElimination(HGraph* graph)
+      : HOptimization(graph, true, kDeadCodeEliminationPassName) {}
 
-  virtual void Run() OVERRIDE;
+  void Run() OVERRIDE;
 
   static constexpr const char* kDeadCodeEliminationPassName =
     "dead_code_elimination";
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index 0c68074..5d4b9cb 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -41,8 +41,7 @@
   ASSERT_EQ(actual_before, expected_before);
 
   x86::CodeGeneratorX86 codegen(graph);
-  HGraphVisualizer visualizer(nullptr, graph, codegen, "");
-  HDeadCodeElimination(graph, visualizer).Run();
+  HDeadCodeElimination(graph).Run();
   SSAChecker ssa_checker(&allocator, graph);
   ssa_checker.Run();
   ASSERT_TRUE(ssa_checker.IsValid());
diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h
index 8d2c774..a841d5f 100644
--- a/compiler/optimizing/gvn.h
+++ b/compiler/optimizing/gvn.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_GVN_H_
 
 #include "nodes.h"
+#include "optimization.h"
 
 namespace art {
 
@@ -165,11 +166,11 @@
 /**
  * Optimization phase that removes redundant instruction.
  */
-class GlobalValueNumberer : public ValueObject {
+class GlobalValueNumberer : public HOptimization {
  public:
   GlobalValueNumberer(ArenaAllocator* allocator, HGraph* graph)
-      : allocator_(allocator),
-        graph_(graph),
+      : HOptimization(graph, true, "GVN"),
+        allocator_(allocator),
         block_effects_(allocator, graph->GetBlocks().Size()),
         loop_effects_(allocator, graph->GetBlocks().Size()),
         sets_(allocator, graph->GetBlocks().Size()),
@@ -186,7 +187,7 @@
     }
   }
 
-  void Run();
+  void Run() OVERRIDE;
 
  private:
   // Per-block GVN. Will also update the ValueSet of the dominated and
@@ -202,7 +203,6 @@
   SideEffects GetBlockEffects(HBasicBlock* block) const;
 
   ArenaAllocator* const allocator_;
-  HGraph* const graph_;
 
   // Side effects of individual blocks, that is the union of the side effects
   // of the instructions in the block.
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 29eabe7..3d65e9a 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -18,11 +18,22 @@
 
 namespace art {
 
+class InstructionSimplifierVisitor : public HGraphVisitor {
+ public:
+  explicit InstructionSimplifierVisitor(HGraph* graph) : HGraphVisitor(graph) {}
+
+ private:
+  void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
+  void VisitEqual(HEqual* equal) OVERRIDE;
+  void VisitArraySet(HArraySet* equal) OVERRIDE;
+};
+
 void InstructionSimplifier::Run() {
-  VisitInsertionOrder();
+  InstructionSimplifierVisitor visitor(graph_);
+  visitor.VisitInsertionOrder();
 }
 
-void InstructionSimplifier::VisitSuspendCheck(HSuspendCheck* check) {
+void InstructionSimplifierVisitor::VisitSuspendCheck(HSuspendCheck* check) {
   HBasicBlock* block = check->GetBlock();
   // Currently always keep the suspend check at entry.
   if (block->IsEntryBlock()) return;
@@ -38,7 +49,7 @@
   block->RemoveInstruction(check);
 }
 
-void InstructionSimplifier::VisitEqual(HEqual* equal) {
+void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
   HInstruction* input1 = equal->InputAt(0);
   HInstruction* input2 = equal->InputAt(1);
   if (input1->GetType() == Primitive::kPrimBoolean && input2->IsIntConstant()) {
@@ -55,4 +66,16 @@
   }
 }
 
+void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) {
+  HInstruction* value = instruction->GetValue();
+  if (value->GetType() != Primitive::kPrimNot) return;
+
+  if (value->IsArrayGet()) {
+    if (value->AsArrayGet()->GetArray() == instruction->GetArray()) {
+      // If the code is just swapping elements in the array, no need for a type check.
+      instruction->ClearNeedsTypeCheck();
+    }
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index d74b624..7068c7f 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -18,21 +18,19 @@
 #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_
 
 #include "nodes.h"
+#include "optimization.h"
 
 namespace art {
 
 /**
  * Implements optimizations specific to each instruction.
  */
-class InstructionSimplifier : public HGraphVisitor {
+class InstructionSimplifier : public HOptimization {
  public:
-  explicit InstructionSimplifier(HGraph* graph) : HGraphVisitor(graph) {}
+  explicit InstructionSimplifier(HGraph* graph)
+    : HOptimization(graph, true, "instruction_simplifier") {}
 
-  void Run();
-
- private:
-  virtual void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
-  virtual void VisitEqual(HEqual* equal) OVERRIDE;
+  void Run() OVERRIDE;
 };
 
 }  // namespace art
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index d1555d4..e1c8e8e 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -391,6 +391,10 @@
     return (register_set & (1 << reg)) != 0;
   }
 
+  size_t GetNumberOfRegisters() const {
+    return __builtin_popcount(core_registers_) + __builtin_popcount(floating_point_registers_);
+  }
+
  private:
   uint32_t core_registers_;
   uint32_t floating_point_registers_;
@@ -503,6 +507,10 @@
     return &live_registers_;
   }
 
+  size_t GetNumberOfLiveRegisters() const {
+    return live_registers_.GetNumberOfRegisters();
+  }
+
   bool InputOverlapsWithOutputOrTemp(uint32_t input_index, bool is_environment) const {
     if (is_environment) return true;
     if ((input_index == 0)
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6224a11..7d52d7d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -90,7 +90,7 @@
         maximum_number_of_out_vregs_(0),
         number_of_vregs_(0),
         number_of_in_vregs_(0),
-        number_of_temporaries_(0),
+        temporaries_vreg_slots_(0),
         current_instruction_id_(0) {}
 
   ArenaAllocator* GetArena() const { return arena_; }
@@ -129,12 +129,12 @@
     maximum_number_of_out_vregs_ = std::max(new_value, maximum_number_of_out_vregs_);
   }
 
-  void UpdateNumberOfTemporaries(size_t count) {
-    number_of_temporaries_ = std::max(count, number_of_temporaries_);
+  void UpdateTemporariesVRegSlots(size_t slots) {
+    temporaries_vreg_slots_ = std::max(slots, temporaries_vreg_slots_);
   }
 
-  size_t GetNumberOfTemporaries() const {
-    return number_of_temporaries_;
+  size_t GetTemporariesVRegSlots() const {
+    return temporaries_vreg_slots_;
   }
 
   void SetNumberOfVRegs(uint16_t number_of_vregs) {
@@ -192,8 +192,8 @@
   // The number of virtual registers used by parameters of this method.
   uint16_t number_of_in_vregs_;
 
-  // The number of temporaries that will be needed for the baseline compiler.
-  size_t number_of_temporaries_;
+  // Number of vreg size slots that the temporaries use (used in baseline compiler).
+  size_t temporaries_vreg_slots_;
 
   // The current id to assign to a newly added instruction. See HInstruction.id_.
   int current_instruction_id_;
@@ -521,6 +521,7 @@
   M(ParallelMove, Instruction)                                          \
   M(ParameterValue, Instruction)                                        \
   M(Phi, Instruction)                                                   \
+  M(Rem, BinaryOperation)                                             \
   M(Return, Instruction)                                                \
   M(ReturnVoid, Instruction)                                            \
   M(StaticFieldGet, Instruction)                                        \
@@ -1750,23 +1751,59 @@
 
 class HDiv : public HBinaryOperation {
  public:
-  HDiv(Primitive::Type result_type, HInstruction* left, HInstruction* right)
-      : HBinaryOperation(result_type, left, right) {}
+  HDiv(Primitive::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc)
+      : HBinaryOperation(result_type, left, right), dex_pc_(dex_pc) {}
 
   virtual int32_t Evaluate(int32_t x, int32_t y) const {
     // Our graph structure ensures we never have 0 for `y` during constant folding.
     DCHECK_NE(y, 0);
-    // Special case -1 to avoid getting a SIGFPE on x86.
+    // Special case -1 to avoid getting a SIGFPE on x86(_64).
     return (y == -1) ? -x : x / y;
   }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const { return x / y; }
+
+  virtual int64_t Evaluate(int64_t x, int64_t y) const {
+    DCHECK_NE(y, 0);
+    // Special case -1 to avoid getting a SIGFPE on x86(_64).
+    return (y == -1) ? -x : x / y;
+  }
+
+  uint32_t GetDexPc() const { return dex_pc_; }
 
   DECLARE_INSTRUCTION(Div);
 
  private:
+  const uint32_t dex_pc_;
+
   DISALLOW_COPY_AND_ASSIGN(HDiv);
 };
 
+class HRem : public HBinaryOperation {
+ public:
+  HRem(Primitive::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc)
+      : HBinaryOperation(result_type, left, right), dex_pc_(dex_pc) {}
+
+  virtual int32_t Evaluate(int32_t x, int32_t y) const {
+    DCHECK_NE(y, 0);
+    // Special case -1 to avoid getting a SIGFPE on x86(_64).
+    return (y == -1) ? 0 : x % y;
+  }
+
+  virtual int64_t Evaluate(int64_t x, int64_t y) const {
+    DCHECK_NE(y, 0);
+    // Special case -1 to avoid getting a SIGFPE on x86(_64).
+    return (y == -1) ? 0 : x % y;
+  }
+
+  uint32_t GetDexPc() const { return dex_pc_; }
+
+  DECLARE_INSTRUCTION(Rem);
+
+ private:
+  const uint32_t dex_pc_;
+
+  DISALLOW_COPY_AND_ASSIGN(HRem);
+};
+
 class HDivZeroCheck : public HExpression<1> {
  public:
   HDivZeroCheck(HInstruction* value, uint32_t dex_pc)
@@ -2030,6 +2067,8 @@
   MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); }
   Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); }
 
+  HInstruction* GetValue() const { return InputAt(1); }
+
   DECLARE_INSTRUCTION(InstanceFieldSet);
 
  private:
@@ -2046,13 +2085,16 @@
     SetRawInputAt(1, index);
   }
 
-  virtual bool CanBeMoved() const { return true; }
-  virtual bool InstructionDataEquals(HInstruction* other) const {
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
     UNUSED(other);
     return true;
   }
   void SetType(Primitive::Type type) { type_ = type; }
 
+  HInstruction* GetArray() const { return InputAt(0); }
+  HInstruction* GetIndex() const { return InputAt(1); }
+
   DECLARE_INSTRUCTION(ArrayGet);
 
  private:
@@ -2068,20 +2110,29 @@
             uint32_t dex_pc)
       : HTemplateInstruction(SideEffects::ChangesSomething()),
         dex_pc_(dex_pc),
-        expected_component_type_(expected_component_type) {
+        expected_component_type_(expected_component_type),
+        needs_type_check_(value->GetType() == Primitive::kPrimNot) {
     SetRawInputAt(0, array);
     SetRawInputAt(1, index);
     SetRawInputAt(2, value);
   }
 
-  virtual bool NeedsEnvironment() const {
+  bool NeedsEnvironment() const {
     // We currently always call a runtime method to catch array store
     // exceptions.
-    return InputAt(2)->GetType() == Primitive::kPrimNot;
+    return needs_type_check_;
   }
 
+  void ClearNeedsTypeCheck() {
+    needs_type_check_ = false;
+  }
+
+  bool NeedsTypeCheck() const { return needs_type_check_; }
+
   uint32_t GetDexPc() const { return dex_pc_; }
 
+  HInstruction* GetArray() const { return InputAt(0); }
+  HInstruction* GetIndex() const { return InputAt(1); }
   HInstruction* GetValue() const { return InputAt(2); }
 
   Primitive::Type GetComponentType() const {
@@ -2100,6 +2151,7 @@
  private:
   const uint32_t dex_pc_;
   const Primitive::Type expected_component_type_;
+  bool needs_type_check_;
 
   DISALLOW_COPY_AND_ASSIGN(HArraySet);
 };
@@ -2158,8 +2210,8 @@
  * Some DEX instructions are folded into multiple HInstructions that need
  * to stay live until the last HInstruction. This class
  * is used as a marker for the baseline compiler to ensure its preceding
- * HInstruction stays live. `index` is the temporary number that is used
- * for knowing the stack offset where to store the instruction.
+ * HInstruction stays live. `index` represents the stack location index of the
+ * instruction (the actual offset is computed as index * vreg_size).
  */
 class HTemporary : public HTemplateInstruction<0> {
  public:
@@ -2368,6 +2420,8 @@
   MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); }
   Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); }
 
+  HInstruction* GetValue() const { return InputAt(1); }
+
   DECLARE_INSTRUCTION(StaticFieldSet);
 
  private:
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index ea98186..d1178d5 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -21,12 +21,6 @@
 
 namespace art {
 
-void HOptimization::Execute() {
-  Run();
-  visualizer_.DumpGraph(pass_name_);
-  Check();
-}
-
 void HOptimization::Check() {
   if (kIsDebugBuild) {
     if (is_in_ssa_form_) {
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index 59683e2..d281248 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -29,25 +29,19 @@
  public:
   HOptimization(HGraph* graph,
                 bool is_in_ssa_form,
-                const char* pass_name,
-                const HGraphVisualizer& visualizer)
+                const char* pass_name)
       : graph_(graph),
         is_in_ssa_form_(is_in_ssa_form),
-        pass_name_(pass_name),
-        visualizer_(visualizer) {}
+        pass_name_(pass_name) {}
 
   virtual ~HOptimization() {}
 
-  // Execute the optimization pass.
-  void Execute();
-
   // Return the name of the pass.
   const char* GetPassName() const { return pass_name_; }
 
   // Peform the analysis itself.
   virtual void Run() = 0;
 
- private:
   // Verify the graph; abort if it is not valid.
   void Check();
 
@@ -59,9 +53,6 @@
   const bool is_in_ssa_form_;
   // Optimization pass name.
   const char* pass_name_;
-  // A graph visualiser invoked after the execution of the optimization
-  // pass if enabled.
-  const HGraphVisualizer& visualizer_;
 
   DISALLOW_COPY_AND_ASSIGN(HOptimization);
 };
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 6e3653a..42ac77d 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -26,9 +26,12 @@
 #include "dead_code_elimination.h"
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
+#include "elf_writer_quick.h"
 #include "graph_visualizer.h"
 #include "gvn.h"
 #include "instruction_simplifier.h"
+#include "jni/quick/jni_compiler.h"
+#include "mirror/art_method-inl.h"
 #include "nodes.h"
 #include "prepare_for_register_allocation.h"
 #include "register_allocator.h"
@@ -88,15 +91,6 @@
                           jobject class_loader,
                           const DexFile& dex_file) const OVERRIDE;
 
-  CompiledMethod* TryCompile(const DexFile::CodeItem* code_item,
-                             uint32_t access_flags,
-                             InvokeType invoke_type,
-                             uint16_t class_def_idx,
-                             uint32_t method_idx,
-                             jobject class_loader,
-                             const DexFile& dex_file) const;
-
-  // For the following methods we will use the fallback. This is a delegation pattern.
   CompiledMethod* JniCompile(uint32_t access_flags,
                              uint32_t method_idx,
                              const DexFile& dex_file) const OVERRIDE;
@@ -110,13 +104,16 @@
                 const std::string& android_root,
                 bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  Backend* GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const OVERRIDE;
+  Backend* GetCodeGenerator(CompilationUnit* cu ATTRIBUTE_UNUSED,
+                            void* compilation_unit ATTRIBUTE_UNUSED) const OVERRIDE {
+    return nullptr;
+  }
 
-  void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
+  void InitCompilationUnit(CompilationUnit& cu ATTRIBUTE_UNUSED) const OVERRIDE {}
 
-  void Init() const OVERRIDE;
+  void Init() const OVERRIDE {}
 
-  void UnInit() const OVERRIDE;
+  void UnInit() const OVERRIDE {}
 
  private:
   // Whether we should run any optimization or register allocation. If false, will
@@ -128,10 +125,6 @@
 
   std::unique_ptr<std::ostream> visualizer_output_;
 
-  // Delegate to another compiler in case the optimizing compiler cannot compile a method.
-  // Currently the fallback is the quick compiler.
-  std::unique_ptr<Compiler> delegate_;
-
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler);
 };
 
@@ -143,21 +136,12 @@
           driver->GetCompilerOptions().GetCompilerFilter() != CompilerOptions::kTime),
       total_compiled_methods_(0),
       unoptimized_compiled_methods_(0),
-      optimized_compiled_methods_(0),
-      delegate_(Create(driver, Compiler::Kind::kQuick)) {
+      optimized_compiled_methods_(0) {
   if (kIsVisualizerEnabled) {
     visualizer_output_.reset(new std::ofstream("art.cfg"));
   }
 }
 
-void OptimizingCompiler::Init() const {
-  delegate_->Init();
-}
-
-void OptimizingCompiler::UnInit() const {
-  delegate_->UnInit();
-}
-
 OptimizingCompiler::~OptimizingCompiler() {
   if (total_compiled_methods_ == 0) {
     LOG(INFO) << "Did not compile any method.";
@@ -170,33 +154,28 @@
   }
 }
 
-bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file,
-                                          CompilationUnit* cu) const {
-  return delegate_->CanCompileMethod(method_idx, dex_file, cu);
+bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED,
+                                          const DexFile& dex_file ATTRIBUTE_UNUSED,
+                                          CompilationUnit* cu ATTRIBUTE_UNUSED) const {
+  return true;
 }
 
 CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
                                                uint32_t method_idx,
                                                const DexFile& dex_file) const {
-  return delegate_->JniCompile(access_flags, method_idx, dex_file);
+  return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
 }
 
 uintptr_t OptimizingCompiler::GetEntryPointOf(mirror::ArtMethod* method) const {
-  return delegate_->GetEntryPointOf(method);
+  return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize(
+      InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
 }
 
 bool OptimizingCompiler::WriteElf(art::File* file, OatWriter* oat_writer,
                                   const std::vector<const art::DexFile*>& dex_files,
                                   const std::string& android_root, bool is_host) const {
-  return delegate_->WriteElf(file, oat_writer, dex_files, android_root, is_host);
-}
-
-Backend* OptimizingCompiler::GetCodeGenerator(CompilationUnit* cu, void* compilation_unit) const {
-  return delegate_->GetCodeGenerator(cu, compilation_unit);
-}
-
-void OptimizingCompiler::InitCompilationUnit(CompilationUnit& cu) const {
-  delegate_->InitCompilationUnit(cu);
+  return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
+                                       *GetCompilerDriver());
 }
 
 static bool IsInstructionSetSupported(InstructionSet instruction_set) {
@@ -211,13 +190,32 @@
   return code_item.tries_size_ == 0;
 }
 
-CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
-                                               uint32_t access_flags,
-                                               InvokeType invoke_type,
-                                               uint16_t class_def_idx,
-                                               uint32_t method_idx,
-                                               jobject class_loader,
-                                               const DexFile& dex_file) const {
+static void RunOptimizations(HGraph* graph, const HGraphVisualizer& visualizer) {
+  HDeadCodeElimination opt1(graph);
+  HConstantFolding opt2(graph);
+  SsaRedundantPhiElimination opt3(graph);
+  SsaDeadPhiElimination opt4(graph);
+  InstructionSimplifier opt5(graph);
+  GlobalValueNumberer opt6(graph->GetArena(), graph);
+  InstructionSimplifier opt7(graph);
+
+  HOptimization* optimizations[] = { &opt1, &opt2, &opt3, &opt4, &opt5, &opt6, &opt7 };
+
+  for (size_t i = 0; i < arraysize(optimizations); ++i) {
+    HOptimization* optimization = optimizations[i];
+    optimization->Run();
+    optimization->Check();
+    visualizer.DumpGraph(optimization->GetPassName());
+  }
+}
+
+CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
+                                            uint32_t access_flags,
+                                            InvokeType invoke_type,
+                                            uint16_t class_def_idx,
+                                            uint32_t method_idx,
+                                            jobject class_loader,
+                                            const DexFile& dex_file) const {
   UNUSED(invoke_type);
   total_compiled_methods_++;
   InstructionSet instruction_set = GetCompilerDriver()->GetInstructionSet();
@@ -278,16 +276,9 @@
     visualizer.DumpGraph("ssa");
     graph->FindNaturalLoops();
 
-    HDeadCodeElimination(graph, visualizer).Execute();
-    HConstantFolding(graph, visualizer).Execute();
+    RunOptimizations(graph, visualizer);
 
-    SsaRedundantPhiElimination(graph).Run();
-    SsaDeadPhiElimination(graph).Run();
-    InstructionSimplifier(graph).Run();
-    GlobalValueNumberer(graph->GetArena(), graph).Run();
-    visualizer.DumpGraph(kGVNPassName);
     PrepareForRegisterAllocation(graph).Run();
-
     SsaLivenessAnalysis liveness(*graph, codegen);
     liveness.Analyze();
     visualizer.DumpGraph(kLivenessPassName);
@@ -360,23 +351,6 @@
   }
 }
 
-CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
-                                            uint32_t access_flags,
-                                            InvokeType invoke_type,
-                                            uint16_t class_def_idx,
-                                            uint32_t method_idx,
-                                            jobject class_loader,
-                                            const DexFile& dex_file) const {
-  CompiledMethod* method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
-                                      method_idx, class_loader, dex_file);
-  if (method != nullptr) {
-    return method;
-  }
-
-  return delegate_->Compile(code_item, access_flags, invoke_type, class_def_idx, method_idx,
-                            class_loader, dex_file);
-}
-
 Compiler* CreateOptimizingCompiler(CompilerDriver* driver) {
   return new OptimizingCompiler(driver);
 }
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 4d6e664..2948496 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -215,9 +215,16 @@
       // By adding the following interval in the algorithm, we can compute this
       // maximum before updating locations.
       LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
-      interval->AddRange(position, position + 1);
-      unhandled_core_intervals_.Add(interval);
-      unhandled_fp_intervals_.Add(interval);
+      // The start of the interval must be after the position of the safepoint, so that
+      // we can just check the number of active registers at that position. Note that this
+      // will include the current interval in the computation of
+      // `maximum_number_of_live_registers`, so we need a better strategy if this becomes
+      // a problem.
+      // TODO: We could put the logic in AddSorted, to ensure the safepoint range is
+      // after all other intervals starting at that same position.
+      interval->AddRange(position + 1, position + 2);
+      AddSorted(&unhandled_core_intervals_, interval);
+      AddSorted(&unhandled_fp_intervals_, interval);
     }
   }
 
@@ -250,6 +257,7 @@
       : unhandled_fp_intervals_;
 
   DCHECK(unhandled.IsEmpty() || current->StartsBeforeOrAt(unhandled.Peek()));
+
   // Some instructions define their output in fixed register/stack slot. We need
   // to ensure we know these locations before doing register allocation. For a
   // given register, we create an interval that covers these locations. The register
@@ -475,6 +483,17 @@
     LiveInterval* current = unhandled_->Pop();
     DCHECK(!current->IsFixed() && !current->HasSpillSlot());
     DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart());
+
+    if (current->IsSlowPathSafepoint()) {
+      // Synthesized interval to record the maximum number of live registers
+      // at safepoints. No need to allocate a register for it.
+      // We know that current actives are all live at the safepoint (modulo
+      // the one created by the safepoint).
+      maximum_number_of_live_registers_ =
+          std::max(maximum_number_of_live_registers_, active_.Size());
+      continue;
+    }
+
     size_t position = current->GetStart();
 
     // Remember the inactive_ size here since the ones moved to inactive_ from
@@ -515,14 +534,6 @@
       }
     }
 
-    if (current->IsSlowPathSafepoint()) {
-      // Synthesized interval to record the maximum number of live registers
-      // at safepoints. No need to allocate a register for it.
-      maximum_number_of_live_registers_ =
-          std::max(maximum_number_of_live_registers_, active_.Size());
-      continue;
-    }
-
     // (4) Try to find an available register.
     bool success = TryAllocateFreeReg(current);
 
@@ -1062,6 +1073,7 @@
       switch (source.GetKind()) {
         case Location::kRegister: {
           locations->AddLiveRegister(source);
+          DCHECK_LE(locations->GetNumberOfLiveRegisters(), maximum_number_of_live_registers_);
           if (current->GetType() == Primitive::kPrimNot) {
             locations->SetRegisterBit(source.reg());
           }
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 3d81362..ba4be34 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -713,7 +713,7 @@
   graph->AddBlock(block);
   entry->AddSuccessor(block);
 
-  *div = new (allocator) HDiv(Primitive::kPrimInt, first, second);
+  *div = new (allocator) HDiv(Primitive::kPrimInt, first, second, 0);  // don't care about dex_pc.
   block->AddInstruction(*div);
 
   block->AddInstruction(new (allocator) HExit());
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index fec40f9..b2cc119 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -183,8 +183,7 @@
 static HPhi* GetFloatOrDoubleEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
   // We place the floating point phi next to this phi.
   HInstruction* next = phi->GetNext();
-  if (next == nullptr
-      || (next->GetType() != Primitive::kPrimDouble && next->GetType() != Primitive::kPrimFloat)) {
+  if (next == nullptr || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())) {
     ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
     HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
     for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
@@ -195,9 +194,7 @@
     phi->GetBlock()->InsertPhiAfter(new_phi, phi);
     return new_phi;
   } else {
-    // If there is already a phi with the expected type, we know it is the floating
-    // point equivalent of this phi.
-    DCHECK_EQ(next->AsPhi()->GetRegNumber(), phi->GetRegNumber());
+    DCHECK_EQ(next->GetType(), type);
     return next->AsPhi();
   }
 }
diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h
index 5274f09..b789971 100644
--- a/compiler/optimizing/ssa_phi_elimination.h
+++ b/compiler/optimizing/ssa_phi_elimination.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_
 
 #include "nodes.h"
+#include "optimization.h"
 
 namespace art {
 
@@ -25,15 +26,15 @@
  * Optimization phase that removes dead phis from the graph. Dead phis are unused
  * phis, or phis only used by other phis.
  */
-class SsaDeadPhiElimination : public ValueObject {
+class SsaDeadPhiElimination : public HOptimization {
  public:
   explicit SsaDeadPhiElimination(HGraph* graph)
-      : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {}
+      : HOptimization(graph, true, "dead_phi_elimination"),
+        worklist_(graph->GetArena(), kDefaultWorklistSize) {}
 
-  void Run();
+  void Run() OVERRIDE;
 
  private:
-  HGraph* const graph_;
   GrowableArray<HPhi*> worklist_;
 
   static constexpr size_t kDefaultWorklistSize = 8;
@@ -47,15 +48,15 @@
  * registers might be updated with the same value, or not updated at all. We can just
  * replace the phi with the value when entering the loop.
  */
-class SsaRedundantPhiElimination : public ValueObject {
+class SsaRedundantPhiElimination : public HOptimization {
  public:
   explicit SsaRedundantPhiElimination(HGraph* graph)
-      : graph_(graph), worklist_(graph->GetArena(), kDefaultWorklistSize) {}
+      : HOptimization(graph, true, "redundant_phi_elimination"),
+        worklist_(graph->GetArena(), kDefaultWorklistSize) {}
 
-  void Run();
+  void Run() OVERRIDE;
 
  private:
-  HGraph* const graph_;
   GrowableArray<HPhi*> worklist_;
 
   static constexpr size_t kDefaultWorklistSize = 8;
diff --git a/compiler/optimizing/ssa_type_propagation.cc b/compiler/optimizing/ssa_type_propagation.cc
index 3828142..cb5ce20 100644
--- a/compiler/optimizing/ssa_type_propagation.cc
+++ b/compiler/optimizing/ssa_type_propagation.cc
@@ -90,10 +90,12 @@
     }
   } else {
     for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
-      HPhi* phi = it.Current()->AsPhi();
-      if (UpdateType(phi)) {
-        AddDependentInstructionsToWorklist(phi);
-      }
+      // Eagerly compute the type of the phi, for quicker convergence. Note
+      // that we don't need to add users to the worklist because we are
+      // doing a reverse post-order visit, therefore either the phi users are
+      // non-loop phi and will be visited later in the visit, or are loop-phis,
+      // and they are already in the work list.
+      UpdateType(it.Current()->AsPhi());
     }
   }
 }
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index dca2ab7..d288b70 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -421,6 +421,12 @@
   virtual void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) = 0;
   virtual void udiv(Register rd, Register rn, Register rm, Condition cond = AL) = 0;
 
+  // Bit field extract instructions.
+  virtual void sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width,
+                    Condition cond = AL) = 0;
+  virtual void ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width,
+                    Condition cond = AL) = 0;
+
   // Load/store instructions.
   virtual void ldr(Register rd, const Address& ad, Condition cond = AL) = 0;
   virtual void str(Register rd, const Address& ad, Condition cond = AL) = 0;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index c8a57b1..a1594b0 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -208,6 +208,44 @@
 }
 
 
+void Arm32Assembler::sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  CHECK_LE(lsb, 31U);
+  CHECK(1U <= width && width <= 32U) << width;
+  uint32_t widthminus1 = width - 1;
+
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+      B26 | B25 | B24 | B23 | B21 |
+      (widthminus1 << 16) |
+      (static_cast<uint32_t>(rd) << 12) |
+      (lsb << 7) |
+      B6 | B4 |
+      static_cast<uint32_t>(rn);
+  Emit(encoding);
+}
+
+
+void Arm32Assembler::ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond) {
+  CHECK_NE(rd, kNoRegister);
+  CHECK_NE(rn, kNoRegister);
+  CHECK_NE(cond, kNoCondition);
+  CHECK_LE(lsb, 31U);
+  CHECK(1U <= width && width <= 32U) << width;
+  uint32_t widthminus1 = width - 1;
+
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+      B26 | B25 | B24 | B23 | B22 | B21 |
+      (widthminus1 << 16) |
+      (static_cast<uint32_t>(rd) << 12) |
+      (lsb << 7) |
+      B6 | B4 |
+      static_cast<uint32_t>(rn);
+  Emit(encoding);
+}
+
+
 void Arm32Assembler::ldr(Register rd, const Address& ad, Condition cond) {
   EmitMemOp(cond, true, false, rd, ad);
 }
@@ -1475,10 +1513,8 @@
 
 
 void Arm32Assembler::dmb(DmbOptions flavor) {
-#if ANDROID_SMP != 0
   int32_t encoding = 0xf57ff05f;  // dmb
   Emit(encoding | flavor);
-#endif
 }
 
 
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index dbabb99..0b009e1 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -96,6 +96,10 @@
   void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
   void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
 
+  // Bit field extract instructions.
+  void sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;
+  void ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;
+
   // Load/store instructions.
   void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
   void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
new file mode 100644
index 0000000..277a9eb
--- /dev/null
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_arm32.h"
+
+#include "base/stl_util.h"
+#include "utils/assembler_test.h"
+
+namespace art {
+
+class AssemblerArm32Test : public AssemblerTest<arm::Arm32Assembler,
+                                                arm::Register, arm::SRegister,
+                                                uint32_t> {
+ protected:
+  std::string GetArchitectureString() OVERRIDE {
+    return "arm";
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -marm --no-show-raw-insn";
+  }
+
+  void SetUpHelpers() OVERRIDE {
+    if (registers_.size() == 0) {
+      registers_.insert(end(registers_),
+                        {  // NOLINT(whitespace/braces)
+                          new arm::Register(arm::R0),
+                          new arm::Register(arm::R1),
+                          new arm::Register(arm::R2),
+                          new arm::Register(arm::R3),
+                          new arm::Register(arm::R4),
+                          new arm::Register(arm::R5),
+                          new arm::Register(arm::R6),
+                          new arm::Register(arm::R7),
+                          new arm::Register(arm::R8),
+                          new arm::Register(arm::R9),
+                          new arm::Register(arm::R10),
+                          new arm::Register(arm::R11),
+                          new arm::Register(arm::R12),
+                          new arm::Register(arm::R13),
+                          new arm::Register(arm::R14),
+                          new arm::Register(arm::R15)
+                        });
+    }
+  }
+
+  void TearDown() OVERRIDE {
+    AssemblerTest::TearDown();
+    STLDeleteElements(&registers_);
+  }
+
+  std::vector<arm::Register*> GetRegisters() OVERRIDE {
+    return registers_;
+  }
+
+  uint32_t CreateImmediate(int64_t imm_value) OVERRIDE {
+    return imm_value;
+  }
+
+ private:
+  std::vector<arm::Register*> registers_;
+};
+
+
+TEST_F(AssemblerArm32Test, Toolchain) {
+  EXPECT_TRUE(CheckTools());
+}
+
+
+TEST_F(AssemblerArm32Test, Sbfx) {
+  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 1);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 8);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 16);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 32);
+
+  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 1);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 8);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 16);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 24);
+
+  GetAssembler()->sbfx(arm::R0, arm::R1, 16, 1);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 16, 8);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 16, 16);
+
+  GetAssembler()->sbfx(arm::R0, arm::R1, 31, 1);
+
+  const char* expected =
+      "sbfx r0, r1, #0, #1\n"
+      "sbfx r0, r1, #0, #8\n"
+      "sbfx r0, r1, #0, #16\n"
+      "sbfx r0, r1, #0, #32\n"
+
+      "sbfx r0, r1, #8, #1\n"
+      "sbfx r0, r1, #8, #8\n"
+      "sbfx r0, r1, #8, #16\n"
+      "sbfx r0, r1, #8, #24\n"
+
+      "sbfx r0, r1, #16, #1\n"
+      "sbfx r0, r1, #16, #8\n"
+      "sbfx r0, r1, #16, #16\n"
+
+      "sbfx r0, r1, #31, #1\n";
+  DriverStr(expected, "sbfx");
+}
+
+TEST_F(AssemblerArm32Test, Ubfx) {
+  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 1);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 8);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 16);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 32);
+
+  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 1);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 8);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 16);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 24);
+
+  GetAssembler()->ubfx(arm::R0, arm::R1, 16, 1);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 16, 8);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 16, 16);
+
+  GetAssembler()->ubfx(arm::R0, arm::R1, 31, 1);
+
+  const char* expected =
+      "ubfx r0, r1, #0, #1\n"
+      "ubfx r0, r1, #0, #8\n"
+      "ubfx r0, r1, #0, #16\n"
+      "ubfx r0, r1, #0, #32\n"
+
+      "ubfx r0, r1, #8, #1\n"
+      "ubfx r0, r1, #8, #8\n"
+      "ubfx r0, r1, #8, #16\n"
+      "ubfx r0, r1, #8, #24\n"
+
+      "ubfx r0, r1, #16, #1\n"
+      "ubfx r0, r1, #16, #8\n"
+      "ubfx r0, r1, #16, #16\n"
+
+      "ubfx r0, r1, #31, #1\n";
+  DriverStr(expected, "ubfx");
+}
+
+}  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 053e843..a349209 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -264,6 +264,48 @@
 }
 
 
+void Thumb2Assembler::sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond) {
+  CheckCondition(cond);
+  CHECK_LE(lsb, 31U);
+  CHECK(1U <= width && width <= 32U) << width;
+  uint32_t widthminus1 = width - 1;
+  uint32_t imm2 = lsb & (B1 | B0);  // Bits 0-1 of `lsb`.
+  uint32_t imm3 = (lsb & (B4 | B3 | B2)) >> 2;  // Bits 2-4 of `lsb`.
+
+  uint32_t op = 20U /* 0b10100 */;
+  int32_t encoding = B31 | B30 | B29 | B28 | B25 |
+      op << 20 |
+      static_cast<uint32_t>(rn) << 16 |
+      imm3 << 12 |
+      static_cast<uint32_t>(rd) << 8 |
+      imm2 << 6 |
+      widthminus1;
+
+  Emit32(encoding);
+}
+
+
+void Thumb2Assembler::ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond) {
+  CheckCondition(cond);
+  CHECK_LE(lsb, 31U);
+  CHECK(1U <= width && width <= 32U) << width;
+  uint32_t widthminus1 = width - 1;
+  uint32_t imm2 = lsb & (B1 | B0);  // Bits 0-1 of `lsb`.
+  uint32_t imm3 = (lsb & (B4 | B3 | B2)) >> 2;  // Bits 2-4 of `lsb`.
+
+  uint32_t op = 28U /* 0b11100 */;
+  int32_t encoding = B31 | B30 | B29 | B28 | B25 |
+      op << 20 |
+      static_cast<uint32_t>(rn) << 16 |
+      imm3 << 12 |
+      static_cast<uint32_t>(rd) << 8 |
+      imm2 << 6 |
+      widthminus1;
+
+  Emit32(encoding);
+}
+
+
 void Thumb2Assembler::ldr(Register rd, const Address& ad, Condition cond) {
   EmitLoadStore(cond, true, false, false, false, rd, ad);
 }
@@ -2557,10 +2599,8 @@
 
 
 void Thumb2Assembler::dmb(DmbOptions flavor) {
-#if ANDROID_SMP != 0
   int32_t encoding = 0xf3bf8f50;  // dmb in T1 encoding.
   Emit32(encoding | flavor);
-#endif
 }
 
 
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 9ccdef7..cfa251a 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -118,6 +118,10 @@
   void sdiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
   void udiv(Register rd, Register rn, Register rm, Condition cond = AL) OVERRIDE;
 
+  // Bit field extract instructions.
+  void sbfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;
+  void ubfx(Register rd, Register rn, uint32_t lsb, uint32_t width, Condition cond = AL) OVERRIDE;
+
   // Load/store instructions.
   void ldr(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
   void str(Register rd, const Address& ad, Condition cond = AL) OVERRIDE;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
new file mode 100644
index 0000000..65d6d45
--- /dev/null
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_thumb2.h"
+
+#include "base/stl_util.h"
+#include "utils/assembler_test.h"
+
+namespace art {
+
+class AssemblerThumb2Test : public AssemblerTest<arm::Thumb2Assembler,
+                                                 arm::Register, arm::SRegister,
+                                                 uint32_t> {
+ protected:
+  std::string GetArchitectureString() OVERRIDE {
+    return "arm";
+  }
+
+  std::string GetAssemblerParameters() OVERRIDE {
+    return " -mthumb";
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -marm --no-show-raw-insn";
+  }
+
+  void SetUpHelpers() OVERRIDE {
+    if (registers_.size() == 0) {
+      registers_.insert(end(registers_),
+                        {  // NOLINT(whitespace/braces)
+                          new arm::Register(arm::R0),
+                          new arm::Register(arm::R1),
+                          new arm::Register(arm::R2),
+                          new arm::Register(arm::R3),
+                          new arm::Register(arm::R4),
+                          new arm::Register(arm::R5),
+                          new arm::Register(arm::R6),
+                          new arm::Register(arm::R7),
+                          new arm::Register(arm::R8),
+                          new arm::Register(arm::R9),
+                          new arm::Register(arm::R10),
+                          new arm::Register(arm::R11),
+                          new arm::Register(arm::R12),
+                          new arm::Register(arm::R13),
+                          new arm::Register(arm::R14),
+                          new arm::Register(arm::R15)
+                        });
+    }
+  }
+
+  void TearDown() OVERRIDE {
+    AssemblerTest::TearDown();
+    STLDeleteElements(&registers_);
+  }
+
+  std::vector<arm::Register*> GetRegisters() OVERRIDE {
+    return registers_;
+  }
+
+  uint32_t CreateImmediate(int64_t imm_value) OVERRIDE {
+    return imm_value;
+  }
+
+ private:
+  std::vector<arm::Register*> registers_;
+};
+
+
+TEST_F(AssemblerThumb2Test, Toolchain) {
+  EXPECT_TRUE(CheckTools());
+}
+
+
+TEST_F(AssemblerThumb2Test, Sbfx) {
+  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 1);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 8);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 16);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 0, 32);
+
+  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 1);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 8);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 16);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 8, 24);
+
+  GetAssembler()->sbfx(arm::R0, arm::R1, 16, 1);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 16, 8);
+  GetAssembler()->sbfx(arm::R0, arm::R1, 16, 16);
+
+  GetAssembler()->sbfx(arm::R0, arm::R1, 31, 1);
+
+  const char* expected =
+      "sbfx r0, r1, #0, #1\n"
+      "sbfx r0, r1, #0, #8\n"
+      "sbfx r0, r1, #0, #16\n"
+      "sbfx r0, r1, #0, #32\n"
+
+      "sbfx r0, r1, #8, #1\n"
+      "sbfx r0, r1, #8, #8\n"
+      "sbfx r0, r1, #8, #16\n"
+      "sbfx r0, r1, #8, #24\n"
+
+      "sbfx r0, r1, #16, #1\n"
+      "sbfx r0, r1, #16, #8\n"
+      "sbfx r0, r1, #16, #16\n"
+
+      "sbfx r0, r1, #31, #1\n";
+  DriverStr(expected, "sbfx");
+}
+
+TEST_F(AssemblerThumb2Test, Ubfx) {
+  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 1);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 8);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 16);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 0, 32);
+
+  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 1);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 8);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 16);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 8, 24);
+
+  GetAssembler()->ubfx(arm::R0, arm::R1, 16, 1);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 16, 8);
+  GetAssembler()->ubfx(arm::R0, arm::R1, 16, 16);
+
+  GetAssembler()->ubfx(arm::R0, arm::R1, 31, 1);
+
+  const char* expected =
+      "ubfx r0, r1, #0, #1\n"
+      "ubfx r0, r1, #0, #8\n"
+      "ubfx r0, r1, #0, #16\n"
+      "ubfx r0, r1, #0, #32\n"
+
+      "ubfx r0, r1, #8, #1\n"
+      "ubfx r0, r1, #8, #8\n"
+      "ubfx r0, r1, #8, #16\n"
+      "ubfx r0, r1, #8, #24\n"
+
+      "ubfx r0, r1, #16, #1\n"
+      "ubfx r0, r1, #16, #8\n"
+      "ubfx r0, r1, #16, #16\n"
+
+      "ubfx r0, r1, #31, #1\n";
+  DriverStr(expected, "ubfx");
+}
+
+}  // namespace art
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 02011b8..390f2ea 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -476,9 +476,7 @@
 
 void Arm64Assembler::MemoryBarrier(ManagedRegister m_scratch ATTRIBUTE_UNUSED) {
   // TODO: Should we check that m_scratch is IP? - see arm.
-#if ANDROID_SMP != 0
   ___ Dmb(vixl::InnerShareable, vixl::BarrierAll);
-#endif
 }
 
 void Arm64Assembler::SignExtend(ManagedRegister mreg, size_t size) {
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index ad7e98d..67711e3 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -19,16 +19,16 @@
 
 #include <vector>
 
+#include "arch/instruction_set.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "arm/constants_arm.h"
-#include "mips/constants_mips.h"
-#include "x86/constants_x86.h"
-#include "x86_64/constants_x86_64.h"
-#include "instruction_set.h"
 #include "managed_register.h"
 #include "memory_region.h"
+#include "mips/constants_mips.h"
 #include "offsets.h"
+#include "x86/constants_x86.h"
+#include "x86_64/constants_x86_64.h"
 
 namespace art {
 
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 91237ae..9d3fa01 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -29,21 +29,31 @@
 
 namespace art {
 
+// Helper for a constexpr string length.
+constexpr size_t ConstexprStrLen(char const* str, size_t count = 0) {
+  return ('\0' == str[0]) ? count : ConstexprStrLen(str+1, count+1);
+}
+
 // Use a glocal static variable to keep the same name for all test data. Else we'll just spam the
 // temp directory.
 static std::string tmpnam_;
 
-template<typename Ass, typename Reg, typename Imm>
+template<typename Ass, typename Reg, typename FPReg, typename Imm>
 class AssemblerTest : public testing::Test {
  public:
+  enum class RegisterView {  // private
+    kUsePrimaryName,
+    kUseSecondaryName
+  };
+
   Ass* GetAssembler() {
     return assembler_.get();
   }
 
-  typedef std::string (*TestFn)(Ass* assembler);
+  typedef std::string (*TestFn)(AssemblerTest* assembler_test, Ass* assembler);
 
   void DriverFn(TestFn f, std::string test_name) {
-    Driver(f(assembler_.get()), test_name);
+    Driver(f(this, assembler_.get()), test_name);
   }
 
   // This driver assumes the assembler has already been called.
@@ -52,116 +62,114 @@
   }
 
   std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) {
-    const std::vector<Reg*> registers = GetRegisters();
-    std::string str;
-    for (auto reg : registers) {
-      (assembler_.get()->*f)(*reg);
-      std::string base = fmt;
+    return RepeatTemplatedRegister<Reg>(f,
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
+  }
 
-      size_t reg_index = base.find("{reg}");
-      if (reg_index != std::string::npos) {
-        std::ostringstream sreg;
-        sreg << *reg;
-        std::string reg_string = sreg.str();
-        base.replace(reg_index, 5, reg_string);
-      }
-
-      if (str.size() > 0) {
-        str += "\n";
-      }
-      str += base;
-    }
-    // Add a newline at the end.
-    str += "\n";
-    return str;
+  std::string Repeatr(void (Ass::*f)(Reg), std::string fmt) {
+    return RepeatTemplatedRegister<Reg>(f,
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+        fmt);
   }
 
   std::string RepeatRR(void (Ass::*f)(Reg, Reg), std::string fmt) {
-    const std::vector<Reg*> registers = GetRegisters();
-    std::string str;
-    for (auto reg1 : registers) {
-      for (auto reg2 : registers) {
-        (assembler_.get()->*f)(*reg1, *reg2);
-        std::string base = fmt;
+    return RepeatTemplatedRegisters<Reg, Reg>(f,
+        GetRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
+  }
 
-        size_t reg1_index = base.find("{reg1}");
-        if (reg1_index != std::string::npos) {
-          std::ostringstream sreg;
-          sreg << *reg1;
-          std::string reg_string = sreg.str();
-          base.replace(reg1_index, 6, reg_string);
-        }
+  std::string Repeatrr(void (Ass::*f)(Reg, Reg), std::string fmt) {
+    return RepeatTemplatedRegisters<Reg, Reg>(f,
+        GetRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+        &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+        fmt);
+  }
 
-        size_t reg2_index = base.find("{reg2}");
-        if (reg2_index != std::string::npos) {
-          std::ostringstream sreg;
-          sreg << *reg2;
-          std::string reg_string = sreg.str();
-          base.replace(reg2_index, 6, reg_string);
-        }
-
-        if (str.size() > 0) {
-          str += "\n";
-        }
-        str += base;
-      }
-    }
-    // Add a newline at the end.
-    str += "\n";
-    return str;
+  std::string RepeatRr(void (Ass::*f)(Reg, Reg), std::string fmt) {
+    return RepeatTemplatedRegisters<Reg, Reg>(f,
+        GetRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+        fmt);
   }
 
   std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) {
-    const std::vector<Reg*> registers = GetRegisters();
-    std::string str;
-    std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
-    for (auto reg : registers) {
-      for (int64_t imm : imms) {
-        Imm new_imm = CreateImmediate(imm);
-        (assembler_.get()->*f)(*reg, new_imm);
-        std::string base = fmt;
-
-        size_t reg_index = base.find("{reg}");
-        if (reg_index != std::string::npos) {
-          std::ostringstream sreg;
-          sreg << *reg;
-          std::string reg_string = sreg.str();
-          base.replace(reg_index, 5, reg_string);
-        }
-
-        size_t imm_index = base.find("{imm}");
-        if (imm_index != std::string::npos) {
-          std::ostringstream sreg;
-          sreg << imm;
-          std::string imm_string = sreg.str();
-          base.replace(imm_index, 5, imm_string);
-        }
-
-        if (str.size() > 0) {
-          str += "\n";
-        }
-        str += base;
-      }
-    }
-    // Add a newline at the end.
-    str += "\n";
-    return str;
+    return RepeatRegisterImm<RegisterView::kUsePrimaryName>(f, imm_bytes, fmt);
   }
 
-  std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt) {
+  std::string Repeatri(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) {
+    return RepeatRegisterImm<RegisterView::kUseSecondaryName>(f, imm_bytes, fmt);
+  }
+
+  std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), std::string fmt) {
+    return RepeatTemplatedRegisters<FPReg, FPReg>(f,
+                                                  GetFPRegisters(),
+                                                  GetFPRegisters(),
+                                                  &AssemblerTest::GetFPRegName,
+                                                  &AssemblerTest::GetFPRegName,
+                                                  fmt);
+  }
+
+  std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) {
+    return RepeatTemplatedRegisters<FPReg, Reg>(f,
+        GetFPRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetFPRegName,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
+  }
+
+  std::string RepeatFr(void (Ass::*f)(FPReg, Reg), std::string fmt) {
+    return RepeatTemplatedRegisters<FPReg, Reg>(f,
+        GetFPRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetFPRegName,
+        &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+        fmt);
+  }
+
+  std::string RepeatRF(void (Ass::*f)(Reg, FPReg), std::string fmt) {
+    return RepeatTemplatedRegisters<Reg, FPReg>(f,
+        GetRegisters(),
+        GetFPRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetFPRegName,
+        fmt);
+  }
+
+  std::string RepeatrF(void (Ass::*f)(Reg, FPReg), std::string fmt) {
+    return RepeatTemplatedRegisters<Reg, FPReg>(f,
+        GetRegisters(),
+        GetFPRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+        &AssemblerTest::GetFPRegName,
+        fmt);
+  }
+
+  std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt,
+                      bool as_uint = false) {
     std::string str;
-    std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
+    std::vector<int64_t> imms = CreateImmediateValues(imm_bytes, as_uint);
     for (int64_t imm : imms) {
       Imm new_imm = CreateImmediate(imm);
       (assembler_.get()->*f)(new_imm);
       std::string base = fmt;
 
-      size_t imm_index = base.find("{imm}");
+      size_t imm_index = base.find(IMM_TOKEN);
       if (imm_index != std::string::npos) {
         std::ostringstream sreg;
         sreg << imm;
         std::string imm_string = sreg.str();
-        base.replace(imm_index, 5, imm_string);
+        base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
       }
 
       if (str.size() > 0) {
@@ -200,7 +208,24 @@
     return true;
   }
 
+  // The following functions are public so that TestFn can use them...
+
+  virtual std::vector<Reg*> GetRegisters() = 0;
+
+  virtual std::vector<FPReg*> GetFPRegisters() {
+    UNIMPLEMENTED(FATAL) << "Architecture does not support floating-point registers";
+    UNREACHABLE();
+  }
+
+  // Secondary register names are the secondary view on registers, e.g., 32b on 64b systems.
+  virtual std::string GetSecondaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+    UNIMPLEMENTED(FATAL) << "Architecture does not support secondary registers";
+    UNREACHABLE();
+  }
+
  protected:
+  explicit AssemblerTest() {}
+
   void SetUp() OVERRIDE {
     assembler_.reset(new Ass());
 
@@ -219,8 +244,6 @@
   // Override this to set up any architecture-specific things, e.g., register vectors.
   virtual void SetUpHelpers() {}
 
-  virtual std::vector<Reg*> GetRegisters() = 0;
-
   // Get the typically used name for this architecture, e.g., aarch64, x86_64, ...
   virtual std::string GetArchitectureString() = 0;
 
@@ -305,23 +328,41 @@
   }
 
   // Create a couple of immediate values up to the number of bytes given.
-  virtual std::vector<int64_t> CreateImmediateValues(size_t imm_bytes) {
+  virtual std::vector<int64_t> CreateImmediateValues(size_t imm_bytes, bool as_uint = false) {
     std::vector<int64_t> res;
     res.push_back(0);
-    res.push_back(-1);
+    if (!as_uint) {
+      res.push_back(-1);
+    } else {
+      res.push_back(0xFF);
+    }
     res.push_back(0x12);
     if (imm_bytes >= 2) {
       res.push_back(0x1234);
-      res.push_back(-0x1234);
+      if (!as_uint) {
+        res.push_back(-0x1234);
+      } else {
+        res.push_back(0xFFFF);
+      }
       if (imm_bytes >= 4) {
         res.push_back(0x12345678);
-        res.push_back(-0x12345678);
+        if (!as_uint) {
+          res.push_back(-0x12345678);
+        } else {
+          res.push_back(0xFFFFFFFF);
+        }
         if (imm_bytes >= 6) {
           res.push_back(0x123456789ABC);
-          res.push_back(-0x123456789ABC);
+          if (!as_uint) {
+            res.push_back(-0x123456789ABC);
+          }
           if (imm_bytes >= 8) {
             res.push_back(0x123456789ABCDEF0);
-            res.push_back(-0x123456789ABCDEF0);
+            if (!as_uint) {
+              res.push_back(-0x123456789ABCDEF0);
+            } else {
+              res.push_back(0xFFFFFFFFFFFFFFFF);
+            }
           }
         }
       }
@@ -332,7 +373,127 @@
   // Create an immediate from the specific value.
   virtual Imm CreateImmediate(int64_t imm_value) = 0;
 
+  template <typename RegType>
+  std::string RepeatTemplatedRegister(void (Ass::*f)(RegType),
+                                      const std::vector<RegType*> registers,
+                                      std::string (AssemblerTest::*GetName)(const RegType&),
+                                      std::string fmt) {
+    std::string str;
+    for (auto reg : registers) {
+      (assembler_.get()->*f)(*reg);
+      std::string base = fmt;
+
+      std::string reg_string = (this->*GetName)(*reg);
+      size_t reg_index;
+      if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
+        base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
+      }
+
+      if (str.size() > 0) {
+        str += "\n";
+      }
+      str += base;
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
+  template <typename Reg1, typename Reg2>
+  std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2),
+                                       const std::vector<Reg1*> reg1_registers,
+                                       const std::vector<Reg2*> reg2_registers,
+                                       std::string (AssemblerTest::*GetName1)(const Reg1&),
+                                       std::string (AssemblerTest::*GetName2)(const Reg2&),
+                                       std::string fmt) {
+    std::string str;
+    for (auto reg1 : reg1_registers) {
+      for (auto reg2 : reg2_registers) {
+        (assembler_.get()->*f)(*reg1, *reg2);
+        std::string base = fmt;
+
+        std::string reg1_string = (this->*GetName1)(*reg1);
+        size_t reg1_index;
+        while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
+          base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
+        }
+
+        std::string reg2_string = (this->*GetName2)(*reg2);
+        size_t reg2_index;
+        while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
+          base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
+        }
+
+        if (str.size() > 0) {
+          str += "\n";
+        }
+        str += base;
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
  private:
+  template <RegisterView kRegView>
+  std::string GetRegName(const Reg& reg) {
+    std::ostringstream sreg;
+    switch (kRegView) {
+      case RegisterView::kUsePrimaryName:
+        sreg << reg;
+        break;
+
+      case RegisterView::kUseSecondaryName:
+        sreg << GetSecondaryRegisterName(reg);
+        break;
+    }
+    return sreg.str();
+  }
+
+  std::string GetFPRegName(const FPReg& reg) {
+    std::ostringstream sreg;
+    sreg << reg;
+    return sreg.str();
+  }
+
+  template <RegisterView kRegView>
+  std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes,
+                                  std::string fmt) {
+    const std::vector<Reg*> registers = GetRegisters();
+    std::string str;
+    std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
+    for (auto reg : registers) {
+      for (int64_t imm : imms) {
+        Imm new_imm = CreateImmediate(imm);
+        (assembler_.get()->*f)(*reg, new_imm);
+        std::string base = fmt;
+
+        std::string reg_string = GetRegName<kRegView>(*reg);
+        size_t reg_index;
+        while ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
+          base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
+        }
+
+        size_t imm_index = base.find(IMM_TOKEN);
+        if (imm_index != std::string::npos) {
+          std::ostringstream sreg;
+          sreg << imm;
+          std::string imm_string = sreg.str();
+          base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+        }
+
+        if (str.size() > 0) {
+          str += "\n";
+        }
+        str += base;
+      }
+    }
+    // Add a newline at the end.
+    str += "\n";
+    return str;
+  }
+
   // Driver() assembles and compares the results. If the results are not equal and we have a
   // disassembler, disassemble both and check whether they have the same mnemonics (in which case
   // we just warn).
@@ -394,10 +555,19 @@
 
     std::vector<std::string> args;
 
+    // Encaspulate the whole command line in a single string passed to
+    // the shell, so that GetAssemblerCommand() may contain arguments
+    // in addition to the program name.
     args.push_back(GetAssemblerCommand());
     args.push_back("-o");
     args.push_back(to_file);
     args.push_back(from_file);
+    std::string cmd = Join(args, ' ');
+
+    args.clear();
+    args.push_back("/bin/sh");
+    args.push_back("-c");
+    args.push_back(cmd);
 
     return Exec(args, error_msg);
   }
@@ -414,6 +584,9 @@
     std::string error_msg;
     std::vector<std::string> args;
 
+    // Encaspulate the whole command line in a single string passed to
+    // the shell, so that GetObjdumpCommand() may contain arguments
+    // in addition to the program name.
     args.push_back(GetObjdumpCommand());
     args.push_back(file);
     args.push_back(">");
@@ -477,12 +650,12 @@
 
     bool result = CompareFiles(data_name + ".dis", as_name + ".dis");
 
-    if (result) {
-      std::remove(data_name.c_str());
-      std::remove(as_name.c_str());
-      std::remove((data_name + ".dis").c_str());
-      std::remove((as_name + ".dis").c_str());
-    }
+    // If you want to take a look at the differences between the ART assembler and GCC, comment
+    // out the removal code.
+    std::remove(data_name.c_str());
+    std::remove(as_name.c_str());
+    std::remove((data_name + ".dis").c_str());
+    std::remove((as_name + ".dis").c_str());
 
     return result;
   }
@@ -490,6 +663,9 @@
   bool DisassembleBinary(std::string file, std::string* error_msg) {
     std::vector<std::string> args;
 
+    // Encaspulate the whole command line in a single string passed to
+    // the shell, so that GetDisassembleCommand() may contain arguments
+    // in addition to the program name.
     args.push_back(GetDisassembleCommand());
     args.push_back(file);
     args.push_back("| sed -n \'/<.data>/,$p\' | sed -e \'s/.*://\'");
@@ -686,6 +862,13 @@
     return tmpnam_;
   }
 
+  static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
+
+  static constexpr const char* REG_TOKEN = "{reg}";
+  static constexpr const char* REG1_TOKEN = "{reg1}";
+  static constexpr const char* REG2_TOKEN = "{reg2}";
+  static constexpr const char* IMM_TOKEN = "{imm}";
+
   std::unique_ptr<Ass> assembler_;
 
   std::string resolved_assembler_cmd_;
@@ -694,7 +877,7 @@
 
   std::string android_data_;
 
-  static constexpr size_t OBJDUMP_SECTION_LINE_MIN_TOKENS = 6;
+  DISALLOW_COPY_AND_ASSIGN(AssemblerTest);
 };
 
 }  // namespace art
diff --git a/compiler/utils/stack_checks.h b/compiler/utils/stack_checks.h
index e762f7d..c348f2c 100644
--- a/compiler/utils/stack_checks.h
+++ b/compiler/utils/stack_checks.h
@@ -17,7 +17,7 @@
 #ifndef ART_COMPILER_UTILS_STACK_CHECKS_H_
 #define ART_COMPILER_UTILS_STACK_CHECKS_H_
 
-#include "instruction_set.h"
+#include "arch/instruction_set.h"
 
 namespace art {
 
@@ -34,8 +34,7 @@
 // stack overflow check on method entry.
 //
 // A frame is considered large when it's above kLargeFrameSize.
-static inline bool FrameNeedsStackCheck(size_t size, InstructionSet isa) {
-  UNUSED(isa);
+static inline bool FrameNeedsStackCheck(size_t size, InstructionSet isa ATTRIBUTE_UNUSED) {
   return size >= kLargeFrameSize;
 }
 
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 8ebb40e..afa4a3b 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1830,9 +1830,7 @@
 }
 
 void X86Assembler::MemoryBarrier(ManagedRegister) {
-#if ANDROID_SMP != 0
   mfence();
-#endif
 }
 
 void X86Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 5d1c9af..8c428f4 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -345,7 +345,7 @@
 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
-  EmitOptionalRex32(dst, src);
+  EmitOptionalRex32(src, dst);  // Movss is MR encoding instead of the usual RM.
   EmitUint8(0x0F);
   EmitUint8(0x11);
   EmitXmmRegisterOperand(src.LowBits(), dst);
@@ -505,7 +505,7 @@
 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF2);
-  EmitOptionalRex32(dst, src);
+  EmitOptionalRex32(src, dst);  // Movsd is MR encoding instead of the usual RM.
   EmitUint8(0x0F);
   EmitUint8(0x11);
   EmitXmmRegisterOperand(src.LowBits(), dst);
@@ -856,17 +856,46 @@
 
 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalRex32(dst, src);
+  // There is a short version for rax.
+  // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
+  // work.
+  const bool src_rax = src.AsRegister() == RAX;
+  const bool dst_rax = dst.AsRegister() == RAX;
+  if (src_rax || dst_rax) {
+    EmitOptionalRex32(src_rax ? dst : src);
+    EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
+    return;
+  }
+
+  // General case.
+  EmitOptionalRex32(src, dst);
   EmitUint8(0x87);
-  EmitRegisterOperand(dst.LowBits(), src.LowBits());
+  EmitRegisterOperand(src.LowBits(), dst.LowBits());
 }
 
 
 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitRex64(dst, src);
+  // There is a short version for rax.
+  // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
+  // work.
+  const bool src_rax = src.AsRegister() == RAX;
+  const bool dst_rax = dst.AsRegister() == RAX;
+  if (src_rax || dst_rax) {
+    // If src == target, emit a nop instead.
+    if (src_rax && dst_rax) {
+      EmitUint8(0x90);
+    } else {
+      EmitRex64(src_rax ? dst : src);
+      EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
+    }
+    return;
+  }
+
+  // General case.
+  EmitRex64(src, dst);
   EmitUint8(0x87);
-  EmitOperand(dst.LowBits(), Operand(src));
+  EmitRegisterOperand(src.LowBits(), dst.LowBits());
 }
 
 
@@ -909,6 +938,21 @@
 }
 
 
+void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(reg, address);
+  EmitUint8(0x39);
+  EmitOperand(reg.LowBits(), address);
+}
+
+
+void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(address);
+  EmitComplex(7, address, imm);
+}
+
+
 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(reg0, reg1);
@@ -933,6 +977,14 @@
 }
 
 
+void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
+  CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(address);
+  EmitComplex(7, address, imm);
+}
+
+
 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -949,21 +1001,6 @@
 }
 
 
-void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalRex32(reg, address);
-  EmitUint8(0x39);
-  EmitOperand(reg.LowBits(), address);
-}
-
-
-void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalRex32(address);
-  EmitComplex(7, address, imm);
-}
-
-
 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg1, reg2);
@@ -998,6 +1035,14 @@
 }
 
 
+void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg1, reg2);
+  EmitUint8(0x85);
+  EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
+}
+
+
 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitRex64(reg);
@@ -1267,6 +1312,13 @@
 }
 
 
+void X86_64Assembler::cqo() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64();
+  EmitUint8(0x99);
+}
+
+
 void X86_64Assembler::idivl(CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(reg);
@@ -1275,6 +1327,14 @@
 }
 
 
+void X86_64Assembler::idivq(CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg);
+  EmitUint8(0xF7);
+  EmitUint8(0xF8 | reg.LowBits());
+}
+
+
 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -1283,13 +1343,25 @@
   EmitOperand(dst.LowBits(), Operand(src));
 }
 
-
 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // imull only supports 32b immediate.
+
   EmitOptionalRex32(reg, reg);
-  EmitUint8(0x69);
-  EmitOperand(reg.LowBits(), Operand(reg));
-  EmitImmediate(imm);
+
+  // See whether imm can be represented as a sign-extended 8bit value.
+  int32_t v32 = static_cast<int32_t>(imm.value());
+  if (IsInt32(8, v32)) {
+    // Sign-extension works.
+    EmitUint8(0x6B);
+    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
+  } else {
+    // Not representable, use full immediate.
+    EmitUint8(0x69);
+    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitImmediate(imm);
+  }
 }
 
 
@@ -1314,10 +1386,22 @@
 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
-  EmitRex64(reg);
-  EmitUint8(0x69);
-  EmitOperand(reg.LowBits(), Operand(reg));
-  EmitImmediate(imm);
+
+  EmitRex64(reg, reg);
+
+  // See whether imm can be represented as a sign-extended 8bit value.
+  int64_t v64 = imm.value();
+  if (IsInt64(8, v64)) {
+    // Sign-extension works.
+    EmitUint8(0x6B);
+    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
+  } else {
+    // Not representable, use full immediate.
+    EmitUint8(0x69);
+    EmitOperand(reg.LowBits(), Operand(reg));
+    EmitImmediate(imm);
+  }
 }
 
 
@@ -1728,6 +1812,8 @@
   CHECK(imm.is_int8());
   if (wide) {
     EmitRex64(reg);
+  } else {
+    EmitOptionalRex32(reg);
   }
   if (imm.value() == 1) {
     EmitUint8(0xD1);
@@ -1745,6 +1831,7 @@
                                        CpuRegister shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK_EQ(shifter.AsRegister(), RCX);
+  EmitOptionalRex32(operand);
   EmitUint8(0xD3);
   EmitOperand(reg_or_opcode, Operand(operand));
 }
@@ -1820,10 +1907,20 @@
   }
 }
 
+void X86_64Assembler::EmitRex64() {
+  EmitOptionalRex(false, true, false, false, false);
+}
+
 void X86_64Assembler::EmitRex64(CpuRegister reg) {
   EmitOptionalRex(false, true, false, false, reg.NeedsRex());
 }
 
+void X86_64Assembler::EmitRex64(const Operand& operand) {
+  uint8_t rex = operand.rex();
+  rex |= 0x48;  // REX.W000
+  EmitUint8(rex);
+}
+
 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
 }
@@ -2274,9 +2371,7 @@
 }
 
 void X86_64Assembler::MemoryBarrier(ManagedRegister) {
-#if ANDROID_SMP != 0
   mfence();
-#endif
 }
 
 void X86_64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 285b4cf..4dd70e2 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -328,17 +328,17 @@
   void divsd(XmmRegister dst, XmmRegister src);
   void divsd(XmmRegister dst, const Address& src);
 
-  void cvtsi2ss(XmmRegister dst, CpuRegister src);
-  void cvtsi2sd(XmmRegister dst, CpuRegister src);
+  void cvtsi2ss(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
+  void cvtsi2sd(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
 
-  void cvtss2si(CpuRegister dst, XmmRegister src);
+  void cvtss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
   void cvtss2sd(XmmRegister dst, XmmRegister src);
 
-  void cvtsd2si(CpuRegister dst, XmmRegister src);
+  void cvtsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
   void cvtsd2ss(XmmRegister dst, XmmRegister src);
 
-  void cvttss2si(CpuRegister dst, XmmRegister src);
-  void cvttsd2si(CpuRegister dst, XmmRegister src);
+  void cvttss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
+  void cvttsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
 
   void cvtdq2pd(XmmRegister dst, XmmRegister src);
 
@@ -390,10 +390,12 @@
   void cmpq(CpuRegister reg0, CpuRegister reg1);
   void cmpq(CpuRegister reg0, const Immediate& imm);
   void cmpq(CpuRegister reg0, const Address& address);
+  void cmpq(const Address& address, const Immediate& imm);
 
   void testl(CpuRegister reg1, CpuRegister reg2);
   void testl(CpuRegister reg, const Immediate& imm);
 
+  void testq(CpuRegister reg1, CpuRegister reg2);
   void testq(CpuRegister reg, const Address& address);
 
   void andl(CpuRegister dst, const Immediate& imm);
@@ -432,8 +434,10 @@
   void subq(CpuRegister dst, const Address& address);
 
   void cdq();
+  void cqo();
 
   void idivl(CpuRegister reg);
+  void idivq(CpuRegister reg);
 
   void imull(CpuRegister dst, CpuRegister src);
   void imull(CpuRegister reg, const Immediate& imm);
@@ -669,7 +673,9 @@
   void EmitOptionalRex32(XmmRegister dst, const Operand& operand);
 
   // Emit a REX.W prefix plus necessary register bit encodings.
+  void EmitRex64();
   void EmitRex64(CpuRegister reg);
+  void EmitRex64(const Operand& operand);
   void EmitRex64(CpuRegister dst, CpuRegister src);
   void EmitRex64(CpuRegister dst, const Operand& operand);
   void EmitRex64(XmmRegister dst, CpuRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 0e8ea5b..af389e6 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -16,8 +16,13 @@
 
 #include "assembler_x86_64.h"
 
+#include <inttypes.h>
+#include <map>
+#include <random>
+
 #include "base/stl_util.h"
 #include "utils/assembler_test.h"
+#include "utils.h"
 
 namespace art {
 
@@ -30,8 +35,88 @@
   ASSERT_EQ(static_cast<size_t>(5), buffer.Size());
 }
 
+#ifdef HAVE_ANDROID_OS
+static constexpr size_t kRandomIterations = 1000;  // Devices might be puny, don't stress them...
+#else
+static constexpr size_t kRandomIterations = 100000;  // Hosts are pretty powerful.
+#endif
+
+TEST(AssemblerX86_64, SignExtension) {
+  // 32bit.
+  for (int32_t i = 0; i < 128; i++) {
+    EXPECT_TRUE(IsInt32(8, i)) << i;
+  }
+  for (int32_t i = 128; i < 255; i++) {
+    EXPECT_FALSE(IsInt32(8, i)) << i;
+  }
+  // Do some higher ones randomly.
+  std::random_device rd;
+  std::default_random_engine e1(rd());
+  std::uniform_int_distribution<int32_t> uniform_dist(256, INT32_MAX);
+  for (size_t i = 0; i < kRandomIterations; i++) {
+    int32_t value = uniform_dist(e1);
+    EXPECT_FALSE(IsInt32(8, value)) << value;
+  }
+
+  // Negative ones.
+  for (int32_t i = -1; i >= -128; i--) {
+    EXPECT_TRUE(IsInt32(8, i)) << i;
+  }
+
+  for (int32_t i = -129; i > -256; i--) {
+    EXPECT_FALSE(IsInt32(8, i)) << i;
+  }
+
+  // Do some lower ones randomly.
+  std::uniform_int_distribution<int32_t> uniform_dist2(INT32_MIN, -256);
+  for (size_t i = 0; i < 100; i++) {
+    int32_t value = uniform_dist2(e1);
+    EXPECT_FALSE(IsInt32(8, value)) << value;
+  }
+
+  // 64bit.
+  for (int64_t i = 0; i < 128; i++) {
+    EXPECT_TRUE(IsInt64(8, i)) << i;
+  }
+  for (int32_t i = 128; i < 255; i++) {
+    EXPECT_FALSE(IsInt64(8, i)) << i;
+  }
+  // Do some higher ones randomly.
+  std::uniform_int_distribution<int64_t> uniform_dist3(256, INT64_MAX);
+  for (size_t i = 0; i < 100; i++) {
+    int64_t value = uniform_dist3(e1);
+    EXPECT_FALSE(IsInt64(8, value)) << value;
+  }
+
+  // Negative ones.
+  for (int64_t i = -1; i >= -128; i--) {
+    EXPECT_TRUE(IsInt64(8, i)) << i;
+  }
+
+  for (int64_t i = -129; i > -256; i--) {
+    EXPECT_FALSE(IsInt64(8, i)) << i;
+  }
+
+  // Do some lower ones randomly.
+  std::uniform_int_distribution<int64_t> uniform_dist4(INT64_MIN, -256);
+  for (size_t i = 0; i < kRandomIterations; i++) {
+    int64_t value = uniform_dist4(e1);
+    EXPECT_FALSE(IsInt64(8, value)) << value;
+  }
+}
+
+struct X86_64CpuRegisterCompare {
+    bool operator()(const x86_64::CpuRegister& a, const x86_64::CpuRegister& b) const {
+        return a.AsRegister() < b.AsRegister();
+    }
+};
+
 class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64::CpuRegister,
-                                                 x86_64::Immediate> {
+                                                 x86_64::XmmRegister, x86_64::Immediate> {
+ public:
+  typedef AssemblerTest<x86_64::X86_64Assembler, x86_64::CpuRegister,
+                        x86_64::XmmRegister, x86_64::Immediate> Base;
+
  protected:
   // Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
   std::string GetArchitectureString() OVERRIDE {
@@ -60,24 +145,71 @@
       registers_.push_back(new x86_64::CpuRegister(x86_64::R13));
       registers_.push_back(new x86_64::CpuRegister(x86_64::R14));
       registers_.push_back(new x86_64::CpuRegister(x86_64::R15));
+
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "eax");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "ebx");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "ecx");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDX), "edx");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBP), "ebp");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSP), "esp");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RSI), "esi");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RDI), "edi");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R8), "r8d");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R9), "r9d");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R10), "r10d");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R11), "r11d");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R12), "r12d");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13d");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14d");
+      secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15d");
+
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM0));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM1));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM2));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM3));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM4));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM5));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM6));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM7));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM8));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM9));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM10));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM11));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM12));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM13));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM14));
+      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM15));
     }
   }
 
   void TearDown() OVERRIDE {
     AssemblerTest::TearDown();
     STLDeleteElements(&registers_);
+    STLDeleteElements(&fp_registers_);
   }
 
   std::vector<x86_64::CpuRegister*> GetRegisters() OVERRIDE {
     return registers_;
   }
 
+  std::vector<x86_64::XmmRegister*> GetFPRegisters() OVERRIDE {
+    return fp_registers_;
+  }
+
   x86_64::Immediate CreateImmediate(int64_t imm_value) OVERRIDE {
     return x86_64::Immediate(imm_value);
   }
 
+  std::string GetSecondaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE {
+    CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end());
+    return secondary_register_names_[reg];
+  }
+
  private:
   std::vector<x86_64::CpuRegister*> registers_;
+  std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_;
+
+  std::vector<x86_64::XmmRegister*> fp_registers_;
 };
 
 
@@ -94,7 +226,6 @@
   DriverStr(RepeatI(&x86_64::X86_64Assembler::pushq, 4U, "pushq ${imm}"), "pushqi");
 }
 
-
 TEST_F(AssemblerX86_64Test, MovqRegs) {
   DriverStr(RepeatRR(&x86_64::X86_64Assembler::movq, "movq %{reg2}, %{reg1}"), "movq");
 }
@@ -103,6 +234,13 @@
   DriverStr(RepeatRI(&x86_64::X86_64Assembler::movq, 8U, "movq ${imm}, %{reg}"), "movqi");
 }
 
+TEST_F(AssemblerX86_64Test, MovlRegs) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::movl, "mov %{reg2}, %{reg1}"), "movl");
+}
+
+TEST_F(AssemblerX86_64Test, MovlImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::movl, 4U, "mov ${imm}, %{reg}"), "movli");
+}
 
 TEST_F(AssemblerX86_64Test, AddqRegs) {
   DriverStr(RepeatRR(&x86_64::X86_64Assembler::addq, "addq %{reg2}, %{reg1}"), "addq");
@@ -112,10 +250,36 @@
   DriverStr(RepeatRI(&x86_64::X86_64Assembler::addq, 4U, "addq ${imm}, %{reg}"), "addqi");
 }
 
+TEST_F(AssemblerX86_64Test, AddlRegs) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::addl, "add %{reg2}, %{reg1}"), "addl");
+}
+
+TEST_F(AssemblerX86_64Test, AddlImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::addl, 4U, "add ${imm}, %{reg}"), "addli");
+}
+
 TEST_F(AssemblerX86_64Test, ImulqRegs) {
   DriverStr(RepeatRR(&x86_64::X86_64Assembler::imulq, "imulq %{reg2}, %{reg1}"), "imulq");
 }
 
+TEST_F(AssemblerX86_64Test, ImulqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::imulq, 4U, "imulq ${imm}, %{reg}, %{reg}"),
+            "imulqi");
+}
+
+TEST_F(AssemblerX86_64Test, ImullRegs) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::imull, "imul %{reg2}, %{reg1}"), "imull");
+}
+
+TEST_F(AssemblerX86_64Test, ImullImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::imull, 4U, "imull ${imm}, %{reg}, %{reg}"),
+            "imulli");
+}
+
+TEST_F(AssemblerX86_64Test, Mull) {
+  DriverStr(Repeatr(&x86_64::X86_64Assembler::mull, "mull %{reg}"), "mull");
+}
+
 TEST_F(AssemblerX86_64Test, SubqRegs) {
   DriverStr(RepeatRR(&x86_64::X86_64Assembler::subq, "subq %{reg2}, %{reg1}"), "subq");
 }
@@ -124,45 +288,178 @@
   DriverStr(RepeatRI(&x86_64::X86_64Assembler::subq, 4U, "subq ${imm}, %{reg}"), "subqi");
 }
 
+TEST_F(AssemblerX86_64Test, SublRegs) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::subl, "sub %{reg2}, %{reg1}"), "subl");
+}
+
+TEST_F(AssemblerX86_64Test, SublImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::subl, 4U, "sub ${imm}, %{reg}"), "subli");
+}
+
+// Shll only allows CL as the shift register.
+std::string shll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->shll(*reg, shifter);
+    str << "shll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, ShllReg) {
+  DriverFn(&shll_fn, "shll");
+}
+
+TEST_F(AssemblerX86_64Test, ShllImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::shll, 1U, "shll ${imm}, %{reg}"), "shlli");
+}
+
+// Shrl only allows CL as the shift register.
+std::string shrl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->shrl(*reg, shifter);
+    str << "shrl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, ShrlReg) {
+  DriverFn(&shrl_fn, "shrl");
+}
+
+TEST_F(AssemblerX86_64Test, ShrlImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::shrl, 1U, "shrl ${imm}, %{reg}"), "shrli");
+}
+
+// Sarl only allows CL as the shift register.
+std::string sarl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+
+  x86_64::CpuRegister shifter(x86_64::RCX);
+  for (auto reg : registers) {
+    assembler->sarl(*reg, shifter);
+    str << "sarl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, SarlReg) {
+  DriverFn(&sarl_fn, "sarl");
+}
+
+TEST_F(AssemblerX86_64Test, SarlImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::sarl, 1U, "sarl ${imm}, %{reg}"), "sarli");
+}
 
 TEST_F(AssemblerX86_64Test, CmpqRegs) {
   DriverStr(RepeatRR(&x86_64::X86_64Assembler::cmpq, "cmpq %{reg2}, %{reg1}"), "cmpq");
 }
 
+TEST_F(AssemblerX86_64Test, CmpqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::cmpq, 4U  /* cmpq only supports 32b imm */,
+                     "cmpq ${imm}, %{reg}"), "cmpqi");
+}
+
+TEST_F(AssemblerX86_64Test, CmplRegs) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::cmpl, "cmp %{reg2}, %{reg1}"), "cmpl");
+}
+
+TEST_F(AssemblerX86_64Test, CmplImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::cmpl, 4U, "cmpl ${imm}, %{reg}"), "cmpli");
+}
+
+TEST_F(AssemblerX86_64Test, Testl) {
+  // Note: uses different order for GCC than usual. This makes GCC happy, and doesn't have an
+  // impact on functional correctness.
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::testl, "testl %{reg1}, %{reg2}"), "testl");
+}
+
+TEST_F(AssemblerX86_64Test, Negq) {
+  DriverStr(RepeatR(&x86_64::X86_64Assembler::negq, "negq %{reg}"), "negq");
+}
+
+TEST_F(AssemblerX86_64Test, Negl) {
+  DriverStr(Repeatr(&x86_64::X86_64Assembler::negl, "negl %{reg}"), "negl");
+}
+
+TEST_F(AssemblerX86_64Test, Notq) {
+  DriverStr(RepeatR(&x86_64::X86_64Assembler::notq, "notq %{reg}"), "notq");
+}
+
+TEST_F(AssemblerX86_64Test, Notl) {
+  DriverStr(Repeatr(&x86_64::X86_64Assembler::notl, "notl %{reg}"), "notl");
+}
+
+TEST_F(AssemblerX86_64Test, AndqRegs) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::andq, "andq %{reg2}, %{reg1}"), "andq");
+}
+
+TEST_F(AssemblerX86_64Test, AndqImm) {
+  DriverStr(RepeatRI(&x86_64::X86_64Assembler::andq, 4U  /* andq only supports 32b imm */,
+                     "andq ${imm}, %{reg}"), "andqi");
+}
+
+TEST_F(AssemblerX86_64Test, AndlRegs) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::andl, "andl %{reg2}, %{reg1}"), "andl");
+}
+
+TEST_F(AssemblerX86_64Test, AndlImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::andl, 4U, "andl ${imm}, %{reg}"), "andli");
+}
+
+TEST_F(AssemblerX86_64Test, OrqRegs) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::orq, "orq %{reg2}, %{reg1}"), "orq");
+}
+
+TEST_F(AssemblerX86_64Test, OrlRegs) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::orl, "orl %{reg2}, %{reg1}"), "orl");
+}
+
+TEST_F(AssemblerX86_64Test, OrlImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::orl, 4U, "orl ${imm}, %{reg}"), "orli");
+}
+
+TEST_F(AssemblerX86_64Test, XorqRegs) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::xorq, "xorq %{reg2}, %{reg1}"), "xorq");
+}
 
 TEST_F(AssemblerX86_64Test, XorqImm) {
   DriverStr(RepeatRI(&x86_64::X86_64Assembler::xorq, 4U, "xorq ${imm}, %{reg}"), "xorqi");
 }
 
-TEST_F(AssemblerX86_64Test, Movaps) {
-  GetAssembler()->movaps(x86_64::XmmRegister(x86_64::XMM0), x86_64::XmmRegister(x86_64::XMM8));
-  DriverStr("movaps %xmm8, %xmm0", "movaps");
+TEST_F(AssemblerX86_64Test, XorlRegs) {
+  DriverStr(Repeatrr(&x86_64::X86_64Assembler::xorl, "xor %{reg2}, %{reg1}"), "xorl");
 }
 
-TEST_F(AssemblerX86_64Test, Movd) {
-  GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM0), x86_64::CpuRegister(x86_64::R11));
-  GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM0), x86_64::CpuRegister(x86_64::RAX));
-  GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM8), x86_64::CpuRegister(x86_64::R11));
-  GetAssembler()->movd(x86_64::XmmRegister(x86_64::XMM8), x86_64::CpuRegister(x86_64::RAX));
-  GetAssembler()->movd(x86_64::CpuRegister(x86_64::R11), x86_64::XmmRegister(x86_64::XMM0));
-  GetAssembler()->movd(x86_64::CpuRegister(x86_64::RAX), x86_64::XmmRegister(x86_64::XMM0));
-  GetAssembler()->movd(x86_64::CpuRegister(x86_64::R11), x86_64::XmmRegister(x86_64::XMM8));
-  GetAssembler()->movd(x86_64::CpuRegister(x86_64::RAX), x86_64::XmmRegister(x86_64::XMM8));
-  const char* expected =
-    "movd %r11, %xmm0\n"
-    "movd %rax, %xmm0\n"
-    "movd %r11, %xmm8\n"
-    "movd %rax, %xmm8\n"
-    "movd %xmm0, %r11\n"
-    "movd %xmm0, %rax\n"
-    "movd %xmm8, %r11\n"
-    "movd %xmm8, %rax\n";
-  DriverStr(expected, "movd");
+TEST_F(AssemblerX86_64Test, XorlImm) {
+  DriverStr(Repeatri(&x86_64::X86_64Assembler::xorl, 4U, "xor ${imm}, %{reg}"), "xorli");
+}
+
+TEST_F(AssemblerX86_64Test, Xchgq) {
+  DriverStr(RepeatRR(&x86_64::X86_64Assembler::xchgq, "xchgq %{reg2}, %{reg1}"), "xchgq");
+}
+
+TEST_F(AssemblerX86_64Test, Xchgl) {
+  // Test is disabled because GCC generates 0x87 0xC0 for xchgl eax, eax. All other cases are the
+  // same. Anyone know why it doesn't emit a simple 0x90? It does so for xchgq rax, rax...
+  // DriverStr(Repeatrr(&x86_64::X86_64Assembler::xchgl, "xchgl %{reg2}, %{reg1}"), "xchgl");
 }
 
 TEST_F(AssemblerX86_64Test, Movl) {
-  GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::CpuRegister(x86_64::R11));
-  GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::CpuRegister(x86_64::R11));
   GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
       x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
   GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
@@ -170,8 +467,6 @@
   GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::Address(
       x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
   const char* expected =
-    "movl %R11d, %R8d\n"
-    "movl %R11d, %EAX\n"
     "movl 0xc(%RDI,%RBX,4), %EAX\n"
     "movl 0xc(%RDI,%R9,4), %EAX\n"
     "movl 0xc(%RDI,%R9,4), %R8d\n";
@@ -186,17 +481,201 @@
   DriverStr(expected, "movw");
 }
 
-TEST_F(AssemblerX86_64Test, IMulImmediate) {
-  GetAssembler()->imull(x86_64::CpuRegister(x86_64::RAX), x86_64::Immediate(0x40000));
-  GetAssembler()->imull(x86_64::CpuRegister(x86_64::R8), x86_64::Immediate(0x40000));
-  const char* expected =
-    "imull $0x40000,%eax,%eax\n"
-    "imull $0x40000,%r8d,%r8d\n";
-  DriverStr(expected, "imul");
+TEST_F(AssemblerX86_64Test, Movsxd) {
+  DriverStr(RepeatRr(&x86_64::X86_64Assembler::movsxd, "movsxd %{reg2}, %{reg1}"), "movsxd");
+}
+
+///////////////////
+// FP Operations //
+///////////////////
+
+TEST_F(AssemblerX86_64Test, Movaps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::movaps, "movaps %{reg2}, %{reg1}"), "movaps");
+}
+
+TEST_F(AssemblerX86_64Test, Movss) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::movss, "movss %{reg2}, %{reg1}"), "movss");
+}
+
+TEST_F(AssemblerX86_64Test, Movsd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::movsd, "movsd %{reg2}, %{reg1}"), "movsd");
+}
+
+TEST_F(AssemblerX86_64Test, Movd1) {
+  DriverStr(RepeatFR(&x86_64::X86_64Assembler::movd, "movd %{reg2}, %{reg1}"), "movd.1");
+}
+
+TEST_F(AssemblerX86_64Test, Movd2) {
+  DriverStr(RepeatRF(&x86_64::X86_64Assembler::movd, "movd %{reg2}, %{reg1}"), "movd.2");
+}
+
+TEST_F(AssemblerX86_64Test, Addss) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::addss, "addss %{reg2}, %{reg1}"), "addss");
+}
+
+TEST_F(AssemblerX86_64Test, Addsd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::addsd, "addsd %{reg2}, %{reg1}"), "addsd");
+}
+
+TEST_F(AssemblerX86_64Test, Subss) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::subss, "subss %{reg2}, %{reg1}"), "subss");
+}
+
+TEST_F(AssemblerX86_64Test, Subsd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::subsd, "subsd %{reg2}, %{reg1}"), "subsd");
+}
+
+TEST_F(AssemblerX86_64Test, Mulss) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::mulss, "mulss %{reg2}, %{reg1}"), "mulss");
+}
+
+TEST_F(AssemblerX86_64Test, Mulsd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::mulsd, "mulsd %{reg2}, %{reg1}"), "mulsd");
+}
+
+TEST_F(AssemblerX86_64Test, Divss) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::divss, "divss %{reg2}, %{reg1}"), "divss");
+}
+
+TEST_F(AssemblerX86_64Test, Divsd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::divsd, "divsd %{reg2}, %{reg1}"), "divsd");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtsi2ss) {
+  DriverStr(RepeatFr(&x86_64::X86_64Assembler::cvtsi2ss, "cvtsi2ss %{reg2}, %{reg1}"), "cvtsi2ss");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtsi2sd) {
+  DriverStr(RepeatFr(&x86_64::X86_64Assembler::cvtsi2sd, "cvtsi2sd %{reg2}, %{reg1}"), "cvtsi2sd");
 }
 
 
-std::string setcc_test_fn(x86_64::X86_64Assembler* assembler) {
+TEST_F(AssemblerX86_64Test, Cvtss2si) {
+  DriverStr(RepeatrF(&x86_64::X86_64Assembler::cvtss2si, "cvtss2si %{reg2}, %{reg1}"), "cvtss2si");
+}
+
+
+TEST_F(AssemblerX86_64Test, Cvtss2sd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtss2sd, "cvtss2sd %{reg2}, %{reg1}"), "cvtss2sd");
+}
+
+
+TEST_F(AssemblerX86_64Test, Cvtsd2si) {
+  DriverStr(RepeatrF(&x86_64::X86_64Assembler::cvtsd2si, "cvtsd2si %{reg2}, %{reg1}"), "cvtsd2si");
+}
+
+TEST_F(AssemblerX86_64Test, Cvttss2si) {
+  DriverStr(RepeatrF(&x86_64::X86_64Assembler::cvttss2si, "cvttss2si %{reg2}, %{reg1}"),
+            "cvttss2si");
+}
+
+TEST_F(AssemblerX86_64Test, Cvttsd2si) {
+  DriverStr(RepeatrF(&x86_64::X86_64Assembler::cvttsd2si, "cvttsd2si %{reg2}, %{reg1}"),
+            "cvttsd2si");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtsd2ss) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtsd2ss, "cvtsd2ss %{reg2}, %{reg1}"), "cvtsd2ss");
+}
+
+TEST_F(AssemblerX86_64Test, Cvtdq2pd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd");
+}
+
+TEST_F(AssemblerX86_64Test, Comiss) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::comiss, "comiss %{reg2}, %{reg1}"), "comiss");
+}
+
+TEST_F(AssemblerX86_64Test, Comisd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::comisd, "comisd %{reg2}, %{reg1}"), "comisd");
+}
+
+TEST_F(AssemblerX86_64Test, Sqrtss) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::sqrtss, "sqrtss %{reg2}, %{reg1}"), "sqrtss");
+}
+
+TEST_F(AssemblerX86_64Test, Sqrtsd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::sqrtsd, "sqrtsd %{reg2}, %{reg1}"), "sqrtsd");
+}
+
+TEST_F(AssemblerX86_64Test, Xorps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorps, "xorps %{reg2}, %{reg1}"), "xorps");
+}
+
+TEST_F(AssemblerX86_64Test, Xorpd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd");
+}
+
+// X87
+
+std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                   x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  assembler->fincstp();
+  str << "fincstp\n";
+
+  assembler->fsin();
+  str << "fsin\n";
+
+  assembler->fcos();
+  str << "fcos\n";
+
+  assembler->fptan();
+  str << "fptan\n";
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, X87) {
+  DriverFn(&x87_fn, "x87");
+}
+
+////////////////
+// CALL / JMP //
+////////////////
+
+TEST_F(AssemblerX86_64Test, Call) {
+  DriverStr(RepeatR(&x86_64::X86_64Assembler::call, "call *%{reg}"), "call");
+}
+
+TEST_F(AssemblerX86_64Test, Jmp) {
+  DriverStr(RepeatR(&x86_64::X86_64Assembler::jmp, "jmp *%{reg}"), "jmp");
+}
+
+TEST_F(AssemblerX86_64Test, Enter) {
+  DriverStr(RepeatI(&x86_64::X86_64Assembler::enter, 2U  /* 16b immediate */, "enter ${imm}, $0",
+                    true  /* Only non-negative number */), "enter");
+}
+
+TEST_F(AssemblerX86_64Test, RetImm) {
+  DriverStr(RepeatI(&x86_64::X86_64Assembler::ret, 2U  /* 16b immediate */, "ret ${imm}",
+                    true  /* Only non-negative number */), "reti");
+}
+
+std::string ret_and_leave_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                             x86_64::X86_64Assembler* assembler) {
+  std::ostringstream str;
+
+  assembler->ret();
+  str << "ret\n";
+
+  assembler->leave();
+  str << "leave\n";
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86_64Test, RetAndLeave) {
+  DriverFn(&ret_and_leave_fn, "retleave");
+}
+
+//////////
+// MISC //
+//////////
+
+std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test,
+                          x86_64::X86_64Assembler* assembler) {
   // From Condition
   /*
   kOverflow     =  0,
@@ -218,23 +697,7 @@
   std::string suffixes[15] = { "o", "no", "b", "ae", "e", "ne", "be", "a", "s", "ns", "pe", "po",
                                "l", "ge", "le" };
 
-  std::vector<x86_64::CpuRegister*> registers;
-  registers.push_back(new x86_64::CpuRegister(x86_64::RAX));
-  registers.push_back(new x86_64::CpuRegister(x86_64::RBX));
-  registers.push_back(new x86_64::CpuRegister(x86_64::RCX));
-  registers.push_back(new x86_64::CpuRegister(x86_64::RDX));
-  registers.push_back(new x86_64::CpuRegister(x86_64::RBP));
-  registers.push_back(new x86_64::CpuRegister(x86_64::RSP));
-  registers.push_back(new x86_64::CpuRegister(x86_64::RSI));
-  registers.push_back(new x86_64::CpuRegister(x86_64::RDI));
-  registers.push_back(new x86_64::CpuRegister(x86_64::R8));
-  registers.push_back(new x86_64::CpuRegister(x86_64::R9));
-  registers.push_back(new x86_64::CpuRegister(x86_64::R10));
-  registers.push_back(new x86_64::CpuRegister(x86_64::R11));
-  registers.push_back(new x86_64::CpuRegister(x86_64::R12));
-  registers.push_back(new x86_64::CpuRegister(x86_64::R13));
-  registers.push_back(new x86_64::CpuRegister(x86_64::R14));
-  registers.push_back(new x86_64::CpuRegister(x86_64::R15));
+  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
 
   std::string byte_regs[16];
   byte_regs[x86_64::RAX] = "al";
@@ -263,7 +726,6 @@
     }
   }
 
-  STLDeleteElements(&registers);
   return str.str();
 }
 
@@ -279,7 +741,8 @@
   return x86_64::X86_64ManagedRegister::FromXmmRegister(r);
 }
 
-std::string buildframe_test_fn(x86_64::X86_64Assembler* assembler) {
+std::string buildframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                               x86_64::X86_64Assembler* assembler) {
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
@@ -323,7 +786,8 @@
   DriverFn(&buildframe_test_fn, "BuildFrame");
 }
 
-std::string removeframe_test_fn(x86_64::X86_64Assembler* assembler) {
+std::string removeframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                                x86_64::X86_64Assembler* assembler) {
   // TODO: more interesting spill registers / entry spills.
 
   // Two random spill regs.
@@ -351,7 +815,8 @@
   DriverFn(&removeframe_test_fn, "RemoveFrame");
 }
 
-std::string increaseframe_test_fn(x86_64::X86_64Assembler* assembler) {
+std::string increaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                                  x86_64::X86_64Assembler* assembler) {
   assembler->IncreaseFrameSize(0U);
   assembler->IncreaseFrameSize(kStackAlignment);
   assembler->IncreaseFrameSize(10 * kStackAlignment);
@@ -369,7 +834,8 @@
   DriverFn(&increaseframe_test_fn, "IncreaseFrame");
 }
 
-std::string decreaseframe_test_fn(x86_64::X86_64Assembler* assembler) {
+std::string decreaseframe_test_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+                                  x86_64::X86_64Assembler* assembler) {
   assembler->DecreaseFrameSize(0U);
   assembler->DecreaseFrameSize(kStackAlignment);
   assembler->DecreaseFrameSize(10 * kStackAlignment);
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 4951b1f..927c5f5 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -31,8 +31,9 @@
 #endif
 
 #define ATRACE_TAG ATRACE_TAG_DALVIK
-#include "cutils/trace.h"
+#include <cutils/trace.h>
 
+#include "arch/instruction_set_features.h"
 #include "base/dumpable.h"
 #include "base/stl_util.h"
 #include "base/stringpiece.h"
@@ -430,6 +431,8 @@
       image_base_(0U),
       image_classes_zip_filename_(nullptr),
       image_classes_filename_(nullptr),
+      compiled_classes_zip_filename_(nullptr),
+      compiled_classes_filename_(nullptr),
       image_(false),
       is_host_(false),
       dump_stats_(false),
@@ -540,6 +543,10 @@
         image_classes_filename_ = option.substr(strlen("--image-classes=")).data();
       } else if (option.starts_with("--image-classes-zip=")) {
         image_classes_zip_filename_ = option.substr(strlen("--image-classes-zip=")).data();
+      } else if (option.starts_with("--compiled-classes=")) {
+        compiled_classes_filename_ = option.substr(strlen("--compiled-classes=")).data();
+      } else if (option.starts_with("--compiled-classes-zip=")) {
+        compiled_classes_zip_filename_ = option.substr(strlen("--compiled-classes-zip=")).data();
       } else if (option.starts_with("--base=")) {
         const char* image_base_str = option.substr(strlen("--base=")).data();
         char* end;
@@ -571,11 +578,18 @@
         }
       } else if (option.starts_with("--instruction-set-features=")) {
         StringPiece str = option.substr(strlen("--instruction-set-features=")).data();
-        instruction_set_features_.reset(
-            InstructionSetFeatures::FromFeatureString(instruction_set_, str.as_string(),
-                                                      &error_msg));
         if (instruction_set_features_.get() == nullptr) {
-          Usage("%s", error_msg.c_str());
+          instruction_set_features_.reset(
+              InstructionSetFeatures::FromVariant(instruction_set_, "default", &error_msg));
+          if (instruction_set_features_.get() == nullptr) {
+            Usage("Problem initializing default instruction set features variant: %s",
+                  error_msg.c_str());
+          }
+        }
+        instruction_set_features_.reset(
+            instruction_set_features_->AddFeaturesFromString(str.as_string(), &error_msg));
+        if (instruction_set_features_.get() == nullptr) {
+          Usage("Error parsing '%s': %s", option.data(), error_msg.c_str());
         }
       } else if (option.starts_with("--compiler-backend=")) {
         StringPiece backend_str = option.substr(strlen("--compiler-backend=")).data();
@@ -743,6 +757,18 @@
       Usage("--image-classes-zip should be used with --image-classes");
     }
 
+    if (compiled_classes_filename_ != nullptr && !image_) {
+      Usage("--compiled-classes should only be used with --image");
+    }
+
+    if (compiled_classes_filename_ != nullptr && !boot_image_option_.empty()) {
+      Usage("--compiled-classes should not be used with --boot-image");
+    }
+
+    if (compiled_classes_zip_filename_ != nullptr && compiled_classes_filename_ == nullptr) {
+      Usage("--compiled-classes-zip should be used with --compiled-classes");
+    }
+
     if (dex_filenames_.empty() && zip_fd_ == -1) {
       Usage("Input must be supplied with either --dex-file or --zip-fd");
     }
@@ -784,7 +810,11 @@
     // instruction set.
     if (instruction_set_features_.get() == nullptr) {
       instruction_set_features_.reset(
-          InstructionSetFeatures::FromFeatureString(instruction_set_, "default", &error_msg));
+          InstructionSetFeatures::FromVariant(instruction_set_, "default", &error_msg));
+      if (instruction_set_features_.get() == nullptr) {
+        Usage("Problem initializing default instruction set features variant: %s",
+              error_msg.c_str());
+      }
     }
 
     if (instruction_set_ == kRuntimeISA) {
@@ -912,9 +942,11 @@
         oat_location_ = oat_filename_;
       }
     } else {
-      oat_file_.reset(new File(oat_fd_, oat_location_));
+      oat_file_.reset(new File(oat_fd_, oat_location_, true));
       oat_file_->DisableAutoClose();
-      oat_file_->SetLength(0);
+      if (oat_file_->SetLength(0) != 0) {
+        PLOG(WARNING) << "Truncating oat file " << oat_location_ << " failed.";
+      }
     }
     if (oat_file_.get() == nullptr) {
       PLOG(ERROR) << "Failed to create oat file: " << oat_location_;
@@ -922,6 +954,7 @@
     }
     if (create_file && fchmod(oat_file_->Fd(), 0644) != 0) {
       PLOG(ERROR) << "Failed to make oat file world readable: " << oat_location_;
+      oat_file_->Erase();
       return false;
     }
     return true;
@@ -986,6 +1019,25 @@
     } else if (image_) {
       image_classes_.reset(new std::set<std::string>);
     }
+    // If --compiled-classes was specified, calculate the full list of classes to compile in the
+    // image.
+    if (compiled_classes_filename_ != nullptr) {
+      std::string error_msg;
+      if (compiled_classes_zip_filename_ != nullptr) {
+        compiled_classes_.reset(ReadImageClassesFromZip(compiled_classes_zip_filename_,
+                                                        compiled_classes_filename_,
+                                                        &error_msg));
+      } else {
+        compiled_classes_.reset(ReadImageClassesFromFile(compiled_classes_filename_));
+      }
+      if (compiled_classes_.get() == nullptr) {
+        LOG(ERROR) << "Failed to create list of compiled classes from '"
+                   << compiled_classes_filename_ << "': " << error_msg;
+        return false;
+      }
+    } else if (image_) {
+      compiled_classes_.reset(nullptr);  // By default compile everything.
+    }
 
     if (boot_image_option_.empty()) {
       dex_files_ = Runtime::Current()->GetClassLinker()->GetBootClassPath();
@@ -1026,7 +1078,10 @@
                 << ". Try: adb shell chmod 777 /data/local/tmp";
             continue;
           }
-          tmp_file->WriteFully(dex_file->Begin(), dex_file->Size());
+          // This is just dumping files for debugging. Ignore errors, and leave remnants.
+          UNUSED(tmp_file->WriteFully(dex_file->Begin(), dex_file->Size()));
+          UNUSED(tmp_file->Flush());
+          UNUSED(tmp_file->Close());
           LOG(INFO) << "Wrote input to " << tmp_file_name;
         }
       }
@@ -1089,6 +1144,7 @@
                                      instruction_set_features_.get(),
                                      image_,
                                      image_classes_.release(),
+                                     compiled_classes_.release(),
                                      thread_count_,
                                      dump_stats_,
                                      dump_passes_,
@@ -1216,6 +1272,7 @@
       if (!driver_->WriteElf(android_root_, is_host_, dex_files_, oat_writer.get(),
                              oat_file_.get())) {
         LOG(ERROR) << "Failed to write ELF file " << oat_file_->GetPath();
+        oat_file_->Erase();
         return false;
       }
     }
@@ -1223,8 +1280,8 @@
     // Flush result to disk.
     {
       TimingLogger::ScopedTiming t2("dex2oat Flush ELF", timings_);
-      if (oat_file_->Flush() != 0) {
-        LOG(ERROR) << "Failed to flush ELF file " << oat_file_->GetPath();
+      if (oat_file_->FlushCloseOrErase() != 0) {
+        PLOG(ERROR) << "Failed to flush ELF file " << oat_file_->GetPath();
         return false;
       }
     }
@@ -1252,7 +1309,13 @@
     // We need to strip after image creation because FixupElf needs to use .strtab.
     if (oat_unstripped_ != oat_stripped_) {
       TimingLogger::ScopedTiming t("dex2oat OatFile copy", timings_);
-      oat_file_.reset();
+      if (kUsePortableCompiler) {
+        if (oat_file_->FlushCloseOrErase() != 0) {
+          PLOG(ERROR) << "Failed to flush and close oat file: " << oat_location_;
+          return EXIT_FAILURE;
+        }
+        oat_file_.reset();
+      }
       std::unique_ptr<File> in(OS::OpenFileForReading(oat_unstripped_.c_str()));
       std::unique_ptr<File> out(OS::CreateEmptyFile(oat_stripped_.c_str()));
       size_t buffer_size = 8192;
@@ -1280,6 +1343,7 @@
         std::string error_msg;
         if (!ElfFile::Strip(oat_file_.get(), &error_msg)) {
           LOG(ERROR) << "Failed to strip elf file: " << error_msg;
+          oat_file_->Erase();
           return false;
         }
 
@@ -1288,8 +1352,20 @@
       } else {
         VLOG(compiler) << "Oat file written successfully without stripping: " << oat_location_;
       }
+      if (oat_file_->FlushCloseOrErase() != 0) {
+        PLOG(ERROR) << "Failed to flush and close oat file: " << oat_location_;
+        return EXIT_FAILURE;
+      }
+      oat_file_.reset(nullptr);
     }
 
+    if (oat_file_.get() != nullptr) {
+      if (oat_file_->FlushCloseOrErase() != 0) {
+        PLOG(ERROR) << "Failed to flush and close oat file: " << oat_location_ << "/"
+                    << oat_filename_;
+        return EXIT_FAILURE;
+      }
+    }
     return true;
   }
 
@@ -1401,18 +1477,24 @@
     // Destroy ImageWriter before doing FixupElf.
     image_writer_.reset();
 
-    std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_unstripped_.c_str()));
-    if (oat_file.get() == nullptr) {
-      PLOG(ERROR) << "Failed to open ELF file: " << oat_unstripped_;
-      return false;
-    }
-
     // Do not fix up the ELF file if we are --compile-pic
     if (!compiler_options_->GetCompilePic()) {
+      std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_unstripped_.c_str()));
+      if (oat_file.get() == nullptr) {
+        PLOG(ERROR) << "Failed to open ELF file: " << oat_unstripped_;
+        return false;
+      }
+
       if (!ElfWriter::Fixup(oat_file.get(), oat_data_begin)) {
+        oat_file->Erase();
         LOG(ERROR) << "Failed to fixup ELF file " << oat_file->GetPath();
         return false;
       }
+
+      if (oat_file->FlushCloseOrErase()) {
+        PLOG(ERROR) << "Failed to flush and close fixed ELF file " << oat_file->GetPath();
+        return false;
+      }
     }
 
     return true;
@@ -1514,7 +1596,10 @@
   uintptr_t image_base_;
   const char* image_classes_zip_filename_;
   const char* image_classes_filename_;
+  const char* compiled_classes_zip_filename_;
+  const char* compiled_classes_filename_;
   std::unique_ptr<std::set<std::string>> image_classes_;
+  std::unique_ptr<std::set<std::string>> compiled_classes_;
   bool image_;
   std::unique_ptr<ImageWriter> image_writer_;
   bool is_host_;
diff --git a/disassembler/disassembler.h b/disassembler/disassembler.h
index 487f433..966ee3a 100644
--- a/disassembler/disassembler.h
+++ b/disassembler/disassembler.h
@@ -21,8 +21,8 @@
 
 #include <iosfwd>
 
+#include "arch/instruction_set.h"
 #include "base/macros.h"
-#include "instruction_set.h"
 
 namespace art {
 
@@ -34,8 +34,14 @@
   // Base addess for calculating relative code offsets when absolute_addresses_ is false.
   const uint8_t* const base_address_;
 
-  DisassemblerOptions(bool absolute_addresses, const uint8_t* base_address)
-      : absolute_addresses_(absolute_addresses), base_address_(base_address) {}
+  // If set, the disassembler is allowed to look at load targets in literal
+  // pools.
+  const bool can_read_literals_;
+
+  DisassemblerOptions(bool absolute_addresses, const uint8_t* base_address,
+                      bool can_read_literals)
+      : absolute_addresses_(absolute_addresses), base_address_(base_address),
+        can_read_literals_(can_read_literals) {}
 
  private:
   DISALLOW_COPY_AND_ASSIGN(DisassemblerOptions);
diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc
index 229ac97..fe50421 100644
--- a/disassembler/disassembler_arm64.cc
+++ b/disassembler/disassembler_arm64.cc
@@ -27,10 +27,88 @@
 namespace art {
 namespace arm64 {
 
+void CustomDisassembler::AppendRegisterNameToOutput(
+    const vixl::Instruction* instr,
+    const vixl::CPURegister& reg) {
+  USE(instr);
+  if (reg.IsRegister()) {
+    // This enumeration should mirror the declarations in
+    // runtime/arch/arm64/registers_arm64.h. We do not include that file to
+    // avoid a dependency on libart.
+    enum {
+      TR  = 18,
+      ETR = 21,
+      IP0 = 16,
+      IP1 = 17,
+      FP  = 29,
+      LR  = 30
+    };
+    switch (reg.code()) {
+      case IP0: AppendToOutput(reg.Is64Bits() ? "ip0" : "wip0"); return;
+      case IP1: AppendToOutput(reg.Is64Bits() ? "ip1" : "wip1"); return;
+      case TR:  AppendToOutput(reg.Is64Bits() ? "tr"  :  "w18"); return;
+      case ETR: AppendToOutput(reg.Is64Bits() ? "etr" :  "w21"); return;
+      case FP:  AppendToOutput(reg.Is64Bits() ? "fp"  :  "w29"); return;
+      case LR:  AppendToOutput(reg.Is64Bits() ? "lr"  :  "w30"); return;
+      default:
+        // Fall through.
+        break;
+    }
+  }
+  // Print other register names as usual.
+  Disassembler::AppendRegisterNameToOutput(instr, reg);
+}
+
+void CustomDisassembler::VisitLoadLiteral(const vixl::Instruction* instr) {
+  Disassembler::VisitLoadLiteral(instr);
+
+  if (!read_literals_) {
+    return;
+  }
+
+  char* buffer = buffer_;
+  char* buffer_end = buffer_ + buffer_size_;
+
+  // Find the end position in the buffer.
+  while ((*buffer != 0) && (buffer < buffer_end)) {
+    ++buffer;
+  }
+
+  void* data_address = instr->LiteralAddress();
+  ptrdiff_t buf_size_remaining = buffer_end - buffer;
+  vixl::Instr op = instr->Mask(vixl::LoadLiteralMask);
+
+  switch (op) {
+    case vixl::LDR_w_lit:
+    case vixl::LDR_x_lit:
+    case vixl::LDRSW_x_lit: {
+      int64_t data = op == vixl::LDR_x_lit ? *reinterpret_cast<int64_t*>(data_address)
+                                           : *reinterpret_cast<int32_t*>(data_address);
+      snprintf(buffer, buf_size_remaining, " (0x%" PRIx64 " / %" PRId64 ")", data, data);
+      break;
+    }
+    case vixl::LDR_s_lit:
+    case vixl::LDR_d_lit: {
+      double data = (op == vixl::LDR_s_lit) ? *reinterpret_cast<float*>(data_address)
+                                            : *reinterpret_cast<double*>(data_address);
+      snprintf(buffer, buf_size_remaining, " (%g)", data);
+      break;
+    }
+    default:
+      break;
+  }
+}
+
 size_t DisassemblerArm64::Dump(std::ostream& os, const uint8_t* begin) {
   const vixl::Instruction* instr = reinterpret_cast<const vixl::Instruction*>(begin);
   decoder.Decode(instr);
-  os << FormatInstructionPointer(begin)
+  // TODO: Use FormatInstructionPointer() once VIXL provides the appropriate
+  // features.
+  // VIXL does not yet allow remapping addresses disassembled. Using
+  // FormatInstructionPointer() would show incoherences between the instruction
+  // location addresses and the target addresses disassembled by VIXL (eg. for
+  // branch instructions).
+  os << StringPrintf("%p", instr)
      << StringPrintf(": %08x\t%s\n", instr->InstructionBits(), disasm.GetOutput());
   return vixl::kInstructionSize;
 }
diff --git a/disassembler/disassembler_arm64.h b/disassembler/disassembler_arm64.h
index e56fe4f..a370b8d 100644
--- a/disassembler/disassembler_arm64.h
+++ b/disassembler/disassembler_arm64.h
@@ -28,9 +28,35 @@
 namespace art {
 namespace arm64 {
 
+class CustomDisassembler FINAL : public vixl::Disassembler {
+ public:
+  explicit CustomDisassembler(bool read_literals) :
+      vixl::Disassembler(), read_literals_(read_literals) {}
+
+  // Use register aliases in the disassembly.
+  virtual void AppendRegisterNameToOutput(const vixl::Instruction* instr,
+                                          const vixl::CPURegister& reg) OVERRIDE;
+
+  // Improve the disassembly of literal load instructions.
+  virtual void VisitLoadLiteral(const vixl::Instruction* instr) OVERRIDE;
+
+ private:
+  // Indicate if the disassembler should read data loaded from literal pools.
+  // This should only be enabled if reading the target of literal loads is safe.
+  // Here are possible outputs when the option is on or off:
+  // read_literals_ | disassembly
+  //           true | 0x72681558: 1c000acb  ldr s11, pc+344 (addr 0x726816b0)
+  //          false | 0x72681558: 1c000acb  ldr s11, pc+344 (addr 0x726816b0) (3.40282e+38)
+  const bool read_literals_;
+};
+
 class DisassemblerArm64 FINAL : public Disassembler {
  public:
-  explicit DisassemblerArm64(DisassemblerOptions* options) : Disassembler(options) {
+  // TODO: Update this code once VIXL provides the ability to map code addresses
+  // to disassemble as a different address (the way FormatInstructionPointer()
+  // does).
+  explicit DisassemblerArm64(DisassemblerOptions* options) :
+      Disassembler(options), disasm(options->can_read_literals_) {
     decoder.AppendVisitor(&disasm);
   }
 
@@ -39,7 +65,7 @@
 
  private:
   vixl::Decoder decoder;
-  vixl::Disassembler disasm;
+  CustomDisassembler disasm;
 
   DISALLOW_COPY_AND_ASSIGN(DisassemblerArm64);
 };
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index cdf48c3..feee598 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -23,6 +23,7 @@
 #include <unordered_map>
 #include <vector>
 
+#include "arch/instruction_set_features.h"
 #include "base/stringpiece.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
@@ -107,6 +108,10 @@
           "  --no-disassemble may be used to disable disassembly.\n"
           "      Example: --no-disassemble\n"
           "\n");
+  fprintf(stderr,
+          "  --method-filter=<method name>: only dumps methods that contain the filter.\n"
+          "      Example: --method-filter=foo\n"
+          "\n");
 }
 
 const char* image_roots_descriptions_[] = {
@@ -177,8 +182,8 @@
 
     bool result = builder_->Write();
 
-    elf_output_->Flush();
-    elf_output_->Close();
+    // Ignore I/O errors.
+    UNUSED(elf_output_->FlushClose());
 
     return result;
   }
@@ -356,12 +361,14 @@
                    bool dump_vmap,
                    bool disassemble_code,
                    bool absolute_addresses,
+                   const char* method_filter,
                    Handle<mirror::ClassLoader>* class_loader)
     : dump_raw_mapping_table_(dump_raw_mapping_table),
       dump_raw_gc_map_(dump_raw_gc_map),
       dump_vmap_(dump_vmap),
       disassemble_code_(disassemble_code),
       absolute_addresses_(absolute_addresses),
+      method_filter_(method_filter),
       class_loader_(class_loader) {}
 
   const bool dump_raw_mapping_table_;
@@ -369,6 +376,7 @@
   const bool dump_vmap_;
   const bool disassemble_code_;
   const bool absolute_addresses_;
+  const char* const method_filter_;
   Handle<mirror::ClassLoader>* class_loader_;
 };
 
@@ -380,7 +388,8 @@
       options_(options),
       disassembler_(Disassembler::Create(oat_file_.GetOatHeader().GetInstructionSet(),
                                          new DisassemblerOptions(options_->absolute_addresses_,
-                                                                 oat_file.Begin()))) {
+                                                                 oat_file.Begin(),
+                                                                 true /* can_read_litals_ */))) {
     CHECK(options_->class_loader_ != nullptr);
     AddAllOffsets();
   }
@@ -686,8 +695,13 @@
                      uint32_t dex_method_idx, const DexFile::CodeItem* code_item,
                      uint32_t method_access_flags) {
     bool success = true;
+    std::string pretty_method = PrettyMethod(dex_method_idx, dex_file, true);
+    if (pretty_method.find(options_->method_filter_) == std::string::npos) {
+      return success;
+    }
+
     os << StringPrintf("%d: %s (dex_method_idx=%d)\n",
-                       class_method_index, PrettyMethod(dex_method_idx, dex_file, true).c_str(),
+                       class_method_index, pretty_method.c_str(),
                        dex_method_idx);
     Indenter indent1_filter(os.rdbuf(), kIndentChar, kIndentBy1Count);
     std::unique_ptr<std::ostream> indent1_os(new std::ostream(&indent1_filter));
@@ -2179,6 +2193,8 @@
       } else if (option.starts_with("--symbolize=")) {
         oat_filename_ = option.substr(strlen("--symbolize=")).data();
         symbolize_ = true;
+      } else if (option.starts_with("--method-filter=")) {
+        method_filter_ = option.substr(strlen("--method-filter=")).data();
       } else {
         fprintf(stderr, "Unknown argument %s\n", option.data());
         usage();
@@ -2200,6 +2216,7 @@
   }
 
   const char* oat_filename_ = nullptr;
+  const char* method_filter_ = "";
   const char* image_location_ = nullptr;
   const char* boot_image_location_ = nullptr;
   InstructionSet instruction_set_ = kRuntimeISA;
@@ -2231,6 +2248,7 @@
       args.dump_vmap_,
       args.disassemble_code_,
       absolute_addresses,
+      args.method_filter_,
       nullptr));
 
   std::unique_ptr<Runtime> runtime;
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 75160ca..b15c712 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -34,12 +34,8 @@
 #include "elf_file_impl.h"
 #include "gc/space/image_space.h"
 #include "image.h"
-#include "instruction_set.h"
-#include "mirror/art_field.h"
 #include "mirror/art_field-inl.h"
-#include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
-#include "mirror/object.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference.h"
 #include "noop_compiler_callbacks.h"
@@ -179,7 +175,7 @@
   }
   gc::space::ImageSpace* ispc = Runtime::Current()->GetHeap()->GetImageSpace();
 
-  PatchOat p(image.release(), ispc->GetLiveBitmap(), ispc->GetMemMap(),
+  PatchOat p(isa, image.release(), ispc->GetLiveBitmap(), ispc->GetMemMap(),
              delta, timings);
   t.NewTiming("Patching files");
   if (!p.PatchImage()) {
@@ -301,7 +297,7 @@
     CHECK(is_oat_pic == NOT_PIC);
   }
 
-  PatchOat p(elf.release(), image.release(), ispc->GetLiveBitmap(), ispc->GetMemMap(),
+  PatchOat p(isa, elf.release(), image.release(), ispc->GetLiveBitmap(), ispc->GetMemMap(),
              delta, timings);
   t.NewTiming("Patching files");
   if (!skip_patching_oat && !p.PatchElf()) {
@@ -536,39 +532,44 @@
   PatchOat::PatchVisitor visitor(this, copy);
   object->VisitReferences<true, kVerifyNone>(visitor, visitor);
   if (object->IsArtMethod<kVerifyNone>()) {
-    FixupMethod(static_cast<mirror::ArtMethod*>(object),
-                static_cast<mirror::ArtMethod*>(copy));
+    FixupMethod(down_cast<mirror::ArtMethod*>(object), down_cast<mirror::ArtMethod*>(copy));
   }
 }
 
 void PatchOat::FixupMethod(mirror::ArtMethod* object, mirror::ArtMethod* copy) {
+  const size_t pointer_size = InstructionSetPointerSize(isa_);
   // Just update the entry points if it looks like we should.
   // TODO: sanity check all the pointers' values
   uintptr_t portable = reinterpret_cast<uintptr_t>(
-      object->GetEntryPointFromPortableCompiledCode<kVerifyNone>());
+      object->GetEntryPointFromPortableCompiledCodePtrSize<kVerifyNone>(pointer_size));
   if (portable != 0) {
-    copy->SetEntryPointFromPortableCompiledCode(reinterpret_cast<void*>(portable + delta_));
+    copy->SetEntryPointFromPortableCompiledCodePtrSize(reinterpret_cast<void*>(portable + delta_),
+                                                       pointer_size);
   }
   uintptr_t quick= reinterpret_cast<uintptr_t>(
-      object->GetEntryPointFromQuickCompiledCode<kVerifyNone>());
+      object->GetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(pointer_size));
   if (quick != 0) {
-    copy->SetEntryPointFromQuickCompiledCode(reinterpret_cast<void*>(quick + delta_));
+    copy->SetEntryPointFromQuickCompiledCodePtrSize(reinterpret_cast<void*>(quick + delta_),
+                                                    pointer_size);
   }
   uintptr_t interpreter = reinterpret_cast<uintptr_t>(
-      object->GetEntryPointFromInterpreter<kVerifyNone>());
+      object->GetEntryPointFromInterpreterPtrSize<kVerifyNone>(pointer_size));
   if (interpreter != 0) {
-    copy->SetEntryPointFromInterpreter(
-        reinterpret_cast<mirror::EntryPointFromInterpreter*>(interpreter + delta_));
+    copy->SetEntryPointFromInterpreterPtrSize(
+        reinterpret_cast<mirror::EntryPointFromInterpreter*>(interpreter + delta_), pointer_size);
   }
 
-  uintptr_t native_method = reinterpret_cast<uintptr_t>(object->GetNativeMethod());
+  uintptr_t native_method = reinterpret_cast<uintptr_t>(
+      object->GetEntryPointFromJniPtrSize(pointer_size));
   if (native_method != 0) {
-    copy->SetNativeMethod(reinterpret_cast<void*>(native_method + delta_));
+    copy->SetEntryPointFromJniPtrSize(reinterpret_cast<void*>(native_method + delta_),
+                                      pointer_size);
   }
 
-  uintptr_t native_gc_map = reinterpret_cast<uintptr_t>(object->GetNativeGcMap());
+  uintptr_t native_gc_map = reinterpret_cast<uintptr_t>(
+      object->GetNativeGcMapPtrSize(pointer_size));
   if (native_gc_map != 0) {
-    copy->SetNativeGcMap(reinterpret_cast<uint8_t*>(native_gc_map + delta_));
+    copy->SetNativeGcMapPtrSize(reinterpret_cast<uint8_t*>(native_gc_map + delta_), pointer_size);
   }
 }
 
@@ -903,6 +904,20 @@
   }
 }
 
+// Either try to close the file (close=true), or erase it.
+static bool FinishFile(File* file, bool close) {
+  if (close) {
+    if (file->FlushCloseOrErase() != 0) {
+      PLOG(ERROR) << "Failed to flush and close file.";
+      return false;
+    }
+    return true;
+  } else {
+    file->Erase();
+    return false;
+  }
+}
+
 static int patchoat(int argc, char **argv) {
   InitLogging(argv);
   MemMap::Init();
@@ -1174,7 +1189,7 @@
       if (output_image_filename.empty()) {
         output_image_filename = "output-image-file";
       }
-      output_image.reset(new File(output_image_fd, output_image_filename));
+      output_image.reset(new File(output_image_fd, output_image_filename, true));
     } else {
       CHECK(!output_image_filename.empty());
       output_image.reset(CreateOrOpen(output_image_filename.c_str(), &new_image_out));
@@ -1188,7 +1203,7 @@
       if (input_oat_filename.empty()) {
         input_oat_filename = "input-oat-file";
       }
-      input_oat.reset(new File(input_oat_fd, input_oat_filename));
+      input_oat.reset(new File(input_oat_fd, input_oat_filename, false));
       if (input_oat == nullptr) {
         // Unlikely, but ensure exhaustive logging in non-0 exit code case
         LOG(ERROR) << "Failed to open input oat file by its FD" << input_oat_fd;
@@ -1207,7 +1222,7 @@
       if (output_oat_filename.empty()) {
         output_oat_filename = "output-oat-file";
       }
-      output_oat.reset(new File(output_oat_fd, output_oat_filename));
+      output_oat.reset(new File(output_oat_fd, output_oat_filename, true));
       if (output_oat == nullptr) {
         // Unlikely, but ensure exhaustive logging in non-0 exit code case
         LOG(ERROR) << "Failed to open output oat file by its FD" << output_oat_fd;
@@ -1280,14 +1295,20 @@
                           output_oat.get(), output_image.get(), isa, &timings,
                           output_oat_fd >= 0,  // was it opened from FD?
                           new_oat_out);
+    // The order here doesn't matter. If the first one is successfully saved and the second one
+    // erased, ImageSpace will still detect a problem and not use the files.
+    ret = ret && FinishFile(output_image.get(), ret);
+    ret = ret && FinishFile(output_oat.get(), ret);
   } else if (have_oat_files) {
     TimingLogger::ScopedTiming pt("patch oat", &timings);
     ret = PatchOat::Patch(input_oat.get(), base_delta, output_oat.get(), &timings,
                           output_oat_fd >= 0,  // was it opened from FD?
                           new_oat_out);
+    ret = ret && FinishFile(output_oat.get(), ret);
   } else if (have_image_files) {
     TimingLogger::ScopedTiming pt("patch image", &timings);
     ret = PatchOat::Patch(input_image_location, base_delta, output_image.get(), isa, &timings);
+    ret = ret && FinishFile(output_image.get(), ret);
   } else {
     CHECK(false);
     ret = true;
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index 0ceef64..578df3a 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -17,14 +17,14 @@
 #ifndef ART_PATCHOAT_PATCHOAT_H_
 #define ART_PATCHOAT_PATCHOAT_H_
 
+#include "arch/instruction_set.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "instruction_set.h"
-#include "os.h"
 #include "elf_file.h"
 #include "elf_utils.h"
 #include "gc/accounting/space_bitmap.h"
 #include "gc/heap.h"
+#include "os.h"
 #include "utils.h"
 
 namespace art {
@@ -61,15 +61,16 @@
   // Takes ownership only of the ElfFile. All other pointers are only borrowed.
   PatchOat(ElfFile* oat_file, off_t delta, TimingLogger* timings)
       : oat_file_(oat_file), image_(nullptr), bitmap_(nullptr), heap_(nullptr), delta_(delta),
-        timings_(timings) {}
-  PatchOat(MemMap* image, gc::accounting::ContinuousSpaceBitmap* bitmap,
+        isa_(kNone), timings_(timings) {}
+  PatchOat(InstructionSet isa, MemMap* image, gc::accounting::ContinuousSpaceBitmap* bitmap,
            MemMap* heap, off_t delta, TimingLogger* timings)
       : image_(image), bitmap_(bitmap), heap_(heap),
-        delta_(delta), timings_(timings) {}
-  PatchOat(ElfFile* oat_file, MemMap* image, gc::accounting::ContinuousSpaceBitmap* bitmap,
-           MemMap* heap, off_t delta, TimingLogger* timings)
+        delta_(delta), isa_(isa), timings_(timings) {}
+  PatchOat(InstructionSet isa, ElfFile* oat_file, MemMap* image,
+           gc::accounting::ContinuousSpaceBitmap* bitmap, MemMap* heap, off_t delta,
+           TimingLogger* timings)
       : oat_file_(oat_file), image_(image), bitmap_(bitmap), heap_(heap),
-        delta_(delta), timings_(timings) {}
+        delta_(delta), isa_(isa), timings_(timings) {}
   ~PatchOat() {}
 
   // Was the .art image at image_path made with --compile-pic ?
@@ -156,8 +157,10 @@
   const MemMap* const heap_;
   // The amount we are changing the offset by.
   const off_t delta_;
-  // Timing splits.
-  TimingLogger* const timings_;
+  // Active instruction set, used to know the entrypoint size.
+  const InstructionSet isa_;
+
+  TimingLogger* timings_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(PatchOat);
 };
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 082e8dd..58f7940 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -73,7 +73,6 @@
   hprof/hprof.cc \
   image.cc \
   indirect_reference_table.cc \
-  instruction_set.cc \
   instrumentation.cc \
   intern_table.cc \
   interpreter/interpreter.cc \
@@ -165,11 +164,18 @@
 
 LIBART_COMMON_SRC_FILES += \
   arch/context.cc \
+  arch/instruction_set.cc \
+  arch/instruction_set_features.cc \
   arch/memcmp16.cc \
+  arch/arm/instruction_set_features_arm.cc \
   arch/arm/registers_arm.cc \
+  arch/arm64/instruction_set_features_arm64.cc \
   arch/arm64/registers_arm64.cc \
-  arch/x86/registers_x86.cc \
+  arch/mips/instruction_set_features_mips.cc \
   arch/mips/registers_mips.cc \
+  arch/x86/instruction_set_features_x86.cc \
+  arch/x86/registers_x86.cc \
+  arch/x86_64/registers_x86_64.cc \
   entrypoints/entrypoint_utils.cc \
   entrypoints/interpreter/interpreter_entrypoints.cc \
   entrypoints/jni/jni_entrypoints.cc \
@@ -216,7 +222,7 @@
 LIBART_TARGET_SRC_FILES_arm := \
   arch/arm/context_arm.cc.arm \
   arch/arm/entrypoints_init_arm.cc \
-  arch/arm/instruction_set_features_arm.S \
+  arch/arm/instruction_set_features_assembly_tests.S \
   arch/arm/jni_entrypoints_arm.S \
   arch/arm/memcmp16_arm.S \
   arch/arm/portable_entrypoints_arm.S \
@@ -292,10 +298,11 @@
   $(LIBART_SRC_FILES_x86_64)
 
 LIBART_ENUM_OPERATOR_OUT_HEADER_FILES := \
-  arch/x86_64/registers_x86_64.h \
+  arch/instruction_set.h \
   base/allocator.h \
   base/mutex.h \
   debugger.h \
+  base/unix_file/fd_file.h \
   dex_file.h \
   dex_instruction.h \
   gc/allocator/rosalloc.h \
@@ -306,7 +313,6 @@
   gc/heap.h \
   instrumentation.h \
   indirect_reference_table.h \
-  instruction_set.h \
   invoke_type.h \
   jdwp/jdwp.h \
   jdwp/jdwp_constants.h \
@@ -339,7 +345,7 @@
 2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := default
 ifeq ($(DEX2OAT_TARGET_ARCH),arm)
   ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a15 krait denver))
-    LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := lpae,div
+    LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := atomic_ldrd_strd,div
   else
     ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a7))
       LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := div
@@ -348,7 +354,7 @@
 endif
 ifeq ($(2ND_DEX2OAT_TARGET_ARCH),arm)
   ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a15 krait denver))
-    2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := lpae,div
+    2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := atomic_ldrd_strd,div
   else
     ifneq (,$(filter $(DEX2OAT_TARGET_CPU_VARIANT),cortex-a7))
       2ND_LIBART_TARGET_DEFAULT_INSTRUCTION_SET_FEATURES := div
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 480190a..325b283 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -23,7 +23,6 @@
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
-#include "instruction_set.h"
 #include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
 #include "thread.h"
diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc
new file mode 100644
index 0000000..f8590d3
--- /dev/null
+++ b/runtime/arch/arm/instruction_set_features_arm.cc
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set_features_arm.h"
+
+#if defined(HAVE_ANDROID_OS) && defined(__arm__)
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+#endif
+
+#include "signal.h"
+#include <fstream>
+
+#include "base/stringprintf.h"
+#include "utils.h"  // For Trim.
+
+#if defined(__arm__)
+extern "C" bool artCheckForArmSdivInstruction();
+#endif
+
+namespace art {
+
+const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromVariant(
+    const std::string& variant, std::string* error_msg) {
+  // Assume all ARM processors are SMP.
+  // TODO: set the SMP support based on variant.
+  const bool smp = true;
+
+  // Look for variants that have divide support.
+  static const char* arm_variants_with_div[] = {
+          "cortex-a7", "cortex-a12", "cortex-a15", "cortex-a17", "cortex-a53", "cortex-a57",
+          "cortex-m3", "cortex-m4", "cortex-r4", "cortex-r5",
+          "cyclone", "denver", "krait", "swift"};
+
+  bool has_div = FindVariantInArray(arm_variants_with_div, arraysize(arm_variants_with_div),
+                                    variant);
+
+  // Look for variants that have LPAE support.
+  static const char* arm_variants_with_lpae[] = {
+      "cortex-a7", "cortex-a15", "krait", "denver"
+  };
+  bool has_lpae = FindVariantInArray(arm_variants_with_lpae, arraysize(arm_variants_with_lpae),
+                                     variant);
+
+  if (has_div == false && has_lpae == false) {
+    // Avoid unsupported variants.
+    static const char* unsupported_arm_variants[] = {
+        // ARM processors that aren't ARMv7 compatible aren't supported.
+        "arm2", "arm250", "arm3", "arm6", "arm60", "arm600", "arm610", "arm620",
+        "cortex-m0", "cortex-m0plus", "cortex-m1",
+        "fa526", "fa626", "fa606te", "fa626te", "fmp626", "fa726te",
+        "iwmmxt", "iwmmxt2",
+        "strongarm", "strongarm110", "strongarm1100", "strongarm1110",
+        "xscale"
+    };
+    if (FindVariantInArray(unsupported_arm_variants, arraysize(unsupported_arm_variants),
+                           variant)) {
+      *error_msg = StringPrintf("Attempt to use unsupported ARM variant: %s", variant.c_str());
+      return nullptr;
+    }
+    // Warn if the variant is unknown.
+    // TODO: some of the variants below may have feature support, but that support is currently
+    //       unknown so we'll choose conservative (sub-optimal) defaults without warning.
+    // TODO: some of the architectures may not support all features required by ART and should be
+    //       moved to unsupported_arm_variants[] above.
+    static const char* arm_variants_without_known_features[] = {
+        "default",
+        "arm7", "arm7m", "arm7d", "arm7dm", "arm7di", "arm7dmi", "arm70", "arm700", "arm700i",
+        "arm710", "arm710c", "arm7100", "arm720", "arm7500", "arm7500fe", "arm7tdmi", "arm7tdmi-s",
+        "arm710t", "arm720t", "arm740t",
+        "arm8", "arm810",
+        "arm9", "arm9e", "arm920", "arm920t", "arm922t", "arm946e-s", "arm966e-s", "arm968e-s",
+        "arm926ej-s", "arm940t", "arm9tdmi",
+        "arm10tdmi", "arm1020t", "arm1026ej-s", "arm10e", "arm1020e", "arm1022e",
+        "arm1136j-s", "arm1136jf-s",
+        "arm1156t2-s", "arm1156t2f-s", "arm1176jz-s", "arm1176jzf-s",
+        "cortex-a5", "cortex-a8", "cortex-a9", "cortex-a9-mp", "cortex-r4f",
+        "marvell-pj4", "mpcore", "mpcorenovfp"
+    };
+    if (!FindVariantInArray(arm_variants_without_known_features,
+                            arraysize(arm_variants_without_known_features),
+                            variant)) {
+      LOG(WARNING) << "Unknown instruction set features for ARM CPU variant (" << variant
+          << ") using conservative defaults";
+    }
+  }
+  return new ArmInstructionSetFeatures(smp, has_div, has_lpae);
+}
+
+const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromBitmap(uint32_t bitmap) {
+  bool smp = (bitmap & kSmpBitfield) != 0;
+  bool has_div = (bitmap & kDivBitfield) != 0;
+  bool has_atomic_ldrd_strd = (bitmap & kAtomicLdrdStrdBitfield) != 0;
+  return new ArmInstructionSetFeatures(smp, has_div, has_atomic_ldrd_strd);
+}
+
+const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromCppDefines() {
+  const bool smp = true;
+#if defined(__ARM_ARCH_EXT_IDIV__)
+  const bool has_div = true;
+#else
+  const bool has_div = false;
+#endif
+#if defined(__ARM_FEATURE_LPAE)
+  const bool has_lpae = true;
+#else
+  const bool has_lpae = false;
+#endif
+  return new ArmInstructionSetFeatures(smp, has_div, has_lpae);
+}
+
+const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromCpuInfo() {
+  // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
+  // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
+  bool smp = false;
+  bool has_lpae = false;
+  bool has_div = false;
+
+  std::ifstream in("/proc/cpuinfo");
+  if (!in.fail()) {
+    while (!in.eof()) {
+      std::string line;
+      std::getline(in, line);
+      if (!in.eof()) {
+        LOG(INFO) << "cpuinfo line: " << line;
+        if (line.find("Features") != std::string::npos) {
+          LOG(INFO) << "found features";
+          if (line.find("idivt") != std::string::npos) {
+            // We always expect both ARM and Thumb divide instructions to be available or not
+            // available.
+            CHECK_NE(line.find("idiva"), std::string::npos);
+            has_div = true;
+          }
+          if (line.find("lpae") != std::string::npos) {
+            has_lpae = true;
+          }
+        } else if (line.find("processor") != std::string::npos &&
+            line.find(": 1") != std::string::npos) {
+          smp = true;
+        }
+      }
+    }
+    in.close();
+  } else {
+    LOG(ERROR) << "Failed to open /proc/cpuinfo";
+  }
+  return new ArmInstructionSetFeatures(smp, has_div, has_lpae);
+}
+
+const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromHwcap() {
+  bool smp = sysconf(_SC_NPROCESSORS_CONF) > 1;
+
+  bool has_div = false;
+  bool has_lpae = false;
+
+#if defined(HAVE_ANDROID_OS) && defined(__arm__)
+  uint64_t hwcaps = getauxval(AT_HWCAP);
+  LOG(INFO) << "hwcaps=" << hwcaps;
+  if ((hwcaps & HWCAP_IDIVT) != 0) {
+    // We always expect both ARM and Thumb divide instructions to be available or not
+    // available.
+    CHECK_NE(hwcaps & HWCAP_IDIVA, 0U);
+    has_div = true;
+  }
+  if ((hwcaps & HWCAP_LPAE) != 0) {
+    has_lpae = true;
+  }
+#endif
+
+  return new ArmInstructionSetFeatures(smp, has_div, has_lpae);
+}
+
+// A signal handler called by a fault for an illegal instruction.  We record the fact in r0
+// and then increment the PC in the signal context to return to the next instruction.  We know the
+// instruction is an sdiv (4 bytes long).
+static void bad_divide_inst_handle(int signo ATTRIBUTE_UNUSED, siginfo_t* si ATTRIBUTE_UNUSED,
+                                   void* data) {
+#if defined(__arm__)
+  struct ucontext *uc = (struct ucontext *)data;
+  struct sigcontext *sc = &uc->uc_mcontext;
+  sc->arm_r0 = 0;     // Set R0 to #0 to signal error.
+  sc->arm_pc += 4;    // Skip offending instruction.
+#else
+  UNUSED(data);
+#endif
+}
+
+const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromAssembly() {
+  const bool smp = true;
+
+  // See if have a sdiv instruction.  Register a signal handler and try to execute an sdiv
+  // instruction.  If we get a SIGILL then it's not supported.
+  struct sigaction sa, osa;
+  sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
+  sa.sa_sigaction = bad_divide_inst_handle;
+  sigaction(SIGILL, &sa, &osa);
+
+  bool has_div = false;
+#if defined(__arm__)
+  if (artCheckForArmSdivInstruction()) {
+    has_div = true;
+  }
+#endif
+
+  // Restore the signal handler.
+  sigaction(SIGILL, &osa, nullptr);
+
+  // Use compile time features to "detect" LPAE support.
+  // TODO: write an assembly LPAE support test.
+#if defined(__ARM_FEATURE_LPAE)
+  const bool has_lpae = true;
+#else
+  const bool has_lpae = false;
+#endif
+  return new ArmInstructionSetFeatures(smp, has_div, has_lpae);
+}
+
+bool ArmInstructionSetFeatures::Equals(const InstructionSetFeatures* other) const {
+  if (kArm != other->GetInstructionSet()) {
+    return false;
+  }
+  const ArmInstructionSetFeatures* other_as_arm = other->AsArmInstructionSetFeatures();
+  return IsSmp() == other_as_arm->IsSmp() &&
+      has_div_ == other_as_arm->has_div_ &&
+      has_atomic_ldrd_strd_ == other_as_arm->has_atomic_ldrd_strd_;
+}
+
+uint32_t ArmInstructionSetFeatures::AsBitmap() const {
+  return (IsSmp() ? kSmpBitfield : 0) |
+      (has_div_ ? kDivBitfield : 0) |
+      (has_atomic_ldrd_strd_ ? kAtomicLdrdStrdBitfield : 0);
+}
+
+std::string ArmInstructionSetFeatures::GetFeatureString() const {
+  std::string result;
+  if (IsSmp()) {
+    result += "smp";
+  } else {
+    result += "-smp";
+  }
+  if (has_div_) {
+    result += ",div";
+  } else {
+    result += ",-div";
+  }
+  if (has_atomic_ldrd_strd_) {
+    result += ",atomic_ldrd_strd";
+  } else {
+    result += ",-atomic_ldrd_strd";
+  }
+  return result;
+}
+
+const InstructionSetFeatures* ArmInstructionSetFeatures::AddFeaturesFromSplitString(
+    const bool smp, const std::vector<std::string>& features, std::string* error_msg) const {
+  bool has_atomic_ldrd_strd = has_atomic_ldrd_strd_;
+  bool has_div = has_div_;
+  for (auto i = features.begin(); i != features.end(); i++) {
+    std::string feature = Trim(*i);
+    if (feature == "div") {
+      has_div = true;
+    } else if (feature == "-div") {
+      has_div = false;
+    } else if (feature == "atomic_ldrd_strd") {
+      has_atomic_ldrd_strd = true;
+    } else if (feature == "-atomic_ldrd_strd") {
+      has_atomic_ldrd_strd = false;
+    } else {
+      *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
+      return nullptr;
+    }
+  }
+  return new ArmInstructionSetFeatures(smp, has_div, has_atomic_ldrd_strd);
+}
+
+}  // namespace art
diff --git a/runtime/arch/arm/instruction_set_features_arm.h b/runtime/arch/arm/instruction_set_features_arm.h
new file mode 100644
index 0000000..221bf1f
--- /dev/null
+++ b/runtime/arch/arm/instruction_set_features_arm.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM_INSTRUCTION_SET_FEATURES_ARM_H_
+#define ART_RUNTIME_ARCH_ARM_INSTRUCTION_SET_FEATURES_ARM_H_
+
+#include "arch/instruction_set_features.h"
+
+namespace art {
+
+// Instruction set features relevant to the ARM architecture.
+class ArmInstructionSetFeatures FINAL : public InstructionSetFeatures {
+ public:
+  // Process a CPU variant string like "krait" or "cortex-a15" and create InstructionSetFeatures.
+  static const ArmInstructionSetFeatures* FromVariant(const std::string& variant,
+                                                      std::string* error_msg);
+
+  // Parse a bitmap and create an InstructionSetFeatures.
+  static const ArmInstructionSetFeatures* FromBitmap(uint32_t bitmap);
+
+  // Turn C pre-processor #defines into the equivalent instruction set features.
+  static const ArmInstructionSetFeatures* FromCppDefines();
+
+  // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
+  static const ArmInstructionSetFeatures* FromCpuInfo();
+
+  // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
+  // InstructionSetFeatures.
+  static const ArmInstructionSetFeatures* FromHwcap();
+
+  // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
+  // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
+  static const ArmInstructionSetFeatures* FromAssembly();
+
+  bool Equals(const InstructionSetFeatures* other) const OVERRIDE;
+
+  InstructionSet GetInstructionSet() const OVERRIDE {
+    return kArm;
+  }
+
+  uint32_t AsBitmap() const OVERRIDE;
+
+  // Return a string of the form "div,lpae" or "none".
+  std::string GetFeatureString() const OVERRIDE;
+
+  // Is the divide instruction feature enabled?
+  bool HasDivideInstruction() const {
+      return has_div_;
+  }
+
+  // Are the ldrd and strd instructions atomic? This is commonly true when the Large Physical
+  // Address Extension (LPAE) is present.
+  bool HasAtomicLdrdAndStrd() const {
+    return has_atomic_ldrd_strd_;
+  }
+
+  virtual ~ArmInstructionSetFeatures() {}
+
+ protected:
+  // Parse a vector of the form "div", "lpae" adding these to a new ArmInstructionSetFeatures.
+  const InstructionSetFeatures*
+      AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
+                                 std::string* error_msg) const OVERRIDE;
+
+ private:
+  ArmInstructionSetFeatures(bool smp, bool has_div, bool has_atomic_ldrd_strd)
+      : InstructionSetFeatures(smp),
+        has_div_(has_div), has_atomic_ldrd_strd_(has_atomic_ldrd_strd) {
+  }
+
+  // Bitmap positions for encoding features as a bitmap.
+  enum {
+    kSmpBitfield = 1,
+    kDivBitfield = 2,
+    kAtomicLdrdStrdBitfield = 4,
+  };
+
+  const bool has_div_;
+  const bool has_atomic_ldrd_strd_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArmInstructionSetFeatures);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_ARM_INSTRUCTION_SET_FEATURES_ARM_H_
diff --git a/runtime/arch/arm/instruction_set_features_arm_test.cc b/runtime/arch/arm/instruction_set_features_arm_test.cc
new file mode 100644
index 0000000..44b1640
--- /dev/null
+++ b/runtime/arch/arm/instruction_set_features_arm_test.cc
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set_features_arm.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+TEST(ArmInstructionSetFeaturesTest, ArmFeaturesFromVariant) {
+  // Build features for a 32-bit ARM krait processor.
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> krait_features(
+      InstructionSetFeatures::FromVariant(kArm, "krait", &error_msg));
+  ASSERT_TRUE(krait_features.get() != nullptr) << error_msg;
+
+  ASSERT_EQ(krait_features->GetInstructionSet(), kArm);
+  EXPECT_TRUE(krait_features->Equals(krait_features.get()));
+  EXPECT_TRUE(krait_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
+  EXPECT_TRUE(krait_features->AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd());
+  EXPECT_STREQ("smp,div,atomic_ldrd_strd", krait_features->GetFeatureString().c_str());
+  EXPECT_EQ(krait_features->AsBitmap(), 7U);
+
+  // Build features for a 32-bit ARM denver processor.
+  std::unique_ptr<const InstructionSetFeatures> denver_features(
+      InstructionSetFeatures::FromVariant(kArm, "denver", &error_msg));
+  ASSERT_TRUE(denver_features.get() != nullptr) << error_msg;
+
+  EXPECT_TRUE(denver_features->Equals(denver_features.get()));
+  EXPECT_TRUE(denver_features->Equals(krait_features.get()));
+  EXPECT_TRUE(krait_features->Equals(denver_features.get()));
+  EXPECT_TRUE(denver_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
+  EXPECT_TRUE(denver_features->AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd());
+  EXPECT_STREQ("smp,div,atomic_ldrd_strd", denver_features->GetFeatureString().c_str());
+  EXPECT_EQ(denver_features->AsBitmap(), 7U);
+
+  // Build features for a 32-bit ARMv7 processor.
+  std::unique_ptr<const InstructionSetFeatures> arm7_features(
+      InstructionSetFeatures::FromVariant(kArm, "arm7", &error_msg));
+  ASSERT_TRUE(arm7_features.get() != nullptr) << error_msg;
+
+  EXPECT_TRUE(arm7_features->Equals(arm7_features.get()));
+  EXPECT_FALSE(arm7_features->Equals(krait_features.get()));
+  EXPECT_FALSE(krait_features->Equals(arm7_features.get()));
+  EXPECT_FALSE(arm7_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
+  EXPECT_FALSE(arm7_features->AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd());
+  EXPECT_STREQ("smp,-div,-atomic_ldrd_strd", arm7_features->GetFeatureString().c_str());
+  EXPECT_EQ(arm7_features->AsBitmap(), 1U);
+
+  // ARM6 is not a supported architecture variant.
+  std::unique_ptr<const InstructionSetFeatures> arm6_features(
+      InstructionSetFeatures::FromVariant(kArm, "arm6", &error_msg));
+  EXPECT_TRUE(arm6_features.get() == nullptr);
+  EXPECT_NE(error_msg.size(), 0U);
+}
+
+TEST(ArmInstructionSetFeaturesTest, ArmAddFeaturesFromString) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> base_features(
+      InstructionSetFeatures::FromVariant(kArm, "arm7", &error_msg));
+  ASSERT_TRUE(base_features.get() != nullptr) << error_msg;
+
+  // Build features for a 32-bit ARM with LPAE and div processor.
+  std::unique_ptr<const InstructionSetFeatures> krait_features(
+      base_features->AddFeaturesFromString("atomic_ldrd_strd,div", &error_msg));
+  ASSERT_TRUE(krait_features.get() != nullptr) << error_msg;
+
+  ASSERT_EQ(krait_features->GetInstructionSet(), kArm);
+  EXPECT_TRUE(krait_features->Equals(krait_features.get()));
+  EXPECT_TRUE(krait_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
+  EXPECT_TRUE(krait_features->AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd());
+  EXPECT_STREQ("smp,div,atomic_ldrd_strd", krait_features->GetFeatureString().c_str());
+  EXPECT_EQ(krait_features->AsBitmap(), 7U);
+
+  // Build features for a 32-bit ARM processor with LPAE and div flipped.
+  std::unique_ptr<const InstructionSetFeatures> denver_features(
+      base_features->AddFeaturesFromString("div,atomic_ldrd_strd", &error_msg));
+  ASSERT_TRUE(denver_features.get() != nullptr) << error_msg;
+
+  EXPECT_TRUE(denver_features->Equals(denver_features.get()));
+  EXPECT_TRUE(denver_features->Equals(krait_features.get()));
+  EXPECT_TRUE(krait_features->Equals(denver_features.get()));
+  EXPECT_TRUE(denver_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
+  EXPECT_TRUE(denver_features->AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd());
+  EXPECT_STREQ("smp,div,atomic_ldrd_strd", denver_features->GetFeatureString().c_str());
+  EXPECT_EQ(denver_features->AsBitmap(), 7U);
+
+  // Build features for a 32-bit default ARM processor.
+  std::unique_ptr<const InstructionSetFeatures> arm7_features(
+      base_features->AddFeaturesFromString("default", &error_msg));
+  ASSERT_TRUE(arm7_features.get() != nullptr) << error_msg;
+
+  EXPECT_TRUE(arm7_features->Equals(arm7_features.get()));
+  EXPECT_FALSE(arm7_features->Equals(krait_features.get()));
+  EXPECT_FALSE(krait_features->Equals(arm7_features.get()));
+  EXPECT_FALSE(arm7_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
+  EXPECT_FALSE(arm7_features->AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd());
+  EXPECT_STREQ("smp,-div,-atomic_ldrd_strd", arm7_features->GetFeatureString().c_str());
+  EXPECT_EQ(arm7_features->AsBitmap(), 1U);
+}
+
+}  // namespace art
diff --git a/runtime/arch/arm/instruction_set_features_arm.S b/runtime/arch/arm/instruction_set_features_assembly_tests.S
similarity index 94%
rename from runtime/arch/arm/instruction_set_features_arm.S
rename to runtime/arch/arm/instruction_set_features_assembly_tests.S
index c26f2cd..c1086df 100644
--- a/runtime/arch/arm/instruction_set_features_arm.S
+++ b/runtime/arch/arm/instruction_set_features_assembly_tests.S
@@ -23,7 +23,7 @@
 // caller must arrange for the signal handler to set the r0
 // register to 0 and move the pc forward by 4 bytes (to skip
 // the invalid instruction).
-ENTRY artCheckForARMSDIVInstruction
+ENTRY artCheckForArmSdivInstruction
   mov r1,#1
   // depending on the architecture, the assembler will not allow an
   // sdiv instruction, so we will have to output the bytes directly.
@@ -35,4 +35,4 @@
   // It will have 0 otherwise (set by the signal handler)
   // the value is just returned from this function.
   bx lr
-END artCheckForARMSDIVInstruction
+END artCheckForArmSdivInstruction
diff --git a/runtime/arch/arm/portable_entrypoints_arm.S b/runtime/arch/arm/portable_entrypoints_arm.S
index d37e760..89ac1f7 100644
--- a/runtime/arch/arm/portable_entrypoints_arm.S
+++ b/runtime/arch/arm/portable_entrypoints_arm.S
@@ -53,7 +53,7 @@
     mov    ip, #0                          @ set ip to 0
     str    ip, [sp]                        @ store NULL for method* at bottom of frame
     add    sp, #16                         @ first 4 args are not passed on stack for portable
-    ldr    ip, [r0, #MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET]  @ get pointer to the code
+    ldr    ip, [r0, #MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_32]  @ get pointer to the code
     blx    ip                              @ call the method
     mov    sp, r11                         @ restore the stack pointer
     ldr    ip, [sp, #24]                   @ load the result pointer
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 632b414..1782db5 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -422,7 +422,7 @@
     mov    r4, #SUSPEND_CHECK_INTERVAL     @ reset r4 to suspend check interval
 #endif
 
-    ldr    ip, [r0, #MIRROR_ART_METHOD_QUICK_CODE_OFFSET]  @ get pointer to the code
+    ldr    ip, [r0, #MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32]  @ get pointer to the code
     blx    ip                              @ call the method
 
     mov    sp, r11                         @ restore the stack pointer
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.cc b/runtime/arch/arm64/instruction_set_features_arm64.cc
new file mode 100644
index 0000000..a1270dc
--- /dev/null
+++ b/runtime/arch/arm64/instruction_set_features_arm64.cc
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set_features_arm64.h"
+
+#include <fstream>
+#include <sstream>
+
+#include "base/stringprintf.h"
+#include "utils.h"  // For Trim.
+
+namespace art {
+
+const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromVariant(
+    const std::string& variant ATTRIBUTE_UNUSED, std::string* error_msg ATTRIBUTE_UNUSED) {
+  const bool smp = true;  // Conservative default.
+
+  // Look for variants that need a fix for a53 erratum 835769.
+  static const char* arm64_variants_with_a53_835769_bug[] = {
+      "default", "generic"  // Pessimistically assume all generic ARM64s are A53s.
+  };
+  bool needs_a53_835769_fix = FindVariantInArray(arm64_variants_with_a53_835769_bug,
+                                                 arraysize(arm64_variants_with_a53_835769_bug),
+                                                 variant);
+
+  if (!needs_a53_835769_fix) {
+    // Check to see if this is an expected variant.
+    static const char* arm64_known_variants[] = {
+        "denver64"
+    };
+    if (!FindVariantInArray(arm64_known_variants, arraysize(arm64_known_variants), variant)) {
+      std::ostringstream os;
+      os << "Unexpected CPU variant for Arm64: " << variant;
+      *error_msg = os.str();
+      return nullptr;
+    }
+  }
+  return new Arm64InstructionSetFeatures(smp, needs_a53_835769_fix);
+}
+
+const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromBitmap(uint32_t bitmap) {
+  bool smp = (bitmap & kSmpBitfield) != 0;
+  bool is_a53 = (bitmap & kA53Bitfield) != 0;
+  return new Arm64InstructionSetFeatures(smp, is_a53);
+}
+
+const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromCppDefines() {
+  const bool smp = true;
+  const bool is_a53 = true;  // Pessimistically assume all ARM64s are A53s.
+  return new Arm64InstructionSetFeatures(smp, is_a53);
+}
+
+const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromCpuInfo() {
+  // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
+  // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
+  bool smp = false;
+  const bool is_a53 = true;  // Conservative default.
+
+  std::ifstream in("/proc/cpuinfo");
+  if (!in.fail()) {
+    while (!in.eof()) {
+      std::string line;
+      std::getline(in, line);
+      if (!in.eof()) {
+        LOG(INFO) << "cpuinfo line: " << line;
+        if (line.find("processor") != std::string::npos && line.find(": 1") != std::string::npos) {
+          smp = true;
+        }
+      }
+    }
+    in.close();
+  } else {
+    LOG(ERROR) << "Failed to open /proc/cpuinfo";
+  }
+  return new Arm64InstructionSetFeatures(smp, is_a53);
+}
+
+const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromHwcap() {
+  bool smp = sysconf(_SC_NPROCESSORS_CONF) > 1;
+  const bool is_a53 = true;  // Pessimistically assume all ARM64s are A53s.
+  return new Arm64InstructionSetFeatures(smp, is_a53);
+}
+
+const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromAssembly() {
+  UNIMPLEMENTED(WARNING);
+  return FromCppDefines();
+}
+
+bool Arm64InstructionSetFeatures::Equals(const InstructionSetFeatures* other) const {
+  if (kArm64 != other->GetInstructionSet()) {
+    return false;
+  }
+  const Arm64InstructionSetFeatures* other_as_arm = other->AsArm64InstructionSetFeatures();
+  return fix_cortex_a53_835769_ == other_as_arm->fix_cortex_a53_835769_;
+}
+
+uint32_t Arm64InstructionSetFeatures::AsBitmap() const {
+  return (IsSmp() ? kSmpBitfield : 0) | (fix_cortex_a53_835769_ ? kA53Bitfield : 0);
+}
+
+std::string Arm64InstructionSetFeatures::GetFeatureString() const {
+  std::string result;
+  if (IsSmp()) {
+    result += "smp";
+  } else {
+    result += "-smp";
+  }
+  if (fix_cortex_a53_835769_) {
+    result += ",a53";
+  } else {
+    result += ",-a53";
+  }
+  return result;
+}
+
+const InstructionSetFeatures* Arm64InstructionSetFeatures::AddFeaturesFromSplitString(
+    const bool smp, const std::vector<std::string>& features, std::string* error_msg) const {
+  bool is_a53 = fix_cortex_a53_835769_;
+  for (auto i = features.begin(); i != features.end(); i++) {
+    std::string feature = Trim(*i);
+    if (feature == "a53") {
+      is_a53 = true;
+    } else if (feature == "-a53") {
+      is_a53 = false;
+    } else {
+      *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
+      return nullptr;
+    }
+  }
+  return new Arm64InstructionSetFeatures(smp, is_a53);
+}
+
+}  // namespace art
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.h b/runtime/arch/arm64/instruction_set_features_arm64.h
new file mode 100644
index 0000000..b0c66b3
--- /dev/null
+++ b/runtime/arch/arm64/instruction_set_features_arm64.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM64_INSTRUCTION_SET_FEATURES_ARM64_H_
+#define ART_RUNTIME_ARCH_ARM64_INSTRUCTION_SET_FEATURES_ARM64_H_
+
+#include "arch/instruction_set_features.h"
+
+namespace art {
+
+// Instruction set features relevant to the ARM64 architecture.
+class Arm64InstructionSetFeatures FINAL : public InstructionSetFeatures {
+ public:
+  // Process a CPU variant string like "krait" or "cortex-a15" and create InstructionSetFeatures.
+  static const Arm64InstructionSetFeatures* FromVariant(const std::string& variant,
+                                                        std::string* error_msg);
+
+  // Parse a bitmap and create an InstructionSetFeatures.
+  static const Arm64InstructionSetFeatures* FromBitmap(uint32_t bitmap);
+
+  // Turn C pre-processor #defines into the equivalent instruction set features.
+  static const Arm64InstructionSetFeatures* FromCppDefines();
+
+  // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
+  static const Arm64InstructionSetFeatures* FromCpuInfo();
+
+  // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
+  // InstructionSetFeatures.
+  static const Arm64InstructionSetFeatures* FromHwcap();
+
+  // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
+  // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
+  static const Arm64InstructionSetFeatures* FromAssembly();
+
+  bool Equals(const InstructionSetFeatures* other) const OVERRIDE;
+
+  InstructionSet GetInstructionSet() const OVERRIDE {
+    return kArm64;
+  }
+
+  uint32_t AsBitmap() const OVERRIDE;
+
+  // Return a string of the form "a53" or "none".
+  std::string GetFeatureString() const OVERRIDE;
+
+  // Generate code addressing Cortex-A53 erratum 835769?
+  bool NeedFixCortexA53_835769() const {
+      return fix_cortex_a53_835769_;
+  }
+
+  virtual ~Arm64InstructionSetFeatures() {}
+
+ protected:
+  // Parse a vector of the form "a53" adding these to a new ArmInstructionSetFeatures.
+  const InstructionSetFeatures*
+      AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
+                                 std::string* error_msg) const OVERRIDE;
+
+ private:
+  explicit Arm64InstructionSetFeatures(bool smp, bool needs_a53_835769_fix)
+      : InstructionSetFeatures(smp), fix_cortex_a53_835769_(needs_a53_835769_fix) {
+  }
+
+  // Bitmap positions for encoding features as a bitmap.
+  enum {
+    kSmpBitfield = 1,
+    kA53Bitfield = 2,
+  };
+
+  const bool fix_cortex_a53_835769_;
+
+  DISALLOW_COPY_AND_ASSIGN(Arm64InstructionSetFeatures);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_ARM64_INSTRUCTION_SET_FEATURES_ARM64_H_
diff --git a/runtime/arch/arm64/instruction_set_features_arm64_test.cc b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
new file mode 100644
index 0000000..027e59c
--- /dev/null
+++ b/runtime/arch/arm64/instruction_set_features_arm64_test.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set_features_arm64.h"
+
+#include <gtest/gtest.h>
+
+namespace art {
+
+TEST(Arm64InstructionSetFeaturesTest, Arm64Features) {
+  // Build features for an ARM64 processor.
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> arm64_features(
+      InstructionSetFeatures::FromVariant(kArm64, "default", &error_msg));
+  ASSERT_TRUE(arm64_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(arm64_features->GetInstructionSet(), kArm64);
+  EXPECT_TRUE(arm64_features->Equals(arm64_features.get()));
+  EXPECT_STREQ("smp,a53", arm64_features->GetFeatureString().c_str());
+  EXPECT_EQ(arm64_features->AsBitmap(), 3U);
+}
+
+}  // namespace art
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 147d434..4415935 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -564,7 +564,7 @@
 .macro INVOKE_STUB_CALL_AND_RETURN
 
     // load method-> METHOD_QUICK_CODE_OFFSET
-    ldr x9, [x0 , #MIRROR_ART_METHOD_QUICK_CODE_OFFSET]
+    ldr x9, [x0 , #MIRROR_ART_METHOD_QUICK_CODE_OFFSET_64]
     // Branch to method.
     blr x9
 
diff --git a/runtime/arch/instruction_set.cc b/runtime/arch/instruction_set.cc
new file mode 100644
index 0000000..92fa727
--- /dev/null
+++ b/runtime/arch/instruction_set.cc
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set.h"
+
+#include "globals.h"
+
+namespace art {
+
+const char* GetInstructionSetString(const InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      return "arm";
+    case kArm64:
+      return "arm64";
+    case kX86:
+      return "x86";
+    case kX86_64:
+      return "x86_64";
+    case kMips:
+      return "mips";
+    case kMips64:
+      return "mips64";
+    case kNone:
+      return "none";
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      UNREACHABLE();
+  }
+}
+
+InstructionSet GetInstructionSetFromString(const char* isa_str) {
+  CHECK(isa_str != nullptr);
+
+  if (strcmp("arm", isa_str) == 0) {
+    return kArm;
+  } else if (strcmp("arm64", isa_str) == 0) {
+    return kArm64;
+  } else if (strcmp("x86", isa_str) == 0) {
+    return kX86;
+  } else if (strcmp("x86_64", isa_str) == 0) {
+    return kX86_64;
+  } else if (strcmp("mips", isa_str) == 0) {
+    return kMips;
+  } else if (strcmp("mips64", isa_str) == 0) {
+    return kMips;
+  }
+
+  return kNone;
+}
+
+size_t GetInstructionSetAlignment(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return kArmAlignment;
+    case kArm64:
+      return kArm64Alignment;
+    case kX86:
+      // Fall-through.
+    case kX86_64:
+      return kX86Alignment;
+    case kMips:
+      return kMipsAlignment;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have alignment.";
+      UNREACHABLE();
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      UNREACHABLE();
+  }
+}
+
+static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
+static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+
+static constexpr size_t kArmStackOverflowReservedBytes =    8 * KB;
+static constexpr size_t kArm64StackOverflowReservedBytes =  8 * KB;
+static constexpr size_t kX86StackOverflowReservedBytes =    8 * KB;
+static constexpr size_t kX86_64StackOverflowReservedBytes = 8 * KB;
+
+size_t GetStackOverflowReservedBytes(InstructionSet isa) {
+  switch (isa) {
+    case kArm:      // Intentional fall-through.
+    case kThumb2:
+      return kArmStackOverflowReservedBytes;
+
+    case kArm64:
+      return kArm64StackOverflowReservedBytes;
+
+    case kMips:
+      return kMipsStackOverflowReservedBytes;
+
+    case kX86:
+      return kX86StackOverflowReservedBytes;
+
+    case kX86_64:
+      return kX86_64StackOverflowReservedBytes;
+
+    case kNone:
+      LOG(FATAL) << "kNone has no stack overflow size";
+      UNREACHABLE();
+
+    default:
+      LOG(FATAL) << "Unknown instruction set" << isa;
+      UNREACHABLE();
+  }
+}
+
+}  // namespace art
diff --git a/runtime/arch/instruction_set.h b/runtime/arch/instruction_set.h
new file mode 100644
index 0000000..e413880
--- /dev/null
+++ b/runtime/arch/instruction_set.h
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_INSTRUCTION_SET_H_
+#define ART_RUNTIME_ARCH_INSTRUCTION_SET_H_
+
+#include <iosfwd>
+#include <string>
+
+#include "base/logging.h"  // Logging is required for FATAL in the helper functions.
+
+namespace art {
+
+enum InstructionSet {
+  kNone,
+  kArm,
+  kArm64,
+  kThumb2,
+  kX86,
+  kX86_64,
+  kMips,
+  kMips64
+};
+std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
+
+#if defined(__arm__)
+static constexpr InstructionSet kRuntimeISA = kArm;
+#elif defined(__aarch64__)
+static constexpr InstructionSet kRuntimeISA = kArm64;
+#elif defined(__mips__)
+static constexpr InstructionSet kRuntimeISA = kMips;
+#elif defined(__i386__)
+static constexpr InstructionSet kRuntimeISA = kX86;
+#elif defined(__x86_64__)
+static constexpr InstructionSet kRuntimeISA = kX86_64;
+#else
+static constexpr InstructionSet kRuntimeISA = kNone;
+#endif
+
+// Architecture-specific pointer sizes
+static constexpr size_t kArmPointerSize = 4;
+static constexpr size_t kArm64PointerSize = 8;
+static constexpr size_t kMipsPointerSize = 4;
+static constexpr size_t kMips64PointerSize = 8;
+static constexpr size_t kX86PointerSize = 4;
+static constexpr size_t kX86_64PointerSize = 8;
+
+// ARM instruction alignment. ARM processors require code to be 4-byte aligned,
+// but ARM ELF requires 8..
+static constexpr size_t kArmAlignment = 8;
+
+// ARM64 instruction alignment. This is the recommended alignment for maximum performance.
+static constexpr size_t kArm64Alignment = 16;
+
+// MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
+// TODO: Can this be 4?
+static constexpr size_t kMipsAlignment = 8;
+
+// X86 instruction alignment. This is the recommended alignment for maximum performance.
+static constexpr size_t kX86Alignment = 16;
+
+
+const char* GetInstructionSetString(InstructionSet isa);
+
+// Note: Returns kNone when the string cannot be parsed to a known value.
+InstructionSet GetInstructionSetFromString(const char* instruction_set);
+
+static inline size_t GetInstructionSetPointerSize(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return kArmPointerSize;
+    case kArm64:
+      return kArm64PointerSize;
+    case kX86:
+      return kX86PointerSize;
+    case kX86_64:
+      return kX86_64PointerSize;
+    case kMips:
+      return kMipsPointerSize;
+    case kMips64:
+      return kMips64PointerSize;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have pointer size.";
+      UNREACHABLE();
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      UNREACHABLE();
+  }
+}
+
+size_t GetInstructionSetAlignment(InstructionSet isa);
+
+static inline bool Is64BitInstructionSet(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+    case kX86:
+    case kMips:
+      return false;
+
+    case kArm64:
+    case kX86_64:
+    case kMips64:
+      return true;
+
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have bit width.";
+      UNREACHABLE();
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      UNREACHABLE();
+  }
+}
+
+static inline size_t InstructionSetPointerSize(InstructionSet isa) {
+  return Is64BitInstructionSet(isa) ? 8U : 4U;
+}
+
+static inline size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return 4;
+    case kArm64:
+      return 8;
+    case kX86:
+      return 4;
+    case kX86_64:
+      return 8;
+    case kMips:
+      return 4;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have spills.";
+      UNREACHABLE();
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      UNREACHABLE();
+  }
+}
+
+static inline size_t GetBytesPerFprSpillLocation(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return 4;
+    case kArm64:
+      return 8;
+    case kX86:
+      return 8;
+    case kX86_64:
+      return 8;
+    case kMips:
+      return 4;
+    case kNone:
+      LOG(FATAL) << "ISA kNone does not have spills.";
+      UNREACHABLE();
+    default:
+      LOG(FATAL) << "Unknown ISA " << isa;
+      UNREACHABLE();
+  }
+}
+
+size_t GetStackOverflowReservedBytes(InstructionSet isa);
+
+// The following definitions create return types for two word-sized entities that will be passed
+// in registers so that memory operations for the interface trampolines can be avoided. The entities
+// are the resolved method and the pointer to the code to be invoked.
+//
+// On x86, ARM32 and MIPS, this is given for a *scalar* 64bit value. The definition thus *must* be
+// uint64_t or long long int.
+//
+// On x86_64 and ARM64, structs are decomposed for allocation, so we can create a structs of two
+// size_t-sized values.
+//
+// We need two operations:
+//
+// 1) A flag value that signals failure. The assembly stubs expect the lower part to be "0".
+//    GetTwoWordFailureValue() will return a value that has lower part == 0.
+//
+// 2) A value that combines two word-sized values.
+//    GetTwoWordSuccessValue() constructs this.
+//
+// IMPORTANT: If you use this to transfer object pointers, it is your responsibility to ensure
+//            that the object does not move or the value is updated. Simple use of this is NOT SAFE
+//            when the garbage collector can move objects concurrently. Ensure that required locks
+//            are held when using!
+
+#if defined(__i386__) || defined(__arm__) || defined(__mips__)
+typedef uint64_t TwoWordReturn;
+
+// Encodes method_ptr==nullptr and code_ptr==nullptr
+static inline constexpr TwoWordReturn GetTwoWordFailureValue() {
+  return 0;
+}
+
+// Use the lower 32b for the method pointer and the upper 32b for the code pointer.
+static inline TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
+  static_assert(sizeof(uint32_t) == sizeof(uintptr_t), "Unexpected size difference");
+  uint32_t lo32 = lo;
+  uint64_t hi64 = static_cast<uint64_t>(hi);
+  return ((hi64 << 32) | lo32);
+}
+
+#elif defined(__x86_64__) || defined(__aarch64__)
+struct TwoWordReturn {
+  uintptr_t lo;
+  uintptr_t hi;
+};
+
+// Encodes method_ptr==nullptr. Leaves random value in code pointer.
+static inline TwoWordReturn GetTwoWordFailureValue() {
+  TwoWordReturn ret;
+  ret.lo = 0;
+  return ret;
+}
+
+// Write values into their respective members.
+static inline TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
+  TwoWordReturn ret;
+  ret.lo = lo;
+  ret.hi = hi;
+  return ret;
+}
+#else
+#error "Unsupported architecture"
+#endif
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_INSTRUCTION_SET_H_
diff --git a/runtime/arch/instruction_set_features.cc b/runtime/arch/instruction_set_features.cc
new file mode 100644
index 0000000..1072562
--- /dev/null
+++ b/runtime/arch/instruction_set_features.cc
@@ -0,0 +1,275 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set_features.h"
+
+#include "base/casts.h"
+#include "utils.h"
+
+
+#include "arm/instruction_set_features_arm.h"
+#include "arm64/instruction_set_features_arm64.h"
+#include "mips/instruction_set_features_mips.h"
+#include "x86/instruction_set_features_x86.h"
+#include "x86_64/instruction_set_features_x86_64.h"
+
+namespace art {
+
+const InstructionSetFeatures* InstructionSetFeatures::FromVariant(InstructionSet isa,
+                                                                  const std::string& variant,
+                                                                  std::string* error_msg) {
+  const InstructionSetFeatures* result;
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      result = ArmInstructionSetFeatures::FromVariant(variant, error_msg);
+      break;
+    case kArm64:
+      result = Arm64InstructionSetFeatures::FromVariant(variant, error_msg);
+      break;
+    case kMips:
+      result = MipsInstructionSetFeatures::FromVariant(variant, error_msg);
+      break;
+    case kX86:
+      result = X86InstructionSetFeatures::FromVariant(variant, error_msg);
+      break;
+    case kX86_64:
+      result = X86_64InstructionSetFeatures::FromVariant(variant, error_msg);
+      break;
+    default:
+      UNIMPLEMENTED(FATAL) << isa;
+      UNREACHABLE();
+  }
+  CHECK_EQ(result == nullptr, error_msg->size() != 0);
+  return result;
+}
+
+const InstructionSetFeatures* InstructionSetFeatures::FromBitmap(InstructionSet isa,
+                                                                 uint32_t bitmap) {
+  const InstructionSetFeatures* result;
+  switch (isa) {
+    case kArm:
+    case kThumb2:
+      result = ArmInstructionSetFeatures::FromBitmap(bitmap);
+      break;
+    case kArm64:
+      result = Arm64InstructionSetFeatures::FromBitmap(bitmap);
+      break;
+    case kMips:
+      result = MipsInstructionSetFeatures::FromBitmap(bitmap);
+      break;
+    case kX86:
+      result = X86InstructionSetFeatures::FromBitmap(bitmap);
+      break;
+    case kX86_64:
+      result = X86_64InstructionSetFeatures::FromBitmap(bitmap);
+      break;
+    default:
+      UNIMPLEMENTED(FATAL) << isa;
+      UNREACHABLE();
+  }
+  CHECK_EQ(bitmap, result->AsBitmap());
+  return result;
+}
+
+const InstructionSetFeatures* InstructionSetFeatures::FromCppDefines() {
+  const InstructionSetFeatures* result;
+  switch (kRuntimeISA) {
+    case kArm:
+    case kThumb2:
+      result = ArmInstructionSetFeatures::FromCppDefines();
+      break;
+    case kArm64:
+      result = Arm64InstructionSetFeatures::FromCppDefines();
+      break;
+    case kMips:
+      result = MipsInstructionSetFeatures::FromCppDefines();
+      break;
+    case kX86:
+      result = X86InstructionSetFeatures::FromCppDefines();
+      break;
+    case kX86_64:
+      result = X86_64InstructionSetFeatures::FromCppDefines();
+      break;
+    default:
+      UNIMPLEMENTED(FATAL) << kRuntimeISA;
+      UNREACHABLE();
+  }
+  return result;
+}
+
+
+const InstructionSetFeatures* InstructionSetFeatures::FromCpuInfo() {
+  const InstructionSetFeatures* result;
+  switch (kRuntimeISA) {
+    case kArm:
+    case kThumb2:
+      result = ArmInstructionSetFeatures::FromCpuInfo();
+      break;
+    case kArm64:
+      result = Arm64InstructionSetFeatures::FromCpuInfo();
+      break;
+    case kMips:
+      result = MipsInstructionSetFeatures::FromCpuInfo();
+      break;
+    case kX86:
+      result = X86InstructionSetFeatures::FromCpuInfo();
+      break;
+    case kX86_64:
+      result = X86_64InstructionSetFeatures::FromCpuInfo();
+      break;
+    default:
+      UNIMPLEMENTED(FATAL) << kRuntimeISA;
+      UNREACHABLE();
+  }
+  return result;
+}
+
+const InstructionSetFeatures* InstructionSetFeatures::FromHwcap() {
+  const InstructionSetFeatures* result;
+  switch (kRuntimeISA) {
+    case kArm:
+    case kThumb2:
+      result = ArmInstructionSetFeatures::FromHwcap();
+      break;
+    case kArm64:
+      result = Arm64InstructionSetFeatures::FromHwcap();
+      break;
+    case kMips:
+      result = MipsInstructionSetFeatures::FromHwcap();
+      break;
+    case kX86:
+      result = X86InstructionSetFeatures::FromHwcap();
+      break;
+    case kX86_64:
+      result = X86_64InstructionSetFeatures::FromHwcap();
+      break;
+    default:
+      UNIMPLEMENTED(FATAL) << kRuntimeISA;
+      UNREACHABLE();
+  }
+  return result;
+}
+
+const InstructionSetFeatures* InstructionSetFeatures::FromAssembly() {
+  const InstructionSetFeatures* result;
+  switch (kRuntimeISA) {
+    case kArm:
+    case kThumb2:
+      result = ArmInstructionSetFeatures::FromAssembly();
+      break;
+    case kArm64:
+      result = Arm64InstructionSetFeatures::FromAssembly();
+      break;
+    case kMips:
+      result = MipsInstructionSetFeatures::FromAssembly();
+      break;
+    case kX86:
+      result = X86InstructionSetFeatures::FromAssembly();
+      break;
+    case kX86_64:
+      result = X86_64InstructionSetFeatures::FromAssembly();
+      break;
+    default:
+      UNIMPLEMENTED(FATAL) << kRuntimeISA;
+      UNREACHABLE();
+  }
+  return result;
+}
+
+const InstructionSetFeatures* InstructionSetFeatures::AddFeaturesFromString(
+    const std::string& feature_list, std::string* error_msg) const {
+  if (feature_list.empty()) {
+    *error_msg = "No instruction set features specified";
+    return nullptr;
+  }
+  std::vector<std::string> features;
+  Split(feature_list, ',', &features);
+  bool smp = smp_;
+  bool use_default = false;  // Have we seen the 'default' feature?
+  bool first = false;  // Is this first feature?
+  for (auto it = features.begin(); it != features.end();) {
+    if (use_default) {
+      *error_msg = "Unexpected instruction set features after 'default'";
+      return nullptr;
+    }
+    std::string feature = Trim(*it);
+    bool erase = false;
+    if (feature == "default") {
+      if (!first) {
+        use_default = true;
+        erase = true;
+      } else {
+        *error_msg = "Unexpected instruction set features before 'default'";
+        return nullptr;
+      }
+    } else if (feature == "smp") {
+      smp = true;
+      erase = true;
+    } else if (feature == "-smp") {
+      smp = false;
+      erase = true;
+    }
+    // Erase the smp feature once processed.
+    if (!erase) {
+      ++it;
+    } else {
+      it = features.erase(it);
+    }
+    first = true;
+  }
+  DCHECK_EQ(use_default, features.empty());
+  return AddFeaturesFromSplitString(smp, features, error_msg);
+}
+
+const ArmInstructionSetFeatures* InstructionSetFeatures::AsArmInstructionSetFeatures() const {
+  DCHECK_EQ(kArm, GetInstructionSet());
+  return down_cast<const ArmInstructionSetFeatures*>(this);
+}
+
+const Arm64InstructionSetFeatures* InstructionSetFeatures::AsArm64InstructionSetFeatures() const {
+  DCHECK_EQ(kArm64, GetInstructionSet());
+  return down_cast<const Arm64InstructionSetFeatures*>(this);
+}
+
+const MipsInstructionSetFeatures* InstructionSetFeatures::AsMipsInstructionSetFeatures() const {
+  DCHECK_EQ(kMips, GetInstructionSet());
+  return down_cast<const MipsInstructionSetFeatures*>(this);
+}
+
+const X86InstructionSetFeatures* InstructionSetFeatures::AsX86InstructionSetFeatures() const {
+  DCHECK(kX86 == GetInstructionSet() || kX86_64 == GetInstructionSet());
+  return down_cast<const X86InstructionSetFeatures*>(this);
+}
+
+const X86_64InstructionSetFeatures* InstructionSetFeatures::AsX86_64InstructionSetFeatures() const {
+  DCHECK_EQ(kX86_64, GetInstructionSet());
+  return down_cast<const X86_64InstructionSetFeatures*>(this);
+}
+
+bool InstructionSetFeatures::FindVariantInArray(const char* variants[], size_t num_variants,
+                                                const std::string& variant) {
+  const char** begin = variants;
+  const char** end = begin + num_variants;
+  return std::find(begin, end, variant) != end;
+}
+
+std::ostream& operator<<(std::ostream& os, const InstructionSetFeatures& rhs) {
+  os << "ISA: " << rhs.GetInstructionSet() << " Feature string: " << rhs.GetFeatureString();
+  return os;
+}
+
+}  // namespace art
diff --git a/runtime/arch/instruction_set_features.h b/runtime/arch/instruction_set_features.h
new file mode 100644
index 0000000..2c6e699
--- /dev/null
+++ b/runtime/arch/instruction_set_features.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_INSTRUCTION_SET_FEATURES_H_
+#define ART_RUNTIME_ARCH_INSTRUCTION_SET_FEATURES_H_
+
+#include <ostream>
+#include <vector>
+
+#include "base/macros.h"
+#include "instruction_set.h"
+
+namespace art {
+
+class ArmInstructionSetFeatures;
+class Arm64InstructionSetFeatures;
+class MipsInstructionSetFeatures;
+class X86InstructionSetFeatures;
+class X86_64InstructionSetFeatures;
+
+// Abstraction used to describe features of a different instruction sets.
+class InstructionSetFeatures {
+ public:
+  // Process a CPU variant string for the given ISA and create an InstructionSetFeatures.
+  static const InstructionSetFeatures* FromVariant(InstructionSet isa,
+                                                   const std::string& variant,
+                                                   std::string* error_msg);
+
+  // Parse a bitmap for the given isa and create an InstructionSetFeatures.
+  static const InstructionSetFeatures* FromBitmap(InstructionSet isa, uint32_t bitmap);
+
+  // Turn C pre-processor #defines into the equivalent instruction set features for kRuntimeISA.
+  static const InstructionSetFeatures* FromCppDefines();
+
+  // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
+  static const InstructionSetFeatures* FromCpuInfo();
+
+  // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
+  // InstructionSetFeatures.
+  static const InstructionSetFeatures* FromHwcap();
+
+  // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
+  // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
+  static const InstructionSetFeatures* FromAssembly();
+
+  // Parse a string of the form "div,-atomic_ldrd_strd" adding and removing these features to
+  // create a new InstructionSetFeatures.
+  const InstructionSetFeatures* AddFeaturesFromString(const std::string& feature_list,
+                                                      std::string* error_msg) const WARN_UNUSED;
+
+  // Are these features the same as the other given features?
+  virtual bool Equals(const InstructionSetFeatures* other) const = 0;
+
+  // Return the ISA these features relate to.
+  virtual InstructionSet GetInstructionSet() const = 0;
+
+  // Return a bitmap that represents the features. ISA specific.
+  virtual uint32_t AsBitmap() const = 0;
+
+  // Return a string of the form "div,lpae" or "none".
+  virtual std::string GetFeatureString() const = 0;
+
+  // Does the instruction set variant require instructions for correctness with SMP?
+  bool IsSmp() const {
+    return smp_;
+  }
+
+  // Down cast this ArmInstructionFeatures.
+  const ArmInstructionSetFeatures* AsArmInstructionSetFeatures() const;
+
+  // Down cast this Arm64InstructionFeatures.
+  const Arm64InstructionSetFeatures* AsArm64InstructionSetFeatures() const;
+
+  // Down cast this MipsInstructionFeatures.
+  const MipsInstructionSetFeatures* AsMipsInstructionSetFeatures() const;
+
+  // Down cast this X86InstructionFeatures.
+  const X86InstructionSetFeatures* AsX86InstructionSetFeatures() const;
+
+  // Down cast this X86_64InstructionFeatures.
+  const X86_64InstructionSetFeatures* AsX86_64InstructionSetFeatures() const;
+
+  virtual ~InstructionSetFeatures() {}
+
+ protected:
+  explicit InstructionSetFeatures(bool smp) : smp_(smp) {}
+
+  // Returns true if variant appears in the array variants.
+  static bool FindVariantInArray(const char* variants[], size_t num_variants,
+                                 const std::string& variant);
+
+  // Add architecture specific features in sub-classes.
+  virtual const InstructionSetFeatures*
+      AddFeaturesFromSplitString(bool smp, const std::vector<std::string>& features,
+                                 std::string* error_msg) const = 0;
+
+ private:
+  const bool smp_;
+
+  DISALLOW_COPY_AND_ASSIGN(InstructionSetFeatures);
+};
+std::ostream& operator<<(std::ostream& os, const InstructionSetFeatures& rhs);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_INSTRUCTION_SET_FEATURES_H_
diff --git a/runtime/arch/instruction_set_features_test.cc b/runtime/arch/instruction_set_features_test.cc
new file mode 100644
index 0000000..e6f4e7a
--- /dev/null
+++ b/runtime/arch/instruction_set_features_test.cc
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set_features.h"
+
+#include <gtest/gtest.h>
+
+#ifdef HAVE_ANDROID_OS
+#include "cutils/properties.h"
+#endif
+
+#include "base/stringprintf.h"
+
+namespace art {
+
+#ifdef HAVE_ANDROID_OS
+#if defined(__aarch64__)
+TEST(InstructionSetFeaturesTest, DISABLED_FeaturesFromSystemPropertyVariant) {
+  LOG(WARNING) << "Test disabled due to no CPP define for A53 erratum 835769";
+#else
+TEST(InstructionSetFeaturesTest, FeaturesFromSystemPropertyVariant) {
+#endif
+  // Take the default set of instruction features from the build.
+  std::unique_ptr<const InstructionSetFeatures> instruction_set_features(
+      InstructionSetFeatures::FromCppDefines());
+
+  // Read the variant property.
+  std::string key = StringPrintf("dalvik.vm.isa.%s.variant", GetInstructionSetString(kRuntimeISA));
+  char dex2oat_isa_variant[PROPERTY_VALUE_MAX];
+  if (property_get(key.c_str(), dex2oat_isa_variant, nullptr) > 0) {
+    // Use features from property to build InstructionSetFeatures and check against build's
+    // features.
+    std::string error_msg;
+    std::unique_ptr<const InstructionSetFeatures> property_features(
+        InstructionSetFeatures::FromVariant(kRuntimeISA, dex2oat_isa_variant, &error_msg));
+    ASSERT_TRUE(property_features.get() != nullptr) << error_msg;
+
+    EXPECT_TRUE(property_features->Equals(instruction_set_features.get()))
+      << "System property features: " << *property_features.get()
+      << "\nFeatures from build: " << *instruction_set_features.get();
+  }
+}
+
+#if defined(__aarch64__)
+TEST(InstructionSetFeaturesTest, DISABLED_FeaturesFromSystemPropertyString) {
+  LOG(WARNING) << "Test disabled due to no CPP define for A53 erratum 835769";
+#else
+TEST(InstructionSetFeaturesTest, FeaturesFromSystemPropertyString) {
+#endif
+  // Take the default set of instruction features from the build.
+  std::unique_ptr<const InstructionSetFeatures> instruction_set_features(
+      InstructionSetFeatures::FromCppDefines());
+
+  // Read the variant property.
+  std::string variant_key = StringPrintf("dalvik.vm.isa.%s.variant",
+                                         GetInstructionSetString(kRuntimeISA));
+  char dex2oat_isa_variant[PROPERTY_VALUE_MAX];
+  if (property_get(variant_key.c_str(), dex2oat_isa_variant, nullptr) > 0) {
+    // Read the features property.
+    std::string features_key = StringPrintf("dalvik.vm.isa.%s.features",
+                                            GetInstructionSetString(kRuntimeISA));
+    char dex2oat_isa_features[PROPERTY_VALUE_MAX];
+    if (property_get(features_key.c_str(), dex2oat_isa_features, nullptr) > 0) {
+      // Use features from property to build InstructionSetFeatures and check against build's
+      // features.
+      std::string error_msg;
+      std::unique_ptr<const InstructionSetFeatures> base_features(
+          InstructionSetFeatures::FromVariant(kRuntimeISA, dex2oat_isa_variant, &error_msg));
+      ASSERT_TRUE(base_features.get() != nullptr) << error_msg;
+
+      std::unique_ptr<const InstructionSetFeatures> property_features(
+          base_features->AddFeaturesFromString(dex2oat_isa_features, &error_msg));
+      ASSERT_TRUE(property_features.get() != nullptr) << error_msg;
+
+      EXPECT_TRUE(property_features->Equals(instruction_set_features.get()))
+      << "System property features: " << *property_features.get()
+      << "\nFeatures from build: " << *instruction_set_features.get();
+    }
+  }
+}
+
+#if defined(__arm__)
+TEST(InstructionSetFeaturesTest, DISABLED_FeaturesFromCpuInfo) {
+  LOG(WARNING) << "Test disabled due to buggy ARM kernels";
+#else
+TEST(InstructionSetFeaturesTest, FeaturesFromCpuInfo) {
+#endif
+  // Take the default set of instruction features from the build.
+  std::unique_ptr<const InstructionSetFeatures> instruction_set_features(
+      InstructionSetFeatures::FromCppDefines());
+
+  // Check we get the same instruction set features using /proc/cpuinfo.
+  std::unique_ptr<const InstructionSetFeatures> cpuinfo_features(
+      InstructionSetFeatures::FromCpuInfo());
+  EXPECT_TRUE(cpuinfo_features->Equals(instruction_set_features.get()))
+      << "CPU Info features: " << *cpuinfo_features.get()
+      << "\nFeatures from build: " << *instruction_set_features.get();
+}
+#endif
+
+#ifndef HAVE_ANDROID_OS
+TEST(InstructionSetFeaturesTest, HostFeaturesFromCppDefines) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> default_features(
+      InstructionSetFeatures::FromVariant(kRuntimeISA, "default", &error_msg));
+  ASSERT_TRUE(error_msg.empty());
+
+  std::unique_ptr<const InstructionSetFeatures> cpp_features(
+      InstructionSetFeatures::FromCppDefines());
+  EXPECT_TRUE(default_features->Equals(cpp_features.get()))
+      << "Default variant features: " << *default_features.get()
+      << "\nFeatures from build: " << *cpp_features.get();
+}
+#endif
+
+#if defined(__arm__)
+TEST(InstructionSetFeaturesTest, DISABLED_FeaturesFromHwcap) {
+  LOG(WARNING) << "Test disabled due to buggy ARM kernels";
+#else
+TEST(InstructionSetFeaturesTest, FeaturesFromHwcap) {
+#endif
+  // Take the default set of instruction features from the build.
+  std::unique_ptr<const InstructionSetFeatures> instruction_set_features(
+      InstructionSetFeatures::FromCppDefines());
+
+  // Check we get the same instruction set features using AT_HWCAP.
+  std::unique_ptr<const InstructionSetFeatures> hwcap_features(
+      InstructionSetFeatures::FromHwcap());
+  EXPECT_TRUE(hwcap_features->Equals(instruction_set_features.get()))
+      << "Hwcap features: " << *hwcap_features.get()
+      << "\nFeatures from build: " << *instruction_set_features.get();
+}
+
+TEST(InstructionSetFeaturesTest, FeaturesFromAssembly) {
+  // Take the default set of instruction features from the build.
+  std::unique_ptr<const InstructionSetFeatures> instruction_set_features(
+      InstructionSetFeatures::FromCppDefines());
+
+  // Check we get the same instruction set features using assembly tests.
+  std::unique_ptr<const InstructionSetFeatures> assembly_features(
+      InstructionSetFeatures::FromAssembly());
+  EXPECT_TRUE(assembly_features->Equals(instruction_set_features.get()))
+      << "Assembly features: " << *assembly_features.get()
+      << "\nFeatures from build: " << *instruction_set_features.get();
+}
+
+}  // namespace art
diff --git a/runtime/arch/instruction_set_test.cc b/runtime/arch/instruction_set_test.cc
new file mode 100644
index 0000000..932ef32
--- /dev/null
+++ b/runtime/arch/instruction_set_test.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set.h"
+
+#include <gtest/gtest.h>
+
+#include "base/stringprintf.h"
+
+namespace art {
+
+TEST(InstructionSetTest, GetInstructionSetFromString) {
+  EXPECT_EQ(kArm, GetInstructionSetFromString("arm"));
+  EXPECT_EQ(kArm64, GetInstructionSetFromString("arm64"));
+  EXPECT_EQ(kX86, GetInstructionSetFromString("x86"));
+  EXPECT_EQ(kX86_64, GetInstructionSetFromString("x86_64"));
+  EXPECT_EQ(kMips, GetInstructionSetFromString("mips"));
+  EXPECT_EQ(kNone, GetInstructionSetFromString("none"));
+  EXPECT_EQ(kNone, GetInstructionSetFromString("random-string"));
+}
+
+TEST(InstructionSetTest, GetInstructionSetString) {
+  EXPECT_STREQ("arm", GetInstructionSetString(kArm));
+  EXPECT_STREQ("arm", GetInstructionSetString(kThumb2));
+  EXPECT_STREQ("arm64", GetInstructionSetString(kArm64));
+  EXPECT_STREQ("x86", GetInstructionSetString(kX86));
+  EXPECT_STREQ("x86_64", GetInstructionSetString(kX86_64));
+  EXPECT_STREQ("mips", GetInstructionSetString(kMips));
+  EXPECT_STREQ("none", GetInstructionSetString(kNone));
+}
+
+TEST(InstructionSetTest, TestRoundTrip) {
+  EXPECT_EQ(kRuntimeISA, GetInstructionSetFromString(GetInstructionSetString(kRuntimeISA)));
+}
+
+TEST(InstructionSetTest, PointerSize) {
+  EXPECT_EQ(sizeof(void*), GetInstructionSetPointerSize(kRuntimeISA));
+}
+
+}  // namespace art
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
new file mode 100644
index 0000000..11be2a8
--- /dev/null
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set_features_mips.h"
+
+#include <fstream>
+#include <sstream>
+
+#include "base/stringprintf.h"
+#include "utils.h"  // For Trim.
+
+namespace art {
+
+const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromVariant(
+    const std::string& variant ATTRIBUTE_UNUSED, std::string* error_msg ATTRIBUTE_UNUSED) {
+  if (variant != "default") {
+    std::ostringstream os;
+    LOG(WARNING) << "Unexpected CPU variant for Mips using defaults: " << variant;
+  }
+  bool smp = true;  // Conservative default.
+  bool fpu_32bit = true;
+  bool mips_isa_gte2 = true;
+  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2);
+}
+
+const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromBitmap(uint32_t bitmap) {
+  bool smp = (bitmap & kSmpBitfield) != 0;
+  bool fpu_32bit = (bitmap & kFpu32Bitfield) != 0;
+  bool mips_isa_gte2 = (bitmap & kIsaRevGte2Bitfield) != 0;
+  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2);
+}
+
+const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromCppDefines() {
+  const bool smp = true;
+
+  // TODO: here we assume the FPU is always 32-bit.
+  const bool fpu_32bit = true;
+
+#if __mips_isa_rev >= 2
+  const bool mips_isa_gte2 = true;
+#else
+  const bool mips_isa_gte2 = false;
+#endif
+
+  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2);
+}
+
+const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromCpuInfo() {
+  // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
+  // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
+  bool smp = false;
+
+  // TODO: here we assume the FPU is always 32-bit.
+  const bool fpu_32bit = true;
+
+  // TODO: here we assume all MIPS processors are >= v2.
+#if __mips_isa_rev >= 2
+  const bool mips_isa_gte2 = true;
+#else
+  const bool mips_isa_gte2 = false;
+#endif
+
+  std::ifstream in("/proc/cpuinfo");
+  if (!in.fail()) {
+    while (!in.eof()) {
+      std::string line;
+      std::getline(in, line);
+      if (!in.eof()) {
+        LOG(INFO) << "cpuinfo line: " << line;
+        if (line.find("processor") != std::string::npos && line.find(": 1") != std::string::npos) {
+          smp = true;
+        }
+      }
+    }
+    in.close();
+  } else {
+    LOG(ERROR) << "Failed to open /proc/cpuinfo";
+  }
+  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2);
+}
+
+const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromHwcap() {
+  UNIMPLEMENTED(WARNING);
+  return FromCppDefines();
+}
+
+const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromAssembly() {
+  UNIMPLEMENTED(WARNING);
+  return FromCppDefines();
+}
+
+bool MipsInstructionSetFeatures::Equals(const InstructionSetFeatures* other) const {
+  if (kMips != other->GetInstructionSet()) {
+    return false;
+  }
+  const MipsInstructionSetFeatures* other_as_mips = other->AsMipsInstructionSetFeatures();
+  return (IsSmp() == other->IsSmp()) &&
+      (fpu_32bit_ == other_as_mips->fpu_32bit_) &&
+      (mips_isa_gte2_ == other_as_mips->mips_isa_gte2_);
+}
+
+uint32_t MipsInstructionSetFeatures::AsBitmap() const {
+  return (IsSmp() ? kSmpBitfield : 0) |
+      (fpu_32bit_ ? kFpu32Bitfield : 0) |
+      (mips_isa_gte2_ ? kIsaRevGte2Bitfield : 0);
+}
+
+std::string MipsInstructionSetFeatures::GetFeatureString() const {
+  std::string result;
+  if (IsSmp()) {
+    result += "smp";
+  } else {
+    result += "-smp";
+  }
+  if (fpu_32bit_) {
+    result += ",fpu32";
+  } else {
+    result += ",-fpu32";
+  }
+  if (mips_isa_gte2_) {
+    result += ",mips2";
+  } else {
+    result += ",-mips2";
+  }
+  return result;
+}
+
+const InstructionSetFeatures* MipsInstructionSetFeatures::AddFeaturesFromSplitString(
+    const bool smp, const std::vector<std::string>& features, std::string* error_msg) const {
+  bool fpu_32bit = fpu_32bit_;
+  bool mips_isa_gte2 = mips_isa_gte2_;
+  for (auto i = features.begin(); i != features.end(); i++) {
+    std::string feature = Trim(*i);
+    if (feature == "fpu32") {
+      fpu_32bit = true;
+    } else if (feature == "-fpu32") {
+      fpu_32bit = false;
+    } else if (feature == "mips2") {
+      mips_isa_gte2 = true;
+    } else if (feature == "-mips2") {
+      mips_isa_gte2 = false;
+    } else {
+      *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
+      return nullptr;
+    }
+  }
+  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2);
+}
+
+}  // namespace art
diff --git a/runtime/arch/mips/instruction_set_features_mips.h b/runtime/arch/mips/instruction_set_features_mips.h
new file mode 100644
index 0000000..f7c64fe
--- /dev/null
+++ b/runtime/arch/mips/instruction_set_features_mips.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_MIPS_INSTRUCTION_SET_FEATURES_MIPS_H_
+#define ART_RUNTIME_ARCH_MIPS_INSTRUCTION_SET_FEATURES_MIPS_H_
+
+#include "arch/instruction_set_features.h"
+
+namespace art {
+
+// Instruction set features relevant to the MIPS architecture.
+class MipsInstructionSetFeatures FINAL : public InstructionSetFeatures {
+ public:
+  // Process a CPU variant string like "r4000" and create InstructionSetFeatures.
+  static const MipsInstructionSetFeatures* FromVariant(const std::string& variant,
+                                                        std::string* error_msg);
+
+  // Parse a bitmap and create an InstructionSetFeatures.
+  static const MipsInstructionSetFeatures* FromBitmap(uint32_t bitmap);
+
+  // Turn C pre-processor #defines into the equivalent instruction set features.
+  static const MipsInstructionSetFeatures* FromCppDefines();
+
+  // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
+  static const MipsInstructionSetFeatures* FromCpuInfo();
+
+  // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
+  // InstructionSetFeatures.
+  static const MipsInstructionSetFeatures* FromHwcap();
+
+  // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
+  // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
+  static const MipsInstructionSetFeatures* FromAssembly();
+
+  bool Equals(const InstructionSetFeatures* other) const OVERRIDE;
+
+  InstructionSet GetInstructionSet() const OVERRIDE {
+    return kMips;
+  }
+
+  uint32_t AsBitmap() const OVERRIDE;
+
+  std::string GetFeatureString() const OVERRIDE;
+
+  // Is this an ISA revision greater than 2 opening up new opcodes.
+  bool IsMipsIsaRevGreaterThanEqual2() const {
+    return mips_isa_gte2_;
+  }
+
+  // Floating point double registers are encoded differently based on whether the Status.FR bit is
+  // set. When the FR bit is 0 then the FPU is 32-bit, 1 its 64-bit. Return true if the code should
+  // be generated assuming Status.FR is 0.
+  bool Is32BitFloatingPoint() const {
+    return fpu_32bit_;
+  }
+
+  virtual ~MipsInstructionSetFeatures() {}
+
+ protected:
+  // Parse a vector of the form "fpu32", "mips2" adding these to a new MipsInstructionSetFeatures.
+  virtual const InstructionSetFeatures*
+      AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
+                                 std::string* error_msg) const OVERRIDE;
+
+ private:
+  MipsInstructionSetFeatures(bool smp, bool fpu_32bit, bool mips_isa_gte2)
+      : InstructionSetFeatures(smp), fpu_32bit_(fpu_32bit),  mips_isa_gte2_(mips_isa_gte2) {
+  }
+
+  // Bitmap positions for encoding features as a bitmap.
+  enum {
+    kSmpBitfield = 1,
+    kFpu32Bitfield = 2,
+    kIsaRevGte2Bitfield = 4,
+  };
+
+  const bool fpu_32bit_;
+  const bool mips_isa_gte2_;
+
+  DISALLOW_COPY_AND_ASSIGN(MipsInstructionSetFeatures);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_MIPS_INSTRUCTION_SET_FEATURES_MIPS_H_
diff --git a/runtime/arch/mips/instruction_set_features_mips_test.cc b/runtime/arch/mips/instruction_set_features_mips_test.cc
new file mode 100644
index 0000000..9b81ce2
--- /dev/null
+++ b/runtime/arch/mips/instruction_set_features_mips_test.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set_features_mips.h"
+
+#include <gtest/gtest.h>
+
+namespace art {
+
+TEST(MipsInstructionSetFeaturesTest, MipsFeatures) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> mips_features(
+      InstructionSetFeatures::FromVariant(kMips, "default", &error_msg));
+  ASSERT_TRUE(mips_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(mips_features->GetInstructionSet(), kMips);
+  EXPECT_TRUE(mips_features->Equals(mips_features.get()));
+  EXPECT_STREQ("smp,fpu32,mips2", mips_features->GetFeatureString().c_str());
+  EXPECT_EQ(mips_features->AsBitmap(), 7U);
+}
+
+}  // namespace art
diff --git a/runtime/arch/mips/portable_entrypoints_mips.S b/runtime/arch/mips/portable_entrypoints_mips.S
index d7e7a8e..8d418e8 100644
--- a/runtime/arch/mips/portable_entrypoints_mips.S
+++ b/runtime/arch/mips/portable_entrypoints_mips.S
@@ -98,7 +98,7 @@
     lw    $a1, 4($sp)           # copy arg value for a1
     lw    $a2, 8($sp)           # copy arg value for a2
     lw    $a3, 12($sp)          # copy arg value for a3
-    lw    $t9, MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET($a0)  # get pointer to the code
+    lw    $t9, MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_32($a0)  # get pointer to the code
     jalr  $t9                   # call the method
     sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
     move  $sp, $fp              # restore the stack
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index e878ef7..4824857 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -512,7 +512,7 @@
     lw    $a1, 4($sp)           # copy arg value for a1
     lw    $a2, 8($sp)           # copy arg value for a2
     lw    $a3, 12($sp)          # copy arg value for a3
-    lw    $t9, MIRROR_ART_METHOD_QUICK_CODE_OFFSET($a0)  # get pointer to the code
+    lw    $t9, MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32($a0)  # get pointer to the code
     jalr  $t9                   # call the method
     sw    $zero, 0($sp)         # store NULL for method* at bottom of frame
     move  $sp, $fp              # restore the stack
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
new file mode 100644
index 0000000..a12773d
--- /dev/null
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -0,0 +1,276 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set_features_x86.h"
+
+#include <fstream>
+#include <sstream>
+
+#include "arch/x86_64/instruction_set_features_x86_64.h"
+#include "base/stringprintf.h"
+#include "utils.h"  // For Trim.
+
+namespace art {
+
+const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant(
+    const std::string& variant ATTRIBUTE_UNUSED, std::string* error_msg ATTRIBUTE_UNUSED,
+    bool x86_64) {
+  bool known_variant = false;
+  bool smp = true;  // Conservative default.
+  static const char* x86_variants_with_ssse3[] = {
+      "atom"
+  };
+  bool has_SSSE3 = FindVariantInArray(x86_variants_with_ssse3, arraysize(x86_variants_with_ssse3),
+                                      variant);
+  bool has_SSE4_1 = false;
+  bool has_SSE4_2 = false;
+  bool has_AVX = false;
+  bool has_AVX2 = false;
+  if (!known_variant && variant != "default") {
+    std::ostringstream os;
+    LOG(WARNING) << "Unexpected CPU variant for X86 using defaults: " << variant;
+  }
+
+  if (x86_64) {
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                            has_AVX2);
+  } else {
+    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                            has_AVX2);
+  }
+}
+
+const X86InstructionSetFeatures* X86InstructionSetFeatures::FromBitmap(uint32_t bitmap,
+                                                                       bool x86_64) {
+  bool smp = (bitmap & kSmpBitfield) != 0;
+  bool has_SSSE3 = (bitmap & kSsse3Bitfield) != 0;
+  bool has_SSE4_1 = (bitmap & kSse4_1Bitfield) != 0;
+  bool has_SSE4_2 = (bitmap & kSse4_2Bitfield) != 0;
+  bool has_AVX = (bitmap & kAvxBitfield) != 0;
+  bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
+  if (x86_64) {
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+  } else {
+    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                            has_AVX2);
+  }
+}
+
+const X86InstructionSetFeatures* X86InstructionSetFeatures::FromCppDefines(bool x86_64) {
+  const bool smp = true;
+
+#ifndef __SSSE3__
+  const bool has_SSSE3 = false;
+#else
+  const bool has_SSSE3 = true;
+#endif
+
+#ifndef __SSE4_1__
+  const bool has_SSE4_1 = false;
+#else
+  const bool has_SSE4_1 = true;
+#endif
+
+#ifndef __SSE4_2__
+  const bool has_SSE4_2 = false;
+#else
+  const bool has_SSE4_2 = true;
+#endif
+
+#ifndef __AVX__
+  const bool has_AVX = false;
+#else
+  const bool has_AVX = true;
+#endif
+
+#ifndef __AVX2__
+  const bool has_AVX2 = false;
+#else
+  const bool has_AVX2 = true;
+#endif
+
+  if (x86_64) {
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+  } else {
+    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                            has_AVX2);
+  }
+}
+
+const X86InstructionSetFeatures* X86InstructionSetFeatures::FromCpuInfo(bool x86_64) {
+  // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
+  // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
+  bool smp = false;
+  bool has_SSSE3 = false;
+  bool has_SSE4_1 = false;
+  bool has_SSE4_2 = false;
+  bool has_AVX = false;
+  bool has_AVX2 = false;
+
+  std::ifstream in("/proc/cpuinfo");
+  if (!in.fail()) {
+    while (!in.eof()) {
+      std::string line;
+      std::getline(in, line);
+      if (!in.eof()) {
+        LOG(INFO) << "cpuinfo line: " << line;
+        if (line.find("flags") != std::string::npos) {
+          LOG(INFO) << "found flags";
+          if (line.find("ssse3") != std::string::npos) {
+            has_SSSE3 = true;
+          }
+          if (line.find("sse4_1") != std::string::npos) {
+            has_SSE4_1 = true;
+          }
+          if (line.find("sse4_2") != std::string::npos) {
+            has_SSE4_2 = true;
+          }
+          if (line.find("avx") != std::string::npos) {
+            has_AVX = true;
+          }
+          if (line.find("avx2") != std::string::npos) {
+            has_AVX2 = true;
+          }
+        } else if (line.find("processor") != std::string::npos &&
+            line.find(": 1") != std::string::npos) {
+          smp = true;
+        }
+      }
+    }
+    in.close();
+  } else {
+    LOG(ERROR) << "Failed to open /proc/cpuinfo";
+  }
+  if (x86_64) {
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+  } else {
+    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                            has_AVX2);
+  }
+}
+
+const X86InstructionSetFeatures* X86InstructionSetFeatures::FromHwcap(bool x86_64) {
+  UNIMPLEMENTED(WARNING);
+  return FromCppDefines(x86_64);
+}
+
+const X86InstructionSetFeatures* X86InstructionSetFeatures::FromAssembly(bool x86_64) {
+  UNIMPLEMENTED(WARNING);
+  return FromCppDefines(x86_64);
+}
+
+bool X86InstructionSetFeatures::Equals(const InstructionSetFeatures* other) const {
+  if (GetInstructionSet() != other->GetInstructionSet()) {
+    return false;
+  }
+  const X86InstructionSetFeatures* other_as_x86 = other->AsX86InstructionSetFeatures();
+  return (IsSmp() == other->IsSmp()) &&
+      (has_SSSE3_ == other_as_x86->has_SSSE3_) &&
+      (has_SSE4_1_ == other_as_x86->has_SSE4_1_) &&
+      (has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
+      (has_AVX_ == other_as_x86->has_AVX_) &&
+      (has_AVX2_ == other_as_x86->has_AVX2_);
+}
+
+uint32_t X86InstructionSetFeatures::AsBitmap() const {
+  return (IsSmp() ? kSmpBitfield : 0) |
+      (has_SSSE3_ ? kSsse3Bitfield : 0) |
+      (has_SSE4_1_ ? kSse4_1Bitfield : 0) |
+      (has_SSE4_2_ ? kSse4_2Bitfield : 0) |
+      (has_AVX_ ? kAvxBitfield : 0) |
+      (has_AVX2_ ? kAvx2Bitfield : 0);
+}
+
+std::string X86InstructionSetFeatures::GetFeatureString() const {
+  std::string result;
+  if (IsSmp()) {
+    result += "smp";
+  } else {
+    result += "-smp";
+  }
+  if (has_SSSE3_) {
+    result += ",ssse3";
+  } else {
+    result += ",-ssse3";
+  }
+  if (has_SSE4_1_) {
+    result += ",sse4.1";
+  } else {
+    result += ",-sse4.1";
+  }
+  if (has_SSE4_2_) {
+    result += ",sse4.2";
+  } else {
+    result += ",-sse4.2";
+  }
+  if (has_AVX_) {
+    result += ",avx";
+  } else {
+    result += ",-avx";
+  }
+  if (has_AVX2_) {
+    result += ",avx2";
+  } else {
+    result += ",-avx2";
+  }
+  return result;
+}
+
+const InstructionSetFeatures* X86InstructionSetFeatures::AddFeaturesFromSplitString(
+    const bool smp, const std::vector<std::string>& features, bool x86_64,
+    std::string* error_msg) const {
+  bool has_SSSE3 = has_SSSE3_;
+  bool has_SSE4_1 = has_SSE4_1_;
+  bool has_SSE4_2 = has_SSE4_2_;
+  bool has_AVX = has_AVX_;
+  bool has_AVX2 = has_AVX2_;
+  for (auto i = features.begin(); i != features.end(); i++) {
+    std::string feature = Trim(*i);
+    if (feature == "ssse3") {
+      has_SSSE3 = true;
+    } else if (feature == "-ssse3") {
+      has_SSSE3 = false;
+    } else if (feature == "sse4.1") {
+      has_SSE4_1 = true;
+    } else if (feature == "-sse4.1") {
+      has_SSE4_1 = false;
+    } else if (feature == "sse4.2") {
+      has_SSE4_2 = true;
+    } else if (feature == "-sse4.2") {
+      has_SSE4_2 = false;
+    } else if (feature == "avx") {
+      has_AVX = true;
+    } else if (feature == "-avx") {
+      has_AVX = false;
+    } else if (feature == "avx2") {
+      has_AVX2 = true;
+    } else if (feature == "-avx2") {
+      has_AVX2 = false;
+    } else {
+      *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
+      return nullptr;
+    }
+  }
+  if (x86_64) {
+    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                            has_AVX2);
+  } else {
+    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+                                            has_AVX2);
+  }
+}
+
+}  // namespace art
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
new file mode 100644
index 0000000..926fabb
--- /dev/null
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_INSTRUCTION_SET_FEATURES_X86_H_
+#define ART_RUNTIME_ARCH_X86_INSTRUCTION_SET_FEATURES_X86_H_
+
+#include "arch/instruction_set_features.h"
+
+namespace art {
+
+// Instruction set features relevant to the X86 architecture.
+class X86InstructionSetFeatures : public InstructionSetFeatures {
+ public:
+  // Process a CPU variant string like "atom" or "nehalem" and create InstructionSetFeatures.
+  static const X86InstructionSetFeatures* FromVariant(const std::string& variant,
+                                                        std::string* error_msg,
+                                                        bool x86_64 = false);
+
+  // Parse a bitmap and create an InstructionSetFeatures.
+  static const X86InstructionSetFeatures* FromBitmap(uint32_t bitmap, bool x86_64 = false);
+
+  // Turn C pre-processor #defines into the equivalent instruction set features.
+  static const X86InstructionSetFeatures* FromCppDefines(bool x86_64 = false);
+
+  // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
+  static const X86InstructionSetFeatures* FromCpuInfo(bool x86_64 = false);
+
+  // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
+  // InstructionSetFeatures.
+  static const X86InstructionSetFeatures* FromHwcap(bool x86_64 = false);
+
+  // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
+  // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
+  static const X86InstructionSetFeatures* FromAssembly(bool x86_64 = false);
+
+  bool Equals(const InstructionSetFeatures* other) const OVERRIDE;
+
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return kX86;
+  }
+
+  uint32_t AsBitmap() const OVERRIDE;
+
+  std::string GetFeatureString() const OVERRIDE;
+
+  virtual ~X86InstructionSetFeatures() {}
+
+ protected:
+  // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
+  virtual const InstructionSetFeatures*
+      AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
+                                 std::string* error_msg) const OVERRIDE {
+    return AddFeaturesFromSplitString(smp, features, false, error_msg);
+  }
+
+  const InstructionSetFeatures*
+      AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
+                                 bool x86_64, std::string* error_msg) const;
+
+  X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
+                            bool has_AVX, bool has_AVX2)
+      : InstructionSetFeatures(smp), has_SSSE3_(has_SSSE3), has_SSE4_1_(has_SSE4_1),
+        has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2) {
+  }
+
+ private:
+  // Bitmap positions for encoding features as a bitmap.
+  enum {
+    kSmpBitfield = 1,
+    kSsse3Bitfield = 2,
+    kSse4_1Bitfield = 4,
+    kSse4_2Bitfield = 8,
+    kAvxBitfield = 16,
+    kAvx2Bitfield = 32,
+  };
+
+  const bool has_SSSE3_;   // x86 128bit SIMD - Supplemental SSE.
+  const bool has_SSE4_1_;  // x86 128bit SIMD SSE4.1.
+  const bool has_SSE4_2_;  // x86 128bit SIMD SSE4.2.
+  const bool has_AVX_;     // x86 256bit SIMD AVX.
+  const bool has_AVX2_;    // x86 256bit SIMD AVX 2.0.
+
+  DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_X86_INSTRUCTION_SET_FEATURES_X86_H_
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
new file mode 100644
index 0000000..d231beb
--- /dev/null
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set_features_x86.h"
+
+#include <gtest/gtest.h>
+
+namespace art {
+
+TEST(X86InstructionSetFeaturesTest, X86FeaturesFromDefaultVariant) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> x86_features(
+      InstructionSetFeatures::FromVariant(kX86, "default", &error_msg));
+  ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
+  EXPECT_TRUE(x86_features->Equals(x86_features.get()));
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_features->AsBitmap(), 1U);
+}
+
+TEST(X86InstructionSetFeaturesTest, X86FeaturesFromAtomVariant) {
+  // Build features for a 32-bit x86 atom processor.
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> x86_features(
+      InstructionSetFeatures::FromVariant(kX86, "atom", &error_msg));
+  ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
+  EXPECT_TRUE(x86_features->Equals(x86_features.get()));
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_features->AsBitmap(), 3U);
+
+  // Build features for a 32-bit x86 default processor.
+  std::unique_ptr<const InstructionSetFeatures> x86_default_features(
+      InstructionSetFeatures::FromVariant(kX86, "default", &error_msg));
+  ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
+  EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+               x86_default_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
+
+  // Build features for a 64-bit x86-64 atom processor.
+  std::unique_ptr<const InstructionSetFeatures> x86_64_features(
+      InstructionSetFeatures::FromVariant(kX86_64, "atom", &error_msg));
+  ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
+  EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
+  EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+               x86_64_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_64_features->AsBitmap(), 3U);
+
+  EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
+  EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
+  EXPECT_FALSE(x86_features->Equals(x86_default_features.get()));
+}
+
+}  // namespace art
diff --git a/runtime/arch/x86/portable_entrypoints_x86.S b/runtime/arch/x86/portable_entrypoints_x86.S
index a7c4124..1f0900e 100644
--- a/runtime/arch/x86/portable_entrypoints_x86.S
+++ b/runtime/arch/x86/portable_entrypoints_x86.S
@@ -46,7 +46,7 @@
     addl LITERAL(12), %esp        // pop arguments to memcpy
     mov 12(%ebp), %eax            // move method pointer into eax
     mov %eax, (%esp)              // push method pointer onto stack
-    call *MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET(%eax) // call the method
+    call *MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_32(%eax) // call the method
     mov %ebp, %esp                // restore stack pointer
     POP ebx                       // pop ebx
     POP ebp                       // pop ebp
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 0109a7c..1ce01c4 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -316,7 +316,7 @@
     mov 4(%esp), %ecx             // copy arg1 into ecx
     mov 8(%esp), %edx             // copy arg2 into edx
     mov 12(%esp), %ebx            // copy arg3 into ebx
-    call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET(%eax) // call the method
+    call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32(%eax) // call the method
     mov %ebp, %esp                // restore stack pointer
     CFI_DEF_CFA_REGISTER(esp)
     POP ebx                       // pop ebx
@@ -1117,7 +1117,7 @@
     PUSH eax                      // Save eax which will be clobbered by the callee-save method.
     subl LITERAL(12), %esp        // Align stack.
     CFI_ADJUST_CFA_OFFSET(12)
-    pushl 40(%esp)                // Pass LR.
+    pushl FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-4+16(%esp)  // Pass LR.
     CFI_ADJUST_CFA_OFFSET(4)
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
new file mode 100644
index 0000000..3280177
--- /dev/null
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_X86_64_INSTRUCTION_SET_FEATURES_X86_64_H_
+#define ART_RUNTIME_ARCH_X86_64_INSTRUCTION_SET_FEATURES_X86_64_H_
+
+#include "arch/x86/instruction_set_features_x86.h"
+
+namespace art {
+
+// Instruction set features relevant to the X86_64 architecture.
+class X86_64InstructionSetFeatures FINAL : public X86InstructionSetFeatures {
+ public:
+  // Process a CPU variant string like "atom" or "nehalem" and create InstructionSetFeatures.
+  static const X86_64InstructionSetFeatures* FromVariant(const std::string& variant,
+                                                         std::string* error_msg) {
+    return X86InstructionSetFeatures::FromVariant(variant, error_msg, true)
+        ->AsX86_64InstructionSetFeatures();
+  }
+
+  // Parse a bitmap and create an InstructionSetFeatures.
+  static const X86_64InstructionSetFeatures* FromBitmap(uint32_t bitmap) {
+    return X86InstructionSetFeatures::FromBitmap(bitmap, true)->AsX86_64InstructionSetFeatures();
+  }
+
+  // Turn C pre-processor #defines into the equivalent instruction set features.
+  static const X86_64InstructionSetFeatures* FromCppDefines() {
+    return X86InstructionSetFeatures::FromCppDefines(true)->AsX86_64InstructionSetFeatures();
+  }
+
+  // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
+  static const X86_64InstructionSetFeatures* FromCpuInfo() {
+    return X86InstructionSetFeatures::FromCpuInfo(true)->AsX86_64InstructionSetFeatures();
+  }
+
+  // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
+  // InstructionSetFeatures.
+  static const X86_64InstructionSetFeatures* FromHwcap() {
+    return X86InstructionSetFeatures::FromHwcap(true)->AsX86_64InstructionSetFeatures();
+  }
+
+  // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
+  // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
+  static const X86_64InstructionSetFeatures* FromAssembly() {
+    return X86InstructionSetFeatures::FromAssembly(true)->AsX86_64InstructionSetFeatures();
+  }
+
+  InstructionSet GetInstructionSet() const OVERRIDE {
+    return kX86_64;
+  }
+
+  virtual ~X86_64InstructionSetFeatures() {}
+
+ protected:
+  // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
+  const InstructionSetFeatures*
+      AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
+                                 std::string* error_msg) const OVERRIDE {
+    return X86InstructionSetFeatures::AddFeaturesFromSplitString(smp, features, true, error_msg);
+  }
+
+ private:
+  X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
+                               bool has_AVX, bool has_AVX2)
+      : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2) {
+  }
+
+  friend class X86InstructionSetFeatures;
+
+  DISALLOW_COPY_AND_ASSIGN(X86_64InstructionSetFeatures);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ARCH_X86_64_INSTRUCTION_SET_FEATURES_X86_64_H_
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
new file mode 100644
index 0000000..5171080
--- /dev/null
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_set_features_x86_64.h"
+
+#include <gtest/gtest.h>
+
+namespace art {
+
+TEST(X86_64InstructionSetFeaturesTest, X86Features) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> x86_64_features(
+      InstructionSetFeatures::FromVariant(kX86_64, "default", &error_msg));
+  ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
+  EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
+  EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+               x86_64_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
+}
+
+}  // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index bed7238..a80e7d2 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -518,7 +518,7 @@
     LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
     LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
 .Lgpr_setup_finished:
-    call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
+    call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
     movq %rbp, %rsp               // Restore stack pointer.
     CFI_DEF_CFA_REGISTER(rsp)
     POP r9                        // Pop r9 - shorty*.
@@ -601,7 +601,7 @@
     LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
     LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
 .Lgpr_setup_finished2:
-    call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
+    call *MIRROR_ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
     movq %rbp, %rsp               // Restore stack pointer.
     CFI_DEF_CFA_REGISTER(rsp)
     POP r9                        // Pop r9 - shorty*.
@@ -1446,7 +1446,7 @@
     movq %rdi, %r12               // Preserve method pointer in a callee-save.
 
     movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
-    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %r8   // Pass return PC.
+    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %rcx   // Pass return PC.
 
     call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 26df045..4b4c8855 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -148,13 +148,21 @@
 ADD_TEST_EQ(MIRROR_ART_METHOD_DEX_CACHE_METHODS_OFFSET,
             art::mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())
 
-#define MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET     (32 + MIRROR_OBJECT_HEADER_SIZE)
-ADD_TEST_EQ(MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET,
-            art::mirror::ArtMethod::EntryPointFromPortableCompiledCodeOffset().Int32Value())
+#define MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_32     (48 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_32,
+            art::mirror::ArtMethod::EntryPointFromPortableCompiledCodeOffset(4).Int32Value())
 
-#define MIRROR_ART_METHOD_QUICK_CODE_OFFSET        (40 + MIRROR_OBJECT_HEADER_SIZE)
-ADD_TEST_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET,
-            art::mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().Int32Value())
+#define MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32        (40 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32,
+            art::mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value())
+
+#define MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_64     (64 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_64,
+            art::mirror::ArtMethod::EntryPointFromPortableCompiledCodeOffset(8).Int32Value())
+
+#define MIRROR_ART_METHOD_QUICK_CODE_OFFSET_64        (48 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET_64,
+            art::mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(8).Int32Value())
 
 #if defined(__cplusplus)
 }  // End of CheckAsmSupportOffsets.
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index bf091d0..0e93eee 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -27,6 +27,9 @@
 
 bool ScopedFlock::Init(const char* filename, std::string* error_msg) {
   while (true) {
+    if (file_.get() != nullptr) {
+      UNUSED(file_->FlushCloseOrErase());  // Ignore result.
+    }
     file_.reset(OS::OpenFileWithFlags(filename, O_CREAT | O_RDWR));
     if (file_.get() == NULL) {
       *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno));
@@ -59,7 +62,7 @@
 }
 
 bool ScopedFlock::Init(File* file, std::string* error_msg) {
-  file_.reset(new File(dup(file->Fd())));
+  file_.reset(new File(dup(file->Fd()), true));
   if (file_->Fd() == -1) {
     file_.reset();
     *error_msg = StringPrintf("Failed to duplicate open file '%s': %s",
@@ -89,6 +92,9 @@
   if (file_.get() != NULL) {
     int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN));
     CHECK_EQ(0, flock_result);
+    if (file_->FlushCloseOrErase() != 0) {
+      PLOG(WARNING) << "Could not close scoped file lock file.";
+    }
   }
 }
 
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index f29a7ec..6e5e7a1 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -14,28 +14,68 @@
  * limitations under the License.
  */
 
-#include "base/logging.h"
 #include "base/unix_file/fd_file.h"
+
 #include <errno.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <unistd.h>
 
+#include "base/logging.h"
+
 namespace unix_file {
 
-FdFile::FdFile() : fd_(-1), auto_close_(true) {
+FdFile::FdFile() : guard_state_(GuardState::kClosed), fd_(-1), auto_close_(true) {
 }
 
-FdFile::FdFile(int fd) : fd_(fd), auto_close_(true) {
+FdFile::FdFile(int fd, bool check_usage)
+    : guard_state_(check_usage ? GuardState::kBase : GuardState::kNoCheck),
+      fd_(fd), auto_close_(true) {
 }
 
-FdFile::FdFile(int fd, const std::string& path) : fd_(fd), file_path_(path), auto_close_(true) {
+FdFile::FdFile(int fd, const std::string& path, bool check_usage)
+    : guard_state_(check_usage ? GuardState::kBase : GuardState::kNoCheck),
+      fd_(fd), file_path_(path), auto_close_(true) {
   CHECK_NE(0U, path.size());
 }
 
 FdFile::~FdFile() {
+  if (kCheckSafeUsage && (guard_state_ < GuardState::kNoCheck)) {
+    if (guard_state_ < GuardState::kFlushed) {
+      LOG(::art::ERROR) << "File " << file_path_ << " wasn't explicitly flushed before destruction.";
+    }
+    if (guard_state_ < GuardState::kClosed) {
+      LOG(::art::ERROR) << "File " << file_path_ << " wasn't explicitly closed before destruction.";
+    }
+    CHECK_GE(guard_state_, GuardState::kClosed);
+  }
   if (auto_close_ && fd_ != -1) {
-    Close();
+    if (Close() != 0) {
+      PLOG(::art::WARNING) << "Failed to close file " << file_path_;
+    }
+  }
+}
+
+void FdFile::moveTo(GuardState target, GuardState warn_threshold, const char* warning) {
+  if (kCheckSafeUsage) {
+    if (guard_state_ < GuardState::kNoCheck) {
+      if (warn_threshold < GuardState::kNoCheck && guard_state_ >= warn_threshold) {
+        LOG(::art::ERROR) << warning;
+      }
+      guard_state_ = target;
+    }
+  }
+}
+
+void FdFile::moveUp(GuardState target, const char* warning) {
+  if (kCheckSafeUsage) {
+    if (guard_state_ < GuardState::kNoCheck) {
+      if (guard_state_ < target) {
+        guard_state_ = target;
+      } else if (target < guard_state_) {
+        LOG(::art::ERROR) << warning;
+      }
+    }
   }
 }
 
@@ -54,11 +94,28 @@
     return false;
   }
   file_path_ = path;
+  static_assert(O_RDONLY == 0, "Readonly flag has unexpected value.");
+  if (kCheckSafeUsage && (flags & (O_RDWR | O_CREAT | O_WRONLY)) != 0) {
+    // Start in the base state (not flushed, not closed).
+    guard_state_ = GuardState::kBase;
+  } else {
+    // We are not concerned with read-only files. In that case, proper flushing and closing is
+    // not important.
+    guard_state_ = GuardState::kNoCheck;
+  }
   return true;
 }
 
 int FdFile::Close() {
   int result = TEMP_FAILURE_RETRY(close(fd_));
+
+  // Test here, so the file is closed and not leaked.
+  if (kCheckSafeUsage) {
+    CHECK_GE(guard_state_, GuardState::kFlushed) << "File " << file_path_
+        << " has not been flushed before closing.";
+    moveUp(GuardState::kClosed, nullptr);
+  }
+
   if (result == -1) {
     return -errno;
   } else {
@@ -74,6 +131,7 @@
 #else
   int rc = TEMP_FAILURE_RETRY(fsync(fd_));
 #endif
+  moveUp(GuardState::kFlushed, "Flushing closed file.");
   return (rc == -1) ? -errno : rc;
 }
 
@@ -92,6 +150,7 @@
 #else
   int rc = TEMP_FAILURE_RETRY(ftruncate(fd_, new_length));
 #endif
+  moveTo(GuardState::kBase, GuardState::kClosed, "Truncating closed file.");
   return (rc == -1) ? -errno : rc;
 }
 
@@ -107,6 +166,7 @@
 #else
   int rc = TEMP_FAILURE_RETRY(pwrite(fd_, buf, byte_count, offset));
 #endif
+  moveTo(GuardState::kBase, GuardState::kClosed, "Writing into closed file.");
   return (rc == -1) ? -errno : rc;
 }
 
@@ -135,6 +195,7 @@
 
 bool FdFile::WriteFully(const void* buffer, size_t byte_count) {
   const char* ptr = static_cast<const char*>(buffer);
+  moveTo(GuardState::kBase, GuardState::kClosed, "Writing into closed file.");
   while (byte_count > 0) {
     ssize_t bytes_written = TEMP_FAILURE_RETRY(write(fd_, ptr, byte_count));
     if (bytes_written == -1) {
@@ -146,4 +207,38 @@
   return true;
 }
 
+void FdFile::Erase() {
+  TEMP_FAILURE_RETRY(SetLength(0));
+  TEMP_FAILURE_RETRY(Flush());
+  TEMP_FAILURE_RETRY(Close());
+}
+
+int FdFile::FlushCloseOrErase() {
+  int flush_result = TEMP_FAILURE_RETRY(Flush());
+  if (flush_result != 0) {
+    LOG(::art::ERROR) << "CloseOrErase failed while flushing a file.";
+    Erase();
+    return flush_result;
+  }
+  int close_result = TEMP_FAILURE_RETRY(Close());
+  if (close_result != 0) {
+    LOG(::art::ERROR) << "CloseOrErase failed while closing a file.";
+    Erase();
+    return close_result;
+  }
+  return 0;
+}
+
+int FdFile::FlushClose() {
+  int flush_result = TEMP_FAILURE_RETRY(Flush());
+  if (flush_result != 0) {
+    LOG(::art::ERROR) << "FlushClose failed while flushing a file.";
+  }
+  int close_result = TEMP_FAILURE_RETRY(Close());
+  if (close_result != 0) {
+    LOG(::art::ERROR) << "FlushClose failed while closing a file.";
+  }
+  return (flush_result != 0) ? flush_result : close_result;
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index 01f4ca2..8db2ee4 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -24,6 +24,9 @@
 
 namespace unix_file {
 
+// If true, check whether Flush and Close are called before destruction.
+static constexpr bool kCheckSafeUsage = true;
+
 // A RandomAccessFile implementation backed by a file descriptor.
 //
 // Not thread safe.
@@ -32,8 +35,8 @@
   FdFile();
   // Creates an FdFile using the given file descriptor. Takes ownership of the
   // file descriptor. (Use DisableAutoClose to retain ownership.)
-  explicit FdFile(int fd);
-  explicit FdFile(int fd, const std::string& path);
+  explicit FdFile(int fd, bool checkUsage);
+  explicit FdFile(int fd, const std::string& path, bool checkUsage);
 
   // Destroys an FdFile, closing the file descriptor if Close hasn't already
   // been called. (If you care about the return value of Close, call it
@@ -47,12 +50,21 @@
   bool Open(const std::string& file_path, int flags, mode_t mode);
 
   // RandomAccessFile API.
-  virtual int Close();
-  virtual int64_t Read(char* buf, int64_t byte_count, int64_t offset) const;
-  virtual int SetLength(int64_t new_length);
+  virtual int Close() WARN_UNUSED;
+  virtual int64_t Read(char* buf, int64_t byte_count, int64_t offset) const WARN_UNUSED;
+  virtual int SetLength(int64_t new_length) WARN_UNUSED;
   virtual int64_t GetLength() const;
-  virtual int64_t Write(const char* buf, int64_t byte_count, int64_t offset);
-  virtual int Flush();
+  virtual int64_t Write(const char* buf, int64_t byte_count, int64_t offset) WARN_UNUSED;
+  virtual int Flush() WARN_UNUSED;
+
+  // Short for SetLength(0); Flush(); Close();
+  void Erase();
+
+  // Try to Flush(), then try to Close(); If either fails, call Erase().
+  int FlushCloseOrErase() WARN_UNUSED;
+
+  // Try to Flush and Close(). Attempts both, but returns the first error.
+  int FlushClose() WARN_UNUSED;
 
   // Bonus API.
   int Fd() const;
@@ -61,8 +73,35 @@
     return file_path_;
   }
   void DisableAutoClose();
-  bool ReadFully(void* buffer, size_t byte_count);
-  bool WriteFully(const void* buffer, size_t byte_count);
+  bool ReadFully(void* buffer, size_t byte_count) WARN_UNUSED;
+  bool WriteFully(const void* buffer, size_t byte_count) WARN_UNUSED;
+
+  // This enum is public so that we can define the << operator over it.
+  enum class GuardState {
+    kBase,           // Base, file has not been flushed or closed.
+    kFlushed,        // File has been flushed, but not closed.
+    kClosed,         // File has been flushed and closed.
+    kNoCheck         // Do not check for the current file instance.
+  };
+
+ protected:
+  // If the guard state indicates checking (!=kNoCheck), go to the target state "target". Print the
+  // given warning if the current state is or exceeds warn_threshold.
+  void moveTo(GuardState target, GuardState warn_threshold, const char* warning);
+
+  // If the guard state indicates checking (<kNoCheck), and is below the target state "target", go
+  // to "target." If the current state is higher (excluding kNoCheck) than the trg state, print the
+  // warning.
+  void moveUp(GuardState target, const char* warning);
+
+  // Forcefully sets the state to the given one. This can overwrite kNoCheck.
+  void resetGuard(GuardState new_state) {
+    if (kCheckSafeUsage) {
+      guard_state_ = new_state;
+    }
+  }
+
+  GuardState guard_state_;
 
  private:
   int fd_;
@@ -72,6 +111,8 @@
   DISALLOW_COPY_AND_ASSIGN(FdFile);
 };
 
+std::ostream& operator<<(std::ostream& os, const FdFile::GuardState& kind);
+
 }  // namespace unix_file
 
 #endif  // ART_RUNTIME_BASE_UNIX_FILE_FD_FILE_H_
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 3481f2f..a7e5b96 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -24,7 +24,7 @@
 class FdFileTest : public RandomAccessFileTest {
  protected:
   virtual RandomAccessFile* MakeTestFile() {
-    return new FdFile(fileno(tmpfile()));
+    return new FdFile(fileno(tmpfile()), false);
   }
 };
 
@@ -53,6 +53,7 @@
   ASSERT_TRUE(file.Open(good_path, O_CREAT | O_WRONLY));
   EXPECT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
+  EXPECT_EQ(0, file.Flush());
   EXPECT_EQ(0, file.Close());
   EXPECT_EQ(-1, file.Fd());
   EXPECT_FALSE(file.IsOpened());
@@ -60,7 +61,7 @@
   EXPECT_GE(file.Fd(), 0);
   EXPECT_TRUE(file.IsOpened());
 
-  file.Close();
+  ASSERT_EQ(file.Close(), 0);
   ASSERT_EQ(unlink(good_path.c_str()), 0);
 }
 
diff --git a/runtime/base/unix_file/random_access_file_test.h b/runtime/base/unix_file/random_access_file_test.h
index 0002433..e7ace4c 100644
--- a/runtime/base/unix_file/random_access_file_test.h
+++ b/runtime/base/unix_file/random_access_file_test.h
@@ -76,6 +76,8 @@
     ASSERT_EQ(content.size(), static_cast<uint64_t>(file->Write(content.data(), content.size(), 0)));
 
     TestReadContent(content, file.get());
+
+    CleanUp(file.get());
   }
 
   void TestReadContent(const std::string& content, RandomAccessFile* file) {
@@ -131,6 +133,8 @@
     ASSERT_EQ(new_length, file->GetLength());
     ASSERT_TRUE(ReadString(file.get(), &new_content));
     ASSERT_EQ('\0', new_content[new_length - 1]);
+
+    CleanUp(file.get());
   }
 
   void TestWrite() {
@@ -163,6 +167,11 @@
     ASSERT_EQ(file->GetLength(), new_length);
     ASSERT_TRUE(ReadString(file.get(), &new_content));
     ASSERT_EQ(std::string("hello\0hello", new_length), new_content);
+
+    CleanUp(file.get());
+  }
+
+  virtual void CleanUp(RandomAccessFile* file ATTRIBUTE_UNUSED) {
   }
 
  protected:
diff --git a/runtime/base/unix_file/random_access_file_utils_test.cc b/runtime/base/unix_file/random_access_file_utils_test.cc
index 6317922..9457d22 100644
--- a/runtime/base/unix_file/random_access_file_utils_test.cc
+++ b/runtime/base/unix_file/random_access_file_utils_test.cc
@@ -37,14 +37,14 @@
 }
 
 TEST_F(RandomAccessFileUtilsTest, BadSrc) {
-  FdFile src(-1);
+  FdFile src(-1, false);
   StringFile dst;
   ASSERT_FALSE(CopyFile(src, &dst));
 }
 
 TEST_F(RandomAccessFileUtilsTest, BadDst) {
   StringFile src;
-  FdFile dst(-1);
+  FdFile dst(-1, false);
 
   // We need some source content to trigger a write.
   // Copying an empty file is a no-op.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index fb90b91..e1b79c9 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -185,15 +185,13 @@
 }
 // Shuffle fields forward, making use of gaps whenever possible.
 template<int n>
-static void ShuffleForward(const size_t num_fields, size_t* current_field_idx,
+static void ShuffleForward(size_t* current_field_idx,
                            MemberOffset* field_offset,
-                           mirror::ObjectArray<mirror::ArtField>* fields,
                            std::deque<mirror::ArtField*>* grouped_and_sorted_fields,
                            FieldGaps* gaps)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(current_field_idx != nullptr);
   DCHECK(grouped_and_sorted_fields != nullptr);
-  DCHECK(fields != nullptr || (num_fields == 0 && grouped_and_sorted_fields->empty()));
   DCHECK(gaps != nullptr);
   DCHECK(field_offset != nullptr);
 
@@ -211,7 +209,6 @@
     }
     CHECK(type != Primitive::kPrimNot) << PrettyField(field);  // should be primitive types
     grouped_and_sorted_fields->pop_front();
-    fields->Set<false>(*current_field_idx, field);
     if (!gaps->empty() && gaps->top().size >= n) {
       FieldGap gap = gaps->top();
       gaps->pop();
@@ -246,7 +243,8 @@
       portable_imt_conflict_trampoline_(nullptr),
       quick_imt_conflict_trampoline_(nullptr),
       quick_generic_jni_trampoline_(nullptr),
-      quick_to_interpreter_bridge_trampoline_(nullptr) {
+      quick_to_interpreter_bridge_trampoline_(nullptr),
+      image_pointer_size_(sizeof(void*)) {
   memset(find_array_class_cache_, 0, kFindArrayCacheSize * sizeof(mirror::Class*));
 }
 
@@ -381,10 +379,9 @@
   Handle<mirror::Class> java_lang_reflect_ArtMethod(hs.NewHandle(
     AllocClass(self, java_lang_Class.Get(), mirror::ArtMethod::ClassSize())));
   CHECK(java_lang_reflect_ArtMethod.Get() != nullptr);
-  java_lang_reflect_ArtMethod->SetObjectSize(mirror::ArtMethod::InstanceSize());
+  java_lang_reflect_ArtMethod->SetObjectSize(mirror::ArtMethod::InstanceSize(sizeof(void*)));
   SetClassRoot(kJavaLangReflectArtMethod, java_lang_reflect_ArtMethod.Get());
   java_lang_reflect_ArtMethod->SetStatus(mirror::Class::kStatusResolved, self);
-
   mirror::ArtMethod::SetClass(java_lang_reflect_ArtMethod.Get());
 
   // Set up array classes for string, field, method
@@ -410,8 +407,7 @@
   // DexCache instances. Needs to be after String, Field, Method arrays since AllocDexCache uses
   // these roots.
   CHECK_NE(0U, boot_class_path.size());
-  for (size_t i = 0; i != boot_class_path.size(); ++i) {
-    const DexFile* dex_file = boot_class_path[i];
+  for (const DexFile* dex_file : boot_class_path) {
     CHECK(dex_file != nullptr);
     AppendToBootClassPath(self, *dex_file);
   }
@@ -1685,6 +1681,20 @@
   // Set classes on AbstractMethod early so that IsMethod tests can be performed during the live
   // bitmap walk.
   mirror::ArtMethod::SetClass(GetClassRoot(kJavaLangReflectArtMethod));
+  size_t art_method_object_size = mirror::ArtMethod::GetJavaLangReflectArtMethod()->GetObjectSize();
+  if (!Runtime::Current()->IsCompiler()) {
+    // Compiler supports having an image with a different pointer size than the runtime. This
+    // happens on the host for compile 32 bit tests since we use a 64 bit libart compiler. We may
+    // also use 32 bit dex2oat on a system with 64 bit apps.
+    CHECK_EQ(art_method_object_size, mirror::ArtMethod::InstanceSize(sizeof(void*)))
+        << sizeof(void*);
+  }
+  if (art_method_object_size == mirror::ArtMethod::InstanceSize(4)) {
+    image_pointer_size_ = 4;
+  } else {
+    CHECK_EQ(art_method_object_size, mirror::ArtMethod::InstanceSize(8));
+    image_pointer_size_ = 8;
+  }
 
   // Set entry point to interpreter if in InterpretOnly mode.
   if (Runtime::Current()->GetInstrumentation()->InterpretOnly()) {
@@ -1698,7 +1708,7 @@
 
   // reinit array_iftable_ from any array class instance, they should be ==
   array_iftable_ = GcRoot<mirror::IfTable>(GetClassRoot(kObjectArrayClass)->GetIfTable());
-  DCHECK(array_iftable_.Read() == GetClassRoot(kBooleanArrayClass)->GetIfTable());
+  DCHECK_EQ(array_iftable_.Read(), GetClassRoot(kBooleanArrayClass)->GetIfTable());
   // String class root was set above
   mirror::Reference::SetClass(GetClassRoot(kJavaLangRefReference));
   mirror::ArtField::SetClass(GetClassRoot(kJavaLangReflectArtField));
@@ -2050,6 +2060,7 @@
                                                        Thread* self, const char* descriptor,
                                                        size_t hash,
                                                        Handle<mirror::ClassLoader> class_loader) {
+  // Can we special case for a well understood PathClassLoader with the BootClassLoader as parent?
   if (class_loader->GetClass() !=
       soa.Decode<mirror::Class*>(WellKnownClasses::dalvik_system_PathClassLoader) ||
       class_loader->GetParent()->GetClass() !=
@@ -2061,17 +2072,21 @@
   if (pair.second != nullptr) {
     mirror::Class* klass = LookupClass(self, descriptor, hash, nullptr);
     if (klass != nullptr) {
-      return EnsureResolved(self, descriptor, klass);
+      // May return null if resolution on another thread fails.
+      klass = EnsureResolved(self, descriptor, klass);
+    } else {
+      // May OOME.
+      klass = DefineClass(self, descriptor, hash, NullHandle<mirror::ClassLoader>(), *pair.first,
+                          *pair.second);
     }
-    klass = DefineClass(self, descriptor, hash, NullHandle<mirror::ClassLoader>(), *pair.first,
-                        *pair.second);
-    if (klass != nullptr) {
-      return klass;
+    if (klass == nullptr) {
+      CHECK(self->IsExceptionPending()) << descriptor;
+      self->ClearException();
     }
-    CHECK(self->IsExceptionPending()) << descriptor;
-    self->ClearException();
+    return klass;
   } else {
-    // RegisterDexFile may allocate dex caches (and cause thread suspension).
+    // Handle as if this is the child PathClassLoader.
+    // Handles as RegisterDexFile may allocate dex caches (and cause thread suspension).
     StackHandleScope<3> hs(self);
     // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
     // We need to get the DexPathList and loop through it.
@@ -2128,8 +2143,9 @@
         }
       }
     }
+    self->AssertNoPendingException();
+    return nullptr;
   }
-  return nullptr;
 }
 
 mirror::Class* ClassLinker::FindClass(Thread* self, const char* descriptor,
@@ -5202,20 +5218,27 @@
     Primitive::Type type1 = field1->GetTypeAsPrimitiveType();
     Primitive::Type type2 = field2->GetTypeAsPrimitiveType();
     if (type1 != type2) {
-      bool is_primitive1 = type1 != Primitive::kPrimNot;
-      bool is_primitive2 = type2 != Primitive::kPrimNot;
-      if (type1 != type2) {
-        if (is_primitive1 && is_primitive2) {
-          // Larger primitive types go first.
-          return Primitive::ComponentSize(type1) > Primitive::ComponentSize(type2);
-        } else {
-          // Reference always goes first.
-          return !is_primitive1;
-        }
+      if (type1 == Primitive::kPrimNot) {
+        // Reference always goes first.
+        return true;
       }
+      if (type2 == Primitive::kPrimNot) {
+        // Reference always goes first.
+        return false;
+      }
+      size_t size1 = Primitive::ComponentSize(type1);
+      size_t size2 = Primitive::ComponentSize(type2);
+      if (size1 != size2) {
+        // Larger primitive types go first.
+        return size1 > size2;
+      }
+      // Primitive types differ but sizes match. Arbitrarily order by primitive type.
+      return type1 < type2;
     }
-    // same basic group? then sort by string.
-    return strcmp(field1->GetName(), field2->GetName()) < 0;
+    // Same basic group? Then sort by dex field index. This is guaranteed to be sorted
+    // by name and for equal names by type id index.
+    // NOTE: This works also for proxies. Their static fields are assigned appropriate indexes.
+    return field1->GetDexFieldIndex() < field2->GetDexFieldIndex();
   }
 };
 
@@ -5231,13 +5254,7 @@
   // Initialize field_offset
   MemberOffset field_offset(0);
   if (is_static) {
-    uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
-    if (klass->ShouldHaveEmbeddedImtAndVTable()) {
-      // Static fields come after the embedded tables.
-      base = mirror::Class::ComputeClassSize(true, klass->GetVTableDuringLinking()->GetLength(),
-                                             0, 0, 0, 0, 0);
-    }
-    field_offset = MemberOffset(base);
+    field_offset = klass->GetFirstReferenceStaticFieldOffsetDuringLinking();
   } else {
     mirror::Class* super_class = klass->GetSuperClass();
     if (super_class != nullptr) {
@@ -5274,28 +5291,25 @@
     if (isPrimitive) {
       break;  // past last reference, move on to the next phase
     }
-    if (UNLIKELY(!IsAligned<4>(field_offset.Uint32Value()))) {
+    if (UNLIKELY(!IsAligned<sizeof(mirror::HeapReference<mirror::Object>)>(
+        field_offset.Uint32Value()))) {
       MemberOffset old_offset = field_offset;
       field_offset = MemberOffset(RoundUp(field_offset.Uint32Value(), 4));
       AddFieldGap(old_offset.Uint32Value(), field_offset.Uint32Value(), &gaps);
     }
-    DCHECK(IsAligned<4>(field_offset.Uint32Value()));
+    DCHECK(IsAligned<sizeof(mirror::HeapReference<mirror::Object>)>(field_offset.Uint32Value()));
     grouped_and_sorted_fields.pop_front();
     num_reference_fields++;
-    fields->Set<false>(current_field, field);
     field->SetOffset(field_offset);
-    field_offset = MemberOffset(field_offset.Uint32Value() + sizeof(uint32_t));
+    field_offset = MemberOffset(field_offset.Uint32Value() +
+                                sizeof(mirror::HeapReference<mirror::Object>));
   }
   // Gaps are stored as a max heap which means that we must shuffle from largest to smallest
   // otherwise we could end up with suboptimal gap fills.
-  ShuffleForward<8>(num_fields, &current_field, &field_offset,
-                    fields, &grouped_and_sorted_fields, &gaps);
-  ShuffleForward<4>(num_fields, &current_field, &field_offset,
-                    fields, &grouped_and_sorted_fields, &gaps);
-  ShuffleForward<2>(num_fields, &current_field, &field_offset,
-                    fields, &grouped_and_sorted_fields, &gaps);
-  ShuffleForward<1>(num_fields, &current_field, &field_offset,
-                    fields, &grouped_and_sorted_fields, &gaps);
+  ShuffleForward<8>(&current_field, &field_offset, &grouped_and_sorted_fields, &gaps);
+  ShuffleForward<4>(&current_field, &field_offset, &grouped_and_sorted_fields, &gaps);
+  ShuffleForward<2>(&current_field, &field_offset, &grouped_and_sorted_fields, &gaps);
+  ShuffleForward<1>(&current_field, &field_offset, &grouped_and_sorted_fields, &gaps);
   CHECK(grouped_and_sorted_fields.empty()) << "Missed " << grouped_and_sorted_fields.size() <<
       " fields.";
   self->EndAssertNoThreadSuspension(old_no_suspend_cause);
@@ -5309,10 +5323,39 @@
     --num_reference_fields;
   }
 
+  size_t size = field_offset.Uint32Value();
+  // Update klass
+  if (is_static) {
+    klass->SetNumReferenceStaticFields(num_reference_fields);
+    *class_size = size;
+  } else {
+    klass->SetNumReferenceInstanceFields(num_reference_fields);
+    if (!klass->IsVariableSize()) {
+      if (klass->DescriptorEquals("Ljava/lang/reflect/ArtMethod;")) {
+        klass->SetObjectSize(mirror::ArtMethod::InstanceSize(sizeof(void*)));
+      } else {
+        std::string temp;
+        DCHECK_GE(size, sizeof(mirror::Object)) << klass->GetDescriptor(&temp);
+        size_t previous_size = klass->GetObjectSize();
+        if (previous_size != 0) {
+          // Make sure that we didn't originally have an incorrect size.
+          CHECK_EQ(previous_size, size) << klass->GetDescriptor(&temp);
+        }
+        klass->SetObjectSize(size);
+      }
+    }
+  }
+
   if (kIsDebugBuild) {
-    // Make sure that all reference fields appear before
-    // non-reference fields, and all double-wide fields are aligned.
-    bool seen_non_ref = false;
+    // Make sure that the fields array is ordered by name but all reference
+    // offsets are at the beginning as far as alignment allows.
+    MemberOffset start_ref_offset = is_static
+        ? klass->GetFirstReferenceStaticFieldOffsetDuringLinking()
+        : klass->GetFirstReferenceInstanceFieldOffset();
+    MemberOffset end_ref_offset(start_ref_offset.Uint32Value() +
+                                num_reference_fields *
+                                    sizeof(mirror::HeapReference<mirror::Object>));
+    MemberOffset current_ref_offset = start_ref_offset;
     for (size_t i = 0; i < num_fields; i++) {
       mirror::ArtField* field = fields->Get(i);
       if ((false)) {  // enable to debug field layout
@@ -5322,49 +5365,40 @@
                     << " offset="
                     << field->GetField32(mirror::ArtField::OffsetOffset());
       }
+      if (i != 0) {
+        mirror::ArtField* prev_field = fields->Get(i - 1u);
+        // NOTE: The field names can be the same. This is not possible in the Java language
+        // but it's valid Java/dex bytecode and for example proguard can generate such bytecode.
+        CHECK_LE(strcmp(prev_field->GetName(), field->GetName()), 0);
+      }
       Primitive::Type type = field->GetTypeAsPrimitiveType();
       bool is_primitive = type != Primitive::kPrimNot;
       if (klass->DescriptorEquals("Ljava/lang/ref/Reference;") &&
           strcmp("referent", field->GetName()) == 0) {
         is_primitive = true;  // We lied above, so we have to expect a lie here.
       }
+      MemberOffset offset = field->GetOffsetDuringLinking();
       if (is_primitive) {
-        if (!seen_non_ref) {
-          seen_non_ref = true;
-          DCHECK_EQ(num_reference_fields, i) << PrettyField(field);
+        if (offset.Uint32Value() < end_ref_offset.Uint32Value()) {
+          // Shuffled before references.
+          size_t type_size = Primitive::ComponentSize(type);
+          CHECK_LT(type_size, sizeof(mirror::HeapReference<mirror::Object>));
+          CHECK_LT(offset.Uint32Value(), start_ref_offset.Uint32Value());
+          CHECK_LE(offset.Uint32Value() + type_size, start_ref_offset.Uint32Value());
+          CHECK(!IsAligned<sizeof(mirror::HeapReference<mirror::Object>)>(offset.Uint32Value()));
         }
       } else {
-        DCHECK(!seen_non_ref) << PrettyField(field);
+        CHECK_EQ(current_ref_offset.Uint32Value(), offset.Uint32Value());
+        current_ref_offset = MemberOffset(current_ref_offset.Uint32Value() +
+                                          sizeof(mirror::HeapReference<mirror::Object>));
       }
     }
-    if (!seen_non_ref) {
-      DCHECK_EQ(num_fields, num_reference_fields) << PrettyClass(klass.Get());
-    }
-  }
-
-  size_t size = field_offset.Uint32Value();
-  // Update klass
-  if (is_static) {
-    klass->SetNumReferenceStaticFields(num_reference_fields);
-    *class_size = size;
-  } else {
-    klass->SetNumReferenceInstanceFields(num_reference_fields);
-    if (!klass->IsVariableSize()) {
-      std::string temp;
-      DCHECK_GE(size, sizeof(mirror::Object)) << klass->GetDescriptor(&temp);
-      size_t previous_size = klass->GetObjectSize();
-      if (previous_size != 0) {
-        // Make sure that we didn't originally have an incorrect size.
-        CHECK_EQ(previous_size, size) << klass->GetDescriptor(&temp);
-      }
-      klass->SetObjectSize(size);
-    }
+    CHECK_EQ(current_ref_offset.Uint32Value(), end_ref_offset.Uint32Value());
   }
   return true;
 }
 
-//  Set the bitmap of reference offsets, refOffsets, from the ifields
-//  list.
+//  Set the bitmap of reference instance field offsets.
 void ClassLinker::CreateReferenceInstanceOffsets(Handle<mirror::Class> klass) {
   uint32_t reference_offsets = 0;
   mirror::Class* super_class = klass->GetSuperClass();
@@ -5374,23 +5408,18 @@
     // Compute reference offsets unless our superclass overflowed.
     if (reference_offsets != mirror::Class::kClassWalkSuper) {
       size_t num_reference_fields = klass->NumReferenceInstanceFieldsDuringLinking();
-      mirror::ObjectArray<mirror::ArtField>* fields = klass->GetIFields();
-      // All of the fields that contain object references are guaranteed
-      // to be at the beginning of the fields list.
-      for (size_t i = 0; i < num_reference_fields; ++i) {
-        // Note that byte_offset is the offset from the beginning of
-        // object, not the offset into instance data
-        mirror::ArtField* field = fields->Get(i);
-        MemberOffset byte_offset = field->GetOffsetDuringLinking();
-        uint32_t displaced_bitmap_position =
-            (byte_offset.Uint32Value() - mirror::kObjectHeaderSize) /
+      if (num_reference_fields != 0u) {
+        // All of the fields that contain object references are guaranteed be grouped in memory
+        // starting at an appropriately aligned address after super class object data.
+        uint32_t start_offset = RoundUp(super_class->GetObjectSize(),
+                                        sizeof(mirror::HeapReference<mirror::Object>));
+        uint32_t start_bit = (start_offset - mirror::kObjectHeaderSize) /
             sizeof(mirror::HeapReference<mirror::Object>);
-        if (displaced_bitmap_position >= 32) {
-          // Can't encode offset so fall back on slow-path.
+        if (start_bit + num_reference_fields > 32) {
           reference_offsets = mirror::Class::kClassWalkSuper;
-          break;
         } else {
-          reference_offsets |= (1 << displaced_bitmap_position);
+          reference_offsets |= (0xffffffffu << start_bit) &
+                               (0xffffffffu >> (32 - (start_bit + num_reference_fields)));
         }
       }
     }
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 385f135..b78d0b5 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -117,8 +117,8 @@
                            Handle<mirror::ClassLoader> class_loader)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Find a class in the path class loader, loading it if necessary. Hash function is supposed to
-  // be ComputeModifiedUtf8Hash(descriptor).
+  // Find a class in the path class loader, loading it if necessary without using JNI. Hash
+  // function is supposed to be ComputeModifiedUtf8Hash(descriptor).
   mirror::Class* FindClassInPathClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
                                             Thread* self, const char* descriptor, size_t hash,
                                             Handle<mirror::ClassLoader> class_loader)
@@ -805,6 +805,9 @@
   const void* quick_generic_jni_trampoline_;
   const void* quick_to_interpreter_bridge_trampoline_;
 
+  // Image pointer size.
+  size_t image_pointer_size_;
+
   friend class ImageWriter;  // for GetClassRoots
   friend class ImageDumper;  // for FindOpenedOatFileFromOatLocation
   friend class ElfPatcher;  // for FindOpenedOatFileForDexFile & FindOpenedOatFileFromOatLocation
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index b257343..0c86761 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -275,31 +275,42 @@
       EXPECT_TRUE(field->IsStatic());
     }
 
-    // Confirm that all instances fields are packed together at the start
+    // Confirm that all instances field offsets are packed together at the start.
     EXPECT_GE(klass->NumInstanceFields(), klass->NumReferenceInstanceFields());
     StackHandleScope<1> hs(Thread::Current());
     MutableHandle<mirror::ArtField> fhandle = hs.NewHandle<mirror::ArtField>(nullptr);
-    for (size_t i = 0; i < klass->NumReferenceInstanceFields(); i++) {
-      mirror::ArtField* field = klass->GetInstanceField(i);
-      fhandle.Assign(field);
-      FieldHelper fh(fhandle);
-      ASSERT_TRUE(!field->IsPrimitiveType());
-      mirror::Class* field_type = fh.GetType();
-      ASSERT_TRUE(field_type != nullptr);
-      ASSERT_TRUE(!field_type->IsPrimitive());
-    }
-    for (size_t i = klass->NumReferenceInstanceFields(); i < klass->NumInstanceFields(); i++) {
+    MemberOffset start_ref_offset = klass->GetFirstReferenceInstanceFieldOffset();
+    MemberOffset end_ref_offset(start_ref_offset.Uint32Value() +
+                                klass->NumReferenceInstanceFields() *
+                                    sizeof(mirror::HeapReference<mirror::Object>));
+    MemberOffset current_ref_offset = start_ref_offset;
+    for (size_t i = 0; i < klass->NumInstanceFields(); i++) {
       mirror::ArtField* field = klass->GetInstanceField(i);
       fhandle.Assign(field);
       FieldHelper fh(fhandle);
       mirror::Class* field_type = fh.GetType();
       ASSERT_TRUE(field_type != nullptr);
-      if (!fh.GetField()->IsPrimitiveType() || !field_type->IsPrimitive()) {
-        // While Reference.referent is not primitive, the ClassLinker
-        // treats it as such so that the garbage collector won't scan it.
-        EXPECT_EQ(PrettyField(fh.GetField()), "java.lang.Object java.lang.ref.Reference.referent");
+      if (!field->IsPrimitiveType()) {
+        ASSERT_TRUE(!field_type->IsPrimitive());
+        ASSERT_EQ(current_ref_offset.Uint32Value(), field->GetOffset().Uint32Value());
+        if (current_ref_offset.Uint32Value() == end_ref_offset.Uint32Value()) {
+          // While Reference.referent is not primitive, the ClassLinker
+          // treats it as such so that the garbage collector won't scan it.
+          EXPECT_EQ(PrettyField(fh.GetField()),
+                    "java.lang.Object java.lang.ref.Reference.referent");
+        } else {
+          current_ref_offset = MemberOffset(current_ref_offset.Uint32Value() +
+                                            sizeof(mirror::HeapReference<mirror::Object>));
+        }
+      } else {
+        if (field->GetOffset().Uint32Value() < end_ref_offset.Uint32Value()) {
+          // Shuffled before references.
+          ASSERT_LT(field->GetOffset().Uint32Value(), start_ref_offset.Uint32Value());
+          CHECK(!IsAligned<4>(field->GetOffset().Uint32Value()));
+        }
       }
     }
+    ASSERT_EQ(end_ref_offset.Uint32Value(), current_ref_offset.Uint32Value());
 
     uint32_t total_num_reference_instance_fields = 0;
     mirror::Class* k = klass.Get();
@@ -461,10 +472,7 @@
 
 struct ObjectOffsets : public CheckOffsets<mirror::Object> {
   ObjectOffsets() : CheckOffsets<mirror::Object>(false, "Ljava/lang/Object;") {
-    // alphabetical references
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Object, klass_),   "shadow$_klass_"));
-
-    // alphabetical 32-bit
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Object, monitor_), "shadow$_monitor_"));
 #ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Object, x_rb_ptr_), "shadow$_x_rb_ptr_"));
@@ -475,11 +483,8 @@
 
 struct ArtFieldOffsets : public CheckOffsets<mirror::ArtField> {
   ArtFieldOffsets() : CheckOffsets<mirror::ArtField>(false, "Ljava/lang/reflect/ArtField;") {
-    // alphabetical references
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtField, declaring_class_), "declaringClass"));
-
-    // alphabetical 32-bit
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtField, access_flags_),    "accessFlags"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtField, declaring_class_), "declaringClass"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtField, field_dex_idx_),   "fieldDexIndex"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtField, offset_),          "offset"));
   };
@@ -487,21 +492,11 @@
 
 struct ArtMethodOffsets : public CheckOffsets<mirror::ArtMethod> {
   ArtMethodOffsets() : CheckOffsets<mirror::ArtMethod>(false, "Ljava/lang/reflect/ArtMethod;") {
-    // alphabetical references
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, access_flags_),                   "accessFlags"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, declaring_class_),                      "declaringClass"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_cache_resolved_methods_),           "dexCacheResolvedMethods"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_cache_resolved_types_),             "dexCacheResolvedTypes"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_cache_strings_),                    "dexCacheStrings"));
-
-    // alphabetical 64-bit
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_interpreter_),            "entryPointFromInterpreter"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_jni_),                    "entryPointFromJni"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_portable_compiled_code_), "entryPointFromPortableCompiledCode"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, entry_point_from_quick_compiled_code_),    "entryPointFromQuickCompiledCode"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, gc_map_),                                  "gcMap"));
-
-    // alphabetical 32-bit
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, access_flags_),                   "accessFlags"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_code_item_offset_),           "dexCodeItemOffset"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, dex_method_index_),               "dexMethodIndex"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ArtMethod, method_index_),                   "methodIndex"));
@@ -510,50 +505,43 @@
 
 struct ClassOffsets : public CheckOffsets<mirror::Class> {
   ClassOffsets() : CheckOffsets<mirror::Class>(false, "Ljava/lang/Class;") {
-    // alphabetical references
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, access_flags_),                  "accessFlags"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, class_loader_),                  "classLoader"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, class_size_),                    "classSize"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, clinit_thread_id_),              "clinitThreadId"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, component_type_),                "componentType"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_),                     "dexCache"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, dex_class_def_idx_),             "dexClassDefIndex"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, dex_type_idx_),                  "dexTypeIndex"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, direct_methods_),                "directMethods"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, ifields_),                       "iFields"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, iftable_),                       "ifTable"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, name_),                          "name"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, sfields_),                       "sFields"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, super_class_),                   "superClass"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_class_),            "verifyErrorClass"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_),               "virtualMethods"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, vtable_),                        "vtable"));
-
-    // alphabetical 32-bit
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, access_flags_),                  "accessFlags"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, class_size_),                    "classSize"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, clinit_thread_id_),              "clinitThreadId"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, dex_class_def_idx_),             "dexClassDefIndex"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, dex_type_idx_),                  "dexTypeIndex"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, num_reference_instance_fields_), "numReferenceInstanceFields"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, num_reference_static_fields_),   "numReferenceStaticFields"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, object_size_),                   "objectSize"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, primitive_type_),                "primitiveType"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, reference_instance_offsets_),    "referenceInstanceOffsets"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, sfields_),                       "sFields"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, status_),                        "status"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, super_class_),                   "superClass"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_class_),            "verifyErrorClass"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_),               "virtualMethods"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Class, vtable_),                        "vtable"));
   };
 };
 
 struct StringOffsets : public CheckOffsets<mirror::String> {
   StringOffsets() : CheckOffsets<mirror::String>(false, "Ljava/lang/String;") {
-    // alphabetical references
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::String, array_),     "value"));
-
-    // alphabetical 32-bit
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::String, count_),     "count"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::String, hash_code_), "hashCode"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::String, offset_),    "offset"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::String, array_),     "value"));
   };
 };
 
 struct ThrowableOffsets : public CheckOffsets<mirror::Throwable> {
   ThrowableOffsets() : CheckOffsets<mirror::Throwable>(false, "Ljava/lang/Throwable;") {
-    // alphabetical references
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Throwable, cause_),                 "cause"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Throwable, detail_message_),        "detailMessage"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Throwable, stack_state_),           "stackState"));
@@ -564,17 +552,15 @@
 
 struct StackTraceElementOffsets : public CheckOffsets<mirror::StackTraceElement> {
   StackTraceElementOffsets() : CheckOffsets<mirror::StackTraceElement>(false, "Ljava/lang/StackTraceElement;") {
-    // alphabetical references
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::StackTraceElement, declaring_class_), "declaringClass"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::StackTraceElement, file_name_),       "fileName"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::StackTraceElement, method_name_),     "methodName"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::StackTraceElement, line_number_),     "lineNumber"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::StackTraceElement, method_name_),     "methodName"));
   };
 };
 
 struct ClassLoaderOffsets : public CheckOffsets<mirror::ClassLoader> {
   ClassLoaderOffsets() : CheckOffsets<mirror::ClassLoader>(false, "Ljava/lang/ClassLoader;") {
-    // alphabetical references
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ClassLoader, packages_),   "packages"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ClassLoader, parent_),     "parent"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::ClassLoader, proxyCache_), "proxyCache"));
@@ -583,27 +569,24 @@
 
 struct ProxyOffsets : public CheckOffsets<mirror::Proxy> {
   ProxyOffsets() : CheckOffsets<mirror::Proxy>(false, "Ljava/lang/reflect/Proxy;") {
-    // alphabetical references
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Proxy, h_), "h"));
   };
 };
 
 struct DexCacheOffsets : public CheckOffsets<mirror::DexCache> {
   DexCacheOffsets() : CheckOffsets<mirror::DexCache>(false, "Ljava/lang/DexCache;") {
-    // alphabetical references
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, dex_),                        "dex"));
+    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, dex_file_),                   "dexFile"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, location_),                   "location"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, resolved_fields_),            "resolvedFields"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, resolved_methods_),           "resolvedMethods"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, resolved_types_),             "resolvedTypes"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, strings_),                    "strings"));
-    offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::DexCache, dex_file_),                   "dexFile"));
   };
 };
 
 struct ReferenceOffsets : public CheckOffsets<mirror::Reference> {
   ReferenceOffsets() : CheckOffsets<mirror::Reference>(false, "Ljava/lang/ref/Reference;") {
-    // alphabetical references
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Reference, pending_next_),  "pendingNext"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Reference, queue_),         "queue"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::Reference, queue_next_),    "queueNext"));
@@ -613,7 +596,6 @@
 
 struct FinalizerReferenceOffsets : public CheckOffsets<mirror::FinalizerReference> {
   FinalizerReferenceOffsets() : CheckOffsets<mirror::FinalizerReference>(false, "Ljava/lang/ref/FinalizerReference;") {
-    // alphabetical references
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::FinalizerReference, next_),   "next"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::FinalizerReference, prev_),   "prev"));
     offsets.push_back(CheckOffset(OFFSETOF_MEMBER(mirror::FinalizerReference, zombie_), "zombie"));
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 6e3ebc2..03b33e9 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -59,7 +59,7 @@
   filename_ += "/TmpFile-XXXXXX";
   int fd = mkstemp(&filename_[0]);
   CHECK_NE(-1, fd);
-  file_.reset(new File(fd, GetFilename()));
+  file_.reset(new File(fd, GetFilename(), true));
 }
 
 ScratchFile::ScratchFile(const ScratchFile& other, const char* suffix) {
@@ -67,7 +67,7 @@
   filename_ += suffix;
   int fd = open(filename_.c_str(), O_RDWR | O_CREAT, 0666);
   CHECK_NE(-1, fd);
-  file_.reset(new File(fd, GetFilename()));
+  file_.reset(new File(fd, GetFilename(), true));
 }
 
 ScratchFile::ScratchFile(File* file) {
@@ -88,6 +88,11 @@
   if (!OS::FileExists(filename_.c_str())) {
     return;
   }
+  if (file_.get() != nullptr) {
+    if (file_->FlushCloseOrErase() != 0) {
+      PLOG(WARNING) << "Error closing scratch file.";
+    }
+  }
   int unlink_result = unlink(filename_.c_str());
   CHECK_EQ(0, unlink_result);
 }
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index e2f6085..ef5db2d 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -183,16 +183,20 @@
 
 class Breakpoint {
  public:
-  Breakpoint(mirror::ArtMethod* method, uint32_t dex_pc, bool need_full_deoptimization)
+  Breakpoint(mirror::ArtMethod* method, uint32_t dex_pc,
+             DeoptimizationRequest::Kind deoptimization_kind)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-    : method_(nullptr), dex_pc_(dex_pc), need_full_deoptimization_(need_full_deoptimization) {
+    : method_(nullptr), dex_pc_(dex_pc), deoptimization_kind_(deoptimization_kind) {
+    CHECK(deoptimization_kind_ == DeoptimizationRequest::kNothing ||
+          deoptimization_kind_ == DeoptimizationRequest::kSelectiveDeoptimization ||
+          deoptimization_kind_ == DeoptimizationRequest::kFullDeoptimization);
     ScopedObjectAccessUnchecked soa(Thread::Current());
     method_ = soa.EncodeMethod(method);
   }
 
   Breakpoint(const Breakpoint& other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
     : method_(nullptr), dex_pc_(other.dex_pc_),
-      need_full_deoptimization_(other.need_full_deoptimization_) {
+      deoptimization_kind_(other.deoptimization_kind_) {
     ScopedObjectAccessUnchecked soa(Thread::Current());
     method_ = soa.EncodeMethod(other.Method());
   }
@@ -206,8 +210,8 @@
     return dex_pc_;
   }
 
-  bool NeedFullDeoptimization() const {
-    return need_full_deoptimization_;
+  DeoptimizationRequest::Kind GetDeoptimizationKind() const {
+    return deoptimization_kind_;
   }
 
  private:
@@ -216,7 +220,7 @@
   uint32_t dex_pc_;
 
   // Indicates whether breakpoint needs full deoptimization or selective deoptimization.
-  bool need_full_deoptimization_;
+  DeoptimizationRequest::Kind deoptimization_kind_;
 };
 
 static std::ostream& operator<<(std::ostream& os, const Breakpoint& rhs)
@@ -736,6 +740,12 @@
   return gDisposed;
 }
 
+bool Dbg::RequiresDeoptimization() {
+  // We don't need deoptimization if everything runs with interpreter after
+  // enabling -Xint mode.
+  return !Runtime::Current()->GetInstrumentation()->IsForcedInterpretOnly();
+}
+
 void Dbg::GoActive() {
   // Enable all debugging features, including scans for breakpoints.
   // This is a no-op if we're already active.
@@ -768,7 +778,9 @@
   Thread* self = Thread::Current();
   ThreadState old_state = self->SetStateUnsafe(kRunnable);
   CHECK_NE(old_state, kRunnable);
-  runtime->GetInstrumentation()->EnableDeoptimization();
+  if (RequiresDeoptimization()) {
+    runtime->GetInstrumentation()->EnableDeoptimization();
+  }
   instrumentation_events_ = 0;
   gDebuggerActive = true;
   CHECK_EQ(self->SetStateUnsafe(old_state), kRunnable);
@@ -806,7 +818,9 @@
                                                     instrumentation_events_);
       instrumentation_events_ = 0;
     }
-    runtime->GetInstrumentation()->DisableDeoptimization();
+    if (RequiresDeoptimization()) {
+      runtime->GetInstrumentation()->DisableDeoptimization();
+    }
     gDebuggerActive = false;
   }
   gRegistry->Clear();
@@ -3035,9 +3049,11 @@
 }
 
 void Dbg::DelayFullUndeoptimization() {
-  MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
-  ++delayed_full_undeoptimization_count_;
-  DCHECK_LE(delayed_full_undeoptimization_count_, full_deoptimization_event_count_);
+  if (RequiresDeoptimization()) {
+    MutexLock mu(Thread::Current(), *Locks::deoptimization_lock_);
+    ++delayed_full_undeoptimization_count_;
+    DCHECK_LE(delayed_full_undeoptimization_count_, full_deoptimization_event_count_);
+  }
 }
 
 void Dbg::ProcessDelayedFullUndeoptimizations() {
@@ -3196,20 +3212,78 @@
 }
 
 // Sanity checks all existing breakpoints on the same method.
-static void SanityCheckExistingBreakpoints(mirror::ArtMethod* m, bool need_full_deoptimization)
+static void SanityCheckExistingBreakpoints(mirror::ArtMethod* m,
+                                           DeoptimizationRequest::Kind deoptimization_kind)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::breakpoint_lock_) {
   for (const Breakpoint& breakpoint : gBreakpoints) {
-    CHECK_EQ(need_full_deoptimization, breakpoint.NeedFullDeoptimization());
+    if (breakpoint.Method() == m) {
+      CHECK_EQ(deoptimization_kind, breakpoint.GetDeoptimizationKind());
+    }
   }
-  if (need_full_deoptimization) {
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (deoptimization_kind == DeoptimizationRequest::kFullDeoptimization) {
     // We should have deoptimized everything but not "selectively" deoptimized this method.
-    CHECK(Runtime::Current()->GetInstrumentation()->AreAllMethodsDeoptimized());
-    CHECK(!Runtime::Current()->GetInstrumentation()->IsDeoptimized(m));
-  } else {
+    CHECK(instrumentation->AreAllMethodsDeoptimized());
+    CHECK(!instrumentation->IsDeoptimized(m));
+  } else if (deoptimization_kind == DeoptimizationRequest::kSelectiveDeoptimization) {
     // We should have "selectively" deoptimized this method.
     // Note: while we have not deoptimized everything for this method, we may have done it for
     // another event.
-    CHECK(Runtime::Current()->GetInstrumentation()->IsDeoptimized(m));
+    CHECK(instrumentation->IsDeoptimized(m));
+  } else {
+    // This method does not require deoptimization.
+    CHECK_EQ(deoptimization_kind, DeoptimizationRequest::kNothing);
+    CHECK(!instrumentation->IsDeoptimized(m));
+  }
+}
+
+static DeoptimizationRequest::Kind GetRequiredDeoptimizationKind(Thread* self,
+                                                                 mirror::ArtMethod* m)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (!Dbg::RequiresDeoptimization()) {
+    // We already run in interpreter-only mode so we don't need to deoptimize anything.
+    VLOG(jdwp) << "No need for deoptimization when fully running with interpreter for method "
+               << PrettyMethod(m);
+    return DeoptimizationRequest::kNothing;
+  }
+  const Breakpoint* existing_breakpoint;
+  {
+    ReaderMutexLock mu(self, *Locks::breakpoint_lock_);
+    existing_breakpoint = FindFirstBreakpointForMethod(m);
+  }
+  if (existing_breakpoint == nullptr) {
+    // There is no breakpoint on this method yet: we need to deoptimize. If this method may be
+    // inlined, we deoptimize everything; otherwise we deoptimize only this method.
+    // Note: IsMethodPossiblyInlined goes into the method verifier and may cause thread suspension.
+    // Therefore we must not hold any lock when we call it.
+    bool need_full_deoptimization = IsMethodPossiblyInlined(self, m);
+    if (need_full_deoptimization) {
+      VLOG(jdwp) << "Need full deoptimization because of possible inlining of method "
+                 << PrettyMethod(m);
+      return DeoptimizationRequest::kFullDeoptimization;
+    } else {
+      // We don't need to deoptimize if the method has not been compiled.
+      ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+      const bool is_compiled = class_linker->GetOatMethodQuickCodeFor(m) != nullptr;
+      if (is_compiled) {
+        VLOG(jdwp) << "Need selective deoptimization for compiled method " << PrettyMethod(m);
+        return DeoptimizationRequest::kSelectiveDeoptimization;
+      } else {
+        // Method is not compiled: we don't need to deoptimize.
+        VLOG(jdwp) << "No need for deoptimization for non-compiled method " << PrettyMethod(m);
+        return DeoptimizationRequest::kNothing;
+      }
+    }
+  } else {
+    // There is at least one breakpoint for this method: we don't need to deoptimize.
+    // Let's check that all breakpoints are configured the same way for deoptimization.
+    VLOG(jdwp) << "Breakpoint already set: no deoptimization is required";
+    DeoptimizationRequest::Kind deoptimization_kind = existing_breakpoint->GetDeoptimizationKind();
+    if (kIsDebugBuild) {
+      ReaderMutexLock mu(self, *Locks::breakpoint_lock_);
+      SanityCheckExistingBreakpoints(m, deoptimization_kind);
+    }
+    return DeoptimizationRequest::kNothing;
   }
 }
 
@@ -3220,40 +3294,19 @@
   mirror::ArtMethod* m = FromMethodId(location->method_id);
   DCHECK(m != nullptr) << "No method for method id " << location->method_id;
 
-  const Breakpoint* existing_breakpoint;
-  {
-    ReaderMutexLock mu(self, *Locks::breakpoint_lock_);
-    existing_breakpoint = FindFirstBreakpointForMethod(m);
-  }
-  bool need_full_deoptimization;
-  if (existing_breakpoint == nullptr) {
-    // There is no breakpoint on this method yet: we need to deoptimize. If this method may be
-    // inlined, we deoptimize everything; otherwise we deoptimize only this method.
-    // Note: IsMethodPossiblyInlined goes into the method verifier and may cause thread suspension.
-    // Therefore we must not hold any lock when we call it.
-    need_full_deoptimization = IsMethodPossiblyInlined(self, m);
-    if (need_full_deoptimization) {
-      req->SetKind(DeoptimizationRequest::kFullDeoptimization);
-      req->SetMethod(nullptr);
-    } else {
-      req->SetKind(DeoptimizationRequest::kSelectiveDeoptimization);
-      req->SetMethod(m);
-    }
+  const DeoptimizationRequest::Kind deoptimization_kind = GetRequiredDeoptimizationKind(self, m);
+  req->SetKind(deoptimization_kind);
+  if (deoptimization_kind == DeoptimizationRequest::kSelectiveDeoptimization) {
+    req->SetMethod(m);
   } else {
-    // There is at least one breakpoint for this method: we don't need to deoptimize.
-    req->SetKind(DeoptimizationRequest::kNothing);
+    CHECK(deoptimization_kind == DeoptimizationRequest::kNothing ||
+          deoptimization_kind == DeoptimizationRequest::kFullDeoptimization);
     req->SetMethod(nullptr);
-
-    need_full_deoptimization = existing_breakpoint->NeedFullDeoptimization();
-    if (kIsDebugBuild) {
-      ReaderMutexLock mu(self, *Locks::breakpoint_lock_);
-      SanityCheckExistingBreakpoints(m, need_full_deoptimization);
-    }
   }
 
   {
     WriterMutexLock mu(self, *Locks::breakpoint_lock_);
-    gBreakpoints.push_back(Breakpoint(m, location->dex_pc, need_full_deoptimization));
+    gBreakpoints.push_back(Breakpoint(m, location->dex_pc, deoptimization_kind));
     VLOG(jdwp) << "Set breakpoint #" << (gBreakpoints.size() - 1) << ": "
                << gBreakpoints[gBreakpoints.size() - 1];
   }
@@ -3265,12 +3318,13 @@
   WriterMutexLock mu(Thread::Current(), *Locks::breakpoint_lock_);
   mirror::ArtMethod* m = FromMethodId(location->method_id);
   DCHECK(m != nullptr) << "No method for method id " << location->method_id;
-  bool need_full_deoptimization = false;
+  DeoptimizationRequest::Kind deoptimization_kind = DeoptimizationRequest::kNothing;
   for (size_t i = 0, e = gBreakpoints.size(); i < e; ++i) {
     if (gBreakpoints[i].DexPc() == location->dex_pc && gBreakpoints[i].Method() == m) {
       VLOG(jdwp) << "Removed breakpoint #" << i << ": " << gBreakpoints[i];
-      need_full_deoptimization = gBreakpoints[i].NeedFullDeoptimization();
-      DCHECK_NE(need_full_deoptimization, Runtime::Current()->GetInstrumentation()->IsDeoptimized(m));
+      deoptimization_kind = gBreakpoints[i].GetDeoptimizationKind();
+      DCHECK_EQ(deoptimization_kind == DeoptimizationRequest::kSelectiveDeoptimization,
+                Runtime::Current()->GetInstrumentation()->IsDeoptimized(m));
       gBreakpoints.erase(gBreakpoints.begin() + i);
       break;
     }
@@ -3278,21 +3332,26 @@
   const Breakpoint* const existing_breakpoint = FindFirstBreakpointForMethod(m);
   if (existing_breakpoint == nullptr) {
     // There is no more breakpoint on this method: we need to undeoptimize.
-    if (need_full_deoptimization) {
+    if (deoptimization_kind == DeoptimizationRequest::kFullDeoptimization) {
       // This method required full deoptimization: we need to undeoptimize everything.
       req->SetKind(DeoptimizationRequest::kFullUndeoptimization);
       req->SetMethod(nullptr);
-    } else {
+    } else if (deoptimization_kind == DeoptimizationRequest::kSelectiveDeoptimization) {
       // This method required selective deoptimization: we need to undeoptimize only that method.
       req->SetKind(DeoptimizationRequest::kSelectiveUndeoptimization);
       req->SetMethod(m);
+    } else {
+      // This method had no need for deoptimization: do nothing.
+      CHECK_EQ(deoptimization_kind, DeoptimizationRequest::kNothing);
+      req->SetKind(DeoptimizationRequest::kNothing);
+      req->SetMethod(nullptr);
     }
   } else {
     // There is at least one breakpoint for this method: we don't need to undeoptimize.
     req->SetKind(DeoptimizationRequest::kNothing);
     req->SetMethod(nullptr);
     if (kIsDebugBuild) {
-      SanityCheckExistingBreakpoints(m, need_full_deoptimization);
+      SanityCheckExistingBreakpoints(m, deoptimization_kind);
     }
   }
 }
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 488ba7f..9203163 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -523,6 +523,9 @@
       LOCKS_EXCLUDED(Locks::breakpoint_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Indicates whether we need deoptimization for debugging.
+  static bool RequiresDeoptimization();
+
   // Records deoptimization request in the queue.
   static void RequestDeoptimization(const DeoptimizationRequest& req)
       LOCKS_EXCLUDED(Locks::deoptimization_lock_)
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 134e284..b304779 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -146,6 +146,9 @@
   if (!file->WriteFully(dex_bytes.get(), length)) {
     PLOG(FATAL) << "Failed to write base64 as dex file";
   }
+  if (file->FlushCloseOrErase() != 0) {
+    PLOG(FATAL) << "Could not flush and close test file.";
+  }
   file.reset();
 
   // read dex file
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index addd948..ec1e5f0 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -115,6 +115,9 @@
   if (!file->WriteFully(dex_bytes.get(), length)) {
     PLOG(FATAL) << "Failed to write base64 as dex file";
   }
+  if (file->FlushCloseOrErase() != 0) {
+    PLOG(FATAL) << "Could not flush and close test file.";
+  }
   file.reset();
 
   // read dex file
@@ -177,6 +180,9 @@
   if (!file->WriteFully(bytes, length)) {
     PLOG(FATAL) << "Failed to write base64 as dex file";
   }
+  if (file->FlushCloseOrErase() != 0) {
+    PLOG(FATAL) << "Could not flush and close test file.";
+  }
   file.reset();
 
   // read dex file
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index b6cf921..37c5f9c 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -20,6 +20,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include "arch/instruction_set.h"
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "base/stl_util.h"
@@ -29,7 +30,6 @@
 #include "elf_utils.h"
 #include "leb128.h"
 #include "utils.h"
-#include "instruction_set.h"
 
 namespace art {
 
diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h
index 49357ad..9ffd199 100644
--- a/runtime/entrypoints/quick/callee_save_frame.h
+++ b/runtime/entrypoints/quick/callee_save_frame.h
@@ -17,8 +17,8 @@
 #ifndef ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_
 #define ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_
 
+#include "arch/instruction_set.h"
 #include "base/mutex.h"
-#include "instruction_set.h"
 #include "runtime.h"
 #include "thread-inl.h"
 
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index f78273f..d5493bd 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -16,7 +16,6 @@
 
 #include "callee_save_frame.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
-#include "instruction_set.h"
 #include "instrumentation.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
@@ -37,8 +36,8 @@
     result = GetQuickToInterpreterBridge();
   } else {
     result = instrumentation->GetQuickCodeFor(method);
+    DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(result));
   }
-  DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(result));
   bool interpreter_entry = (result == GetQuickToInterpreterBridge());
   instrumentation->PushInstrumentationStackFrame(self, method->IsStatic() ? nullptr : this_object,
                                                  method, lr, interpreter_entry);
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 4f61707..0b7d382 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -21,7 +21,6 @@
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/card_table-inl.h"
-#include "instruction_set.h"
 #include "interpreter/interpreter.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
@@ -1660,7 +1659,7 @@
   *(sp32 - 1) = cookie;
 
   // Retrieve the stored native code.
-  const void* nativeCode = called->GetNativeMethod();
+  void* nativeCode = called->GetEntryPointFromJni();
 
   // There are two cases for the content of nativeCode:
   // 1) Pointer to the native function.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 0e67978..0cceaa4 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -96,6 +96,8 @@
 static const char* kDlMallocSpaceName[2] = {"main dlmalloc space", "main dlmalloc space 1"};
 static const char* kRosAllocSpaceName[2] = {"main rosalloc space", "main rosalloc space 1"};
 static const char* kMemMapSpaceName[2] = {"main space", "main space 1"};
+static const char* kNonMovingSpaceName = "non moving space";
+static const char* kZygoteSpaceName = "zygote space";
 static constexpr size_t kGSSBumpPointerSpaceCapacity = 32 * MB;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
@@ -232,9 +234,13 @@
                                      +-main alloc space2 / bump space 2 (capacity_)+-
                                      +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
   */
+  // We don't have hspace compaction enabled with GSS.
+  if (foreground_collector_type_ == kCollectorTypeGSS) {
+    use_homogeneous_space_compaction_for_oom_ = false;
+  }
   bool support_homogeneous_space_compaction =
       background_collector_type_ == gc::kCollectorTypeHomogeneousSpaceCompact ||
-      use_homogeneous_space_compaction_for_oom;
+      use_homogeneous_space_compaction_for_oom_;
   // We may use the same space the main space for the non moving space if we don't need to compact
   // from the main space.
   // This is not the case if we support homogeneous compaction or have a moving background
@@ -254,10 +260,14 @@
   std::string error_str;
   std::unique_ptr<MemMap> non_moving_space_mem_map;
   if (separate_non_moving_space) {
+    // If we are the zygote, the non moving space becomes the zygote space when we run
+    // PreZygoteFork the first time. In this case, call the map "zygote space" since we can't
+    // rename the mem map later.
+    const char* space_name = is_zygote ? kZygoteSpaceName: kNonMovingSpaceName;
     // Reserve the non moving mem map before the other two since it needs to be at a specific
     // address.
     non_moving_space_mem_map.reset(
-        MemMap::MapAnonymous("non moving space", requested_alloc_space_begin,
+        MemMap::MapAnonymous(space_name, requested_alloc_space_begin,
                              non_moving_space_capacity, PROT_READ | PROT_WRITE, true, &error_str));
     CHECK(non_moving_space_mem_map != nullptr) << error_str;
     // Try to reserve virtual memory at a lower address if we have a separate non moving space.
@@ -1972,7 +1982,8 @@
     // from this point on.
     RemoveRememberedSet(old_alloc_space);
   }
-  zygote_space_ = old_alloc_space->CreateZygoteSpace("alloc space", low_memory_mode_,
+  // Remaining space becomes the new non moving space.
+  zygote_space_ = old_alloc_space->CreateZygoteSpace(kNonMovingSpaceName, low_memory_mode_,
                                                      &non_moving_space_);
   CHECK(!non_moving_space_->CanMoveObjects());
   if (same_space) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index cf7352e..69a573e 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "allocator_type.h"
+#include "arch/instruction_set.h"
 #include "atomic.h"
 #include "base/timing_logger.h"
 #include "gc/accounting/atomic_stack.h"
@@ -32,7 +33,6 @@
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
 #include "globals.h"
-#include "instruction_set.h"
 #include "jni.h"
 #include "object_callbacks.h"
 #include "offsets.h"
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index b232128..071997f 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -74,17 +74,17 @@
 // out-of-date. We also don't really care if this fails since it is just a convenience.
 // Adapted from prune_dex_cache(const char* subdir) in frameworks/native/cmds/installd/commands.c
 // Note this should only be used during first boot.
-static void RealPruneDexCache(const std::string& cache_dir_path);
+static void RealPruneDalvikCache(const std::string& cache_dir_path);
 
-static void PruneDexCache(InstructionSet isa) {
+static void PruneDalvikCache(InstructionSet isa) {
   CHECK_NE(isa, kNone);
   // Prune the base /data/dalvik-cache.
-  RealPruneDexCache(GetDalvikCacheOrDie(".", false));
+  RealPruneDalvikCache(GetDalvikCacheOrDie(".", false));
   // Prune /data/dalvik-cache/<isa>.
-  RealPruneDexCache(GetDalvikCacheOrDie(GetInstructionSetString(isa), false));
+  RealPruneDalvikCache(GetDalvikCacheOrDie(GetInstructionSetString(isa), false));
 }
 
-static void RealPruneDexCache(const std::string& cache_dir_path) {
+static void RealPruneDalvikCache(const std::string& cache_dir_path) {
   if (!OS::DirectoryExists(cache_dir_path.c_str())) {
     return;
   }
@@ -118,6 +118,28 @@
   CHECK_EQ(0, TEMP_FAILURE_RETRY(closedir(cache_dir))) << "Unable to close directory.";
 }
 
+// We write out an empty file to the zygote's ISA specific cache dir at the start of
+// every zygote boot and delete it when the boot completes. If we find a file already
+// present, it usually means the boot didn't complete. We wipe the entire dalvik
+// cache if that's the case.
+static void MarkZygoteStart(const InstructionSet isa) {
+  const std::string isa_subdir = GetDalvikCacheOrDie(GetInstructionSetString(isa), false);
+  const std::string boot_marker = isa_subdir + "/.booting";
+
+  if (OS::FileExists(boot_marker.c_str())) {
+    LOG(WARNING) << "Incomplete boot detected. Pruning dalvik cache";
+    RealPruneDalvikCache(isa_subdir);
+  }
+
+  VLOG(startup) << "Creating boot start marker: " << boot_marker;
+  std::unique_ptr<File> f(OS::CreateEmptyFile(boot_marker.c_str()));
+  if (f.get() != nullptr) {
+    if (f->FlushCloseOrErase() != 0) {
+      PLOG(WARNING) << "Failed to write boot marker.";
+    }
+  }
+}
+
 static bool GenerateImage(const std::string& image_filename, InstructionSet image_isa,
                           std::string* error_msg) {
   const std::string boot_class_path_string(Runtime::Current()->GetBootClassPathString());
@@ -130,7 +152,7 @@
   // We should clean up so we are more likely to have room for the image.
   if (Runtime::Current()->IsZygote()) {
     LOG(INFO) << "Pruning dalvik-cache since we are generating an image and will need to recompile";
-    PruneDexCache(image_isa);
+    PruneDalvikCache(image_isa);
   }
 
   std::vector<std::string> arg_vector;
@@ -232,7 +254,7 @@
   // We should clean up so we are more likely to have room for the image.
   if (Runtime::Current()->IsZygote()) {
     LOG(INFO) << "Pruning dalvik-cache since we are relocating an image and will need to recompile";
-    PruneDexCache(isa);
+    PruneDalvikCache(isa);
   }
 
   std::string patchoat(Runtime::Current()->GetPatchoatExecutable());
@@ -427,6 +449,10 @@
                                              &has_system, &cache_filename, &dalvik_cache_exists,
                                              &has_cache, &is_global_cache);
 
+  if (Runtime::Current()->IsZygote()) {
+    MarkZygoteStart(image_isa);
+  }
+
   ImageSpace* space;
   bool relocate = Runtime::Current()->ShouldRelocate();
   bool can_compile = Runtime::Current()->IsImageDex2OatEnabled();
@@ -475,7 +501,7 @@
             // Since ImageCreationAllowed was true above, we are the zygote
             // and therefore the only process expected to generate these for
             // the device.
-            PruneDexCache(image_isa);
+            PruneDalvikCache(image_isa);
             return nullptr;
           }
         }
@@ -530,7 +556,7 @@
                                 "but image failed to load: %s",
                                 image_location, cache_filename.c_str(), system_filename.c_str(),
                                 error_msg->c_str());
-      PruneDexCache(image_isa);
+      PruneDalvikCache(image_isa);
       return nullptr;
     } else if (is_system) {
       // If the /system file exists, it should be up-to-date, don't try to generate it.
@@ -558,13 +584,13 @@
     // Since ImageCreationAllowed was true above, we are the zygote
     // and therefore the only process expected to generate these for
     // the device.
-    PruneDexCache(image_isa);
+    PruneDalvikCache(image_isa);
     return nullptr;
   } else {
     // Check whether there is enough space left over after we have generated the image.
     if (!CheckSpace(cache_filename, error_msg)) {
       // No. Delete the generated image and try to run out of the dex files.
-      PruneDexCache(image_isa);
+      PruneDalvikCache(image_isa);
       return nullptr;
     }
 
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 14d7432..3069581 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -475,9 +475,14 @@
         }
       }
 
-      std::unique_ptr<File> file(new File(out_fd, filename_));
+      std::unique_ptr<File> file(new File(out_fd, filename_, true));
       okay = file->WriteFully(header_data_ptr_, header_data_size_) &&
-          file->WriteFully(body_data_ptr_, body_data_size_);
+             file->WriteFully(body_data_ptr_, body_data_size_);
+      if (okay) {
+        okay = file->FlushCloseOrErase() == 0;
+      } else {
+        file->Erase();
+      }
       if (!okay) {
         std::string msg(StringPrintf("Couldn't dump heap; writing \"%s\" failed: %s",
                                      filename_.c_str(), strerror(errno)));
diff --git a/runtime/image.cc b/runtime/image.cc
index aee84bc3..b83eeb1 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -24,7 +24,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '1', '2', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '1', '3', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/instruction_set.cc b/runtime/instruction_set.cc
deleted file mode 100644
index e165a75..0000000
--- a/runtime/instruction_set.cc
+++ /dev/null
@@ -1,507 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "instruction_set.h"
-
-#include <signal.h>
-#include <fstream>
-
-#include "base/casts.h"
-#include "base/stringprintf.h"
-#include "utils.h"
-
-namespace art {
-
-const char* GetInstructionSetString(const InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-    case kThumb2:
-      return "arm";
-    case kArm64:
-      return "arm64";
-    case kX86:
-      return "x86";
-    case kX86_64:
-      return "x86_64";
-    case kMips:
-      return "mips";
-    case kNone:
-      return "none";
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      UNREACHABLE();
-  }
-}
-
-InstructionSet GetInstructionSetFromString(const char* isa_str) {
-  CHECK(isa_str != nullptr);
-
-  if (strcmp("arm", isa_str) == 0) {
-    return kArm;
-  } else if (strcmp("arm64", isa_str) == 0) {
-    return kArm64;
-  } else if (strcmp("x86", isa_str) == 0) {
-    return kX86;
-  } else if (strcmp("x86_64", isa_str) == 0) {
-    return kX86_64;
-  } else if (strcmp("mips", isa_str) == 0) {
-    return kMips;
-  }
-
-  return kNone;
-}
-
-size_t GetInstructionSetAlignment(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-      // Fall-through.
-    case kThumb2:
-      return kArmAlignment;
-    case kArm64:
-      return kArm64Alignment;
-    case kX86:
-      // Fall-through.
-    case kX86_64:
-      return kX86Alignment;
-    case kMips:
-      return kMipsAlignment;
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have alignment.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
-
-static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
-static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
-
-static constexpr size_t kArmStackOverflowReservedBytes =    8 * KB;
-static constexpr size_t kArm64StackOverflowReservedBytes =  8 * KB;
-static constexpr size_t kX86StackOverflowReservedBytes =    8 * KB;
-static constexpr size_t kX86_64StackOverflowReservedBytes = 8 * KB;
-
-size_t GetStackOverflowReservedBytes(InstructionSet isa) {
-  switch (isa) {
-    case kArm:      // Intentional fall-through.
-    case kThumb2:
-      return kArmStackOverflowReservedBytes;
-
-    case kArm64:
-      return kArm64StackOverflowReservedBytes;
-
-    case kMips:
-      return kMipsStackOverflowReservedBytes;
-
-    case kX86:
-      return kX86StackOverflowReservedBytes;
-
-    case kX86_64:
-      return kX86_64StackOverflowReservedBytes;
-
-    case kNone:
-      LOG(FATAL) << "kNone has no stack overflow size";
-      return 0;
-
-    default:
-      LOG(FATAL) << "Unknown instruction set" << isa;
-      return 0;
-  }
-}
-
-const InstructionSetFeatures* InstructionSetFeatures::FromVariant(InstructionSet isa,
-                                                                  const std::string& variant,
-                                                                  std::string* error_msg) {
-  const InstructionSetFeatures* result;
-  switch (isa) {
-    case kArm:
-    case kThumb2:
-      result = ArmInstructionSetFeatures::FromVariant(variant, error_msg);
-      break;
-    default:
-      result = UnknownInstructionSetFeatures::Unknown(isa);
-      break;
-  }
-  CHECK_EQ(result == nullptr, error_msg->size() != 0);
-  return result;
-}
-
-const InstructionSetFeatures* InstructionSetFeatures::FromFeatureString(InstructionSet isa,
-                                                                        const std::string& feature_list,
-                                                                        std::string* error_msg) {
-  const InstructionSetFeatures* result;
-  switch (isa) {
-    case kArm:
-    case kThumb2:
-      result = ArmInstructionSetFeatures::FromFeatureString(feature_list, error_msg);
-      break;
-    default:
-      result = UnknownInstructionSetFeatures::Unknown(isa);
-      break;
-  }
-  // TODO: warn if feature_list doesn't agree with result's GetFeatureList().
-  CHECK_EQ(result == nullptr, error_msg->size() != 0);
-  return result;
-}
-
-const InstructionSetFeatures* InstructionSetFeatures::FromBitmap(InstructionSet isa,
-                                                                 uint32_t bitmap) {
-  const InstructionSetFeatures* result;
-  switch (isa) {
-    case kArm:
-    case kThumb2:
-      result = ArmInstructionSetFeatures::FromBitmap(bitmap);
-      break;
-    default:
-      result = UnknownInstructionSetFeatures::Unknown(isa);
-      break;
-  }
-  CHECK_EQ(bitmap, result->AsBitmap());
-  return result;
-}
-
-const InstructionSetFeatures* InstructionSetFeatures::FromCppDefines() {
-  const InstructionSetFeatures* result;
-  switch (kRuntimeISA) {
-    case kArm:
-    case kThumb2:
-      result = ArmInstructionSetFeatures::FromCppDefines();
-      break;
-    default:
-      result = UnknownInstructionSetFeatures::Unknown(kRuntimeISA);
-      break;
-  }
-  return result;
-}
-
-
-const InstructionSetFeatures* InstructionSetFeatures::FromCpuInfo() {
-  const InstructionSetFeatures* result;
-  switch (kRuntimeISA) {
-    case kArm:
-    case kThumb2:
-      result = ArmInstructionSetFeatures::FromCpuInfo();
-      break;
-    default:
-      result = UnknownInstructionSetFeatures::Unknown(kRuntimeISA);
-      break;
-  }
-  return result;
-}
-
-const InstructionSetFeatures* InstructionSetFeatures::FromHwcap() {
-  const InstructionSetFeatures* result;
-  switch (kRuntimeISA) {
-    case kArm:
-    case kThumb2:
-      result = ArmInstructionSetFeatures::FromHwcap();
-      break;
-    default:
-      result = UnknownInstructionSetFeatures::Unknown(kRuntimeISA);
-      break;
-  }
-  return result;
-}
-
-const InstructionSetFeatures* InstructionSetFeatures::FromAssembly() {
-  const InstructionSetFeatures* result;
-  switch (kRuntimeISA) {
-    case kArm:
-    case kThumb2:
-      result = ArmInstructionSetFeatures::FromAssembly();
-      break;
-    default:
-      result = UnknownInstructionSetFeatures::Unknown(kRuntimeISA);
-      break;
-  }
-  return result;
-}
-
-const ArmInstructionSetFeatures* InstructionSetFeatures::AsArmInstructionSetFeatures() const {
-  DCHECK_EQ(kArm, GetInstructionSet());
-  return down_cast<const ArmInstructionSetFeatures*>(this);
-}
-
-std::ostream& operator<<(std::ostream& os, const InstructionSetFeatures& rhs) {
-  os << "ISA: " << rhs.GetInstructionSet() << " Feature string: " << rhs.GetFeatureString();
-  return os;
-}
-
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromFeatureString(
-    const std::string& feature_list, std::string* error_msg) {
-  std::vector<std::string> features;
-  Split(feature_list, ',', &features);
-  bool has_lpae = false;
-  bool has_div = false;
-  for (auto i = features.begin(); i != features.end(); i++) {
-    std::string feature = Trim(*i);
-    if (feature == "default" || feature == "none") {
-      // Nothing to do.
-    } else if (feature == "div") {
-      has_div = true;
-    } else if (feature == "nodiv") {
-      has_div = false;
-    } else if (feature == "lpae") {
-      has_lpae = true;
-    } else if (feature == "nolpae") {
-      has_lpae = false;
-    } else {
-      *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
-      return nullptr;
-    }
-  }
-  return new ArmInstructionSetFeatures(has_lpae, has_div);
-}
-
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromVariant(
-    const std::string& variant, std::string* error_msg) {
-  // Look for variants that have divide support.
-  bool has_div = false;
-  {
-    static const char* arm_variants_with_div[] = {
-        "cortex-a7", "cortex-a12", "cortex-a15", "cortex-a17", "cortex-a53", "cortex-a57",
-        "cortex-m3", "cortex-m4", "cortex-r4", "cortex-r5",
-        "cyclone", "denver", "krait", "swift"
-    };
-    for (const char* div_variant : arm_variants_with_div) {
-      if (variant == div_variant) {
-        has_div = true;
-        break;
-      }
-    }
-  }
-  // Look for variants that have LPAE support.
-  bool has_lpae = false;
-  {
-    static const char* arm_variants_with_lpae[] = {
-        "cortex-a7", "cortex-a15", "krait", "denver"
-    };
-    for (const char* lpae_variant : arm_variants_with_lpae) {
-      if (variant == lpae_variant) {
-        has_lpae = true;
-        break;
-      }
-    }
-  }
-  if (has_div == false && has_lpae == false) {
-    // Avoid unsupported variants.
-    static const char* unsupported_arm_variants[] = {
-        // ARM processors that aren't ARMv7 compatible aren't supported.
-        "arm2", "arm250", "arm3", "arm6", "arm60", "arm600", "arm610", "arm620",
-        "cortex-m0", "cortex-m0plus", "cortex-m1",
-        "fa526", "fa626", "fa606te", "fa626te", "fmp626", "fa726te",
-        "iwmmxt", "iwmmxt2",
-        "strongarm", "strongarm110", "strongarm1100", "strongarm1110",
-        "xscale"
-    };
-    for (const char* us_variant : unsupported_arm_variants) {
-      if (variant == us_variant) {
-        *error_msg = StringPrintf("Attempt to use unsupported ARM variant: %s", us_variant);
-        return nullptr;
-      }
-    }
-    // Warn if the variant is unknown.
-    // TODO: some of the variants below may have feature support, but that support is currently
-    //       unknown so we'll choose conservative (sub-optimal) defaults without warning.
-    // TODO: some of the architectures may not support all features required by ART and should be
-    //       moved to unsupported_arm_variants[] above.
-    static const char* arm_variants_without_known_features[] = {
-        "arm7", "arm7m", "arm7d", "arm7dm", "arm7di", "arm7dmi", "arm70", "arm700", "arm700i",
-        "arm710", "arm710c", "arm7100", "arm720", "arm7500", "arm7500fe", "arm7tdmi", "arm7tdmi-s",
-        "arm710t", "arm720t", "arm740t",
-        "arm8", "arm810",
-        "arm9", "arm9e", "arm920", "arm920t", "arm922t", "arm946e-s", "arm966e-s", "arm968e-s",
-        "arm926ej-s", "arm940t", "arm9tdmi",
-        "arm10tdmi", "arm1020t", "arm1026ej-s", "arm10e", "arm1020e", "arm1022e",
-        "arm1136j-s", "arm1136jf-s",
-        "arm1156t2-s", "arm1156t2f-s", "arm1176jz-s", "arm1176jzf-s",
-        "cortex-a5", "cortex-a8", "cortex-a9", "cortex-a9-mp", "cortex-r4f",
-        "marvell-pj4", "mpcore", "mpcorenovfp"
-    };
-    bool found = false;
-    for (const char* ff_variant : arm_variants_without_known_features) {
-      if (variant == ff_variant) {
-        found = true;
-        break;
-      }
-    }
-    if (!found) {
-      LOG(WARNING) << "Unknown instruction set features for ARM CPU variant (" << variant
-          << ") using conservative defaults";
-    }
-  }
-  return new ArmInstructionSetFeatures(has_lpae, has_div);
-}
-
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromBitmap(uint32_t bitmap) {
-  bool has_lpae = (bitmap & kLpaeBitfield) != 0;
-  bool has_div = (bitmap & kDivBitfield) != 0;
-  return new ArmInstructionSetFeatures(has_lpae, has_div);
-}
-
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromCppDefines() {
-#if defined(__ARM_ARCH_EXT_IDIV__)
-  bool has_div = true;
-#else
-  bool has_div = false;
-#endif
-#if defined(__ARM_FEATURE_LPAE)
-  bool has_lpae = true;
-#else
-  bool has_lpae = false;
-#endif
-  return new ArmInstructionSetFeatures(has_lpae, has_div);
-}
-
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromCpuInfo() {
-  // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
-  // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
-  bool has_lpae = false;
-  bool has_div = false;
-
-  std::ifstream in("/proc/cpuinfo");
-  if (!in.fail()) {
-    while (!in.eof()) {
-      std::string line;
-      std::getline(in, line);
-      if (!in.eof()) {
-        LOG(INFO) << "cpuinfo line: " << line;
-        if (line.find("Features") != std::string::npos) {
-          LOG(INFO) << "found features";
-          if (line.find("idivt") != std::string::npos) {
-            // We always expect both ARM and Thumb divide instructions to be available or not
-            // available.
-            CHECK_NE(line.find("idiva"), std::string::npos);
-            has_div = true;
-          }
-          if (line.find("lpae") != std::string::npos) {
-            has_lpae = true;
-          }
-        }
-      }
-    }
-    in.close();
-  } else {
-    LOG(INFO) << "Failed to open /proc/cpuinfo";
-  }
-  return new ArmInstructionSetFeatures(has_lpae, has_div);
-}
-
-#if defined(HAVE_ANDROID_OS) && defined(__arm__)
-#include <sys/auxv.h>
-#include <asm/hwcap.h>
-#endif
-
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromHwcap() {
-  bool has_lpae = false;
-  bool has_div = false;
-
-#if defined(HAVE_ANDROID_OS) && defined(__arm__)
-  uint64_t hwcaps = getauxval(AT_HWCAP);
-  LOG(INFO) << "hwcaps=" << hwcaps;
-  if ((hwcaps & HWCAP_IDIVT) != 0) {
-    // We always expect both ARM and Thumb divide instructions to be available or not
-    // available.
-    CHECK_NE(hwcaps & HWCAP_IDIVA, 0U);
-    has_div = true;
-  }
-  if ((hwcaps & HWCAP_LPAE) != 0) {
-    has_lpae = true;
-  }
-#endif
-
-  return new ArmInstructionSetFeatures(has_lpae, has_div);
-}
-
-// A signal handler called by a fault for an illegal instruction.  We record the fact in r0
-// and then increment the PC in the signal context to return to the next instruction.  We know the
-// instruction is an sdiv (4 bytes long).
-static void bad_divide_inst_handle(int signo ATTRIBUTE_UNUSED, siginfo_t* si ATTRIBUTE_UNUSED,
-                                   void* data) {
-#if defined(__arm__)
-  struct ucontext *uc = (struct ucontext *)data;
-  struct sigcontext *sc = &uc->uc_mcontext;
-  sc->arm_r0 = 0;     // Set R0 to #0 to signal error.
-  sc->arm_pc += 4;    // Skip offending instruction.
-#else
-  UNUSED(data);
-#endif
-}
-
-#if defined(__arm__)
-extern "C" bool artCheckForARMSDIVInstruction();
-#endif
-
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromAssembly() {
-  // See if have a sdiv instruction.  Register a signal handler and try to execute an sdiv
-  // instruction.  If we get a SIGILL then it's not supported.
-  struct sigaction sa, osa;
-  sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
-  sa.sa_sigaction = bad_divide_inst_handle;
-  sigaction(SIGILL, &sa, &osa);
-
-  bool has_div = false;
-#if defined(__arm__)
-  if (artCheckForARMSDIVInstruction()) {
-    has_div = true;
-  }
-#endif
-
-  // Restore the signal handler.
-  sigaction(SIGILL, &osa, nullptr);
-
-  // Use compile time features to "detect" LPAE support.
-  // TODO: write an assembly LPAE support test.
-#if defined(__ARM_FEATURE_LPAE)
-  bool has_lpae = true;
-#else
-  bool has_lpae = false;
-#endif
-  return new ArmInstructionSetFeatures(has_lpae, has_div);
-}
-
-
-bool ArmInstructionSetFeatures::Equals(const InstructionSetFeatures* other) const {
-  if (kArm != other->GetInstructionSet()) {
-    return false;
-  }
-  const ArmInstructionSetFeatures* other_as_arm = other->AsArmInstructionSetFeatures();
-  return has_lpae_ == other_as_arm->has_lpae_ && has_div_ == other_as_arm->has_div_;
-}
-
-uint32_t ArmInstructionSetFeatures::AsBitmap() const {
-  return (has_lpae_ ? kLpaeBitfield : 0) | (has_div_ ? kDivBitfield : 0);
-}
-
-std::string ArmInstructionSetFeatures::GetFeatureString() const {
-  std::string result;
-  if (has_div_) {
-    result += ",div";
-  }
-  if (has_lpae_) {
-    result += ",lpae";
-  }
-  if (result.size() == 0) {
-    return "none";
-  } else {
-    // Strip leading comma.
-    return result.substr(1, result.size());
-  }
-}
-
-}  // namespace art
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
deleted file mode 100644
index 84a3e80..0000000
--- a/runtime/instruction_set.h
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_INSTRUCTION_SET_H_
-#define ART_RUNTIME_INSTRUCTION_SET_H_
-
-#include <iosfwd>
-#include <string>
-
-#include "base/logging.h"  // Logging is required for FATAL in the helper functions.
-#include "base/macros.h"
-#include "base/value_object.h"
-#include "globals.h"       // For KB.
-
-namespace art {
-
-enum InstructionSet {
-  kNone,
-  kArm,
-  kArm64,
-  kThumb2,
-  kX86,
-  kX86_64,
-  kMips,
-  kMips64
-};
-std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
-
-#if defined(__arm__)
-static constexpr InstructionSet kRuntimeISA = kArm;
-#elif defined(__aarch64__)
-static constexpr InstructionSet kRuntimeISA = kArm64;
-#elif defined(__mips__)
-static constexpr InstructionSet kRuntimeISA = kMips;
-#elif defined(__i386__)
-static constexpr InstructionSet kRuntimeISA = kX86;
-#elif defined(__x86_64__)
-static constexpr InstructionSet kRuntimeISA = kX86_64;
-#else
-static constexpr InstructionSet kRuntimeISA = kNone;
-#endif
-
-// Architecture-specific pointer sizes
-static constexpr size_t kArmPointerSize = 4;
-static constexpr size_t kArm64PointerSize = 8;
-static constexpr size_t kMipsPointerSize = 4;
-static constexpr size_t kMips64PointerSize = 8;
-static constexpr size_t kX86PointerSize = 4;
-static constexpr size_t kX86_64PointerSize = 8;
-
-// ARM instruction alignment. ARM processors require code to be 4-byte aligned,
-// but ARM ELF requires 8..
-static constexpr size_t kArmAlignment = 8;
-
-// ARM64 instruction alignment. This is the recommended alignment for maximum performance.
-static constexpr size_t kArm64Alignment = 16;
-
-// MIPS instruction alignment.  MIPS processors require code to be 4-byte aligned.
-// TODO: Can this be 4?
-static constexpr size_t kMipsAlignment = 8;
-
-// X86 instruction alignment. This is the recommended alignment for maximum performance.
-static constexpr size_t kX86Alignment = 16;
-
-
-const char* GetInstructionSetString(InstructionSet isa);
-
-// Note: Returns kNone when the string cannot be parsed to a known value.
-InstructionSet GetInstructionSetFromString(const char* instruction_set);
-
-static inline size_t GetInstructionSetPointerSize(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-      // Fall-through.
-    case kThumb2:
-      return kArmPointerSize;
-    case kArm64:
-      return kArm64PointerSize;
-    case kX86:
-      return kX86PointerSize;
-    case kX86_64:
-      return kX86_64PointerSize;
-    case kMips:
-      return kMipsPointerSize;
-    case kMips64:
-      return kMips64PointerSize;
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have pointer size.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
-size_t GetInstructionSetAlignment(InstructionSet isa);
-
-static inline bool Is64BitInstructionSet(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-    case kThumb2:
-    case kX86:
-    case kMips:
-      return false;
-
-    case kArm64:
-    case kX86_64:
-    case kMips64:
-      return true;
-
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have bit width.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
-static inline size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-      // Fall-through.
-    case kThumb2:
-      return 4;
-    case kArm64:
-      return 8;
-    case kX86:
-      return 4;
-    case kX86_64:
-      return 8;
-    case kMips:
-      return 4;
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have spills.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
-static inline size_t GetBytesPerFprSpillLocation(InstructionSet isa) {
-  switch (isa) {
-    case kArm:
-      // Fall-through.
-    case kThumb2:
-      return 4;
-    case kArm64:
-      return 8;
-    case kX86:
-      return 8;
-    case kX86_64:
-      return 8;
-    case kMips:
-      return 4;
-    case kNone:
-      LOG(FATAL) << "ISA kNone does not have spills.";
-      return 0;
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      return 0;
-  }
-}
-
-size_t GetStackOverflowReservedBytes(InstructionSet isa);
-
-class ArmInstructionSetFeatures;
-
-// Abstraction used to describe features of a different instruction sets.
-class InstructionSetFeatures {
- public:
-  // Process a CPU variant string for the given ISA and create an InstructionSetFeatures.
-  static const InstructionSetFeatures* FromVariant(InstructionSet isa,
-                                                   const std::string& variant,
-                                                   std::string* error_msg);
-
-  // Parse a string of the form "div,lpae" and create an InstructionSetFeatures.
-  static const InstructionSetFeatures* FromFeatureString(InstructionSet isa,
-                                                         const std::string& feature_list,
-                                                         std::string* error_msg);
-
-  // Parse a bitmap for the given isa and create an InstructionSetFeatures.
-  static const InstructionSetFeatures* FromBitmap(InstructionSet isa, uint32_t bitmap);
-
-  // Turn C pre-processor #defines into the equivalent instruction set features for kRuntimeISA.
-  static const InstructionSetFeatures* FromCppDefines();
-
-  // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
-  static const InstructionSetFeatures* FromCpuInfo();
-
-  // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
-  // InstructionSetFeatures.
-  static const InstructionSetFeatures* FromHwcap();
-
-  // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
-  // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
-  static const InstructionSetFeatures* FromAssembly();
-
-  // Are these features the same as the other given features?
-  virtual bool Equals(const InstructionSetFeatures* other) const = 0;
-
-  // Return the ISA these features relate to.
-  virtual InstructionSet GetInstructionSet() const = 0;
-
-  // Return a bitmap that represents the features. ISA specific.
-  virtual uint32_t AsBitmap() const = 0;
-
-  // Return a string of the form "div,lpae" or "none".
-  virtual std::string GetFeatureString() const = 0;
-
-  // Down cast this ArmInstructionFeatures.
-  const ArmInstructionSetFeatures* AsArmInstructionSetFeatures() const;
-
-  virtual ~InstructionSetFeatures() {}
-
- protected:
-  InstructionSetFeatures() {}
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(InstructionSetFeatures);
-};
-std::ostream& operator<<(std::ostream& os, const InstructionSetFeatures& rhs);
-
-// Instruction set features relevant to the ARM architecture.
-class ArmInstructionSetFeatures FINAL : public InstructionSetFeatures {
- public:
-  // Process a CPU variant string like "krait" or "cortex-a15" and create InstructionSetFeatures.
-  static const ArmInstructionSetFeatures* FromVariant(const std::string& variant,
-                                                      std::string* error_msg);
-
-  // Parse a string of the form "div,lpae" and create an InstructionSetFeatures.
-  static const ArmInstructionSetFeatures* FromFeatureString(const std::string& feature_list,
-                                                            std::string* error_msg);
-
-  // Parse a bitmap and create an InstructionSetFeatures.
-  static const ArmInstructionSetFeatures* FromBitmap(uint32_t bitmap);
-
-  // Turn C pre-processor #defines into the equivalent instruction set features.
-  static const ArmInstructionSetFeatures* FromCppDefines();
-
-  // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
-  static const ArmInstructionSetFeatures* FromCpuInfo();
-
-  // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
-  // InstructionSetFeatures.
-  static const ArmInstructionSetFeatures* FromHwcap();
-
-  // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
-  // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
-  static const ArmInstructionSetFeatures* FromAssembly();
-
-  bool Equals(const InstructionSetFeatures* other) const OVERRIDE;
-
-  InstructionSet GetInstructionSet() const OVERRIDE {
-    return kArm;
-  }
-
-  uint32_t AsBitmap() const OVERRIDE;
-
-  // Return a string of the form "div,lpae" or "none".
-  std::string GetFeatureString() const OVERRIDE;
-
-  // Is the divide instruction feature enabled?
-  bool HasDivideInstruction() const {
-      return has_div_;
-  }
-
-  // Is the Large Physical Address Extension (LPAE) instruction feature enabled? When true code can
-  // be used that assumes double register loads and stores (ldrd, strd) don't tear.
-  bool HasLpae() const {
-    return has_lpae_;
-  }
-
-  virtual ~ArmInstructionSetFeatures() {}
-
- private:
-  ArmInstructionSetFeatures(bool has_lpae, bool has_div)
-      : has_lpae_(has_lpae), has_div_(has_div) {
-  }
-
-  // Bitmap positions for encoding features as a bitmap.
-  enum {
-    kDivBitfield = 1,
-    kLpaeBitfield = 2,
-  };
-
-  const bool has_lpae_;
-  const bool has_div_;
-
-  DISALLOW_COPY_AND_ASSIGN(ArmInstructionSetFeatures);
-};
-
-// A class used for instruction set features on ISAs that don't yet have any features defined.
-class UnknownInstructionSetFeatures FINAL : public InstructionSetFeatures {
- public:
-  static const UnknownInstructionSetFeatures* Unknown(InstructionSet isa) {
-    return new UnknownInstructionSetFeatures(isa);
-  }
-
-  bool Equals(const InstructionSetFeatures* other) const OVERRIDE {
-    return isa_ == other->GetInstructionSet();
-  }
-
-  InstructionSet GetInstructionSet() const OVERRIDE {
-    return isa_;
-  }
-
-  uint32_t AsBitmap() const OVERRIDE {
-    return 0;
-  }
-
-  std::string GetFeatureString() const OVERRIDE {
-    return "none";
-  }
-
-  virtual ~UnknownInstructionSetFeatures() {}
-
- private:
-  explicit UnknownInstructionSetFeatures(InstructionSet isa) : isa_(isa) {}
-
-  const InstructionSet isa_;
-
-  DISALLOW_COPY_AND_ASSIGN(UnknownInstructionSetFeatures);
-};
-
-// The following definitions create return types for two word-sized entities that will be passed
-// in registers so that memory operations for the interface trampolines can be avoided. The entities
-// are the resolved method and the pointer to the code to be invoked.
-//
-// On x86, ARM32 and MIPS, this is given for a *scalar* 64bit value. The definition thus *must* be
-// uint64_t or long long int.
-//
-// On x86_64 and ARM64, structs are decomposed for allocation, so we can create a structs of two
-// size_t-sized values.
-//
-// We need two operations:
-//
-// 1) A flag value that signals failure. The assembly stubs expect the lower part to be "0".
-//    GetTwoWordFailureValue() will return a value that has lower part == 0.
-//
-// 2) A value that combines two word-sized values.
-//    GetTwoWordSuccessValue() constructs this.
-//
-// IMPORTANT: If you use this to transfer object pointers, it is your responsibility to ensure
-//            that the object does not move or the value is updated. Simple use of this is NOT SAFE
-//            when the garbage collector can move objects concurrently. Ensure that required locks
-//            are held when using!
-
-#if defined(__i386__) || defined(__arm__) || defined(__mips__)
-typedef uint64_t TwoWordReturn;
-
-// Encodes method_ptr==nullptr and code_ptr==nullptr
-static inline constexpr TwoWordReturn GetTwoWordFailureValue() {
-  return 0;
-}
-
-// Use the lower 32b for the method pointer and the upper 32b for the code pointer.
-static inline TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
-  static_assert(sizeof(uint32_t) == sizeof(uintptr_t), "Unexpected size difference");
-  uint32_t lo32 = lo;
-  uint64_t hi64 = static_cast<uint64_t>(hi);
-  return ((hi64 << 32) | lo32);
-}
-
-#elif defined(__x86_64__) || defined(__aarch64__)
-struct TwoWordReturn {
-  uintptr_t lo;
-  uintptr_t hi;
-};
-
-// Encodes method_ptr==nullptr. Leaves random value in code pointer.
-static inline TwoWordReturn GetTwoWordFailureValue() {
-  TwoWordReturn ret;
-  ret.lo = 0;
-  return ret;
-}
-
-// Write values into their respective members.
-static inline TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
-  TwoWordReturn ret;
-  ret.lo = lo;
-  ret.hi = hi;
-  return ret;
-}
-#else
-#error "Unsupported architecture"
-#endif
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_INSTRUCTION_SET_H_
diff --git a/runtime/instruction_set_test.cc b/runtime/instruction_set_test.cc
deleted file mode 100644
index 3f2d16b..0000000
--- a/runtime/instruction_set_test.cc
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "instruction_set.h"
-
-#include "base/stringprintf.h"
-#include "common_runtime_test.h"
-
-namespace art {
-
-class InstructionSetTest : public CommonRuntimeTest {};
-
-TEST_F(InstructionSetTest, GetInstructionSetFromString) {
-  EXPECT_EQ(kArm, GetInstructionSetFromString("arm"));
-  EXPECT_EQ(kArm64, GetInstructionSetFromString("arm64"));
-  EXPECT_EQ(kX86, GetInstructionSetFromString("x86"));
-  EXPECT_EQ(kX86_64, GetInstructionSetFromString("x86_64"));
-  EXPECT_EQ(kMips, GetInstructionSetFromString("mips"));
-  EXPECT_EQ(kNone, GetInstructionSetFromString("none"));
-  EXPECT_EQ(kNone, GetInstructionSetFromString("random-string"));
-}
-
-TEST_F(InstructionSetTest, GetInstructionSetString) {
-  EXPECT_STREQ("arm", GetInstructionSetString(kArm));
-  EXPECT_STREQ("arm", GetInstructionSetString(kThumb2));
-  EXPECT_STREQ("arm64", GetInstructionSetString(kArm64));
-  EXPECT_STREQ("x86", GetInstructionSetString(kX86));
-  EXPECT_STREQ("x86_64", GetInstructionSetString(kX86_64));
-  EXPECT_STREQ("mips", GetInstructionSetString(kMips));
-  EXPECT_STREQ("none", GetInstructionSetString(kNone));
-}
-
-TEST_F(InstructionSetTest, TestRoundTrip) {
-  EXPECT_EQ(kRuntimeISA, GetInstructionSetFromString(GetInstructionSetString(kRuntimeISA)));
-}
-
-TEST_F(InstructionSetTest, PointerSize) {
-  EXPECT_EQ(sizeof(void*), GetInstructionSetPointerSize(kRuntimeISA));
-}
-
-TEST_F(InstructionSetTest, X86Features) {
-  // Build features for a 32-bit x86 atom processor.
-  std::string error_msg;
-  std::unique_ptr<const InstructionSetFeatures> x86_features(
-      InstructionSetFeatures::FromVariant(kX86, "atom", &error_msg));
-  ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
-  EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
-  EXPECT_TRUE(x86_features->Equals(x86_features.get()));
-  EXPECT_STREQ("none", x86_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_features->AsBitmap(), 0U);
-
-  // Build features for a 32-bit x86 default processor.
-  std::unique_ptr<const InstructionSetFeatures> x86_default_features(
-      InstructionSetFeatures::FromFeatureString(kX86, "default", &error_msg));
-  ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
-  EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
-  EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
-  EXPECT_STREQ("none", x86_default_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_default_features->AsBitmap(), 0U);
-
-  // Build features for a 64-bit x86-64 atom processor.
-  std::unique_ptr<const InstructionSetFeatures> x86_64_features(
-      InstructionSetFeatures::FromVariant(kX86_64, "atom", &error_msg));
-  ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
-  EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
-  EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
-  EXPECT_STREQ("none", x86_64_features->GetFeatureString().c_str());
-  EXPECT_EQ(x86_64_features->AsBitmap(), 0U);
-
-  EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
-  EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
-  EXPECT_TRUE(x86_features->Equals(x86_default_features.get()));
-}
-
-TEST_F(InstructionSetTest, ArmFeaturesFromVariant) {
-  // Build features for a 32-bit ARM krait processor.
-  std::string error_msg;
-  std::unique_ptr<const InstructionSetFeatures> krait_features(
-      InstructionSetFeatures::FromVariant(kArm, "krait", &error_msg));
-  ASSERT_TRUE(krait_features.get() != nullptr) << error_msg;
-
-  ASSERT_EQ(krait_features->GetInstructionSet(), kArm);
-  EXPECT_TRUE(krait_features->Equals(krait_features.get()));
-  EXPECT_TRUE(krait_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
-  EXPECT_TRUE(krait_features->AsArmInstructionSetFeatures()->HasLpae());
-  EXPECT_STREQ("div,lpae", krait_features->GetFeatureString().c_str());
-  EXPECT_EQ(krait_features->AsBitmap(), 3U);
-
-  // Build features for a 32-bit ARM denver processor.
-  std::unique_ptr<const InstructionSetFeatures> denver_features(
-      InstructionSetFeatures::FromVariant(kArm, "denver", &error_msg));
-  ASSERT_TRUE(denver_features.get() != nullptr) << error_msg;
-
-  EXPECT_TRUE(denver_features->Equals(denver_features.get()));
-  EXPECT_TRUE(denver_features->Equals(krait_features.get()));
-  EXPECT_TRUE(krait_features->Equals(denver_features.get()));
-  EXPECT_TRUE(denver_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
-  EXPECT_TRUE(denver_features->AsArmInstructionSetFeatures()->HasLpae());
-  EXPECT_STREQ("div,lpae", denver_features->GetFeatureString().c_str());
-  EXPECT_EQ(denver_features->AsBitmap(), 3U);
-
-  // Build features for a 32-bit ARMv7 processor.
-  std::unique_ptr<const InstructionSetFeatures> arm7_features(
-      InstructionSetFeatures::FromVariant(kArm, "arm7", &error_msg));
-  ASSERT_TRUE(arm7_features.get() != nullptr) << error_msg;
-
-  EXPECT_TRUE(arm7_features->Equals(arm7_features.get()));
-  EXPECT_FALSE(arm7_features->Equals(krait_features.get()));
-  EXPECT_FALSE(krait_features->Equals(arm7_features.get()));
-  EXPECT_FALSE(arm7_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
-  EXPECT_FALSE(arm7_features->AsArmInstructionSetFeatures()->HasLpae());
-  EXPECT_STREQ("none", arm7_features->GetFeatureString().c_str());
-  EXPECT_EQ(arm7_features->AsBitmap(), 0U);
-
-  // ARM6 is not a supported architecture variant.
-  std::unique_ptr<const InstructionSetFeatures> arm6_features(
-      InstructionSetFeatures::FromVariant(kArm, "arm6", &error_msg));
-  EXPECT_TRUE(arm6_features.get() == nullptr);
-  EXPECT_NE(error_msg.size(), 0U);
-}
-
-TEST_F(InstructionSetTest, ArmFeaturesFromString) {
-  // Build features for a 32-bit ARM with LPAE and div processor.
-  std::string error_msg;
-  std::unique_ptr<const InstructionSetFeatures> krait_features(
-      InstructionSetFeatures::FromFeatureString(kArm, "lpae,div", &error_msg));
-  ASSERT_TRUE(krait_features.get() != nullptr) << error_msg;
-
-  ASSERT_EQ(krait_features->GetInstructionSet(), kArm);
-  EXPECT_TRUE(krait_features->Equals(krait_features.get()));
-  EXPECT_TRUE(krait_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
-  EXPECT_TRUE(krait_features->AsArmInstructionSetFeatures()->HasLpae());
-  EXPECT_STREQ("div,lpae", krait_features->GetFeatureString().c_str());
-  EXPECT_EQ(krait_features->AsBitmap(), 3U);
-
-  // Build features for a 32-bit ARM processor with LPAE and div flipped.
-  std::unique_ptr<const InstructionSetFeatures> denver_features(
-      InstructionSetFeatures::FromFeatureString(kArm, "div,lpae", &error_msg));
-  ASSERT_TRUE(denver_features.get() != nullptr) << error_msg;
-
-  EXPECT_TRUE(denver_features->Equals(denver_features.get()));
-  EXPECT_TRUE(denver_features->Equals(krait_features.get()));
-  EXPECT_TRUE(krait_features->Equals(denver_features.get()));
-  EXPECT_TRUE(denver_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
-  EXPECT_TRUE(denver_features->AsArmInstructionSetFeatures()->HasLpae());
-  EXPECT_STREQ("div,lpae", denver_features->GetFeatureString().c_str());
-  EXPECT_EQ(denver_features->AsBitmap(), 3U);
-
-  // Build features for a 32-bit default ARM processor.
-  std::unique_ptr<const InstructionSetFeatures> arm7_features(
-      InstructionSetFeatures::FromFeatureString(kArm, "default", &error_msg));
-  ASSERT_TRUE(arm7_features.get() != nullptr) << error_msg;
-
-  EXPECT_TRUE(arm7_features->Equals(arm7_features.get()));
-  EXPECT_FALSE(arm7_features->Equals(krait_features.get()));
-  EXPECT_FALSE(krait_features->Equals(arm7_features.get()));
-  EXPECT_FALSE(arm7_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
-  EXPECT_FALSE(arm7_features->AsArmInstructionSetFeatures()->HasLpae());
-  EXPECT_STREQ("none", arm7_features->GetFeatureString().c_str());
-  EXPECT_EQ(arm7_features->AsBitmap(), 0U);
-}
-
-#ifdef HAVE_ANDROID_OS
-#include "cutils/properties.h"
-
-TEST_F(InstructionSetTest, FeaturesFromSystemPropertyVariant) {
-  // Take the default set of instruction features from the build.
-  std::unique_ptr<const InstructionSetFeatures> instruction_set_features(
-      InstructionSetFeatures::FromCppDefines());
-
-  // Read the features property.
-  std::string key = StringPrintf("dalvik.vm.isa.%s.variant", GetInstructionSetString(kRuntimeISA));
-  char dex2oat_isa_variant[PROPERTY_VALUE_MAX];
-  if (property_get(key.c_str(), dex2oat_isa_variant, nullptr) > 0) {
-    // Use features from property to build InstructionSetFeatures and check against build's
-    // features.
-    std::string error_msg;
-    std::unique_ptr<const InstructionSetFeatures> property_features(
-        InstructionSetFeatures::FromVariant(kRuntimeISA, dex2oat_isa_variant, &error_msg));
-    ASSERT_TRUE(property_features.get() != nullptr) << error_msg;
-
-    EXPECT_TRUE(property_features->Equals(instruction_set_features.get()))
-      << "System property features: " << *property_features.get()
-      << "\nFeatures from build: " << *instruction_set_features.get();
-  }
-}
-
-TEST_F(InstructionSetTest, FeaturesFromSystemPropertyString) {
-  // Take the default set of instruction features from the build.
-  std::unique_ptr<const InstructionSetFeatures> instruction_set_features(
-      InstructionSetFeatures::FromCppDefines());
-
-  // Read the features property.
-  std::string key = StringPrintf("dalvik.vm.isa.%s.features", GetInstructionSetString(kRuntimeISA));
-  char dex2oat_isa_features[PROPERTY_VALUE_MAX];
-  if (property_get(key.c_str(), dex2oat_isa_features, nullptr) > 0) {
-    // Use features from property to build InstructionSetFeatures and check against build's
-    // features.
-    std::string error_msg;
-    std::unique_ptr<const InstructionSetFeatures> property_features(
-        InstructionSetFeatures::FromFeatureString(kRuntimeISA, dex2oat_isa_features, &error_msg));
-    ASSERT_TRUE(property_features.get() != nullptr) << error_msg;
-
-    EXPECT_TRUE(property_features->Equals(instruction_set_features.get()))
-      << "System property features: " << *property_features.get()
-      << "\nFeatures from build: " << *instruction_set_features.get();
-  }
-}
-#endif
-
-#if defined(__arm__)
-TEST_F(InstructionSetTest, DISABLED_FeaturesFromCpuInfo) {
-  LOG(WARNING) << "Test disabled due to buggy ARM kernels";
-#else
-TEST_F(InstructionSetTest, FeaturesFromCpuInfo) {
-#endif
-  // Take the default set of instruction features from the build.
-  std::unique_ptr<const InstructionSetFeatures> instruction_set_features(
-      InstructionSetFeatures::FromCppDefines());
-
-  // Check we get the same instruction set features using /proc/cpuinfo.
-  std::unique_ptr<const InstructionSetFeatures> cpuinfo_features(
-      InstructionSetFeatures::FromCpuInfo());
-  EXPECT_TRUE(cpuinfo_features->Equals(instruction_set_features.get()))
-      << "CPU Info features: " << *cpuinfo_features.get()
-      << "\nFeatures from build: " << *instruction_set_features.get();
-}
-
-#if defined(__arm__)
-TEST_F(InstructionSetTest, DISABLED_FeaturesFromHwcap) {
-  LOG(WARNING) << "Test disabled due to buggy ARM kernels";
-#else
-TEST_F(InstructionSetTest, FeaturesFromHwcap) {
-#endif
-  // Take the default set of instruction features from the build.
-  std::unique_ptr<const InstructionSetFeatures> instruction_set_features(
-      InstructionSetFeatures::FromCppDefines());
-
-  // Check we get the same instruction set features using AT_HWCAP.
-  std::unique_ptr<const InstructionSetFeatures> hwcap_features(
-      InstructionSetFeatures::FromHwcap());
-  EXPECT_TRUE(hwcap_features->Equals(instruction_set_features.get()))
-      << "Hwcap features: " << *hwcap_features.get()
-      << "\nFeatures from build: " << *instruction_set_features.get();
-}
-
-
-#if defined(__arm__)
-TEST_F(InstructionSetTest, DISABLED_FeaturesFromAssembly) {
-  LOG(WARNING) << "Test disabled due to buggy ARM kernels";
-#else
-TEST_F(InstructionSetTest, FeaturesFromAssembly) {
-#endif
-  // Take the default set of instruction features from the build.
-  std::unique_ptr<const InstructionSetFeatures> instruction_set_features(
-      InstructionSetFeatures::FromCppDefines());
-
-  // Check we get the same instruction set features using assembly tests.
-  std::unique_ptr<const InstructionSetFeatures> assembly_features(
-      InstructionSetFeatures::FromAssembly());
-  EXPECT_TRUE(assembly_features->Equals(instruction_set_features.get()))
-      << "Assembly features: " << *assembly_features.get()
-      << "\nFeatures from build: " << *instruction_set_features.get();
-}
-
-}  // namespace art
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 646c7ae..369039d 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -21,8 +21,8 @@
 #include <list>
 #include <map>
 
+#include "arch/instruction_set.h"
 #include "atomic.h"
-#include "instruction_set.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc_root.h"
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 56a6d2c..7ecb58e 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -148,6 +148,7 @@
 }
 
 void InternTable::AddImageStringsToTable(gc::space::ImageSpace* image_space) {
+  CHECK(image_space != nullptr);
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
   if (!image_added_to_intern_table_) {
     mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 18de133..b17f303 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -138,7 +138,7 @@
   if (method->IsStatic()) {
     if (shorty == "L") {
       typedef jobject (fntype)(JNIEnv*, jclass);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       jobject jresult;
@@ -149,35 +149,35 @@
       result->SetL(soa.Decode<Object*>(jresult));
     } else if (shorty == "V") {
       typedef void (fntype)(JNIEnv*, jclass);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
       fn(soa.Env(), klass.get());
     } else if (shorty == "Z") {
       typedef jboolean (fntype)(JNIEnv*, jclass);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
       result->SetZ(fn(soa.Env(), klass.get()));
     } else if (shorty == "BI") {
       typedef jbyte (fntype)(JNIEnv*, jclass, jint);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
       result->SetB(fn(soa.Env(), klass.get(), args[0]));
     } else if (shorty == "II") {
       typedef jint (fntype)(JNIEnv*, jclass, jint);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
       result->SetI(fn(soa.Env(), klass.get(), args[0]));
     } else if (shorty == "LL") {
       typedef jobject (fntype)(JNIEnv*, jclass, jobject);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
@@ -190,14 +190,15 @@
       result->SetL(soa.Decode<Object*>(jresult));
     } else if (shorty == "IIZ") {
       typedef jint (fntype)(JNIEnv*, jclass, jint, jboolean);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
       result->SetI(fn(soa.Env(), klass.get(), args[0], args[1]));
     } else if (shorty == "ILI") {
       typedef jint (fntype)(JNIEnv*, jclass, jobject, jint);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(
+          method->GetEntryPointFromJni()));
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
@@ -206,21 +207,21 @@
       result->SetI(fn(soa.Env(), klass.get(), arg0.get(), args[1]));
     } else if (shorty == "SIZ") {
       typedef jshort (fntype)(JNIEnv*, jclass, jint, jboolean);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetEntryPointFromJni()));
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
       result->SetS(fn(soa.Env(), klass.get(), args[0], args[1]));
     } else if (shorty == "VIZ") {
       typedef void (fntype)(JNIEnv*, jclass, jint, jboolean);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedThreadStateChange tsc(self, kNative);
       fn(soa.Env(), klass.get(), args[0], args[1]);
     } else if (shorty == "ZLL") {
       typedef jboolean (fntype)(JNIEnv*, jclass, jobject, jobject);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
@@ -231,7 +232,7 @@
       result->SetZ(fn(soa.Env(), klass.get(), arg0.get(), arg1.get()));
     } else if (shorty == "ZILL") {
       typedef jboolean (fntype)(JNIEnv*, jclass, jint, jobject, jobject);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg1(soa.Env(),
@@ -242,7 +243,7 @@
       result->SetZ(fn(soa.Env(), klass.get(), args[0], arg1.get(), arg2.get()));
     } else if (shorty == "VILII") {
       typedef void (fntype)(JNIEnv*, jclass, jint, jobject, jint, jint);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg1(soa.Env(),
@@ -251,7 +252,7 @@
       fn(soa.Env(), klass.get(), args[0], arg1.get(), args[2], args[3]);
     } else if (shorty == "VLILII") {
       typedef void (fntype)(JNIEnv*, jclass, jobject, jint, jobject, jint, jint);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
@@ -267,7 +268,7 @@
   } else {
     if (shorty == "L") {
       typedef jobject (fntype)(JNIEnv*, jobject);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jobject> rcvr(soa.Env(),
                                    soa.AddLocalReference<jobject>(receiver));
       jobject jresult;
@@ -278,14 +279,14 @@
       result->SetL(soa.Decode<Object*>(jresult));
     } else if (shorty == "V") {
       typedef void (fntype)(JNIEnv*, jobject);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jobject> rcvr(soa.Env(),
                                    soa.AddLocalReference<jobject>(receiver));
       ScopedThreadStateChange tsc(self, kNative);
       fn(soa.Env(), rcvr.get());
     } else if (shorty == "LL") {
       typedef jobject (fntype)(JNIEnv*, jobject, jobject);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jobject> rcvr(soa.Env(),
                                    soa.AddLocalReference<jobject>(receiver));
       ScopedLocalRef<jobject> arg0(soa.Env(),
@@ -299,7 +300,7 @@
       ScopedThreadStateChange tsc(self, kNative);
     } else if (shorty == "III") {
       typedef jint (fntype)(JNIEnv*, jobject, jint, jint);
-      fntype* const fn = reinterpret_cast<fntype*>(const_cast<void*>(method->GetNativeMethod()));
+      fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
       ScopedLocalRef<jobject> rcvr(soa.Env(),
                                    soa.AddLocalReference<jobject>(receiver));
       ScopedThreadStateChange tsc(self, kNative);
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 44f713c..1e0a2d2 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -125,6 +125,10 @@
 };
 
 static bool NeedsFullDeoptimization(JdwpEventKind eventKind) {
+  if (!Dbg::RequiresDeoptimization()) {
+    // We don't need deoptimization for debugging.
+    return false;
+  }
   switch (eventKind) {
       case EK_METHOD_ENTRY:
       case EK_METHOD_EXIT:
diff --git a/runtime/jdwp/jdwp_socket.cc b/runtime/jdwp/jdwp_socket.cc
index e8c0856..7119ce5 100644
--- a/runtime/jdwp/jdwp_socket.cc
+++ b/runtime/jdwp/jdwp_socket.cc
@@ -272,7 +272,7 @@
   /*
    * Start by resolving the host name.
    */
-#ifdef HAVE_GETHOSTBYNAME_R
+#if defined(__linux__)
   hostent he;
   char auxBuf[128];
   int error;
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 3b0656e..1dcfcab 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -360,7 +360,7 @@
     ScopedObjectAccess soa(env);
     mirror::ArtMethod* m = soa.DecodeMethod(mid);
     CHECK(!kMovingMethods);
-    jobject art_method = soa.AddLocalReference<jobject>(m);
+    ScopedLocalRef<jobject> art_method(env, soa.AddLocalReference<jobject>(m));
     jobject reflect_method;
     if (m->IsConstructor()) {
       reflect_method = env->AllocObject(WellKnownClasses::java_lang_reflect_Constructor);
@@ -371,7 +371,7 @@
       return nullptr;
     }
     SetObjectField(env, reflect_method,
-                   WellKnownClasses::java_lang_reflect_AbstractMethod_artMethod, art_method);
+                   WellKnownClasses::java_lang_reflect_AbstractMethod_artMethod, art_method.get());
     return reflect_method;
   }
 
@@ -379,13 +379,13 @@
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
     mirror::ArtField* f = soa.DecodeField(fid);
-    jobject art_field = soa.AddLocalReference<jobject>(f);
+    ScopedLocalRef<jobject> art_field(env, soa.AddLocalReference<jobject>(f));
     jobject reflect_field = env->AllocObject(WellKnownClasses::java_lang_reflect_Field);
     if (env->ExceptionCheck()) {
       return nullptr;
     }
     SetObjectField(env, reflect_field,
-                   WellKnownClasses::java_lang_reflect_Field_artField, art_field);
+                   WellKnownClasses::java_lang_reflect_Field_artField, art_field.get());
     return reflect_field;
   }
 
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index ccad137..62b6b34 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -390,59 +390,72 @@
   void ReleasePrimitiveArrayElementsOfWrongType(bool check_jni) {
     bool old_check_jni = vm_->SetCheckJniEnabled(check_jni);
     CheckJniAbortCatcher jni_abort_catcher;
+    {
+      jbooleanArray array = env_->NewBooleanArray(10);
+      ASSERT_TRUE(array != nullptr);
+      jboolean is_copy;
+      jboolean* elements = env_->GetBooleanArrayElements(array, &is_copy);
+      ASSERT_TRUE(elements != nullptr);
+      env_->ReleaseByteArrayElements(reinterpret_cast<jbyteArray>(array),
+                                     reinterpret_cast<jbyte*>(elements), 0);
+      jni_abort_catcher.Check(
+          check_jni ? "incompatible array type boolean[] expected byte[]"
+              : "attempt to release byte primitive array elements with an object of type boolean[]");
+      env_->ReleaseShortArrayElements(reinterpret_cast<jshortArray>(array),
+                                      reinterpret_cast<jshort*>(elements), 0);
+      jni_abort_catcher.Check(
+          check_jni ? "incompatible array type boolean[] expected short[]"
+              : "attempt to release short primitive array elements with an object of type boolean[]");
+      env_->ReleaseCharArrayElements(reinterpret_cast<jcharArray>(array),
+                                     reinterpret_cast<jchar*>(elements), 0);
+      jni_abort_catcher.Check(
+          check_jni ? "incompatible array type boolean[] expected char[]"
+              : "attempt to release char primitive array elements with an object of type boolean[]");
+      env_->ReleaseIntArrayElements(reinterpret_cast<jintArray>(array),
+                                    reinterpret_cast<jint*>(elements), 0);
+      jni_abort_catcher.Check(
+          check_jni ? "incompatible array type boolean[] expected int[]"
+              : "attempt to release int primitive array elements with an object of type boolean[]");
+      env_->ReleaseLongArrayElements(reinterpret_cast<jlongArray>(array),
+                                     reinterpret_cast<jlong*>(elements), 0);
+      jni_abort_catcher.Check(
+          check_jni ? "incompatible array type boolean[] expected long[]"
+              : "attempt to release long primitive array elements with an object of type boolean[]");
+      env_->ReleaseFloatArrayElements(reinterpret_cast<jfloatArray>(array),
+                                      reinterpret_cast<jfloat*>(elements), 0);
+      jni_abort_catcher.Check(
+          check_jni ? "incompatible array type boolean[] expected float[]"
+              : "attempt to release float primitive array elements with an object of type boolean[]");
+      env_->ReleaseDoubleArrayElements(reinterpret_cast<jdoubleArray>(array),
+                                       reinterpret_cast<jdouble*>(elements), 0);
+      jni_abort_catcher.Check(
+          check_jni ? "incompatible array type boolean[] expected double[]"
+              : "attempt to release double primitive array elements with an object of type boolean[]");
 
-    jbooleanArray array = env_->NewBooleanArray(10);
-    ASSERT_TRUE(array != nullptr);
-    jboolean is_copy;
-    jboolean* elements = env_->GetBooleanArrayElements(array, &is_copy);
-    ASSERT_TRUE(elements != nullptr);
-    env_->ReleaseByteArrayElements(reinterpret_cast<jbyteArray>(array),
-                                   reinterpret_cast<jbyte*>(elements), 0);
-    jni_abort_catcher.Check(
-        check_jni ? "incompatible array type boolean[] expected byte[]"
-            : "attempt to release byte primitive array elements with an object of type boolean[]");
-    env_->ReleaseShortArrayElements(reinterpret_cast<jshortArray>(array),
-                                    reinterpret_cast<jshort*>(elements), 0);
-    jni_abort_catcher.Check(
-        check_jni ? "incompatible array type boolean[] expected short[]"
-            : "attempt to release short primitive array elements with an object of type boolean[]");
-    env_->ReleaseCharArrayElements(reinterpret_cast<jcharArray>(array),
-                                   reinterpret_cast<jchar*>(elements), 0);
-    jni_abort_catcher.Check(
-        check_jni ? "incompatible array type boolean[] expected char[]"
-            : "attempt to release char primitive array elements with an object of type boolean[]");
-    env_->ReleaseIntArrayElements(reinterpret_cast<jintArray>(array),
-                                  reinterpret_cast<jint*>(elements), 0);
-    jni_abort_catcher.Check(
-        check_jni ? "incompatible array type boolean[] expected int[]"
-            : "attempt to release int primitive array elements with an object of type boolean[]");
-    env_->ReleaseLongArrayElements(reinterpret_cast<jlongArray>(array),
-                                   reinterpret_cast<jlong*>(elements), 0);
-    jni_abort_catcher.Check(
-        check_jni ? "incompatible array type boolean[] expected long[]"
-            : "attempt to release long primitive array elements with an object of type boolean[]");
-    env_->ReleaseFloatArrayElements(reinterpret_cast<jfloatArray>(array),
-                                    reinterpret_cast<jfloat*>(elements), 0);
-    jni_abort_catcher.Check(
-        check_jni ? "incompatible array type boolean[] expected float[]"
-            : "attempt to release float primitive array elements with an object of type boolean[]");
-    env_->ReleaseDoubleArrayElements(reinterpret_cast<jdoubleArray>(array),
-                                     reinterpret_cast<jdouble*>(elements), 0);
-    jni_abort_catcher.Check(
-        check_jni ? "incompatible array type boolean[] expected double[]"
-            : "attempt to release double primitive array elements with an object of type boolean[]");
-    jbyteArray array2 = env_->NewByteArray(10);
-    env_->ReleaseBooleanArrayElements(reinterpret_cast<jbooleanArray>(array2), elements, 0);
-    jni_abort_catcher.Check(
-        check_jni ? "incompatible array type byte[] expected boolean[]"
-            : "attempt to release boolean primitive array elements with an object of type byte[]");
-    jobject object = env_->NewStringUTF("Test String");
-    env_->ReleaseBooleanArrayElements(reinterpret_cast<jbooleanArray>(object), elements, 0);
-    jni_abort_catcher.Check(
-        check_jni ? "jarray argument has non-array type: java.lang.String"
-            : "attempt to release boolean primitive array elements with an object of type "
+      // Don't leak the elements array.
+      env_->ReleaseBooleanArrayElements(array, elements, 0);
+    }
+    {
+      jbyteArray array = env_->NewByteArray(10);
+      jboolean is_copy;
+      jbyte* elements = env_->GetByteArrayElements(array, &is_copy);
+
+      env_->ReleaseBooleanArrayElements(reinterpret_cast<jbooleanArray>(array),
+                                        reinterpret_cast<jboolean*>(elements), 0);
+      jni_abort_catcher.Check(
+          check_jni ? "incompatible array type byte[] expected boolean[]"
+              : "attempt to release boolean primitive array elements with an object of type byte[]");
+      jobject object = env_->NewStringUTF("Test String");
+      env_->ReleaseBooleanArrayElements(reinterpret_cast<jbooleanArray>(object),
+                                        reinterpret_cast<jboolean*>(elements), 0);
+      jni_abort_catcher.Check(
+          check_jni ? "jarray argument has non-array type: java.lang.String"
+              : "attempt to release boolean primitive array elements with an object of type "
               "java.lang.String");
 
+      // Don't leak the elements array.
+      env_->ReleaseByteArrayElements(array, elements, 0);
+    }
     EXPECT_EQ(check_jni, vm_->SetCheckJniEnabled(old_check_jni));
   }
 
@@ -799,6 +812,11 @@
   ASSERT_NE(fid, nullptr);
   // Turn the fid into a java.lang.reflect.Field...
   jobject field = env_->ToReflectedField(c, fid, JNI_FALSE);
+  for (size_t i = 0; i <= kLocalsMax; ++i) {
+    // Regression test for b/18396311, ToReflectedField leaking local refs causing a local
+    // reference table overflows with 512 references to ArtField
+    env_->DeleteLocalRef(env_->ToReflectedField(c, fid, JNI_FALSE));
+  }
   ASSERT_NE(c, nullptr);
   ASSERT_TRUE(env_->IsInstanceOf(field, jlrField));
   // ...and back again.
@@ -825,6 +843,11 @@
   ASSERT_NE(mid, nullptr);
   // Turn the mid into a java.lang.reflect.Constructor...
   jobject method = env_->ToReflectedMethod(c, mid, JNI_FALSE);
+  for (size_t i = 0; i <= kLocalsMax; ++i) {
+    // Regression test for b/18396311, ToReflectedMethod leaking local refs causing a local
+    // reference table overflows with 512 references to ArtMethod
+    env_->DeleteLocalRef(env_->ToReflectedMethod(c, mid, JNI_FALSE));
+  }
   ASSERT_NE(method, nullptr);
   ASSERT_TRUE(env_->IsInstanceOf(method, jlrConstructor));
   // ...and back again.
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 494fa2f..62d17ab 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -307,12 +307,6 @@
   return pc - reinterpret_cast<uintptr_t>(code);
 }
 
-template<VerifyObjectFlags kVerifyFlags>
-inline void ArtMethod::SetNativeMethod(const void* native_method) {
-  SetFieldPtr<false, true, kVerifyFlags>(
-      OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_jni_), native_method);
-}
-
 inline QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo(const void* code_pointer) {
   DCHECK(code_pointer != nullptr);
   DCHECK_EQ(code_pointer, GetQuickOatCodePointer());
@@ -485,6 +479,12 @@
   return type;
 }
 
+inline void ArtMethod::CheckObjectSizeEqualsMirrorSize() {
+  // Using the default, check the class object size to make sure it matches the size of the
+  // object.
+  DCHECK_EQ(GetClass()->GetObjectSize(), sizeof(*this));
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index a742aaa..3b4d5f3 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -483,7 +483,7 @@
   if (is_fast) {
     SetAccessFlags(GetAccessFlags() | kAccFastNative);
   }
-  SetNativeMethod(native_method);
+  SetEntryPointFromJni(native_method);
 }
 
 void ArtMethod::UnregisterNative() {
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index d92d00a..4a7831f 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -48,11 +48,6 @@
   // Size of java.lang.reflect.ArtMethod.class.
   static uint32_t ClassSize();
 
-  // Size of an instance of java.lang.reflect.ArtMethod not including its value array.
-  static constexpr uint32_t InstanceSize() {
-    return sizeof(ArtMethod);
-  }
-
   static ArtMethod* FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
                                         jobject jlr_method)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -258,49 +253,92 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   EntryPointFromInterpreter* GetEntryPointFromInterpreter()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldPtr<EntryPointFromInterpreter*, kVerifyFlags>(
-        OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_));
+    CheckObjectSizeEqualsMirrorSize();
+    return GetEntryPointFromInterpreterPtrSize(sizeof(void*));
+  }
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  EntryPointFromInterpreter* GetEntryPointFromInterpreterPtrSize(size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldPtrWithSize<EntryPointFromInterpreter*, kVerifyFlags>(
+        EntryPointFromInterpreterOffset(pointer_size), pointer_size);
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   void SetEntryPointFromInterpreter(EntryPointFromInterpreter* entry_point_from_interpreter)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SetFieldPtr<false, true, kVerifyFlags>(
-        OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_interpreter_),
-        entry_point_from_interpreter);
+    CheckObjectSizeEqualsMirrorSize();
+    SetEntryPointFromInterpreterPtrSize(entry_point_from_interpreter, sizeof(void*));
+  }
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  void SetEntryPointFromInterpreterPtrSize(EntryPointFromInterpreter* entry_point_from_interpreter,
+                                           size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetFieldPtrWithSize<false, true, kVerifyFlags>(
+        EntryPointFromInterpreterOffset(pointer_size), entry_point_from_interpreter, pointer_size);
   }
 
-  static MemberOffset EntryPointFromPortableCompiledCodeOffset() {
-    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, entry_point_from_portable_compiled_code_));
+  ALWAYS_INLINE static MemberOffset EntryPointFromPortableCompiledCodeOffset(size_t pointer_size) {
+    return MemberOffset(PtrSizedFieldsOffset() + OFFSETOF_MEMBER(
+        PtrSizedFields, entry_point_from_portable_compiled_code_) / sizeof(void*) * pointer_size);
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  const void* GetEntryPointFromPortableCompiledCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldPtr<const void*, kVerifyFlags>(
-        EntryPointFromPortableCompiledCodeOffset());
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  const void* GetEntryPointFromPortableCompiledCode()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    CheckObjectSizeEqualsMirrorSize();
+    return GetEntryPointFromPortableCompiledCodePtrSize(sizeof(void*));
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE const void* GetEntryPointFromPortableCompiledCodePtrSize(size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldPtrWithSize<const void*, kVerifyFlags>(
+        EntryPointFromPortableCompiledCodeOffset(pointer_size), pointer_size);
+  }
+
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   void SetEntryPointFromPortableCompiledCode(const void* entry_point_from_portable_compiled_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SetFieldPtr<false, true, kVerifyFlags>(
-        EntryPointFromPortableCompiledCodeOffset(), entry_point_from_portable_compiled_code);
+    CheckObjectSizeEqualsMirrorSize();
+    return SetEntryPointFromPortableCompiledCodePtrSize(entry_point_from_portable_compiled_code,
+                                                        sizeof(void*));
   }
 
-  static MemberOffset EntryPointFromQuickCompiledCodeOffset() {
-    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, entry_point_from_quick_compiled_code_));
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  void SetEntryPointFromPortableCompiledCodePtrSize(
+      const void* entry_point_from_portable_compiled_code, size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetFieldPtrWithSize<false, true, kVerifyFlags>(
+        EntryPointFromPortableCompiledCodeOffset(pointer_size),
+        entry_point_from_portable_compiled_code, pointer_size);
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   const void* GetEntryPointFromQuickCompiledCode() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldPtr<const void*, kVerifyFlags>(EntryPointFromQuickCompiledCodeOffset());
+    CheckObjectSizeEqualsMirrorSize();
+    return GetEntryPointFromQuickCompiledCodePtrSize(sizeof(void*));
+  }
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE const void* GetEntryPointFromQuickCompiledCodePtrSize(size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldPtrWithSize<const void*, kVerifyFlags>(
+        EntryPointFromQuickCompiledCodeOffset(pointer_size), pointer_size);
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   void SetEntryPointFromQuickCompiledCode(const void* entry_point_from_quick_compiled_code)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SetFieldPtr<false, true, kVerifyFlags>(
-        EntryPointFromQuickCompiledCodeOffset(), entry_point_from_quick_compiled_code);
+    CheckObjectSizeEqualsMirrorSize();
+    SetEntryPointFromQuickCompiledCodePtrSize(entry_point_from_quick_compiled_code,
+                                              sizeof(void*));
+  }
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE void SetEntryPointFromQuickCompiledCodePtrSize(
+      const void* entry_point_from_quick_compiled_code, size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetFieldPtrWithSize<false, true, kVerifyFlags>(
+        EntryPointFromQuickCompiledCodeOffset(pointer_size), entry_point_from_quick_compiled_code,
+        pointer_size);
   }
 
   uint32_t GetCodeSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -365,11 +403,23 @@
   CodeInfo GetOptimizedCodeInfo() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   const uint8_t* GetNativeGcMap() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldPtr<uint8_t*>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, gc_map_));
+    CheckObjectSizeEqualsMirrorSize();
+    return GetNativeGcMapPtrSize(sizeof(void*));
   }
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE const uint8_t* GetNativeGcMapPtrSize(size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldPtrWithSize<uint8_t*>(GcMapOffset(pointer_size), pointer_size);
+  }
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   void SetNativeGcMap(const uint8_t* data) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    SetFieldPtr<false, true, kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(ArtMethod, gc_map_), data);
+    CheckObjectSizeEqualsMirrorSize();
+    SetNativeGcMapPtrSize(data, sizeof(void*));
+  }
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE void SetNativeGcMapPtrSize(const uint8_t* data, size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetFieldPtrWithSize<false, true, kVerifyFlags>(GcMapOffset(pointer_size), data,
+                                                   pointer_size);
   }
 
   // When building the oat need a convenient place to stuff the offset of the native GC map.
@@ -409,16 +459,46 @@
 
   void UnregisterNative() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  static MemberOffset NativeMethodOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, entry_point_from_jni_);
+  static MemberOffset EntryPointFromInterpreterOffset(size_t pointer_size) {
+    return MemberOffset(PtrSizedFieldsOffset() + OFFSETOF_MEMBER(
+        PtrSizedFields, entry_point_from_interpreter_) / sizeof(void*) * pointer_size);
   }
 
-  const void* GetNativeMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetFieldPtr<const void*>(NativeMethodOffset());
+  static MemberOffset EntryPointFromJniOffset(size_t pointer_size) {
+    return MemberOffset(PtrSizedFieldsOffset() + OFFSETOF_MEMBER(
+        PtrSizedFields, entry_point_from_jni_) / sizeof(void*) * pointer_size);
   }
 
-  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  void SetNativeMethod(const void*) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static MemberOffset EntryPointFromQuickCompiledCodeOffset(size_t pointer_size) {
+    return MemberOffset(PtrSizedFieldsOffset() + OFFSETOF_MEMBER(
+        PtrSizedFields, entry_point_from_quick_compiled_code_) / sizeof(void*) * pointer_size);
+  }
+
+  static MemberOffset GcMapOffset(size_t pointer_size) {
+    return MemberOffset(PtrSizedFieldsOffset() + OFFSETOF_MEMBER(
+        PtrSizedFields, gc_map_) / sizeof(void*) * pointer_size);
+  }
+
+  void* GetEntryPointFromJni() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    CheckObjectSizeEqualsMirrorSize();
+    return GetEntryPointFromJniPtrSize(sizeof(void*));
+  }
+  ALWAYS_INLINE void* GetEntryPointFromJniPtrSize(size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return GetFieldPtrWithSize<void*>(EntryPointFromJniOffset(pointer_size), pointer_size);
+  }
+
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  void SetEntryPointFromJni(const void* entrypoint) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    CheckObjectSizeEqualsMirrorSize();
+    SetEntryPointFromJniPtrSize<kVerifyFlags>(entrypoint, sizeof(void*));
+  }
+  template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE void SetEntryPointFromJniPtrSize(const void* entrypoint, size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    SetFieldPtrWithSize<false, true, kVerifyFlags>(
+        EntryPointFromJniOffset(pointer_size), entrypoint, pointer_size);
+  }
 
   static MemberOffset GetMethodIndexOffset() {
     return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
@@ -521,7 +601,16 @@
 
   ALWAYS_INLINE ArtMethod* GetInterfaceMethodIfProxy() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
- private:
+  static size_t SizeWithoutPointerFields() {
+    return sizeof(ArtMethod) - sizeof(PtrSizedFields);
+  }
+
+  // Size of an instance of java.lang.reflect.ArtMethod not including its value array.
+  static size_t InstanceSize(size_t pointer_size) {
+    return SizeWithoutPointerFields() + (sizeof(PtrSizedFields) / sizeof(void*)) * pointer_size;
+  }
+
+ protected:
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   // The class we are a part of.
   HeapReference<Class> declaring_class_;
@@ -535,26 +624,6 @@
   // Short cuts to declaring_class_->dex_cache_ member for fast compiled code access.
   HeapReference<ObjectArray<String>> dex_cache_strings_;
 
-  // Method dispatch from the interpreter invokes this pointer which may cause a bridge into
-  // compiled code.
-  uint64_t entry_point_from_interpreter_;
-
-  // Pointer to JNI function registered to this method, or a function to resolve the JNI function.
-  uint64_t entry_point_from_jni_;
-
-  // Method dispatch from portable compiled code invokes this pointer which may cause bridging into
-  // quick compiled code or the interpreter.
-  uint64_t entry_point_from_portable_compiled_code_;
-
-  // Method dispatch from quick compiled code invokes this pointer which may cause bridging into
-  // portable compiled code or the interpreter.
-  uint64_t entry_point_from_quick_compiled_code_;
-
-  // Pointer to a data structure created by the compiler and used by the garbage collector to
-  // determine which registers hold live references to objects within the heap. Keyed by native PC
-  // offsets for the quick compiler and dex PCs for the portable.
-  uint64_t gc_map_;
-
   // Access flags; low 16 bits are defined by spec.
   uint32_t access_flags_;
 
@@ -573,15 +642,46 @@
   // ifTable.
   uint32_t method_index_;
 
+  // Add alignment word here if necessary.
+
+  // Must be the last fields in the method.
+  struct PACKED(4) PtrSizedFields {
+    // Method dispatch from the interpreter invokes this pointer which may cause a bridge into
+    // compiled code.
+    void* entry_point_from_interpreter_;
+
+    // Pointer to JNI function registered to this method, or a function to resolve the JNI function.
+    void* entry_point_from_jni_;
+
+    // Method dispatch from quick compiled code invokes this pointer which may cause bridging into
+    // portable compiled code or the interpreter.
+    void* entry_point_from_quick_compiled_code_;
+
+    // Pointer to a data structure created by the compiler and used by the garbage collector to
+    // determine which registers hold live references to objects within the heap. Keyed by native PC
+    // offsets for the quick compiler and dex PCs for the portable.
+    void* gc_map_;
+
+    // Method dispatch from portable compiled code invokes this pointer which may cause bridging
+    // into quick compiled code or the interpreter. Last to simplify entrypoint logic.
+    void* entry_point_from_portable_compiled_code_;
+  } ptr_sized_fields_;
+
   static GcRoot<Class> java_lang_reflect_ArtMethod_;
 
  private:
+  ALWAYS_INLINE void CheckObjectSizeEqualsMirrorSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   ALWAYS_INLINE ObjectArray<ArtMethod>* GetDexCacheResolvedMethods()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ALWAYS_INLINE ObjectArray<Class>* GetDexCacheResolvedTypes()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  static size_t PtrSizedFieldsOffset() {
+    return OFFSETOF_MEMBER(ArtMethod, ptr_sized_fields_);
+  }
+
   friend struct art::ArtMethodOffsets;  // for verifying offset information
   DISALLOW_IMPLICIT_CONSTRUCTORS(ArtMethod);
 };
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 5f72dbe..a69d37e 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -402,6 +402,36 @@
   return GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_));
 }
 
+inline MemberOffset Class::GetFirstReferenceInstanceFieldOffset() {
+  Class* super_class = GetSuperClass();
+  return (super_class != nullptr)
+      ? MemberOffset(RoundUp(super_class->GetObjectSize(),
+                             sizeof(mirror::HeapReference<mirror::Object>)))
+      : ClassOffset();
+}
+
+inline MemberOffset Class::GetFirstReferenceStaticFieldOffset() {
+  DCHECK(IsResolved());
+  uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
+  if (ShouldHaveEmbeddedImtAndVTable()) {
+    // Static fields come after the embedded tables.
+    base = mirror::Class::ComputeClassSize(true, GetEmbeddedVTableLength(),
+                                           0, 0, 0, 0, 0);
+  }
+  return MemberOffset(base);
+}
+
+inline MemberOffset Class::GetFirstReferenceStaticFieldOffsetDuringLinking() {
+  DCHECK(IsLoaded());
+  uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
+  if (ShouldHaveEmbeddedImtAndVTable()) {
+    // Static fields come after the embedded tables.
+    base = mirror::Class::ComputeClassSize(true, GetVTableDuringLinking()->GetLength(),
+                                           0, 0, 0, 0, 0);
+  }
+  return MemberOffset(base);
+}
+
 inline void Class::SetIFields(ObjectArray<ArtField>* new_ifields)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(NULL == GetFieldObject<ObjectArray<ArtField>>(OFFSET_OF_OBJECT_MEMBER(Class, ifields_)));
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 4f1af44..82425b5 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -528,6 +528,13 @@
     return SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, object_size_), new_object_size);
   }
 
+  void SetObjectSizeWithoutChecks(uint32_t new_object_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Not called within a transaction.
+    return SetField32<false, false, kVerifyNone>(
+        OFFSET_OF_OBJECT_MEMBER(Class, object_size_), new_object_size);
+  }
+
   // Returns true if this class is in the same packages as that class.
   bool IsInSamePackage(Class* that) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -837,6 +844,9 @@
   void SetReferenceInstanceOffsets(uint32_t new_reference_offsets)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Get the offset of the first reference instance field. Other reference instance fields follow.
+  MemberOffset GetFirstReferenceInstanceFieldOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Returns the number of static fields containing reference types.
   uint32_t NumReferenceStaticFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(IsResolved() || IsErroneous());
@@ -853,6 +863,13 @@
     SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, num_reference_static_fields_), new_num);
   }
 
+  // Get the offset of the first reference static field. Other reference static fields follow.
+  MemberOffset GetFirstReferenceStaticFieldOffset() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Get the offset of the first reference static field. Other reference static fields follow.
+  MemberOffset GetFirstReferenceStaticFieldOffsetDuringLinking()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // Gets the static fields of the class.
   ObjectArray<ArtField>* GetSFields() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index c451764..121947d 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -404,8 +404,7 @@
   }
   DCHECK_GE(result, sizeof(Object))
       << " class=" << PrettyTypeOf(GetClass<kNewFlags, kReadBarrierOption>());
-  DCHECK(!(IsArtField<kNewFlags, kReadBarrierOption>())  || result == sizeof(ArtField));
-  DCHECK(!(IsArtMethod<kNewFlags, kReadBarrierOption>()) || result == sizeof(ArtMethod));
+  DCHECK(!(IsArtField<kNewFlags, kReadBarrierOption>()) || result == sizeof(ArtField));
   return result;
 }
 
@@ -911,13 +910,19 @@
         klass = kIsStatic ? nullptr : klass->GetSuperClass()) {
       size_t num_reference_fields =
           kIsStatic ? klass->NumReferenceStaticFields() : klass->NumReferenceInstanceFields();
+      if (num_reference_fields == 0u) {
+        continue;
+      }
+      MemberOffset field_offset = kIsStatic
+          ? klass->GetFirstReferenceStaticFieldOffset()
+          : klass->GetFirstReferenceInstanceFieldOffset();
       for (size_t i = 0; i < num_reference_fields; ++i) {
-        mirror::ArtField* field = kIsStatic ? klass->GetStaticField(i) : klass->GetInstanceField(i);
-        MemberOffset field_offset = field->GetOffset();
         // TODO: Do a simpler check?
         if (kVisitClass || field_offset.Uint32Value() != ClassOffset().Uint32Value()) {
           visitor(this, field_offset, kIsStatic);
         }
+        field_offset = MemberOffset(field_offset.Uint32Value() +
+                                    sizeof(mirror::HeapReference<mirror::Object>));
       }
     }
   }
@@ -956,7 +961,6 @@
     }
   }
 }
-
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 4227723..fa1f226 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -200,10 +200,11 @@
   for (Class* cur = c; cur != NULL; cur = cur->GetSuperClass()) {
     ObjectArray<ArtField>* fields = cur->GetIFields();
     if (fields != NULL) {
-      size_t num_ref_ifields = cur->NumReferenceInstanceFields();
-      for (size_t i = 0; i < num_ref_ifields; ++i) {
+      size_t num_ifields = fields->GetLength();
+      for (size_t i = 0; i < num_ifields; ++i) {
         ArtField* field = fields->Get(i);
         if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
+          CHECK_NE(field->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
           StackHandleScope<1> hs(Thread::Current());
           FieldHelper fh(hs.NewHandle(field));
           CHECK(fh.GetType()->IsAssignableFrom(new_value->GetClass()));
@@ -219,10 +220,11 @@
   if (IsClass()) {
     ObjectArray<ArtField>* fields = AsClass()->GetSFields();
     if (fields != NULL) {
-      size_t num_ref_sfields = AsClass()->NumReferenceStaticFields();
-      for (size_t i = 0; i < num_ref_sfields; ++i) {
+      size_t num_sfields = fields->GetLength();
+      for (size_t i = 0; i < num_sfields; ++i) {
         ArtField* field = fields->Get(i);
         if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
+          CHECK_NE(field->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
           StackHandleScope<1> hs(Thread::Current());
           FieldHelper fh(hs.NewHandle(field));
           CHECK(fh.GetType()->IsAssignableFrom(new_value->GetClass()));
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 0ce5231..221feca 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -392,15 +392,26 @@
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, typename T>
   void SetFieldPtr(MemberOffset field_offset, T new_value)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#ifndef __LP64__
-    SetField32<kTransactionActive, kCheckTransaction, kVerifyFlags>(
-        field_offset, reinterpret_cast<int32_t>(new_value));
-#else
-    SetField64<kTransactionActive, kCheckTransaction, kVerifyFlags>(
-        field_offset, reinterpret_cast<int64_t>(new_value));
-#endif
+    SetFieldPtrWithSize<kTransactionActive, kCheckTransaction, kVerifyFlags>(
+        field_offset, new_value, sizeof(void*));
   }
 
+  template<bool kTransactionActive, bool kCheckTransaction = true,
+      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, typename T>
+  ALWAYS_INLINE void SetFieldPtrWithSize(MemberOffset field_offset, T new_value,
+                                         size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(pointer_size == 4 || pointer_size == 8) << pointer_size;
+    if (pointer_size == 4) {
+      intptr_t ptr  = reinterpret_cast<intptr_t>(new_value);
+      DCHECK_EQ(static_cast<int32_t>(ptr), ptr);  // Check that we dont lose any non 0 bits.
+      SetField32<kTransactionActive, kCheckTransaction, kVerifyFlags>(
+          field_offset, static_cast<int32_t>(ptr));
+    } else {
+      SetField64<kTransactionActive, kCheckTransaction, kVerifyFlags>(
+          field_offset, static_cast<int64_t>(reinterpret_cast<intptr_t>(new_value)));
+    }
+  }
   // TODO fix thread safety analysis broken by the use of template. This should be
   // SHARED_LOCKS_REQUIRED(Locks::mutator_lock_).
   template <const bool kVisitClass, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -413,11 +424,21 @@
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
   T GetFieldPtr(MemberOffset field_offset)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-#ifndef __LP64__
-    return reinterpret_cast<T>(GetField32<kVerifyFlags, kIsVolatile>(field_offset));
-#else
-    return reinterpret_cast<T>(GetField64<kVerifyFlags, kIsVolatile>(field_offset));
-#endif
+    return GetFieldPtrWithSize<T, kVerifyFlags, kIsVolatile>(field_offset, sizeof(void*));
+  }
+
+  template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
+  ALWAYS_INLINE T GetFieldPtrWithSize(MemberOffset field_offset, size_t pointer_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(pointer_size == 4 || pointer_size == 8) << pointer_size;
+    if (pointer_size == 4) {
+      return reinterpret_cast<T>(GetField32<kVerifyFlags, kIsVolatile>(field_offset));
+    } else {
+      int64_t v = GetField64<kVerifyFlags, kIsVolatile>(field_offset);
+      // Check that we dont lose any non 0 bits.
+      DCHECK_EQ(reinterpret_cast<int64_t>(reinterpret_cast<T>(v)), v);
+      return reinterpret_cast<T>(v);
+    }
   }
 
   // TODO: Fixme when anotatalysis works with visitors.
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index a0aaa9e..4402031 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -78,6 +78,14 @@
 TEST_F(ObjectTest, Constants) {
   EXPECT_EQ(kObjectReferenceSize, sizeof(HeapReference<Object>));
   EXPECT_EQ(kObjectHeaderSize, sizeof(Object));
+  EXPECT_EQ(MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_32,
+            ArtMethod::EntryPointFromPortableCompiledCodeOffset(4).Int32Value());
+  EXPECT_EQ(MIRROR_ART_METHOD_PORTABLE_CODE_OFFSET_64,
+            ArtMethod::EntryPointFromPortableCompiledCodeOffset(8).Int32Value());
+  EXPECT_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET_32,
+            ArtMethod::EntryPointFromQuickCompiledCodeOffset(4).Int32Value());
+  EXPECT_EQ(MIRROR_ART_METHOD_QUICK_CODE_OFFSET_64,
+            ArtMethod::EntryPointFromQuickCompiledCodeOffset(8).Int32Value());
 }
 
 TEST_F(ObjectTest, IsInSamePackage) {
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index 64408a6..30b8aa3 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -109,6 +109,14 @@
 
   int32_t CompareTo(String* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void SetOffset(int32_t new_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Offset is only used during testing so use non-transactional mode.
+    DCHECK_LE(0, new_offset);
+    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(String, offset_), new_offset);
+  }
+
+  void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   static Class* GetJavaLangString() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     DCHECK(!java_lang_String_.IsNull());
     return java_lang_String_.Read();
@@ -134,21 +142,12 @@
     SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count);
   }
 
-  void SetOffset(int32_t new_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // Offset is only used during testing so use non-transactional mode.
-    DCHECK_LE(0, new_offset);
-    DCHECK_GE(GetLength(), new_offset);
-    SetField32<false>(OFFSET_OF_OBJECT_MEMBER(String, offset_), new_offset);
-  }
-
   static String* Alloc(Thread* self, int32_t utf16_length)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static String* Alloc(Thread* self, Handle<CharArray> array)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  void SetArray(CharArray* new_array) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<CharArray> array_;
 
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index f6e2b21..d40d64b 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -17,8 +17,14 @@
 #include "dalvik_system_VMRuntime.h"
 
 #include <limits.h>
+#include <ScopedUtfChars.h>
 
-#include "ScopedUtfChars.h"
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "toStringArray.h"
+#pragma GCC diagnostic pop
+
+#include "arch/instruction_set.h"
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "debugger.h"
@@ -28,7 +34,6 @@
 #include "gc/heap.h"
 #include "gc/space/dlmalloc_space.h"
 #include "gc/space/image_space.h"
-#include "instruction_set.h"
 #include "intern_table.h"
 #include "jni_internal.h"
 #include "mirror/art_method-inl.h"
@@ -41,11 +46,6 @@
 #include "thread.h"
 #include "thread_list.h"
 
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wshadow"
-#include "toStringArray.h"
-#pragma GCC diagnostic pop
-
 namespace art {
 
 static jfloat VMRuntime_getTargetHeapUtilization(JNIEnv*, jobject) {
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 0966954..f1a04cb 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -18,8 +18,8 @@
 
 #include <stdlib.h>
 
+#include "arch/instruction_set.h"
 #include "debugger.h"
-#include "instruction_set.h"
 #include "java_vm_ext.h"
 #include "jni_internal.h"
 #include "JNIHelp.h"
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index c2c6b12..ffadfc6 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -72,7 +72,7 @@
       if (count < method_count) {
         methods[count].name = m->GetName();
         methods[count].signature = m->GetShorty();
-        methods[count].fnPtr = const_cast<void*>(m->GetNativeMethod());
+        methods[count].fnPtr = m->GetEntryPointFromJni();
         count++;
       } else {
         LOG(WARNING) << "Output native method array too small. Skipping " << PrettyMethod(m);
@@ -85,7 +85,7 @@
       if (count < method_count) {
         methods[count].name = m->GetName();
         methods[count].signature = m->GetShorty();
-        methods[count].fnPtr = const_cast<void*>(m->GetNativeMethod());
+        methods[count].fnPtr = m->GetEntryPointFromJni();
         count++;
       } else {
         LOG(WARNING) << "Output native method array too small. Skipping " << PrettyMethod(m);
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 0749c06..bfb27dd 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -15,15 +15,17 @@
  */
 
 #include "oat.h"
-#include "utils.h"
 
 #include <string.h>
 #include <zlib.h>
 
+#include "arch/instruction_set_features.h"
+#include "utils.h"
+
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '4', '6', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '4', '8', '\0' };
 
 static size_t ComputeOatHeaderSize(const SafeMap<std::string, std::string>* variable_data) {
   size_t estimate = 0U;
diff --git a/runtime/oat.h b/runtime/oat.h
index f577b07..8fb02b8 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -19,14 +19,16 @@
 
 #include <vector>
 
+#include "arch/instruction_set.h"
 #include "base/macros.h"
 #include "dex_file.h"
-#include "instruction_set.h"
 #include "quick/quick_method_frame_info.h"
 #include "safe_map.h"
 
 namespace art {
 
+class InstructionSetFeatures;
+
 class PACKED(4) OatHeader {
  public:
   static const uint8_t kOatMagic[4];
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 1a97c35..3e6c86b 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -105,11 +105,7 @@
     profile_clock_source_(kDefaultTraceClockSource),
     verify_(true),
     image_isa_(kRuntimeISA),
-    use_homogeneous_space_compaction_for_oom_(false),  // If we are using homogeneous space
-                                                       // compaction then default background
-                                                       // compaction to off since homogeneous
-                                                       // space compactions when we transition
-                                                       // to not jank perceptible.
+    use_homogeneous_space_compaction_for_oom_(true),  // Enable hspace compaction on OOM by default.
     min_interval_homogeneous_space_compaction_by_oom_(MsToNs(100 * 1000))  // 100s.
     {}
 
diff --git a/runtime/parsed_options.h b/runtime/parsed_options.h
index 26a2f31..9294868 100644
--- a/runtime/parsed_options.h
+++ b/runtime/parsed_options.h
@@ -25,7 +25,7 @@
 #include "globals.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
-#include "instruction_set.h"
+#include "arch/instruction_set.h"
 #include "profiler_options.h"
 
 namespace art {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index d338ad7..078e7d2 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -37,6 +37,7 @@
 #include "arch/arm/registers_arm.h"
 #include "arch/arm64/quick_method_frame_info_arm64.h"
 #include "arch/arm64/registers_arm64.h"
+#include "arch/instruction_set_features.h"
 #include "arch/mips/quick_method_frame_info_mips.h"
 #include "arch/mips/registers_mips.h"
 #include "arch/x86/quick_method_frame_info_x86.h"
@@ -437,10 +438,14 @@
 
   started_ = true;
 
-  if (IsZygote()) {
+  // Use !IsCompiler so that we get test coverage, tests are never the zygote.
+  if (!IsCompiler()) {
     ScopedObjectAccess soa(self);
-    Runtime::Current()->GetInternTable()->AddImageStringsToTable(heap_->GetImageSpace());
-    Runtime::Current()->GetClassLinker()->MoveImageClassesToClassTable();
+    gc::space::ImageSpace* image_space = heap_->GetImageSpace();
+    if (image_space != nullptr) {
+      Runtime::Current()->GetInternTable()->AddImageStringsToTable(image_space);
+      Runtime::Current()->GetClassLinker()->MoveImageClassesToClassTable();
+    }
   }
 
   if (!IsImageDex2OatEnabled() || !Runtime::Current()->GetHeap()->HasImageSpace()) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 3cbe1e5..39fd910 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -26,11 +26,11 @@
 #include <utility>
 #include <vector>
 
+#include "arch/instruction_set.h"
 #include "base/allocator.h"
 #include "compiler_callbacks.h"
 #include "gc_root.h"
 #include "instrumentation.h"
-#include "instruction_set.h"
 #include "jobject_comparator.h"
 #include "object_callbacks.h"
 #include "offsets.h"
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index d4ec803..e377542 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -27,10 +27,10 @@
 
 #include <sstream>
 
+#include "arch/instruction_set.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "gc/heap.h"
-#include "instruction_set.h"
 #include "os.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
@@ -110,11 +110,17 @@
     PLOG(ERROR) << "Unable to open stack trace file '" << stack_trace_file_ << "'";
     return;
   }
-  std::unique_ptr<File> file(new File(fd, stack_trace_file_));
-  if (!file->WriteFully(s.data(), s.size())) {
-    PLOG(ERROR) << "Failed to write stack traces to '" << stack_trace_file_ << "'";
+  std::unique_ptr<File> file(new File(fd, stack_trace_file_, true));
+  bool success = file->WriteFully(s.data(), s.size());
+  if (success) {
+    success = file->FlushCloseOrErase() == 0;
   } else {
+    file->Erase();
+  }
+  if (success) {
     LOG(INFO) << "Wrote stack traces to '" << stack_trace_file_ << "'";
+  } else {
+    PLOG(ERROR) << "Failed to write stack traces to '" << stack_trace_file_ << "'";
   }
 }
 
diff --git a/runtime/stack.h b/runtime/stack.h
index 66c840d..1d772e6 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -20,8 +20,8 @@
 #include <stdint.h>
 #include <string>
 
+#include "arch/instruction_set.h"
 #include "dex_file.h"
-#include "instruction_set.h"
 #include "mirror/object_reference.h"
 #include "throw_location.h"
 #include "utils.h"
diff --git a/runtime/thread.h b/runtime/thread.h
index 7e567fb..b69d2f4 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -25,6 +25,7 @@
 #include <setjmp.h>
 #include <string>
 
+#include "arch/instruction_set.h"
 #include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -34,7 +35,6 @@
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "globals.h"
 #include "handle_scope.h"
-#include "instruction_set.h"
 #include "jvalue.h"
 #include "object_callbacks.h"
 #include "offsets.h"
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 29c01e4..2cc50b3 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -431,6 +431,15 @@
                                                     instrumentation::Instrumentation::kMethodExited |
                                                     instrumentation::Instrumentation::kMethodUnwind);
     }
+    if (the_trace->trace_file_.get() != nullptr) {
+      // Do not try to erase, so flush and close explicitly.
+      if (the_trace->trace_file_->Flush() != 0) {
+        PLOG(ERROR) << "Could not flush trace file.";
+      }
+      if (the_trace->trace_file_->Close() != 0) {
+        PLOG(ERROR) << "Could not close trace file.";
+      }
+    }
     delete the_trace;
   }
   runtime->GetThreadList()->ResumeAll();
diff --git a/runtime/utils.cc b/runtime/utils.cc
index f2d710d..9a4c875 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1120,7 +1120,8 @@
 
 void DumpNativeStack(std::ostream& os, pid_t tid, const char* prefix,
     mirror::ArtMethod* current_method) {
-#ifdef __linux__
+  // TODO: enable on __linux__ b/15446488.
+#if 0
   // b/18119146
   if (RUNNING_ON_VALGRIND != 0) {
     return;
diff --git a/runtime/utils.h b/runtime/utils.h
index 669fe6c..d83013a 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -24,10 +24,10 @@
 #include <string>
 #include <vector>
 
+#include "arch/instruction_set.h"
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "globals.h"
-#include "instruction_set.h"
 #include "primitive.h"
 
 namespace art {
@@ -115,6 +115,20 @@
   return (-limit <= value) && (value < limit);
 }
 
+static inline bool IsInt32(int N, int32_t value) {
+  CHECK_LT(0, N);
+  CHECK_LT(static_cast<size_t>(N), 8 * sizeof(int32_t));
+  int32_t limit = static_cast<int32_t>(1) << (N - 1);
+  return (-limit <= value) && (value < limit);
+}
+
+static inline bool IsInt64(int N, int64_t value) {
+  CHECK_LT(0, N);
+  CHECK_LT(static_cast<size_t>(N), 8 * sizeof(int64_t));
+  int64_t limit = static_cast<int64_t>(1) << (N - 1);
+  return (-limit <= value) && (value < limit);
+}
+
 static inline bool IsUint(int N, intptr_t value) {
   CHECK_LT(0, N);
   CHECK_LT(N, kBitsPerIntPtrT);
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 357acf0..0c4bf3c 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -18,32 +18,26 @@
 #define ART_RUNTIME_VERIFIER_METHOD_VERIFIER_H_
 
 #include <memory>
-#include <set>
 #include <vector>
 
-#include "base/casts.h"
 #include "base/macros.h"
-#include "base/stl_util.h"
-#include "class_reference.h"
 #include "dex_file.h"
-#include "dex_instruction.h"
 #include "handle.h"
 #include "instruction_flags.h"
 #include "method_reference.h"
-#include "reg_type.h"
 #include "reg_type_cache.h"
-#include "register_line.h"
-#include "safe_map.h"
 
 namespace art {
 
+class Instruction;
 struct ReferenceMap2Visitor;
-template<class T> class Handle;
 
 namespace verifier {
 
-class MethodVerifier;
 class DexPcToReferenceMap;
+class MethodVerifier;
+class RegisterLine;
+class RegType;
 
 /*
  * "Direct" and "virtual" methods are stored independently. The type of call used to invoke the
@@ -128,6 +122,8 @@
  private:
   std::unique_ptr<RegisterLine*[]> register_lines_;
   size_t size_;
+
+  DISALLOW_COPY_AND_ASSIGN(PcToRegisterLineTable);
 };
 
 // The verifier
@@ -733,6 +729,8 @@
   // even though we might detect to be a compiler. Should only be set when running
   // VerifyMethodAndDump.
   const bool verify_to_dump_;
+
+  DISALLOW_COPY_AND_ASSIGN(MethodVerifier);
 };
 std::ostream& operator<<(std::ostream& os, const MethodVerifier::FailureKind& rhs);
 
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 34d6caa..05958b5 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -17,17 +17,14 @@
 #ifndef ART_RUNTIME_VERIFIER_REG_TYPE_H_
 #define ART_RUNTIME_VERIFIER_REG_TYPE_H_
 
-#include <limits>
 #include <stdint.h>
+#include <limits>
 #include <set>
 #include <string>
 
-#include "jni.h"
-
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc_root.h"
-#include "globals.h"
 #include "object_callbacks.h"
 #include "primitive.h"
 
@@ -35,6 +32,7 @@
 namespace mirror {
 class Class;
 }  // namespace mirror
+
 namespace verifier {
 
 class RegTypeCache;
@@ -578,17 +576,17 @@
 
   bool IsConstantChar() const OVERRIDE {
     return IsConstant() && ConstantValue() >= 0 &&
-           ConstantValue() <= std::numeric_limits<jchar>::max();
+           ConstantValue() <= std::numeric_limits<uint16_t>::max();
   }
   bool IsConstantByte() const OVERRIDE {
     return IsConstant() &&
-           ConstantValue() >= std::numeric_limits<jbyte>::min() &&
-           ConstantValue() <= std::numeric_limits<jbyte>::max();
+           ConstantValue() >= std::numeric_limits<int8_t>::min() &&
+           ConstantValue() <= std::numeric_limits<int8_t>::max();
   }
   bool IsConstantShort() const OVERRIDE {
     return IsConstant() &&
-           ConstantValue() >= std::numeric_limits<jshort>::min() &&
-           ConstantValue() <= std::numeric_limits<jshort>::max();
+           ConstantValue() >= std::numeric_limits<int16_t>::min() &&
+           ConstantValue() <= std::numeric_limits<int16_t>::max();
   }
   virtual bool IsConstantTypes() const OVERRIDE { return true; }
 
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index 219e687..244deed 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -114,6 +114,17 @@
   }
 }
 
+inline size_t RegisterLine::GetMaxNonZeroReferenceReg(MethodVerifier* verifier,
+                                                      size_t max_ref_reg) const {
+  size_t i = static_cast<int>(max_ref_reg) < 0 ? 0 : max_ref_reg;
+  for (; i < num_regs_; i++) {
+    if (GetRegisterType(verifier, i).IsNonZeroReferenceTypes()) {
+      max_ref_reg = i;
+    }
+  }
+  return max_ref_reg;
+}
+
 inline bool RegisterLine::VerifyRegisterType(MethodVerifier* verifier, uint32_t vsrc,
                                              const RegType& check_type) {
   // Verify the src register type against the check type refining the type of the register
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 3139204..72d7938 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -310,8 +310,12 @@
     verifier->Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "monitor-enter stack overflow: "
         << monitors_.size();
   } else {
-    SetRegToLockDepth(reg_idx, monitors_.size());
-    monitors_.push_back(insn_idx);
+    if (SetRegToLockDepth(reg_idx, monitors_.size())) {
+      monitors_.push_back(insn_idx);
+    } else {
+      verifier->Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unexpected monitor-enter on register v" <<
+          reg_idx;
+    }
   }
 }
 
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index 8f7823a..52b5c13 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -20,14 +20,16 @@
 #include <memory>
 #include <vector>
 
-#include "dex_instruction.h"
-#include "reg_type.h"
 #include "safe_map.h"
 
 namespace art {
+
+class Instruction;
+
 namespace verifier {
 
 class MethodVerifier;
+class RegType;
 
 /*
  * Register type categories, for type checking.
@@ -275,15 +277,7 @@
   bool MergeRegisters(MethodVerifier* verifier, const RegisterLine* incoming_line)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t GetMaxNonZeroReferenceReg(MethodVerifier* verifier, size_t max_ref_reg) {
-    size_t i = static_cast<int>(max_ref_reg) < 0 ? 0 : max_ref_reg;
-    for (; i < num_regs_; i++) {
-      if (GetRegisterType(verifier, i).IsNonZeroReferenceTypes()) {
-        max_ref_reg = i;
-      }
-    }
-    return max_ref_reg;
-  }
+  size_t GetMaxNonZeroReferenceReg(MethodVerifier* verifier, size_t max_ref_reg) const;
 
   // Write a bit at each register location that holds a reference.
   void WriteReferenceBitMap(MethodVerifier* verifier, std::vector<uint8_t>* data, size_t max_bytes);
@@ -313,15 +307,18 @@
     }
   }
 
-  void SetRegToLockDepth(size_t reg, size_t depth) {
+  bool SetRegToLockDepth(size_t reg, size_t depth) {
     CHECK_LT(depth, 32u);
-    DCHECK(!IsSetLockDepth(reg, depth));
+    if (IsSetLockDepth(reg, depth)) {
+      return false;  // Register already holds lock so locking twice is erroneous.
+    }
     auto it = reg_to_lock_depths_.find(reg);
     if (it == reg_to_lock_depths_.end()) {
       reg_to_lock_depths_.Put(reg, 1 << depth);
     } else {
       it->second |= (1 << depth);
     }
+    return true;
   }
 
   void ClearRegToLockDepth(size_t reg, size_t depth) {
@@ -347,21 +344,23 @@
     SetResultTypeToUnknown(verifier);
   }
 
-  // Storage for the result register's type, valid after an invocation
+  // Storage for the result register's type, valid after an invocation.
   uint16_t result_[2];
 
   // Length of reg_types_
   const uint32_t num_regs_;
 
-  // A stack of monitor enter locations
+  // A stack of monitor enter locations.
   std::vector<uint32_t, TrackingAllocator<uint32_t, kAllocatorTagVerifier>> monitors_;
   // A map from register to a bit vector of indices into the monitors_ stack. As we pop the monitor
   // stack we verify that monitor-enter/exit are correctly nested. That is, if there was a
-  // monitor-enter on v5 and then on v6, we expect the monitor-exit to be on v6 then on v5
+  // monitor-enter on v5 and then on v6, we expect the monitor-exit to be on v6 then on v5.
   AllocationTrackingSafeMap<uint32_t, uint32_t, kAllocatorTagVerifier> reg_to_lock_depths_;
 
   // An array of RegType Ids associated with each dex register.
   uint16_t line_[0];
+
+  DISALLOW_COPY_AND_ASSIGN(RegisterLine);
 };
 
 }  // namespace verifier
diff --git a/runtime/zip_archive_test.cc b/runtime/zip_archive_test.cc
index 96abee2..70a4dda 100644
--- a/runtime/zip_archive_test.cc
+++ b/runtime/zip_archive_test.cc
@@ -41,7 +41,7 @@
 
   ScratchFile tmp;
   ASSERT_NE(-1, tmp.GetFd());
-  std::unique_ptr<File> file(new File(tmp.GetFd(), tmp.GetFilename()));
+  std::unique_ptr<File> file(new File(tmp.GetFd(), tmp.GetFilename(), false));
   ASSERT_TRUE(file.get() != NULL);
   bool success = zip_entry->ExtractToFile(*file, &error_msg);
   ASSERT_TRUE(success) << error_msg;
diff --git a/test/083-compiler-regressions/expected.txt b/test/083-compiler-regressions/expected.txt
index 51bf847..78c92fc 100644
--- a/test/083-compiler-regressions/expected.txt
+++ b/test/083-compiler-regressions/expected.txt
@@ -1,3 +1,4 @@
+b17325447 passes
 b17630605 passes
 b17411468 passes
 b2296099 passes
diff --git a/test/083-compiler-regressions/src/Main.java b/test/083-compiler-regressions/src/Main.java
index 9ad8ea7..285c360 100644
--- a/test/083-compiler-regressions/src/Main.java
+++ b/test/083-compiler-regressions/src/Main.java
@@ -30,6 +30,7 @@
     }
 
     public static void main(String args[]) throws Exception {
+        b17325447();
         b17630605();
         b17411468();
         b2296099Test();
@@ -64,6 +65,31 @@
         minDoubleWith3ConstsTest();
     }
 
+    public static double b17325447_i1(int i1, double f) {
+      return f;
+    }
+
+    public static double b17325447_i2(int i1, int i2, double f) {
+      return f;
+    }
+
+    public static double b17325447_i3(int i1, int i2, int i3, double f) {
+      return f;
+    }
+
+    public static void b17325447() {
+      // b/17325447 - x86 handling of special identity method w/ double spanning reg/mem.
+      double d = 0.0;
+      d += b17325447_i1(123, 1.0);
+      d += b17325447_i2(123, 456, 2.0);
+      d += b17325447_i3(123, 456, 789, 3.0);
+      if (d == 6.0) {
+        System.out.println("b17325447 passes");
+      } else {
+        System.out.println("b17325447 fails: " + d);
+      }
+    }
+
     public static void b17630605() {
       // b/17630605 - failure to properly handle min long immediates.
       long a1 = 40455547223404749L;
diff --git a/test/100-reflect2/expected.txt b/test/100-reflect2/expected.txt
index 1af4121..8fdeccc 100644
--- a/test/100-reflect2/expected.txt
+++ b/test/100-reflect2/expected.txt
@@ -32,7 +32,7 @@
 62 (class java.lang.Long)
 14 (class java.lang.Short)
 [public java.lang.String(), java.lang.String(int,int,char[]), public java.lang.String(java.lang.String), public java.lang.String(java.lang.StringBuffer), public java.lang.String(java.lang.StringBuilder), public java.lang.String(byte[]), public java.lang.String(byte[],int), public java.lang.String(byte[],int,int), public java.lang.String(byte[],int,int,int), public java.lang.String(byte[],int,int,java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],int,int,java.nio.charset.Charset), public java.lang.String(byte[],java.lang.String) throws java.io.UnsupportedEncodingException, public java.lang.String(byte[],java.nio.charset.Charset), public java.lang.String(char[]), public java.lang.String(char[],int,int), public java.lang.String(int[],int,int)]
-[private final char[] java.lang.String.value, private final int java.lang.String.count, private int java.lang.String.hashCode, private final int java.lang.String.offset, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final long java.lang.String.serialVersionUID, private static final char java.lang.String.REPLACEMENT_CHAR]
+[private final int java.lang.String.count, private int java.lang.String.hashCode, private final int java.lang.String.offset, private final char[] java.lang.String.value, private static final char[] java.lang.String.ASCII, public static final java.util.Comparator java.lang.String.CASE_INSENSITIVE_ORDER, private static final char java.lang.String.REPLACEMENT_CHAR, private static final long java.lang.String.serialVersionUID]
 [void java.lang.String._getChars(int,int,char[],int), public char java.lang.String.charAt(int), public int java.lang.String.codePointAt(int), public int java.lang.String.codePointBefore(int), public int java.lang.String.codePointCount(int,int), public volatile int java.lang.String.compareTo(java.lang.Object), public native int java.lang.String.compareTo(java.lang.String), public int java.lang.String.compareToIgnoreCase(java.lang.String), public java.lang.String java.lang.String.concat(java.lang.String), public boolean java.lang.String.contains(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.CharSequence), public boolean java.lang.String.contentEquals(java.lang.StringBuffer), public boolean java.lang.String.endsWith(java.lang.String), public boolean java.lang.String.equals(java.lang.Object), public boolean java.lang.String.equalsIgnoreCase(java.lang.String), public void java.lang.String.getBytes(int,int,byte[],int), public [B java.lang.String.getBytes(), public [B java.lang.String.getBytes(java.lang.String) throws java.io.UnsupportedEncodingException, public [B java.lang.String.getBytes(java.nio.charset.Charset), public void java.lang.String.getChars(int,int,char[],int), public int java.lang.String.hashCode(), public int java.lang.String.indexOf(int), public int java.lang.String.indexOf(int,int), public int java.lang.String.indexOf(java.lang.String), public int java.lang.String.indexOf(java.lang.String,int), public native java.lang.String java.lang.String.intern(), public boolean java.lang.String.isEmpty(), public int java.lang.String.lastIndexOf(int), public int java.lang.String.lastIndexOf(int,int), public int java.lang.String.lastIndexOf(java.lang.String), public int java.lang.String.lastIndexOf(java.lang.String,int), public int java.lang.String.length(), public boolean java.lang.String.matches(java.lang.String), public int java.lang.String.offsetByCodePoints(int,int), public boolean java.lang.String.regionMatches(int,java.lang.String,int,int), public boolean java.lang.String.regionMatches(boolean,int,java.lang.String,int,int), public java.lang.String java.lang.String.replace(char,char), public java.lang.String java.lang.String.replace(java.lang.CharSequence,java.lang.CharSequence), public java.lang.String java.lang.String.replaceAll(java.lang.String,java.lang.String), public java.lang.String java.lang.String.replaceFirst(java.lang.String,java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String), public [Ljava.lang.String; java.lang.String.split(java.lang.String,int), public boolean java.lang.String.startsWith(java.lang.String), public boolean java.lang.String.startsWith(java.lang.String,int), public java.lang.CharSequence java.lang.String.subSequence(int,int), public java.lang.String java.lang.String.substring(int), public java.lang.String java.lang.String.substring(int,int), public [C java.lang.String.toCharArray(), public java.lang.String java.lang.String.toLowerCase(), public java.lang.String java.lang.String.toLowerCase(java.util.Locale), public java.lang.String java.lang.String.toString(), public java.lang.String java.lang.String.toUpperCase(), public java.lang.String java.lang.String.toUpperCase(java.util.Locale), public java.lang.String java.lang.String.trim(), public static java.lang.String java.lang.String.copyValueOf(char[]), public static java.lang.String java.lang.String.copyValueOf(char[],int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.failedBoundsCheck(int,int,int), private native int java.lang.String.fastIndexOf(int,int), private char java.lang.String.foldCase(char), public static transient java.lang.String java.lang.String.format(java.lang.String,java.lang.Object[]), public static transient java.lang.String java.lang.String.format(java.util.Locale,java.lang.String,java.lang.Object[]), private java.lang.StringIndexOutOfBoundsException java.lang.String.indexAndLength(int), private static int java.lang.String.indexOf(java.lang.String,java.lang.String,int,int,char), private int java.lang.String.indexOfSupplementary(int,int), private int java.lang.String.lastIndexOfSupplementary(int,int), private java.lang.StringIndexOutOfBoundsException java.lang.String.startEndAndLength(int,int), public static java.lang.String java.lang.String.valueOf(char), public static java.lang.String java.lang.String.valueOf(double), public static java.lang.String java.lang.String.valueOf(float), public static java.lang.String java.lang.String.valueOf(int), public static java.lang.String java.lang.String.valueOf(long), public static java.lang.String java.lang.String.valueOf(java.lang.Object), public static java.lang.String java.lang.String.valueOf(boolean), public static java.lang.String java.lang.String.valueOf(char[]), public static java.lang.String java.lang.String.valueOf(char[],int,int)]
 []
 [interface java.io.Serializable, interface java.lang.Comparable, interface java.lang.CharSequence]
diff --git a/test/128-reg-spilling-on-implicit-nullcheck/expected.txt b/test/128-reg-spilling-on-implicit-nullcheck/expected.txt
new file mode 100644
index 0000000..9bdf658
--- /dev/null
+++ b/test/128-reg-spilling-on-implicit-nullcheck/expected.txt
@@ -0,0 +1 @@
+t7q = 2
diff --git a/test/128-reg-spilling-on-implicit-nullcheck/info.txt b/test/128-reg-spilling-on-implicit-nullcheck/info.txt
new file mode 100644
index 0000000..18b2112
--- /dev/null
+++ b/test/128-reg-spilling-on-implicit-nullcheck/info.txt
@@ -0,0 +1 @@
+This is a compiler reggression test for missing reg spilling on implicit nullcheck.
diff --git a/test/128-reg-spilling-on-implicit-nullcheck/src/Main.java b/test/128-reg-spilling-on-implicit-nullcheck/src/Main.java
new file mode 100644
index 0000000..48276bf
--- /dev/null
+++ b/test/128-reg-spilling-on-implicit-nullcheck/src/Main.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2007 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+    public static void main(String[] args) {
+        int t7q = 0;
+        long q = 1L;
+
+        try {
+            for (int i = 1; i < 8; i++) {
+                t7q = (--t7q);
+                TestClass f = null;
+                t7q = f.field;
+            }
+        }
+        catch (NullPointerException wpw) {
+            q++;
+        }
+        finally {
+            t7q += (int)(1 - ((q - q) - 2));
+        }
+
+        System.out.println("t7q = " + t7q);
+    }
+}
+
+class TestClass {
+    public int field;
+    public void meth() {field = 1;}
+}
diff --git a/test/417-optimizing-arith-div/src/Main.java b/test/417-optimizing-arith-div/src/Main.java
index 5825d24..909ceb4 100644
--- a/test/417-optimizing-arith-div/src/Main.java
+++ b/test/417-optimizing-arith-div/src/Main.java
@@ -24,6 +24,12 @@
     }
   }
 
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   public static void expectEquals(float expected, float result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -78,18 +84,33 @@
     } catch (java.lang.RuntimeException e) {
     }
   }
+
+  public static void expectDivisionByZero(long value) {
+    try {
+      $opt$Div(value, 0L);
+      throw new Error("Expected RuntimeException when dividing by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+    try {
+      $opt$DivZero(value);
+      throw new Error("Expected RuntimeException when dividing by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+  }
+
   public static void main(String[] args) {
     div();
   }
 
   public static void div() {
     divInt();
+    divLong();
     divFloat();
     divDouble();
   }
 
   private static void divInt() {
-    expectEquals(2, $opt$DivLit(6));
+    expectEquals(2, $opt$DivConst(6));
     expectEquals(2, $opt$Div(6, 3));
     expectEquals(6, $opt$Div(6, 1));
     expectEquals(-2, $opt$Div(6, -3));
@@ -111,6 +132,35 @@
     expectDivisionByZero(Integer.MIN_VALUE);
   }
 
+  private static void divLong() {
+    expectEquals(2L, $opt$DivConst(6L));
+    expectEquals(2L, $opt$Div(6L, 3L));
+    expectEquals(6L, $opt$Div(6L, 1L));
+    expectEquals(-2L, $opt$Div(6L, -3L));
+    expectEquals(1L, $opt$Div(4L, 3L));
+    expectEquals(-1L, $opt$Div(4L, -3L));
+    expectEquals(5L, $opt$Div(23L, 4L));
+    expectEquals(-5L, $opt$Div(-23L, 4L));
+
+    expectEquals(-Integer.MAX_VALUE, $opt$Div(Integer.MAX_VALUE, -1L));
+    expectEquals(2147483648L, $opt$Div(Integer.MIN_VALUE, -1L));
+    expectEquals(-1073741824L, $opt$Div(Integer.MIN_VALUE, 2L));
+
+    expectEquals(-Long.MAX_VALUE, $opt$Div(Long.MAX_VALUE, -1L));
+    expectEquals(Long.MIN_VALUE, $opt$Div(Long.MIN_VALUE, -1L)); // overflow
+
+    expectEquals(11111111111111L, $opt$Div(33333333333333L, 3L));
+    expectEquals(3L, $opt$Div(33333333333333L, 11111111111111L));
+
+    expectEquals(0L, $opt$Div(0L, Long.MAX_VALUE));
+    expectEquals(0L, $opt$Div(0L, Long.MIN_VALUE));
+
+    expectDivisionByZero(0L);
+    expectDivisionByZero(1L);
+    expectDivisionByZero(Long.MAX_VALUE);
+    expectDivisionByZero(Long.MIN_VALUE);
+  }
+
   private static void divFloat() {
     expectApproxEquals(1.6666666F, $opt$Div(5F, 3F));
     expectApproxEquals(0F, $opt$Div(0F, 3F));
@@ -178,10 +228,22 @@
   }
 
   // Division by literals != 0 should not generate checks.
-  static int $opt$DivLit(int a) {
+  static int $opt$DivConst(int a) {
     return a / 3;
   }
 
+  static long $opt$DivConst(long a) {
+    return a / 3L;
+  }
+
+  static long $opt$Div(long a, long b) {
+    return a / b;
+  }
+
+  static long $opt$DivZero(long a) {
+    return a / 0L;
+  }
+
   static float $opt$Div(float a, float b) {
     return a / b;
   }
diff --git a/test/422-type-conversion/src/Main.java b/test/422-type-conversion/src/Main.java
index a4232ed..37bc777 100644
--- a/test/422-type-conversion/src/Main.java
+++ b/test/422-type-conversion/src/Main.java
@@ -18,6 +18,18 @@
 // it does compile the method.
 public class Main {
 
+  public static void assertByteEquals(byte expected, byte result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertShortEquals(short expected, short result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   public static void assertIntEquals(int expected, int result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -30,13 +42,63 @@
     }
   }
 
+  public static void assertCharEquals(char expected, char result) {
+    if (expected != result) {
+      // Values are cast to int to display numeric values instead of
+      // (UTF-16 encoded) characters.
+      throw new Error("Expected: " + (int)expected + ", found: " + (int)result);
+    }
+  }
+
+  public static void assertFloatEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void assertDoubleEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+
   public static void main(String[] args) {
+    // Generate, compile and check int-to-long Dex instructions.
     byteToLong();
     shortToLong();
     intToLong();
     charToLong();
 
+    // Generate, compile and check int-to-float Dex instructions.
+    byteToFloat();
+    shortToFloat();
+    intToFloat();
+    charToFloat();
+
+    // Generate, compile and check int-to-double Dex instructions.
+    byteToDouble();
+    shortToDouble();
+    intToDouble();
+    charToDouble();
+
+    // Generate, compile and check long-to-int Dex instructions.
     longToInt();
+
+    // Generate, compile and check int-to-byte Dex instructions.
+    shortToByte();
+    intToByte();
+    charToByte();
+
+    // Generate, compile and check int-to-short Dex instructions.
+    byteToShort();
+    intToShort();
+    charToShort();
+
+    // Generate, compile and check int-to-char Dex instructions.
+    byteToChar();
+    shortToChar();
+    intToChar();
   }
 
   private static void byteToLong() {
@@ -78,16 +140,106 @@
     assertLongEquals(51L, $opt$CharToLong((char)51));
     assertLongEquals(32767L, $opt$CharToLong((char)32767));  // 2^15 - 1
     assertLongEquals(65535L, $opt$CharToLong((char)65535));  // 2^16 - 1
-
-    assertLongEquals(0L, $opt$CharToLong('\u0000'));
-    assertLongEquals(65535L, $opt$CharToLong('\uFFFF'));  // 2^16 - 1
-
     assertLongEquals(65535L, $opt$CharToLong((char)-1));
     assertLongEquals(65485L, $opt$CharToLong((char)-51));
     assertLongEquals(32769L, $opt$CharToLong((char)-32767));  // -(2^15 - 1)
     assertLongEquals(32768L, $opt$CharToLong((char)-32768));  // -(2^15)
   }
 
+  private static void byteToFloat() {
+    assertFloatEquals(1F, $opt$ByteToFloat((byte)1));
+    assertFloatEquals(0F, $opt$ByteToFloat((byte)0));
+    assertFloatEquals(-1F, $opt$ByteToFloat((byte)-1));
+    assertFloatEquals(51F, $opt$ByteToFloat((byte)51));
+    assertFloatEquals(-51F, $opt$ByteToFloat((byte)-51));
+    assertFloatEquals(127F, $opt$ByteToFloat((byte)127));  // 2^7 - 1
+    assertFloatEquals(-127F, $opt$ByteToFloat((byte)-127));  // -(2^7 - 1)
+    assertFloatEquals(-128F, $opt$ByteToFloat((byte)-128));  // -(2^7)
+  }
+
+  private static void shortToFloat() {
+    assertFloatEquals(1F, $opt$ShortToFloat((short)1));
+    assertFloatEquals(0F, $opt$ShortToFloat((short)0));
+    assertFloatEquals(-1F, $opt$ShortToFloat((short)-1));
+    assertFloatEquals(51F, $opt$ShortToFloat((short)51));
+    assertFloatEquals(-51F, $opt$ShortToFloat((short)-51));
+    assertFloatEquals(32767F, $opt$ShortToFloat((short)32767));  // 2^15 - 1
+    assertFloatEquals(-32767F, $opt$ShortToFloat((short)-32767));  // -(2^15 - 1)
+    assertFloatEquals(-32768F, $opt$ShortToFloat((short)-32768));  // -(2^15)
+  }
+
+  private static void intToFloat() {
+    assertFloatEquals(1F, $opt$IntToFloat(1));
+    assertFloatEquals(0F, $opt$IntToFloat(0));
+    assertFloatEquals(-1F, $opt$IntToFloat(-1));
+    assertFloatEquals(51F, $opt$IntToFloat(51));
+    assertFloatEquals(-51F, $opt$IntToFloat(-51));
+    assertFloatEquals(16777215F, $opt$IntToFloat(16777215));  // 2^24 - 1
+    assertFloatEquals(-16777215F, $opt$IntToFloat(-16777215));  // -(2^24 - 1)
+    assertFloatEquals(16777216F, $opt$IntToFloat(16777216));  // 2^24
+    assertFloatEquals(-16777216F, $opt$IntToFloat(-16777216));  // -(2^24)
+    assertFloatEquals(2147483647F, $opt$IntToFloat(2147483647));  // 2^31 - 1
+    assertFloatEquals(-2147483648F, $opt$IntToFloat(-2147483648));  // -(2^31)
+  }
+
+  private static void charToFloat() {
+    assertFloatEquals(1F, $opt$CharToFloat((char)1));
+    assertFloatEquals(0F, $opt$CharToFloat((char)0));
+    assertFloatEquals(51F, $opt$CharToFloat((char)51));
+    assertFloatEquals(32767F, $opt$CharToFloat((char)32767));  // 2^15 - 1
+    assertFloatEquals(65535F, $opt$CharToFloat((char)65535));  // 2^16 - 1
+    assertFloatEquals(65535F, $opt$CharToFloat((char)-1));
+    assertFloatEquals(65485F, $opt$CharToFloat((char)-51));
+    assertFloatEquals(32769F, $opt$CharToFloat((char)-32767));  // -(2^15 - 1)
+    assertFloatEquals(32768F, $opt$CharToFloat((char)-32768));  // -(2^15)
+  }
+
+  private static void byteToDouble() {
+    assertDoubleEquals(1D, $opt$ByteToDouble((byte)1));
+    assertDoubleEquals(0D, $opt$ByteToDouble((byte)0));
+    assertDoubleEquals(-1D, $opt$ByteToDouble((byte)-1));
+    assertDoubleEquals(51D, $opt$ByteToDouble((byte)51));
+    assertDoubleEquals(-51D, $opt$ByteToDouble((byte)-51));
+    assertDoubleEquals(127D, $opt$ByteToDouble((byte)127));  // 2^7 - 1
+    assertDoubleEquals(-127D, $opt$ByteToDouble((byte)-127));  // -(2^7 - 1)
+    assertDoubleEquals(-128D, $opt$ByteToDouble((byte)-128));  // -(2^7)
+  }
+
+  private static void shortToDouble() {
+    assertDoubleEquals(1D, $opt$ShortToDouble((short)1));
+    assertDoubleEquals(0D, $opt$ShortToDouble((short)0));
+    assertDoubleEquals(-1D, $opt$ShortToDouble((short)-1));
+    assertDoubleEquals(51D, $opt$ShortToDouble((short)51));
+    assertDoubleEquals(-51D, $opt$ShortToDouble((short)-51));
+    assertDoubleEquals(32767D, $opt$ShortToDouble((short)32767));  // 2^15 - 1
+    assertDoubleEquals(-32767D, $opt$ShortToDouble((short)-32767));  // -(2^15 - 1)
+    assertDoubleEquals(-32768D, $opt$ShortToDouble((short)-32768));  // -(2^15)
+  }
+
+  private static void intToDouble() {
+    assertDoubleEquals(1D, $opt$IntToDouble(1));
+    assertDoubleEquals(0D, $opt$IntToDouble(0));
+    assertDoubleEquals(-1D, $opt$IntToDouble(-1));
+    assertDoubleEquals(51D, $opt$IntToDouble(51));
+    assertDoubleEquals(-51D, $opt$IntToDouble(-51));
+    assertDoubleEquals(16777216D, $opt$IntToDouble(16777216));  // 2^24
+    assertDoubleEquals(-16777216D, $opt$IntToDouble(-16777216));  // -(2^24)
+    assertDoubleEquals(2147483647D, $opt$IntToDouble(2147483647));  // 2^31 - 1
+    assertDoubleEquals(-2147483648D, $opt$IntToDouble(-2147483648));  // -(2^31)
+  }
+
+  private static void charToDouble() {
+    assertDoubleEquals(1D, $opt$CharToDouble((char)1));
+    assertDoubleEquals(0D, $opt$CharToDouble((char)0));
+    assertDoubleEquals(51D, $opt$CharToDouble((char)51));
+    assertDoubleEquals(32767D, $opt$CharToDouble((char)32767));  // 2^15 - 1
+    assertDoubleEquals(65535D, $opt$CharToDouble((char)65535));  // 2^16 - 1
+    assertDoubleEquals(65535D, $opt$CharToDouble((char)-1));
+    assertDoubleEquals(65485D, $opt$CharToDouble((char)-51));
+    assertDoubleEquals(32769D, $opt$CharToDouble((char)-32767));  // -(2^15 - 1)
+    assertDoubleEquals(32768D, $opt$CharToDouble((char)-32768));  // -(2^15)
+  }
+
   private static void longToInt() {
     assertIntEquals(1, $opt$LongToInt(1L));
     assertIntEquals(0, $opt$LongToInt(0L));
@@ -115,13 +267,167 @@
     assertLongEquals(-1, $opt$IntToLong($opt$LongToInt(-4294967297L)));  // -(2^32 + 1)
   }
 
+  private static void shortToByte() {
+    assertByteEquals((byte)1, $opt$ShortToByte((short)1));
+    assertByteEquals((byte)0, $opt$ShortToByte((short)0));
+    assertByteEquals((byte)-1, $opt$ShortToByte((short)-1));
+    assertByteEquals((byte)51, $opt$ShortToByte((short)51));
+    assertByteEquals((byte)-51, $opt$ShortToByte((short)-51));
+    assertByteEquals((byte)127, $opt$ShortToByte((short)127));  // 2^7 - 1
+    assertByteEquals((byte)-127, $opt$ShortToByte((short)-127));  // -(2^7 - 1)
+    assertByteEquals((byte)-128, $opt$ShortToByte((short)-128));  // -(2^7)
+    assertByteEquals((byte)-128, $opt$ShortToByte((short)128));  // 2^7
+    assertByteEquals((byte)127, $opt$ShortToByte((short)-129));  // -(2^7 + 1)
+    assertByteEquals((byte)-1, $opt$ShortToByte((short)32767));  // 2^15 - 1
+    assertByteEquals((byte)0, $opt$ShortToByte((short)-32768));  // -(2^15)
+  }
+
+  private static void intToByte() {
+    assertByteEquals((byte)1, $opt$IntToByte(1));
+    assertByteEquals((byte)0, $opt$IntToByte(0));
+    assertByteEquals((byte)-1, $opt$IntToByte(-1));
+    assertByteEquals((byte)51, $opt$IntToByte(51));
+    assertByteEquals((byte)-51, $opt$IntToByte(-51));
+    assertByteEquals((byte)127, $opt$IntToByte(127));  // 2^7 - 1
+    assertByteEquals((byte)-127, $opt$IntToByte(-127));  // -(2^7 - 1)
+    assertByteEquals((byte)-128, $opt$IntToByte(-128));  // -(2^7)
+    assertByteEquals((byte)-128, $opt$IntToByte(128));  // 2^7
+    assertByteEquals((byte)127, $opt$IntToByte(-129));  // -(2^7 + 1)
+    assertByteEquals((byte)-1, $opt$IntToByte(2147483647));  // 2^31 - 1
+    assertByteEquals((byte)0, $opt$IntToByte(-2147483648));  // -(2^31)
+  }
+
+  private static void charToByte() {
+    assertByteEquals((byte)1, $opt$CharToByte((char)1));
+    assertByteEquals((byte)0, $opt$CharToByte((char)0));
+    assertByteEquals((byte)51, $opt$CharToByte((char)51));
+    assertByteEquals((byte)127, $opt$CharToByte((char)127));  // 2^7 - 1
+    assertByteEquals((byte)-128, $opt$CharToByte((char)128));  // 2^7
+    assertByteEquals((byte)-1, $opt$CharToByte((char)32767));  // 2^15 - 1
+    assertByteEquals((byte)-1, $opt$CharToByte((char)65535));  // 2^16 - 1
+    assertByteEquals((byte)-1, $opt$CharToByte((char)-1));
+    assertByteEquals((byte)-51, $opt$CharToByte((char)-51));
+    assertByteEquals((byte)-127, $opt$CharToByte((char)-127));  // -(2^7 - 1)
+    assertByteEquals((byte)-128, $opt$CharToByte((char)-128));  // -(2^7)
+    assertByteEquals((byte)127, $opt$CharToByte((char)-129));  // -(2^7 + 1)
+  }
+
+  private static void byteToShort() {
+    assertShortEquals((short)1, $opt$ByteToShort((byte)1));
+    assertShortEquals((short)0, $opt$ByteToShort((byte)0));
+    assertShortEquals((short)-1, $opt$ByteToShort((byte)-1));
+    assertShortEquals((short)51, $opt$ByteToShort((byte)51));
+    assertShortEquals((short)-51, $opt$ByteToShort((byte)-51));
+    assertShortEquals((short)127, $opt$ByteToShort((byte)127));  // 2^7 - 1
+    assertShortEquals((short)-127, $opt$ByteToShort((byte)-127));  // -(2^7 - 1)
+    assertShortEquals((short)-128, $opt$ByteToShort((byte)-128));  // -(2^7)
+  }
+
+  private static void intToShort() {
+    assertShortEquals((short)1, $opt$IntToShort(1));
+    assertShortEquals((short)0, $opt$IntToShort(0));
+    assertShortEquals((short)-1, $opt$IntToShort(-1));
+    assertShortEquals((short)51, $opt$IntToShort(51));
+    assertShortEquals((short)-51, $opt$IntToShort(-51));
+    assertShortEquals((short)32767, $opt$IntToShort(32767));  // 2^15 - 1
+    assertShortEquals((short)-32767, $opt$IntToShort(-32767));  // -(2^15 - 1)
+    assertShortEquals((short)-32768, $opt$IntToShort(-32768));  // -(2^15)
+    assertShortEquals((short)-32768, $opt$IntToShort(32768));  // 2^15
+    assertShortEquals((short)32767, $opt$IntToShort(-32769));  // -(2^15 + 1)
+    assertShortEquals((short)-1, $opt$IntToShort(2147483647));  // 2^31 - 1
+    assertShortEquals((short)0, $opt$IntToShort(-2147483648));  // -(2^31)
+  }
+
+  private static void charToShort() {
+    assertShortEquals((short)1, $opt$CharToShort((char)1));
+    assertShortEquals((short)0, $opt$CharToShort((char)0));
+    assertShortEquals((short)51, $opt$CharToShort((char)51));
+    assertShortEquals((short)32767, $opt$CharToShort((char)32767));  // 2^15 - 1
+    assertShortEquals((short)-32768, $opt$CharToShort((char)32768));  // 2^15
+    assertShortEquals((short)-32767, $opt$CharToShort((char)32769));  // 2^15
+    assertShortEquals((short)-1, $opt$CharToShort((char)65535));  // 2^16 - 1
+    assertShortEquals((short)-1, $opt$CharToShort((char)-1));
+    assertShortEquals((short)-51, $opt$CharToShort((char)-51));
+    assertShortEquals((short)-32767, $opt$CharToShort((char)-32767));  // -(2^15 - 1)
+    assertShortEquals((short)-32768, $opt$CharToShort((char)-32768));  // -(2^15)
+    assertShortEquals((short)32767, $opt$CharToShort((char)-32769));  // -(2^15 + 1)
+  }
+
+  private static void byteToChar() {
+    assertCharEquals((char)1, $opt$ByteToChar((byte)1));
+    assertCharEquals((char)0, $opt$ByteToChar((byte)0));
+    assertCharEquals((char)65535, $opt$ByteToChar((byte)-1));
+    assertCharEquals((char)51, $opt$ByteToChar((byte)51));
+    assertCharEquals((char)65485, $opt$ByteToChar((byte)-51));
+    assertCharEquals((char)127, $opt$ByteToChar((byte)127));  // 2^7 - 1
+    assertCharEquals((char)65409, $opt$ByteToChar((byte)-127));  // -(2^7 - 1)
+    assertCharEquals((char)65408, $opt$ByteToChar((byte)-128));  // -(2^7)
+  }
+
+  private static void shortToChar() {
+    assertCharEquals((char)1, $opt$ShortToChar((short)1));
+    assertCharEquals((char)0, $opt$ShortToChar((short)0));
+    assertCharEquals((char)65535, $opt$ShortToChar((short)-1));
+    assertCharEquals((char)51, $opt$ShortToChar((short)51));
+    assertCharEquals((char)65485, $opt$ShortToChar((short)-51));
+    assertCharEquals((char)32767, $opt$ShortToChar((short)32767));  // 2^15 - 1
+    assertCharEquals((char)32769, $opt$ShortToChar((short)-32767));  // -(2^15 - 1)
+    assertCharEquals((char)32768, $opt$ShortToChar((short)-32768));  // -(2^15)
+  }
+
+  private static void intToChar() {
+    assertCharEquals((char)1, $opt$IntToChar(1));
+    assertCharEquals((char)0, $opt$IntToChar(0));
+    assertCharEquals((char)65535, $opt$IntToChar(-1));
+    assertCharEquals((char)51, $opt$IntToChar(51));
+    assertCharEquals((char)65485, $opt$IntToChar(-51));
+    assertCharEquals((char)32767, $opt$IntToChar(32767));  // 2^15 - 1
+    assertCharEquals((char)32769, $opt$IntToChar(-32767));  // -(2^15 - 1)
+    assertCharEquals((char)32768, $opt$IntToChar(32768));  // 2^15
+    assertCharEquals((char)32768, $opt$IntToChar(-32768));  // -(2^15)
+    assertCharEquals((char)65535, $opt$IntToChar(65535));  // 2^16 - 1
+    assertCharEquals((char)1, $opt$IntToChar(-65535));  // -(2^16 - 1)
+    assertCharEquals((char)0, $opt$IntToChar(65536));  // 2^16
+    assertCharEquals((char)0, $opt$IntToChar(-65536));  // -(2^16)
+    assertCharEquals((char)65535, $opt$IntToChar(2147483647));  // 2^31 - 1
+    assertCharEquals((char)0, $opt$IntToChar(-2147483648));  // -(2^31)
+  }
+
+
   // These methods produce int-to-long Dex instructions.
   static long $opt$ByteToLong(byte a) { return a; }
   static long $opt$ShortToLong(short a) { return a; }
   static long $opt$IntToLong(int a) { return a; }
   static long $opt$CharToLong(int a) { return a; }
 
+  // These methods produce int-to-float Dex instructions.
+  static float $opt$ByteToFloat(byte a) { return a; }
+  static float $opt$ShortToFloat(short a) { return a; }
+  static float $opt$IntToFloat(int a) { return a; }
+  static float $opt$CharToFloat(char a) { return a; }
+
+  // These methods produce int-to-double Dex instructions.
+  static double $opt$ByteToDouble(byte a) { return a; }
+  static double $opt$ShortToDouble(short a) { return a; }
+  static double $opt$IntToDouble(int a) { return a; }
+  static double $opt$CharToDouble(int a) { return a; }
+
   // These methods produce long-to-int Dex instructions.
   static int $opt$LongToInt(long a){ return (int)a; }
   static int $opt$LongLiteralToInt(){ return (int)42L; }
+
+  // These methods produce int-to-byte Dex instructions.
+  static byte $opt$ShortToByte(short a){ return (byte)a; }
+  static byte $opt$IntToByte(int a){ return (byte)a; }
+  static byte $opt$CharToByte(char a){ return (byte)a; }
+
+  // These methods produce int-to-short Dex instructions.
+  static short $opt$ByteToShort(byte a){ return (short)a; }
+  static short $opt$IntToShort(int a){ return (short)a; }
+  static short $opt$CharToShort(char a){ return (short)a; }
+
+  // These methods produce int-to-char Dex instructions.
+  static char $opt$ByteToChar(byte a){ return (char)a; }
+  static char $opt$ShortToChar(short a){ return (char)a; }
+  static char $opt$IntToChar(int a){ return (char)a; }
 }
diff --git a/test/425-invoke-super/expected.txt b/test/425-invoke-super/expected.txt
index e69de29..f7f6ae4 100644
--- a/test/425-invoke-super/expected.txt
+++ b/test/425-invoke-super/expected.txt
@@ -0,0 +1 @@
+Test started
diff --git a/test/425-invoke-super/src/Main.java b/test/425-invoke-super/src/Main.java
index 1fb62d0..f3166fd 100644
--- a/test/425-invoke-super/src/Main.java
+++ b/test/425-invoke-super/src/Main.java
@@ -39,6 +39,8 @@
   }
 
   public static void main(String[] args) throws Exception {
+    // Workaround for b/18051191.
+    System.out.println("Test started");
     assertEquals(1, new B().$opt$bar());
     assertEquals(1, new C().$opt$bar());
     assertEquals(1, new D().$opt$bar());
diff --git a/test/428-optimizing-arith-rem/expected.txt b/test/428-optimizing-arith-rem/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/428-optimizing-arith-rem/expected.txt
diff --git a/test/428-optimizing-arith-rem/info.txt b/test/428-optimizing-arith-rem/info.txt
new file mode 100644
index 0000000..3e37ffe
--- /dev/null
+++ b/test/428-optimizing-arith-rem/info.txt
@@ -0,0 +1 @@
+Tests for modulo (rem) operation.
diff --git a/test/428-optimizing-arith-rem/src/Main.java b/test/428-optimizing-arith-rem/src/Main.java
new file mode 100644
index 0000000..46bd3c6
--- /dev/null
+++ b/test/428-optimizing-arith-rem/src/Main.java
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectDivisionByZero(int value) {
+    try {
+      $opt$Rem(value, 0);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+    try {
+      $opt$RemZero(value);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+  }
+
+  public static void expectDivisionByZero(long value) {
+    try {
+      $opt$Rem(value, 0L);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+    try {
+      $opt$RemZero(value);
+      throw new Error("Expected RuntimeException when modulo by 0");
+    } catch (java.lang.RuntimeException e) {
+    }
+  }
+
+  public static void main(String[] args) {
+    rem();
+  }
+
+  public static void rem() {
+    remInt();
+    remLong();
+  }
+
+  private static void remInt() {
+    expectEquals(2, $opt$RemConst(6));
+    expectEquals(2, $opt$Rem(6, 4));
+    expectEquals(2, $opt$Rem(6, -4));
+    expectEquals(0, $opt$Rem(6, 3));
+    expectEquals(0, $opt$Rem(6, -3));
+    expectEquals(0, $opt$Rem(6, 1));
+    expectEquals(0, $opt$Rem(6, -1));
+    expectEquals(-1, $opt$Rem(-7, 3));
+    expectEquals(-1, $opt$Rem(-7, -3));
+    expectEquals(0, $opt$Rem(6, 6));
+    expectEquals(0, $opt$Rem(-6, -6));
+    expectEquals(7, $opt$Rem(7, 9));
+    expectEquals(7, $opt$Rem(7, -9));
+    expectEquals(-7, $opt$Rem(-7, 9));
+    expectEquals(-7, $opt$Rem(-7, -9));
+
+    expectEquals(0, $opt$Rem(Integer.MAX_VALUE, 1));
+    expectEquals(0, $opt$Rem(Integer.MAX_VALUE, -1));
+    expectEquals(0, $opt$Rem(Integer.MIN_VALUE, 1));
+    expectEquals(0, $opt$Rem(Integer.MIN_VALUE, -1)); // no overflow
+    expectEquals(-1, $opt$Rem(Integer.MIN_VALUE, Integer.MAX_VALUE));
+    expectEquals(Integer.MAX_VALUE, $opt$Rem(Integer.MAX_VALUE, Integer.MIN_VALUE));
+
+    expectEquals(0, $opt$Rem(0, 7));
+    expectEquals(0, $opt$Rem(0, Integer.MAX_VALUE));
+    expectEquals(0, $opt$Rem(0, Integer.MIN_VALUE));
+
+    expectDivisionByZero(0);
+    expectDivisionByZero(1);
+    expectDivisionByZero(5);
+    expectDivisionByZero(Integer.MAX_VALUE);
+    expectDivisionByZero(Integer.MIN_VALUE);
+  }
+
+  private static void remLong() {
+    expectEquals(2L, $opt$RemConst(6L));
+    expectEquals(2L, $opt$Rem(6L, 4L));
+    expectEquals(2L, $opt$Rem(6L, -4L));
+    expectEquals(0L, $opt$Rem(6L, 3L));
+    expectEquals(0L, $opt$Rem(6L, -3L));
+    expectEquals(0L, $opt$Rem(6L, 1L));
+    expectEquals(0L, $opt$Rem(6L, -1L));
+    expectEquals(-1L, $opt$Rem(-7L, 3L));
+    expectEquals(-1L, $opt$Rem(-7L, -3L));
+    expectEquals(0L, $opt$Rem(6L, 6L));
+    expectEquals(0L, $opt$Rem(-6L, -6L));
+    expectEquals(7L, $opt$Rem(7L, 9L));
+    expectEquals(7L, $opt$Rem(7L, -9L));
+    expectEquals(-7L, $opt$Rem(-7L, 9L));
+    expectEquals(-7L, $opt$Rem(-7L, -9L));
+
+    expectEquals(0L, $opt$Rem(Integer.MAX_VALUE, 1L));
+    expectEquals(0L, $opt$Rem(Integer.MAX_VALUE, -1L));
+    expectEquals(0L, $opt$Rem(Integer.MIN_VALUE, 1L));
+    expectEquals(0L, $opt$Rem(Integer.MIN_VALUE, -1L)); // no overflow
+    expectEquals(-1L, $opt$Rem(Integer.MIN_VALUE, Integer.MAX_VALUE));
+    expectEquals(Integer.MAX_VALUE, $opt$Rem(Integer.MAX_VALUE, Integer.MIN_VALUE));
+
+    expectEquals(0L, $opt$Rem(0L, 7L));
+    expectEquals(0L, $opt$Rem(0L, Integer.MAX_VALUE));
+    expectEquals(0L, $opt$Rem(0L, Integer.MIN_VALUE));
+
+    expectDivisionByZero(0L);
+    expectDivisionByZero(1L);
+    expectDivisionByZero(5L);
+    expectDivisionByZero(Integer.MAX_VALUE);
+    expectDivisionByZero(Integer.MIN_VALUE);
+  }
+
+  static int $opt$Rem(int a, int b) {
+    return a % b;
+  }
+
+  static int $opt$RemZero(int a) {
+    return a % 0;
+  }
+
+  // Modulo by literals != 0 should not generate checks.
+  static int $opt$RemConst(int a) {
+    return a % 4;
+  }
+
+  static long $opt$RemConst(long a) {
+    return a % 4L;
+  }
+
+  static long $opt$Rem(long a, long b) {
+    return a % b;
+  }
+
+  static long $opt$RemZero(long a) {
+    return a % 0L;
+  }
+}
diff --git a/test/429-ssa-builder/expected.txt b/test/429-ssa-builder/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/429-ssa-builder/expected.txt
diff --git a/test/429-ssa-builder/info.txt b/test/429-ssa-builder/info.txt
new file mode 100644
index 0000000..509d00f
--- /dev/null
+++ b/test/429-ssa-builder/info.txt
@@ -0,0 +1,3 @@
+Regression test for the type propagation phase of the optimizing
+compiler, that used to crash when dealing with phi floating-point
+equivalents.
diff --git a/test/429-ssa-builder/src/Main.java b/test/429-ssa-builder/src/Main.java
new file mode 100644
index 0000000..32fcef0
--- /dev/null
+++ b/test/429-ssa-builder/src/Main.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    if (new Main().$opt$TestFloatPhi() != 33.0f) {
+      throw new Error("Unexpected result");
+    }
+  }
+
+  public float $opt$TestFloatPhi() {
+    float a = floatField;
+    float b = 42.0f;
+    if (test1) {
+      // The phi for `a` will be found to be of type float.
+      a = otherFloatField;
+      // The phi for `b` will be found to be of type int (constants in DEX).
+      b = 33.0f;
+    }
+    // Use a different condition to avoid having dx being too clever.
+    if (test2) {
+      // Type propagation now realizes that `b` must be of type float. So
+      // it requests a float equivalent for `b`. Because the phi for `a` is
+      // next to the phi for `b` in the phi list, the compiler used to crash,
+      // assuming that a float phi following a phi *must* be for the same DEX
+      // register.
+      a = b;
+    }
+    return a;
+  }
+
+  float floatField = 4.2f;
+  float otherFloatField = 42.2f;
+  boolean test1 = true;
+  boolean test2 = true;
+}
diff --git a/test/430-live-register-slow-path/expected.txt b/test/430-live-register-slow-path/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/430-live-register-slow-path/expected.txt
diff --git a/test/430-live-register-slow-path/info.txt b/test/430-live-register-slow-path/info.txt
new file mode 100644
index 0000000..6f2af28
--- /dev/null
+++ b/test/430-live-register-slow-path/info.txt
@@ -0,0 +1,2 @@
+Regression test for the linear scan register allocator. It used
+to miscompute the number of live registers at a safepoint.
diff --git a/test/430-live-register-slow-path/src/Main.java b/test/430-live-register-slow-path/src/Main.java
new file mode 100644
index 0000000..b84e647
--- /dev/null
+++ b/test/430-live-register-slow-path/src/Main.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+   $opt$TestSlowPath();
+  }
+
+  public static void $opt$TestSlowPath() {
+    Object[] o = bar();
+    assertEquals(0, o.length);
+    // The slowpath of the instanceof requires the live register
+    // holding `o` to be saved before going into runtime. The linear
+    // scan register allocator used to miscompute the number of
+    // live registers at a safepoint, so the place at which the register
+    // was saved was wrong.
+    doCall(o instanceof Interface[], o);
+  }
+
+  public static void assertEquals(int a, int b) {}
+  public static boolean doCall(boolean val, Object o) { return val; }
+
+  static Object[] bar() { return new Object[0]; }
+
+  static interface Interface {}
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 3e3955b..7674a8a 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -1,4 +1,9 @@
 b/17790197
+b/17978759
 FloatBadArgReg
 negLong
+sameFieldNames
+b/18380491
+invoke-super abstract
+BadCaseInOpRegRegReg
 Done!
diff --git a/test/800-smali/smali/BadCaseInOpRegRegReg.smali b/test/800-smali/smali/BadCaseInOpRegRegReg.smali
new file mode 100644
index 0000000..2683790
--- /dev/null
+++ b/test/800-smali/smali/BadCaseInOpRegRegReg.smali
@@ -0,0 +1,13 @@
+.class public LBadCaseInOpRegRegReg;
+
+.super Ljava/lang/Object;
+
+.method public static getInt()I
+    .registers 2
+    const/4 v0, 0x0
+    const/4 v1, 0x1
+    add-int/2addr v0, v1
+    add-int/lit8 v1, v0, 0x1
+    mul-int v0, v1, v0
+    return v0
+.end method
diff --git a/test/800-smali/smali/b_17978759.smali b/test/800-smali/smali/b_17978759.smali
new file mode 100644
index 0000000..07bcae5
--- /dev/null
+++ b/test/800-smali/smali/b_17978759.smali
@@ -0,0 +1,28 @@
+.class public LB17978759;
+.super Ljava/lang/Object;
+
+  .method public constructor <init>()V
+    .registers 1
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+  .end method
+
+  .method public test()V
+    .registers 2
+
+    move-object   v0, p0
+    # v0 and p0 alias
+    monitor-enter p0
+    # monitor-enter on p0
+    monitor-exit  v0
+    # monitor-exit on v0, however, verifier doesn't track this and so this is
+    # a warning. Verifier will still think p0 is locked.
+
+    move-object   v0, p0
+    # v0 will now appear locked.
+    monitor-enter v0
+    # Attempt to lock v0 twice is a verifier failure.
+    monitor-exit  v0
+
+    return-void
+  .end method
diff --git a/test/800-smali/smali/b_18380491AbstractBase.smali b/test/800-smali/smali/b_18380491AbstractBase.smali
new file mode 100644
index 0000000..7aa1b1a
--- /dev/null
+++ b/test/800-smali/smali/b_18380491AbstractBase.smali
@@ -0,0 +1,12 @@
+.class public LB18380491ActractBase;
+
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .locals 0
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method public abstract foo(I)I
+.end method
diff --git a/test/800-smali/smali/b_18380491ConcreteClass.smali b/test/800-smali/smali/b_18380491ConcreteClass.smali
new file mode 100644
index 0000000..db5ef3b
--- /dev/null
+++ b/test/800-smali/smali/b_18380491ConcreteClass.smali
@@ -0,0 +1,19 @@
+.class public LB18380491ConcreteClass;
+
+.super LB18380491ActractBase;
+
+.method public constructor <init>()V
+    .locals 0
+    invoke-direct {p0}, LB18380491ActractBase;-><init>()V
+    return-void
+.end method
+
+.method public foo(I)I
+  .locals 1
+  if-eqz p1, :invoke_super_abstract
+  return p1
+  :invoke_super_abstract
+  invoke-super {p0, p1}, LB18380491ActractBase;->foo(I)I
+  move-result v0
+  return v0
+.end method
diff --git a/test/800-smali/smali/sameFieldNames.smali b/test/800-smali/smali/sameFieldNames.smali
new file mode 100644
index 0000000..107161b
--- /dev/null
+++ b/test/800-smali/smali/sameFieldNames.smali
@@ -0,0 +1,64 @@
+.class public LsameFieldNames;
+.super Ljava/lang/Object;
+
+# Test multiple fields with the same name and different types.
+# (Invalid in Java language but valid in bytecode.)
+.field static public a:D
+.field static public a:S
+.field static public a:J
+.field static public a:F
+.field static public a:Z
+.field static public a:I
+.field static public a:B
+.field static public a:C
+.field static public a:Ljava/lang/Integer;
+.field static public a:Ljava/lang/Long;
+.field static public a:Ljava/lang/Float;
+.field static public a:Ljava/lang/Double;
+.field static public a:Ljava/lang/Boolean;
+.field static public a:Ljava/lang/Void;
+.field static public a:Ljava/lang/Short;
+.field static public a:Ljava/lang/Char;
+.field static public a:Ljava/lang/Byte;
+
+# Add some more fields to stress test the sorting for offset assignment.
+.field static public b:C
+.field static public c:J
+.field static public d:C
+.field static public e:B
+.field static public f:C
+.field static public g:J
+.field static public h:C
+.field static public i:J
+.field static public j:I
+.field static public k:J
+.field static public l:J
+.field static public m:I
+.field static public n:J
+.field static public o:I
+.field static public p:Ljava/lang/Integer;
+.field static public q:I
+.field static public r:J
+.field static public s:I
+.field static public t:Ljava/lang/Integer;
+.field static public u:I
+.field static public v:J
+.field static public w:I
+.field static public x:Ljava/lang/Integer;
+.field static public y:I
+.field static public z:Ljava/lang/Integer;
+
+.method public static getInt()I
+    .locals 2
+    const/4 v0, 2
+    sput v0, LsameFieldNames;->a:I
+    sget-object v1, LsameFieldNames;->a:Ljava/lang/Integer;
+    const/4 v1, 0
+    if-nez v1, :fail
+    const/4 v0, 7
+    :ret
+    return v0
+    :fail
+    const/4 v0, 0
+    goto :ret
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 87549d9..8d318c3 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -14,7 +14,9 @@
  * limitations under the License.
  */
 
+import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
 import java.util.LinkedList;
 import java.util.List;
 
@@ -49,9 +51,16 @@
         testCases = new LinkedList<TestCase>();
 
         testCases.add(new TestCase("b/17790197", "B17790197", "getInt", null, null, 100));
+        testCases.add(new TestCase("b/17978759", "B17978759", "test", null, new VerifyError(), null));
         testCases.add(new TestCase("FloatBadArgReg", "FloatBadArgReg", "getInt",
             new Object[]{100}, null, 100));
         testCases.add(new TestCase("negLong", "negLong", "negLong", null, null, 122142L));
+        testCases.add(new TestCase("sameFieldNames", "sameFieldNames", "getInt", null, null, 7));
+        testCases.add(new TestCase("b/18380491", "B18380491ConcreteClass", "foo",
+            new Object[]{42}, null, 42));
+        testCases.add(new TestCase("invoke-super abstract", "B18380491ConcreteClass", "foo",
+            new Object[]{0}, new AbstractMethodError(), null));
+        testCases.add(new TestCase("BadCaseInOpRegRegReg", "BadCaseInOpRegRegReg", "getInt", null, null, 2));
     }
 
     public void runTests() {
@@ -66,47 +75,62 @@
     }
 
     private void runTest(TestCase tc) throws Exception {
-        Class<?> c = Class.forName(tc.testClass);
-
-        Method[] methods = c.getDeclaredMethods();
-
-        // For simplicity we assume that test methods are not overloaded. So searching by name
-        // will give us the method we need to run.
-        Method method = null;
-        for (Method m : methods) {
-            if (m.getName().equals(tc.testMethodName)) {
-                method = m;
-                break;
-            }
-        }
-
-        if (method == null) {
-            throw new IllegalArgumentException("Could not find test method " + tc.testMethodName +
-                    " in class " + tc.testClass + " for test " + tc.testName);
-        }
-
         Exception errorReturn = null;
         try {
-            Object retValue = method.invoke(null, tc.values);
-            if (tc.expectedException != null) {
-                errorReturn = new IllegalStateException("Expected an exception in test " +
-                                                        tc.testName);
+            Class<?> c = Class.forName(tc.testClass);
+
+            Method[] methods = c.getDeclaredMethods();
+
+            // For simplicity we assume that test methods are not overloaded. So searching by name
+            // will give us the method we need to run.
+            Method method = null;
+            for (Method m : methods) {
+                if (m.getName().equals(tc.testMethodName)) {
+                    method = m;
+                    break;
+                }
             }
-            if (tc.expectedReturn == null && retValue != null) {
-                errorReturn = new IllegalStateException("Expected a null result in test " +
-                                                        tc.testName);
-            } else if (tc.expectedReturn != null &&
-                       (retValue == null || !tc.expectedReturn.equals(retValue))) {
-                errorReturn = new IllegalStateException("Expected return " + tc.expectedReturn +
-                                                        ", but got " + retValue);
+
+            if (method == null) {
+                errorReturn = new IllegalArgumentException("Could not find test method " +
+                                                           tc.testMethodName + " in class " +
+                                                           tc.testClass + " for test " +
+                                                           tc.testName);
+            } else {
+                Object retValue;
+                if (Modifier.isStatic(method.getModifiers())) {
+                    retValue = method.invoke(null, tc.values);
+                } else {
+                    retValue = method.invoke(method.getDeclaringClass().newInstance(), tc.values);
+                }
+                if (tc.expectedException != null) {
+                    errorReturn = new IllegalStateException("Expected an exception in test " +
+                                                            tc.testName);
+                }
+                if (tc.expectedReturn == null && retValue != null) {
+                    errorReturn = new IllegalStateException("Expected a null result in test " +
+                                                            tc.testName);
+                } else if (tc.expectedReturn != null &&
+                           (retValue == null || !tc.expectedReturn.equals(retValue))) {
+                    errorReturn = new IllegalStateException("Expected return " +
+                                                            tc.expectedReturn +
+                                                            ", but got " + retValue);
+                } else {
+                    // Expected result, do nothing.
+                }
             }
-        } catch (Exception exc) {
+        } catch (Throwable exc) {
             if (tc.expectedException == null) {
                 errorReturn = new IllegalStateException("Did not expect exception", exc);
+            } else if (exc instanceof InvocationTargetException && exc.getCause() != null &&
+                       exc.getCause().getClass().equals(tc.expectedException.getClass())) {
+                // Expected exception is wrapped in InvocationTargetException.
             } else if (!tc.expectedException.getClass().equals(exc.getClass())) {
                 errorReturn = new IllegalStateException("Expected " +
-                                                tc.expectedException.getClass().getName() +
-                                                ", but got " + exc.getClass(), exc);
+                                                        tc.expectedException.getClass().getName() +
+                                                        ", but got " + exc.getClass(), exc);
+            } else {
+              // Expected exception, do nothing.
             }
         } finally {
             if (errorReturn != null) {
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index a6f31b4..29da2f6 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -286,8 +286,7 @@
 TEST_ART_BROKEN_NDEBUG_TESTS :=
 
 # Known broken tests for the default compiler (Quick).
-TEST_ART_BROKEN_DEFAULT_RUN_TESTS := \
-  412-new-array
+TEST_ART_BROKEN_DEFAULT_RUN_TESTS :=
 
 ifneq (,$(filter default,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -299,161 +298,39 @@
 
 # Known broken tests for the arm64 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS := \
-  001-HelloWorld \
-  002-sleep \
   003-omnibus-opcodes \
-  004-InterfaceTest \
-  004-JniTest \
   004-NativeAllocations \
   004-ReferenceMap \
-  004-SignalTest \
-  004-StackWalk \
-  004-UnsafeTest \
   005-annotations \
-  006-args \
-  007-count10 \
-  008-exceptions \
   009-instanceof \
   010-instance \
-  011-array-copy \
-  013-math2 \
-  016-intern \
-  017-float \
-  018-stack-overflow \
-  019-wrong-array-type \
-  020-string \
-  021-string2 \
-  022-interface \
+  012-math \
   023-many-interfaces \
-  024-illegal-access \
-  025-access-controller \
-  026-access \
-  028-array-write \
-  029-assert \
-  030-bad-finalizer \
-  031-class-attributes \
-  032-concrete-sub \
-  033-class-init-deadlock \
-  034-call-null \
-  035-enum \
-  036-finalizer \
   037-inherit \
-  038-inner-null \
-  039-join-main \
-  040-miranda \
-  042-new-instance \
-  043-privates \
   044-proxy \
   045-reflect-array \
   046-reflect \
   047-returns \
-  049-show-object \
-  050-sync-test \
-  051-thread \
-  052-verifier-fun \
-  054-uncaught \
-  055-enum-performance \
-  056-const-string-jumbo \
-  058-enum-order \
-  061-out-of-memory \
   062-character-encodings \
   063-process-manager \
-  064-field-access \
-  065-mismatched-implements \
-  066-mismatched-super \
-  067-preemptive-unpark \
   068-classloader \
   069-field-type \
-  070-nio-buffer \
   071-dexfile \
-  072-precise-gc \
-  074-gc-thrash \
-  075-verification-error \
-  076-boolean-put \
-  077-method-override \
-  078-polymorphic-virtual \
-  079-phantom \
-  080-oom-throw \
-  081-hot-exceptions \
-  082-inline-execute \
   083-compiler-regressions \
-  084-class-init \
-  085-old-style-inner-class \
-  086-null-super \
-  087-gc-after-link \
-  088-monitor-verification \
-  090-loop-formation \
-  092-locale \
-  093-serialization \
-  094-pattern \
-  096-array-copy-concurrent-gc \
-  097-duplicate-method \
-  098-ddmc \
-  100-reflect2 \
-  101-fibonacci \
-  102-concurrent-gc \
-  103-string-append \
-  104-growth-limit \
-  105-invoke \
   106-exceptions2 \
   107-int-math2 \
-  108-check-cast \
-  109-suspend-check \
-  110-field-access \
-  111-unresolvable-exception \
-  112-double-math \
-  113-multidex \
   114-ParallelGC \
-  117-nopatchoat \
-  118-noimage-dex2oat \
-  119-noimage-patchoat \
-  120-hashcode \
-  121-modifiers \
-  121-simple-suspend-check \
-  122-npe \
-  123-compiler-regressions-mt \
-  124-missing-classes \
-  125-gc-and-classloading \
-  126-miranda-multidex \
   201-built-in-exception-detail-messages \
-  202-thread-oome \
-  300-package-override \
-  301-abstract-protected \
-  303-verification-stress \
-  304-method-tracing \
-  401-optimizing-compiler \
-  402-optimizing-control-flow \
-  403-optimizing-long \
-  404-optimizing-allocator \
-  405-optimizing-long-allocator \
-  406-fields \
   407-arrays \
-  409-materialized-condition \
-  410-floats \
-  411-optimizing-arith \
   412-new-array \
-  413-regalloc-regression \
-  414-optimizing-arith-sub \
-  414-static-fields \
-  415-optimizing-arith-neg \
-  416-optimizing-arith-not \
-  417-optimizing-arith-div \
-  418-const-string \
-  419-long-parameter \
-  420-const-class \
-  421-exceptions \
-  421-large-frame \
   422-instanceof \
   422-type-conversion \
-  423-invoke-interface \
   424-checkcast \
-  426-monitor \
-  427-bitwise \
-  700-LoadArgRegs \
+  427-bounds \
+  428-optimizing-arith-rem \
+  430-live-register-slow-path \
   701-easy-div-rem \
-  702-LargeBranchOffset \
-  703-floating-point-div \
-  800-smali
+  800-smali \
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
diff --git a/test/run-test b/test/run-test
index b43668d..843714b 100755
--- a/test/run-test
+++ b/test/run-test
@@ -501,6 +501,8 @@
 fi
 
 good="no"
+good_build="yes"
+good_run="yes"
 if [ "$dev_mode" = "yes" ]; then
     "./${build}" 2>&1
     build_exit="$?"
@@ -548,7 +550,15 @@
     if [ "$build_exit" = '0' ]; then
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" >"$output" 2>&1
+        run_exit="$?"
+        if [ "$run_exit" != "0" ]; then
+            echo "run exit status: $run_exit" 1>&2
+            good_run="no"
+        else
+            good_run="yes"
+        fi
     else
+        good_build="no"
         cp "$build_output" "$output"
         echo "Failed to build in tmpdir=${tmp_dir} from oldwd=${oldwd} and cwd=`pwd`" >> "$output"
         echo "Non-canonical tmpdir was ${noncanonical_tmp_dir}" >> "$output"
@@ -561,9 +571,11 @@
     fi
     ./$check_cmd "$expected" "$output"
     if [ "$?" = "0" ]; then
-        # output == expected
-        good="yes"
-        echo "${test_dir}: succeeded!" 1>&2
+        if [ "$good_build" = "no" -o "$good_run" = "yes" ]; then
+          # output == expected
+          good="yes"
+          echo "${test_dir}: succeeded!" 1>&2
+        fi
     fi
 fi