Merge "JDWP: fix breakpoint for method in the image"
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 7b38e5e..6346970 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -144,11 +144,8 @@
 ART_TARGET_CLANG_CFLAGS_arm64 += \
   -fno-vectorize
 
-# Colorize clang compiler warnings.
-art_clang_cflags := -fcolor-diagnostics
-
 # Warn about thread safety violations with clang.
-art_clang_cflags += -Wthread-safety
+art_clang_cflags := -Wthread-safety
 
 # Warn if switch fallthroughs aren't annotated.
 art_clang_cflags += -Wimplicit-fallthrough
@@ -220,6 +217,14 @@
 #  -Wmissing-declarations \
 
 
+
+ifdef ART_IMT_SIZE
+  art_cflags += -DIMT_SIZE=$(ART_IMT_SIZE)
+else
+  # Default is 64
+  art_cflags += -DIMT_SIZE=64
+endif
+
 ifeq ($(ART_SMALL_MODE),true)
   art_cflags += -DART_SMALL_MODE=1
 endif
@@ -228,6 +233,10 @@
   art_cflags += -DART_SEA_IR_MODE=1
 endif
 
+ifeq ($(ART_USE_OPTIMIZING_COMPILER),true)
+  art_cflags += -DART_USE_OPTIMIZING_COMPILER=1
+endif
+
 # Cflags for non-debug ART and ART tools.
 art_non_debug_cflags := \
   -O3
@@ -236,6 +245,7 @@
 art_debug_cflags := \
   -O2 \
   -DDYNAMIC_ANNOTATIONS_ENABLED=1 \
+  -DVIXL_DEBUG \
   -UNDEBUG
 
 art_host_non_debug_cflags := $(art_non_debug_cflags)
@@ -249,16 +259,21 @@
   art_target_non_debug_cflags += -Wframe-larger-than=1728
 endif
 
-ifndef LIBART_IMG_HOST_BASE_ADDRESS
-  $(error LIBART_IMG_HOST_BASE_ADDRESS unset)
-endif
-ART_HOST_CFLAGS += $(art_cflags) -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
-ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default
+# DALVIK_VM_LIB will be empty for VM-less builds. Avoid $(error) calls here because
+# LIBART_IMG_XXX variables won't be defined.
+ifneq ($(DALVIK_VM_LIB),)
+  ifndef LIBART_IMG_HOST_BASE_ADDRESS
+    $(error LIBART_IMG_HOST_BASE_ADDRESS unset)
+  endif
+  ART_HOST_CFLAGS += $(art_cflags) -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS)
+  ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default
 
-ifndef LIBART_IMG_TARGET_BASE_ADDRESS
-  $(error LIBART_IMG_TARGET_BASE_ADDRESS unset)
+  ifndef LIBART_IMG_TARGET_BASE_ADDRESS
+    $(error LIBART_IMG_TARGET_BASE_ADDRESS unset)
+  endif
+  ART_TARGET_CFLAGS += $(art_cflags) \
+                       -DART_TARGET -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS)
 endif
-ART_TARGET_CFLAGS += $(art_cflags) -DART_TARGET -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS)
 
 ART_HOST_NON_DEBUG_CFLAGS := $(art_host_non_debug_cflags)
 ART_TARGET_NON_DEBUG_CFLAGS := $(art_target_non_debug_cflags)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 7e28b37..10b0400 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -368,7 +368,7 @@
   ifeq ($$(art_target_or_host),target)
     $$(eval $$(call set-target-local-clang-vars))
     $$(eval $$(call set-target-local-cflags-vars,debug))
-    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixl
+    LOCAL_SHARED_LIBRARIES += libdl libicuuc libicui18n libnativehelper libz libcutils libvixld
     LOCAL_MODULE_PATH_32 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $$(ART_TARGET_NATIVETEST_OUT)/$$(ART_TARGET_ARCH_64)
     LOCAL_MULTILIB := both
@@ -404,7 +404,7 @@
   else # host
     LOCAL_CLANG := $$(ART_HOST_CLANG)
     LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) $$(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixl
+    LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixld
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -lpthread -ldl
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index e8b363b..523d143 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -44,6 +44,9 @@
   core_pic_infix :=
   core_dex2oat_dependency := $(DEX2OAT_DEPENDENCY)
 
+  ifeq ($(1),default)
+    core_compile_options += --compiler-backend=Quick
+  endif
   ifeq ($(1),optimizing)
     core_compile_options += --compiler-backend=Optimizing
     # With the optimizing compiler, we want to rerun dex2oat whenever there is
@@ -137,6 +140,9 @@
   core_pic_infix :=
   core_dex2oat_dependency := $(DEX2OAT_DEPENDENCY)
 
+  ifeq ($(1),default)
+    core_compile_options += --compiler-backend=Quick
+  endif
   ifeq ($(1),optimizing)
     ifeq ($($(3)TARGET_ARCH),arm64)
       # TODO: Enable image generation on arm64 once the backend
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 84176a1..70c7e52 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -279,7 +279,11 @@
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
   # Vixl assembly support for ARM64 targets.
-  LOCAL_SHARED_LIBRARIES += libvixl
+  ifeq ($$(art_ndebug_or_debug),debug)
+    LOCAL_SHARED_LIBRARIES += libvixld
+  else
+    LOCAL_SHARED_LIBRARIES += libvixl
+  endif
   ifeq ($$(art_target_or_host),target)
     # For atrace.
     LOCAL_SHARED_LIBRARIES += libcutils
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 4d377df..3b3170e 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -614,7 +614,6 @@
   kFixupVLoad,             // FP load which *may* be pc-relative.
   kFixupCBxZ,              // Cbz, Cbnz.
   kFixupTBxZ,              // Tbz, Tbnz.
-  kFixupPushPop,           // Not really pc relative, but changes size based on args.
   kFixupCondBranch,        // Conditional branch
   kFixupT1Branch,          // Thumb1 Unconditional branch
   kFixupT2Branch,          // Thumb2 Unconditional branch
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 1f630f7..55f2abc 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -399,6 +399,28 @@
   return compiler_temp;
 }
 
+static bool EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) {
+  bool is_taken;
+  switch (opcode) {
+    case Instruction::IF_EQ: is_taken = (src1 == src2); break;
+    case Instruction::IF_NE: is_taken = (src1 != src2); break;
+    case Instruction::IF_LT: is_taken = (src1 < src2); break;
+    case Instruction::IF_GE: is_taken = (src1 >= src2); break;
+    case Instruction::IF_GT: is_taken = (src1 > src2); break;
+    case Instruction::IF_LE: is_taken = (src1 <= src2); break;
+    case Instruction::IF_EQZ: is_taken = (src1 == 0); break;
+    case Instruction::IF_NEZ: is_taken = (src1 != 0); break;
+    case Instruction::IF_LTZ: is_taken = (src1 < 0); break;
+    case Instruction::IF_GEZ: is_taken = (src1 >= 0); break;
+    case Instruction::IF_GTZ: is_taken = (src1 > 0); break;
+    case Instruction::IF_LEZ: is_taken = (src1 <= 0); break;
+    default:
+      LOG(FATAL) << "Unexpected opcode " << opcode;
+      UNREACHABLE();
+  }
+  return is_taken;
+}
+
 /* Do some MIR-level extended basic block optimizations */
 bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
   if (bb->block_type == kDead) {
@@ -424,6 +446,46 @@
       // Look for interesting opcodes, skip otherwise
       Instruction::Code opcode = mir->dalvikInsn.opcode;
       switch (opcode) {
+        case Instruction::IF_EQ:
+        case Instruction::IF_NE:
+        case Instruction::IF_LT:
+        case Instruction::IF_GE:
+        case Instruction::IF_GT:
+        case Instruction::IF_LE:
+          if (!IsConst(mir->ssa_rep->uses[1])) {
+            break;
+          }
+          FALLTHROUGH_INTENDED;
+        case Instruction::IF_EQZ:
+        case Instruction::IF_NEZ:
+        case Instruction::IF_LTZ:
+        case Instruction::IF_GEZ:
+        case Instruction::IF_GTZ:
+        case Instruction::IF_LEZ:
+          // Result known at compile time?
+          if (IsConst(mir->ssa_rep->uses[0])) {
+            int32_t rhs = (mir->ssa_rep->num_uses == 2) ? ConstantValue(mir->ssa_rep->uses[1]) : 0;
+            bool is_taken = EvaluateBranch(opcode, ConstantValue(mir->ssa_rep->uses[0]), rhs);
+            BasicBlockId edge_to_kill = is_taken ? bb->fall_through : bb->taken;
+            if (is_taken) {
+              // Replace with GOTO.
+              bb->fall_through = NullBasicBlockId;
+              mir->dalvikInsn.opcode = Instruction::GOTO;
+              mir->dalvikInsn.vA =
+                  IsInstructionIfCc(opcode) ? mir->dalvikInsn.vC : mir->dalvikInsn.vB;
+            } else {
+              // Make NOP.
+              bb->taken = NullBasicBlockId;
+              mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+            }
+            mir->ssa_rep->num_uses = 0;
+            BasicBlock* successor_to_unlink = GetBasicBlock(edge_to_kill);
+            successor_to_unlink->ErasePredecessor(bb->id);
+            if (successor_to_unlink->predecessors.empty()) {
+              successor_to_unlink->KillUnreachable(this);
+            }
+          }
+          break;
         case Instruction::CMPL_FLOAT:
         case Instruction::CMPL_DOUBLE:
         case Instruction::CMPG_FLOAT:
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index 4c7f874..b9d9a11 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -488,6 +488,7 @@
   kThumb2BicRRI8M,   // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
   kThumb2AndRRI8M,   // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
   kThumb2OrrRRI8M,   // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+  kThumb2OrnRRI8M,   // orn rd, rn, #<const> [11110] i [000110] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
   kThumb2EorRRI8M,   // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
   kThumb2AddRRI8M,   // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
   kThumb2AdcRRI8M,   // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 4e20d76..de93e26 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -671,12 +671,12 @@
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
-                 | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 4, kFixupPushPop),
+                 | IS_LOAD, "pop", "<!0R>", 4, kFixupNone),
     ENCODING_MAP(kThumb2Push,          0xe92d0000,
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
-                 | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4, kFixupPushPop),
+                 | IS_STORE, "push", "<!0R>", 4, kFixupNone),
     ENCODING_MAP(kThumb2CmpRI8M, 0xf1b00f00,
                  kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
@@ -788,6 +788,10 @@
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
                  "orr", "!0C, !1C, #!2m", 4, kFixupNone),
+    ENCODING_MAP(kThumb2OrnRRI8M,  0xf0600000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "orn", "!0C, !1C, #!2m", 4, kFixupNone),
     ENCODING_MAP(kThumb2EorRRI8M,  0xf0800000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
@@ -1396,31 +1400,6 @@
           }
           break;
         }
-        case kFixupPushPop: {
-          if (__builtin_popcount(lir->operands[0]) == 1) {
-            /*
-             * The standard push/pop multiple instruction
-             * requires at least two registers in the list.
-             * If we've got just one, switch to the single-reg
-             * encoding.
-             */
-            lir->opcode = (lir->opcode == kThumb2Push) ? kThumb2Push1 :
-                kThumb2Pop1;
-            int reg = 0;
-            while (lir->operands[0]) {
-              if (lir->operands[0] & 0x1) {
-                break;
-              } else {
-                reg++;
-                lir->operands[0] >>= 1;
-              }
-            }
-            lir->operands[0] = reg;
-            // This won't change again, don't bother unlinking, just reset fixup kind
-            lir->flags.fixup = kFixupNone;
-          }
-          break;
-        }
         case kFixupCondBranch: {
           LIR *target_lir = lir->target;
           int32_t delta = 0;
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index f15d707..99b2166 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -347,7 +347,20 @@
     }
   }
   /* Spill core callee saves */
-  NewLIR1(kThumb2Push, core_spill_mask_);
+  if (core_spill_mask_ == 0u) {
+    // Nothing to spill.
+  } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_LR.GetRegNum()))) == 0u) {
+    // Spilling only low regs and/or LR, use 16-bit PUSH.
+    constexpr int lr_bit_shift = rs_rARM_LR.GetRegNum() - 8;
+    NewLIR1(kThumbPush,
+            (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) |
+            ((core_spill_mask_ & (1u << rs_rARM_LR.GetRegNum())) >> lr_bit_shift));
+  } else if (IsPowerOfTwo(core_spill_mask_)) {
+    // kThumb2Push cannot be used to spill a single register.
+    NewLIR1(kThumb2Push1, CTZ(core_spill_mask_));
+  } else {
+    NewLIR1(kThumb2Push, core_spill_mask_);
+  }
   /* Need to spill any FP regs? */
   if (num_fp_spills_) {
     /*
@@ -444,13 +457,26 @@
   if (num_fp_spills_) {
     NewLIR1(kThumb2VPopCS, num_fp_spills_);
   }
-  if (core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) {
+  if ((core_spill_mask_ & (1 << rs_rARM_LR.GetRegNum())) != 0) {
     /* Unspill rARM_LR to rARM_PC */
     core_spill_mask_ &= ~(1 << rs_rARM_LR.GetRegNum());
     core_spill_mask_ |= (1 << rs_rARM_PC.GetRegNum());
   }
-  NewLIR1(kThumb2Pop, core_spill_mask_);
-  if (!(core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum()))) {
+  if (core_spill_mask_ == 0u) {
+    // Nothing to unspill.
+  } else if ((core_spill_mask_ & ~(0xffu | (1u << rs_rARM_PC.GetRegNum()))) == 0u) {
+    // Unspilling only low regs and/or PC, use 16-bit POP.
+    constexpr int pc_bit_shift = rs_rARM_PC.GetRegNum() - 8;
+    NewLIR1(kThumbPop,
+            (core_spill_mask_ & ~(1u << rs_rARM_PC.GetRegNum())) |
+            ((core_spill_mask_ & (1u << rs_rARM_PC.GetRegNum())) >> pc_bit_shift));
+  } else if (IsPowerOfTwo(core_spill_mask_)) {
+    // kThumb2Pop cannot be used to unspill a single register.
+    NewLIR1(kThumb2Pop1, CTZ(core_spill_mask_));
+  } else {
+    NewLIR1(kThumb2Pop, core_spill_mask_);
+  }
+  if ((core_spill_mask_ & (1 << rs_rARM_PC.GetRegNum())) == 0) {
     /* We didn't pop to rARM_PC, so must do a bv rARM_LR */
     NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
   }
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index e8d0c32..0bc4c3b 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -250,10 +250,11 @@
     int EncodeShift(int code, int amount);
     int ModifiedImmediate(uint32_t value);
     ArmConditionCode ArmConditionEncoding(ConditionCode code);
-    bool InexpensiveConstantInt(int32_t value);
-    bool InexpensiveConstantFloat(int32_t value);
-    bool InexpensiveConstantLong(int64_t value);
-    bool InexpensiveConstantDouble(int64_t value);
+    bool InexpensiveConstantInt(int32_t value) OVERRIDE;
+    bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode) OVERRIDE;
+    bool InexpensiveConstantFloat(int32_t value) OVERRIDE;
+    bool InexpensiveConstantLong(int64_t value) OVERRIDE;
+    bool InexpensiveConstantDouble(int64_t value) OVERRIDE;
     RegStorage AllocPreservedDouble(int s_reg);
     RegStorage AllocPreservedSingle(int s_reg);
 
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index e38dbf5..1a7b439 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -592,7 +592,6 @@
 
 // Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms.
 bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) {
-  GetEasyMultiplyOp(lit, &ops[0]);
   if (GetEasyMultiplyOp(lit, &ops[0])) {
     ops[1].op = kOpInvalid;
     ops[1].shift = 0;
@@ -1162,6 +1161,7 @@
   // Check for destructive overlap
   if (rl_result.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
     RegStorage t_reg = AllocTemp();
+    OpRegCopy(t_reg, rl_result.reg.GetLow());
     OpRegRegReg(kOpSub, rl_result.reg.GetLow(), z_reg, rl_src.reg.GetLow());
     OpRegRegReg(kOpSbc, rl_result.reg.GetHigh(), z_reg, t_reg);
     FreeTemp(t_reg);
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 0c7812b..36d065f 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -97,31 +97,11 @@
   return load_pc_rel;
 }
 
-static int LeadingZeros(uint32_t val) {
-  uint32_t alt;
-  int32_t n;
-  int32_t count;
-
-  count = 16;
-  n = 32;
-  do {
-    alt = val >> count;
-    if (alt != 0) {
-      n = n - count;
-      val = alt;
-    }
-    count >>= 1;
-  } while (count);
-  return n - val;
-}
-
 /*
  * Determine whether value can be encoded as a Thumb2 modified
  * immediate.  If not, return -1.  If so, return i:imm3:a:bcdefgh form.
  */
 int ArmMir2Lir::ModifiedImmediate(uint32_t value) {
-  int32_t z_leading;
-  int32_t z_trailing;
   uint32_t b0 = value & 0xff;
 
   /* Note: case of value==0 must use 0:000:0:0000000 encoding */
@@ -135,8 +115,8 @@
   if (value == ((b0 << 24) | (b0 << 8)))
     return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */
   /* Can we do it with rotation? */
-  z_leading = LeadingZeros(value);
-  z_trailing = 32 - LeadingZeros(~value & (value - 1));
+  int z_leading = CLZ(value);
+  int z_trailing = CTZ(value);
   /* A run of eight or fewer active bits? */
   if ((z_leading + z_trailing) < 24)
     return -1;  /* No - bail */
@@ -152,6 +132,71 @@
   return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
 }
 
+bool ArmMir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) {
+  switch (opcode) {
+    case Instruction::ADD_INT:
+    case Instruction::ADD_INT_2ADDR:
+    case Instruction::SUB_INT:
+    case Instruction::SUB_INT_2ADDR:
+      if ((value >> 12) == (value >> 31)) {  // Signed 12-bit, RRI12 versions of ADD/SUB.
+        return true;
+      }
+      FALLTHROUGH_INTENDED;
+    case Instruction::IF_EQ:
+    case Instruction::IF_NE:
+    case Instruction::IF_LT:
+    case Instruction::IF_GE:
+    case Instruction::IF_GT:
+    case Instruction::IF_LE:
+      return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(-value) >= 0);
+    case Instruction::SHL_INT:
+    case Instruction::SHL_INT_2ADDR:
+    case Instruction::SHR_INT:
+    case Instruction::SHR_INT_2ADDR:
+    case Instruction::USHR_INT:
+    case Instruction::USHR_INT_2ADDR:
+      return true;
+    case Instruction::CONST:
+    case Instruction::CONST_4:
+    case Instruction::CONST_16:
+      if ((value >> 16) == 0) {
+        return true;  // movw, 16-bit unsigned.
+      }
+      FALLTHROUGH_INTENDED;
+    case Instruction::AND_INT:
+    case Instruction::AND_INT_2ADDR:
+    case Instruction::AND_INT_LIT16:
+    case Instruction::AND_INT_LIT8:
+    case Instruction::OR_INT:
+    case Instruction::OR_INT_2ADDR:
+    case Instruction::OR_INT_LIT16:
+    case Instruction::OR_INT_LIT8:
+      return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+    case Instruction::XOR_INT:
+    case Instruction::XOR_INT_2ADDR:
+    case Instruction::XOR_INT_LIT16:
+    case Instruction::XOR_INT_LIT8:
+      return (ModifiedImmediate(value) >= 0);
+    case Instruction::MUL_INT:
+    case Instruction::MUL_INT_2ADDR:
+    case Instruction::MUL_INT_LIT8:
+    case Instruction::MUL_INT_LIT16:
+    case Instruction::DIV_INT:
+    case Instruction::DIV_INT_2ADDR:
+    case Instruction::DIV_INT_LIT8:
+    case Instruction::DIV_INT_LIT16:
+    case Instruction::REM_INT:
+    case Instruction::REM_INT_2ADDR:
+    case Instruction::REM_INT_LIT8:
+    case Instruction::REM_INT_LIT16: {
+      EasyMultiplyOp ops[2];
+      return GetEasyMultiplyTwoOps(value, ops);
+    }
+    default:
+      return false;
+  }
+}
+
 bool ArmMir2Lir::InexpensiveConstantFloat(int32_t value) {
   return EncodeImmSingle(value) >= 0;
 }
@@ -213,10 +258,7 @@
 }
 
 LIR* ArmMir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
-  // This is kThumb2BCond instead of kThumbBCond for performance reasons. The assembly
-  // time required for a new pass after kThumbBCond is fixed up to kThumb2BCond is
-  // substantial.
-  LIR* branch = NewLIR2(kThumb2BCond, 0 /* offset to be patched */,
+  LIR* branch = NewLIR2(kThumbBCond, 0 /* offset to be patched */,
                         ArmConditionEncoding(cc));
   branch->target = target;
   return branch;
@@ -513,7 +555,7 @@
           op = (op == kOpAdd) ? kOpSub : kOpAdd;
         }
       }
-      if (mod_imm < 0 && (abs_value & 0x3ff) == abs_value) {
+      if (mod_imm < 0 && (abs_value >> 12) == 0) {
         // This is deliberately used only if modified immediate encoding is inadequate since
         // we sometimes actually use the flags for small values but not necessarily low regs.
         if (op == kOpAdd)
@@ -545,6 +587,12 @@
     case kOpOr:
       opcode = kThumb2OrrRRI8M;
       alt_opcode = kThumb2OrrRRR;
+      if (mod_imm < 0) {
+        mod_imm = ModifiedImmediate(~value);
+        if (mod_imm >= 0) {
+          opcode = kThumb2OrnRRI8M;
+        }
+      }
       break;
     case kOpAnd:
       if (mod_imm < 0) {
@@ -858,12 +906,12 @@
  */
 LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
                                   OpSize size) {
-  LIR* load = NULL;
-  ArmOpcode opcode = kThumbBkpt;
+  LIR* load = nullptr;
+  ArmOpcode opcode16 = kThumbBkpt;  // 16-bit Thumb opcode.
+  ArmOpcode opcode32 = kThumbBkpt;  // 32-bit Thumb2 opcode.
   bool short_form = false;
-  bool thumb2Form = (displacement < 4092 && displacement >= 0);
   bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8();
-  int encoded_disp = displacement;
+  int scale = 0;  // Used for opcode16 and some indexed loads.
   bool already_generated = false;
   switch (size) {
     case kDouble:
@@ -891,57 +939,45 @@
         already_generated = true;
         break;
       }
+      DCHECK_EQ((displacement & 0x3), 0);
+      scale = 2;
       if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
           (displacement >= 0)) {
         short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbLdrPcRel;
+        opcode16 = kThumbLdrPcRel;
       } else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) &&
                  (displacement >= 0)) {
         short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbLdrSpRel;
-      } else if (all_low && displacement < 128 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x3), 0);
-        short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbLdrRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrRRI12;
+        opcode16 = kThumbLdrSpRel;
+      } else {
+        short_form = all_low && (displacement >> (5 + scale)) == 0;
+        opcode16 = kThumbLdrRRI5;
+        opcode32 = kThumb2LdrRRI12;
       }
       break;
     case kUnsignedHalf:
-      if (all_low && displacement < 64 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x1), 0);
-        short_form = true;
-        encoded_disp >>= 1;
-        opcode = kThumbLdrhRRI5;
-      } else if (displacement < 4092 && displacement >= 0) {
-        short_form = true;
-        opcode = kThumb2LdrhRRI12;
-      }
+      DCHECK_EQ((displacement & 0x1), 0);
+      scale = 1;
+      short_form = all_low && (displacement >> (5 + scale)) == 0;
+      opcode16 = kThumbLdrhRRI5;
+      opcode32 = kThumb2LdrhRRI12;
       break;
     case kSignedHalf:
-      if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrshRRI12;
-      }
+      DCHECK_EQ((displacement & 0x1), 0);
+      scale = 1;
+      DCHECK_EQ(opcode16, kThumbBkpt);  // Not available.
+      opcode32 = kThumb2LdrshRRI12;
       break;
     case kUnsignedByte:
-      if (all_low && displacement < 32 && displacement >= 0) {
-        short_form = true;
-        opcode = kThumbLdrbRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrbRRI12;
-      }
+      DCHECK_EQ(scale, 0);  // Keep scale = 0.
+      short_form = all_low && (displacement >> (5 + scale)) == 0;
+      opcode16 = kThumbLdrbRRI5;
+      opcode32 = kThumb2LdrbRRI12;
       break;
     case kSignedByte:
-      if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2LdrsbRRI12;
-      }
+      DCHECK_EQ(scale, 0);  // Keep scale = 0.
+      DCHECK_EQ(opcode16, kThumbBkpt);  // Not available.
+      opcode32 = kThumb2LdrsbRRI12;
       break;
     default:
       LOG(FATAL) << "Bad size: " << size;
@@ -949,12 +985,33 @@
 
   if (!already_generated) {
     if (short_form) {
-      load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp);
+      load = NewLIR3(opcode16, r_dest.GetReg(), r_base.GetReg(), displacement >> scale);
+    } else if ((displacement >> 12) == 0) {  // Thumb2 form.
+      load = NewLIR3(opcode32, r_dest.GetReg(), r_base.GetReg(), displacement);
+    } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) &&
+        InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) {
+      // In this case, using LoadIndexed would emit 3 insns (movw+movt+ldr) but we can
+      // actually do it in two because we know that the kOpAdd is a single insn. On the
+      // other hand, we introduce an extra dependency, so this is not necessarily faster.
+      if (opcode16 != kThumbBkpt && r_dest.Low8() &&
+          InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) {
+        // We can use the 16-bit Thumb opcode for the load.
+        OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~(0x1f << scale));
+        load = NewLIR3(opcode16, r_dest.GetReg(), r_dest.GetReg(), (displacement >> scale) & 0x1f);
+      } else {
+        DCHECK_NE(opcode32, kThumbBkpt);
+        OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~0x00000fff);
+        load = NewLIR3(opcode32, r_dest.GetReg(), r_dest.GetReg(), displacement & 0x00000fff);
+      }
     } else {
+      if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) ||
+          (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) {
+        scale = 0;  // Prefer unscaled indexing if the same number of insns.
+      }
       RegStorage reg_offset = AllocTemp();
-      LoadConstant(reg_offset, encoded_disp);
+      LoadConstant(reg_offset, displacement >> scale);
       DCHECK(!r_dest.IsFloat());
-      load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
+      load = LoadBaseIndexed(r_base, reg_offset, r_dest, scale, size);
       FreeTemp(reg_offset);
     }
   }
@@ -1000,12 +1057,12 @@
 
 LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
                                    OpSize size) {
-  LIR* store = NULL;
-  ArmOpcode opcode = kThumbBkpt;
+  LIR* store = nullptr;
+  ArmOpcode opcode16 = kThumbBkpt;  // 16-bit Thumb opcode.
+  ArmOpcode opcode32 = kThumbBkpt;  // 32-bit Thumb2 opcode.
   bool short_form = false;
-  bool thumb2Form = (displacement < 4092 && displacement >= 0);
   bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8();
-  int encoded_disp = displacement;
+  int scale = 0;  // Used for opcode16 and some indexed loads.
   bool already_generated = false;
   switch (size) {
     case kDouble:
@@ -1037,53 +1094,67 @@
         already_generated = true;
         break;
       }
+      DCHECK_EQ((displacement & 0x3), 0);
+      scale = 2;
       if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
         short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbStrSpRel;
-      } else if (all_low && displacement < 128 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x3), 0);
-        short_form = true;
-        encoded_disp >>= 2;
-        opcode = kThumbStrRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2StrRRI12;
+        opcode16 = kThumbStrSpRel;
+      } else {
+        short_form = all_low && (displacement >> (5 + scale)) == 0;
+        opcode16 = kThumbStrRRI5;
+        opcode32 = kThumb2StrRRI12;
       }
       break;
     case kUnsignedHalf:
     case kSignedHalf:
-      if (all_low && displacement < 64 && displacement >= 0) {
-        DCHECK_EQ((displacement & 0x1), 0);
-        short_form = true;
-        encoded_disp >>= 1;
-        opcode = kThumbStrhRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2StrhRRI12;
-      }
+      DCHECK_EQ((displacement & 0x1), 0);
+      scale = 1;
+      short_form = all_low && (displacement >> (5 + scale)) == 0;
+      opcode16 = kThumbStrhRRI5;
+      opcode32 = kThumb2StrhRRI12;
       break;
     case kUnsignedByte:
     case kSignedByte:
-      if (all_low && displacement < 32 && displacement >= 0) {
-        short_form = true;
-        opcode = kThumbStrbRRI5;
-      } else if (thumb2Form) {
-        short_form = true;
-        opcode = kThumb2StrbRRI12;
-      }
+      DCHECK_EQ(scale, 0);  // Keep scale = 0.
+      short_form = all_low && (displacement >> (5 + scale)) == 0;
+      opcode16 = kThumbStrbRRI5;
+      opcode32 = kThumb2StrbRRI12;
       break;
     default:
       LOG(FATAL) << "Bad size: " << size;
   }
   if (!already_generated) {
     if (short_form) {
-      store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp);
-    } else {
+      store = NewLIR3(opcode16, r_src.GetReg(), r_base.GetReg(), displacement >> scale);
+    } else if ((displacement >> 12) == 0) {
+      store = NewLIR3(opcode32, r_src.GetReg(), r_base.GetReg(), displacement);
+    } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) &&
+        InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) {
+      // In this case, using StoreIndexed would emit 3 insns (movw+movt+str) but we can
+      // actually do it in two because we know that the kOpAdd is a single insn. On the
+      // other hand, we introduce an extra dependency, so this is not necessarily faster.
       RegStorage r_scratch = AllocTemp();
-      LoadConstant(r_scratch, encoded_disp);
+      if (opcode16 != kThumbBkpt && r_src.Low8() && r_scratch.Low8() &&
+          InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) {
+        // We can use the 16-bit Thumb opcode for the load.
+        OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~(0x1f << scale));
+        store = NewLIR3(opcode16, r_src.GetReg(), r_scratch.GetReg(),
+                        (displacement >> scale) & 0x1f);
+      } else {
+        DCHECK_NE(opcode32, kThumbBkpt);
+        OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~0x00000fff);
+        store = NewLIR3(opcode32, r_src.GetReg(), r_scratch.GetReg(), displacement & 0x00000fff);
+      }
+      FreeTemp(r_scratch);
+    } else {
+      if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) ||
+          (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) {
+        scale = 0;  // Prefer unscaled indexing if the same number of insns.
+      }
+      RegStorage r_scratch = AllocTemp();
+      LoadConstant(r_scratch, displacement >> scale);
       DCHECK(!r_src.IsFloat());
-      store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
+      store = StoreBaseIndexed(r_base, r_scratch, r_src, scale, size);
       FreeTemp(r_scratch);
     }
   }
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 1cde01e..0021754 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -775,6 +775,10 @@
         ": " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
     native_gc_map_builder.AddEntry(native_offset, references);
   }
+
+  // Maybe not necessary, but this could help prevent errors where we access the verified method
+  // after it has been deleted.
+  mir_graph_->GetCurrentDexCompilationUnit()->ClearVerifiedMethod();
 }
 
 /* Determine the offset of each literal field */
@@ -922,28 +926,6 @@
   NewLIR1(kPseudoDalvikByteCodeBoundary, WrapPointer(ArenaStrdup(inst_str)));
 }
 
-bool Mir2Lir::EvaluateBranch(Instruction::Code opcode, int32_t src1, int32_t src2) {
-  bool is_taken;
-  switch (opcode) {
-    case Instruction::IF_EQ: is_taken = (src1 == src2); break;
-    case Instruction::IF_NE: is_taken = (src1 != src2); break;
-    case Instruction::IF_LT: is_taken = (src1 < src2); break;
-    case Instruction::IF_GE: is_taken = (src1 >= src2); break;
-    case Instruction::IF_GT: is_taken = (src1 > src2); break;
-    case Instruction::IF_LE: is_taken = (src1 <= src2); break;
-    case Instruction::IF_EQZ: is_taken = (src1 == 0); break;
-    case Instruction::IF_NEZ: is_taken = (src1 != 0); break;
-    case Instruction::IF_LTZ: is_taken = (src1 < 0); break;
-    case Instruction::IF_GEZ: is_taken = (src1 >= 0); break;
-    case Instruction::IF_GTZ: is_taken = (src1 > 0); break;
-    case Instruction::IF_LEZ: is_taken = (src1 <= 0); break;
-    default:
-      LOG(FATAL) << "Unexpected opcode " << opcode;
-      UNREACHABLE();
-  }
-  return is_taken;
-}
-
 // Convert relation of src1/src2 to src2/src1
 ConditionCode Mir2Lir::FlipComparisonOrder(ConditionCode before) {
   ConditionCode res;
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 9be8719..774176e 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -322,6 +322,12 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void Mir2Lir::GenLongToInt(RegLocation rl_dest, RegLocation rl_src) {
+  rl_src = UpdateLocWide(rl_src);
+  rl_src = NarrowRegLoc(rl_src);
+  StoreValue(rl_dest, rl_src);
+}
+
 void Mir2Lir::GenIntNarrowing(Instruction::Code opcode, RegLocation rl_dest,
                               RegLocation rl_src) {
   rl_src = LoadValue(rl_src, kCoreReg);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 0d30927..320c0f4 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -657,24 +657,12 @@
     case Instruction::IF_GT:
     case Instruction::IF_LE: {
       LIR* taken = &label_list[bb->taken];
-      // Result known at compile time?
-      if (rl_src[0].is_const && rl_src[1].is_const) {
-        bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg),
-                                       mir_graph_->ConstantValue(rl_src[1].orig_sreg));
-        BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through;
-        if (mir_graph_->IsBackedge(bb, target_id) &&
-            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, target_id))) {
-          GenSuspendTest(opt_flags);
-        }
-        OpUnconditionalBranch(&label_list[target_id]);
-      } else {
-        if (mir_graph_->IsBackwardsBranch(bb) &&
-            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
-             !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
-          GenSuspendTest(opt_flags);
-        }
-        GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken);
+      if (mir_graph_->IsBackwardsBranch(bb) &&
+          (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
+           !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
+        GenSuspendTest(opt_flags);
       }
+      GenCompareAndBranch(opcode, rl_src[0], rl_src[1], taken);
       break;
     }
     case Instruction::IF_EQZ:
@@ -684,23 +672,12 @@
     case Instruction::IF_GTZ:
     case Instruction::IF_LEZ: {
       LIR* taken = &label_list[bb->taken];
-      // Result known at compile time?
-      if (rl_src[0].is_const) {
-        bool is_taken = EvaluateBranch(opcode, mir_graph_->ConstantValue(rl_src[0].orig_sreg), 0);
-        BasicBlockId target_id = is_taken ? bb->taken : bb->fall_through;
-        if (mir_graph_->IsBackedge(bb, target_id) &&
-            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, target_id))) {
-          GenSuspendTest(opt_flags);
-        }
-        OpUnconditionalBranch(&label_list[target_id]);
-      } else {
-        if (mir_graph_->IsBackwardsBranch(bb) &&
-            (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
-             !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
-          GenSuspendTest(opt_flags);
-        }
-        GenCompareZeroAndBranch(opcode, rl_src[0], taken);
+      if (mir_graph_->IsBackwardsBranch(bb) &&
+          (kLeafOptimization || !mir_graph_->HasSuspendTestBetween(bb, bb->taken) ||
+           !mir_graph_->HasSuspendTestBetween(bb, bb->fall_through))) {
+        GenSuspendTest(opt_flags);
       }
+      GenCompareZeroAndBranch(opcode, rl_src[0], taken);
       break;
     }
 
@@ -956,9 +933,7 @@
       break;
 
     case Instruction::LONG_TO_INT:
-      rl_src[0] = UpdateLocWide(rl_src[0]);
-      rl_src[0] = NarrowRegLoc(rl_src[0]);
-      StoreValue(rl_dest, rl_src[0]);
+      GenLongToInt(rl_dest, rl_src[0]);
       break;
 
     case Instruction::INT_TO_BYTE:
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 6847717..5d78a6e 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -666,7 +666,6 @@
     void MarkBoundary(DexOffset offset, const char* inst_str);
     void NopLIR(LIR* lir);
     void UnlinkLIR(LIR* lir);
-    bool EvaluateBranch(Instruction::Code opcode, int src1, int src2);
     bool IsInexpensiveConstant(RegLocation rl_src);
     ConditionCode FlipComparisonOrder(ConditionCode before);
     ConditionCode NegateComparison(ConditionCode before);
@@ -811,6 +810,7 @@
                              LIR* taken);
     void GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken);
     virtual void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
+    virtual void GenLongToInt(RegLocation rl_dest, RegLocation rl_src);
     void GenIntNarrowing(Instruction::Code opcode, RegLocation rl_dest,
                          RegLocation rl_src);
     void GenNewArray(uint32_t type_idx, RegLocation rl_dest,
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 09e1482..9cb0bf5 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -90,6 +90,7 @@
                        OpSize size) OVERRIDE;
   LIR* LoadConstantNoClobber(RegStorage r_dest, int value);
   LIR* LoadConstantWide(RegStorage r_dest, int64_t value);
+  void GenLongToInt(RegLocation rl_dest, RegLocation rl_src);
   LIR* StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
                      OpSize size, VolatileKind is_volatile) OVERRIDE;
   LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale,
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index a063ce1..80cdc83 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -3213,6 +3213,26 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void X86Mir2Lir::GenLongToInt(RegLocation rl_dest, RegLocation rl_src) {
+  rl_src = UpdateLocWide(rl_src);
+  rl_src = NarrowRegLoc(rl_src);
+  StoreValue(rl_dest, rl_src);
+
+  if (cu_->target64) {
+    // if src and dest are in the same phys reg then StoreValue generates
+    // no operation but we need explicit 32-bit mov R, R to clear
+    // the higher 32-bits
+    rl_dest = UpdateLoc(rl_dest);
+    if (rl_src.location == kLocPhysReg && rl_dest.location == kLocPhysReg
+           && IsSameReg(rl_src.reg, rl_dest.reg)) {
+        LIR* copy_lir = OpRegCopyNoInsert(rl_dest.reg, rl_dest.reg);
+        // remove nop flag set by OpRegCopyNoInsert if src == dest
+        copy_lir->flags.is_nop = false;
+        AppendLIR(copy_lir);
+    }
+  }
+}
+
 void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
                         RegLocation rl_src1, RegLocation rl_shift) {
   if (!cu_->target64) {
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 4929b5b..932a532 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -84,6 +84,15 @@
   return (it != verified_methods_.end()) ? it->second : nullptr;
 }
 
+void VerificationResults::RemoveVerifiedMethod(MethodReference ref) {
+  WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
+  auto it = verified_methods_.find(ref);
+  if (it != verified_methods_.end()) {
+    delete it->second;
+    verified_methods_.erase(it);
+  }
+}
+
 void VerificationResults::AddRejectedClass(ClassReference ref) {
   {
     WriterMutexLock mu(Thread::Current(), rejected_classes_lock_);
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
index 0e7923f..7fc2a23 100644
--- a/compiler/dex/verification_results.h
+++ b/compiler/dex/verification_results.h
@@ -48,6 +48,7 @@
 
     const VerifiedMethod* GetVerifiedMethod(MethodReference ref)
         LOCKS_EXCLUDED(verified_methods_lock_);
+    void RemoveVerifiedMethod(MethodReference ref) LOCKS_EXCLUDED(verified_methods_lock_);
 
     void AddRejectedClass(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
     bool IsClassRejected(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index ebf7874..3a91b08 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -21,7 +21,6 @@
 
 #include "dex/compiler_ir.h"
 #include "dex_compilation_unit.h"
-#include "field_helper.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class_loader.h"
@@ -134,10 +133,9 @@
       } else {
         // Search dex file for localized ssb index, may fail if field's class is a parent
         // of the class mentioned in the dex file and there is no dex cache entry.
-        StackHandleScope<1> hs(Thread::Current());
+        std::string temp;
         const DexFile::StringId* string_id =
-            dex_file->FindStringId(
-                FieldHelper(hs.NewHandle(resolved_field)).GetDeclaringClassDescriptor());
+            dex_file->FindStringId(resolved_field->GetDeclaringClass()->GetDescriptor(&temp));
         if (string_id != nullptr) {
           const DexFile::TypeId* type_id =
              dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 2e9f835..e427471 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -19,9 +19,14 @@
 #define ATRACE_TAG ATRACE_TAG_DALVIK
 #include <utils/Trace.h>
 
+#include <unordered_set>
 #include <vector>
 #include <unistd.h>
 
+#ifndef __APPLE__
+#include <malloc.h>  // For mallinfo
+#endif
+
 #include "base/stl_util.h"
 #include "base/timing_logger.h"
 #include "class_linker.h"
@@ -53,6 +58,7 @@
 #include "ScopedLocalRef.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
+#include "thread_list.h"
 #include "thread_pool.h"
 #include "trampolines/trampoline_compiler.h"
 #include "transaction.h"
@@ -495,6 +501,7 @@
                                 TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
   std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
+  VLOG(compiler) << "Before precompile " << GetMemoryUsageString();
   PreCompile(class_loader, dex_files, thread_pool.get(), timings);
   Compile(class_loader, dex_files, thread_pool.get(), timings);
   if (dump_stats_) {
@@ -591,20 +598,25 @@
 void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
                                 ThreadPool* thread_pool, TimingLogger* timings) {
   LoadImageClasses(timings);
+  VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString();
 
   Resolve(class_loader, dex_files, thread_pool, timings);
+  VLOG(compiler) << "Resolve: " << GetMemoryUsageString();
 
   if (!compiler_options_->IsVerificationEnabled()) {
-    LOG(INFO) << "Verify none mode specified, skipping verification.";
+    VLOG(compiler) << "Verify none mode specified, skipping verification.";
     SetVerified(class_loader, dex_files, thread_pool, timings);
     return;
   }
 
   Verify(class_loader, dex_files, thread_pool, timings);
+  VLOG(compiler) << "Verify: " << GetMemoryUsageString();
 
   InitializeClasses(class_loader, dex_files, thread_pool, timings);
+  VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString();
 
   UpdateImageClasses(timings);
+  VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString();
 }
 
 bool CompilerDriver::IsImageClass(const char* descriptor) const {
@@ -626,10 +638,10 @@
   }
 }
 
-static void ResolveExceptionsForMethod(MutableMethodHelper* mh,
+static void ResolveExceptionsForMethod(MutableHandle<mirror::ArtMethod> method_handle,
     std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  const DexFile::CodeItem* code_item = mh->GetMethod()->GetCodeItem();
+  const DexFile::CodeItem* code_item = method_handle->GetCodeItem();
   if (code_item == nullptr) {
     return;  // native or abstract method
   }
@@ -649,10 +661,10 @@
       uint16_t encoded_catch_handler_handlers_type_idx =
           DecodeUnsignedLeb128(&encoded_catch_handler_list);
       // Add to set of types to resolve if not already in the dex cache resolved types
-      if (!mh->GetMethod()->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) {
+      if (!method_handle->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) {
         exceptions_to_resolve.insert(
             std::pair<uint16_t, const DexFile*>(encoded_catch_handler_handlers_type_idx,
-                                                mh->GetMethod()->GetDexFile()));
+                                                method_handle->GetDexFile()));
       }
       // ignore address associated with catch handler
       DecodeUnsignedLeb128(&encoded_catch_handler_list);
@@ -669,14 +681,14 @@
   std::set<std::pair<uint16_t, const DexFile*>>* exceptions_to_resolve =
       reinterpret_cast<std::set<std::pair<uint16_t, const DexFile*>>*>(arg);
   StackHandleScope<1> hs(Thread::Current());
-  MutableMethodHelper mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
+  MutableHandle<mirror::ArtMethod> method_handle(hs.NewHandle<mirror::ArtMethod>(nullptr));
   for (size_t i = 0; i < c->NumVirtualMethods(); ++i) {
-    mh.ChangeMethod(c->GetVirtualMethod(i));
-    ResolveExceptionsForMethod(&mh, *exceptions_to_resolve);
+    method_handle.Assign(c->GetVirtualMethod(i));
+    ResolveExceptionsForMethod(method_handle, *exceptions_to_resolve);
   }
   for (size_t i = 0; i < c->NumDirectMethods(); ++i) {
-    mh.ChangeMethod(c->GetDirectMethod(i));
-    ResolveExceptionsForMethod(&mh, *exceptions_to_resolve);
+    method_handle.Assign(c->GetDirectMethod(i));
+    ResolveExceptionsForMethod(method_handle, *exceptions_to_resolve);
   }
   return true;
 }
@@ -782,23 +794,143 @@
   }
 }
 
-void CompilerDriver::FindClinitImageClassesCallback(mirror::Object* object, void* arg) {
-  DCHECK(object != nullptr);
-  DCHECK(arg != nullptr);
-  CompilerDriver* compiler_driver = reinterpret_cast<CompilerDriver*>(arg);
-  StackHandleScope<1> hs(Thread::Current());
-  MaybeAddToImageClasses(hs.NewHandle(object->GetClass()), compiler_driver->image_classes_.get());
-}
+// Keeps all the data for the update together. Also doubles as the reference visitor.
+// Note: we can use object pointers because we suspend all threads.
+class ClinitImageUpdate {
+ public:
+  static ClinitImageUpdate* Create(std::set<std::string>* image_class_descriptors, Thread* self,
+                                   ClassLinker* linker, std::string* error_msg) {
+    std::unique_ptr<ClinitImageUpdate> res(new ClinitImageUpdate(image_class_descriptors, self,
+                                                                 linker));
+    if (res->art_method_class_ == nullptr) {
+      *error_msg = "Could not find ArtMethod class.";
+      return nullptr;
+    } else if (res->dex_cache_class_ == nullptr) {
+      *error_msg = "Could not find DexCache class.";
+      return nullptr;
+    }
+
+    return res.release();
+  }
+
+  ~ClinitImageUpdate() {
+    // Allow others to suspend again.
+    self_->EndAssertNoThreadSuspension(old_cause_);
+  }
+
+  // Visitor for VisitReferences.
+  void operator()(mirror::Object* object, MemberOffset field_offset, bool /* is_static */) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    mirror::Object* ref = object->GetFieldObject<mirror::Object>(field_offset);
+    if (ref != nullptr) {
+      VisitClinitClassesObject(ref);
+    }
+  }
+
+  // java.lang.Reference visitor for VisitReferences.
+  void operator()(mirror::Class* /* klass */, mirror::Reference* /* ref */) const {
+  }
+
+  void Walk() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    // Use the initial classes as roots for a search.
+    for (mirror::Class* klass_root : image_classes_) {
+      VisitClinitClassesObject(klass_root);
+    }
+  }
+
+ private:
+  ClinitImageUpdate(std::set<std::string>* image_class_descriptors, Thread* self,
+                    ClassLinker* linker)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) :
+      image_class_descriptors_(image_class_descriptors), self_(self) {
+    CHECK(linker != nullptr);
+    CHECK(image_class_descriptors != nullptr);
+
+    // Make sure nobody interferes with us.
+    old_cause_ = self->StartAssertNoThreadSuspension("Boot image closure");
+
+    // Find the interesting classes.
+    art_method_class_ = linker->LookupClass(self, "Ljava/lang/reflect/ArtMethod;",
+        ComputeModifiedUtf8Hash("Ljava/lang/reflect/ArtMethod;"), nullptr);
+    dex_cache_class_ = linker->LookupClass(self, "Ljava/lang/DexCache;",
+        ComputeModifiedUtf8Hash("Ljava/lang/DexCache;"), nullptr);
+
+    // Find all the already-marked classes.
+    WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+    linker->VisitClasses(FindImageClasses, this);
+  }
+
+  static bool FindImageClasses(mirror::Class* klass, void* arg)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    ClinitImageUpdate* data = reinterpret_cast<ClinitImageUpdate*>(arg);
+    std::string temp;
+    const char* name = klass->GetDescriptor(&temp);
+    if (data->image_class_descriptors_->find(name) != data->image_class_descriptors_->end()) {
+      data->image_classes_.push_back(klass);
+    }
+
+    return true;
+  }
+
+  void VisitClinitClassesObject(mirror::Object* object) const
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    DCHECK(object != nullptr);
+    if (marked_objects_.find(object) != marked_objects_.end()) {
+      // Already processed.
+      return;
+    }
+
+    // Mark it.
+    marked_objects_.insert(object);
+
+    if (object->IsClass()) {
+      // If it is a class, add it.
+      StackHandleScope<1> hs(self_);
+      MaybeAddToImageClasses(hs.NewHandle(object->AsClass()), image_class_descriptors_);
+    } else {
+      // Else visit the object's class.
+      VisitClinitClassesObject(object->GetClass());
+    }
+
+    // If it is not a dex cache or an ArtMethod, visit all references.
+    mirror::Class* klass = object->GetClass();
+    if (klass != art_method_class_ && klass != dex_cache_class_) {
+      object->VisitReferences<false /* visit class */>(*this, *this);
+    }
+  }
+
+  mutable std::unordered_set<mirror::Object*> marked_objects_;
+  std::set<std::string>* const image_class_descriptors_;
+  std::vector<mirror::Class*> image_classes_;
+  const mirror::Class* art_method_class_;
+  const mirror::Class* dex_cache_class_;
+  Thread* const self_;
+  const char* old_cause_;
+
+  DISALLOW_COPY_AND_ASSIGN(ClinitImageUpdate);
+};
 
 void CompilerDriver::UpdateImageClasses(TimingLogger* timings) {
   if (IsImage()) {
     TimingLogger::ScopedTiming t("UpdateImageClasses", timings);
-    // Update image_classes_ with classes for objects created by <clinit> methods.
-    gc::Heap* heap = Runtime::Current()->GetHeap();
-    // TODO: Image spaces only?
-    ScopedObjectAccess soa(Thread::Current());
-    WriterMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_);
-    heap->VisitObjects(FindClinitImageClassesCallback, this);
+
+    Runtime* current = Runtime::Current();
+
+    // Suspend all threads.
+    current->GetThreadList()->SuspendAll();
+
+    std::string error_msg;
+    std::unique_ptr<ClinitImageUpdate> update(ClinitImageUpdate::Create(image_classes_.get(),
+                                                                        Thread::Current(),
+                                                                        current->GetClassLinker(),
+                                                                        &error_msg));
+    CHECK(update.get() != nullptr) << error_msg;  // TODO: Soft failure?
+
+    // Do the marking.
+    update->Walk();
+
+    // Resume threads.
+    current->GetThreadList()->ResumeAll();
   }
 }
 
@@ -1194,11 +1326,10 @@
     target_method->dex_method_index = method->GetDexMethodIndex();
   } else {
     if (no_guarantee_of_dex_cache_entry) {
-      StackHandleScope<1> hs(Thread::Current());
-      MethodHelper mh(hs.NewHandle(method));
       // See if the method is also declared in this dex cache.
-      uint32_t dex_method_idx = mh.FindDexMethodIndexInOtherDexFile(
-          *target_method->dex_file, target_method->dex_method_index);
+      uint32_t dex_method_idx =
+          method->FindDexMethodIndexInOtherDexFile(*target_method->dex_file,
+                                                   target_method->dex_method_index);
       if (dex_method_idx != DexFile::kDexNoIndex) {
         target_method->dex_method_index = dex_method_idx;
       } else {
@@ -1820,6 +1951,12 @@
               mirror::Throwable* exception = soa.Self()->GetException(&throw_location);
               VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
                   << exception->Dump();
+              std::ostream* file_log = manager->GetCompiler()->
+                  GetCompilerOptions().GetInitFailureOutput();
+              if (file_log != nullptr) {
+                *file_log << descriptor << "\n";
+                *file_log << exception->Dump() << "\n";
+              }
               soa.Self()->ClearException();
               transaction.Abort();
               CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
@@ -1875,6 +2012,7 @@
     CHECK(dex_file != nullptr);
     CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
+  VLOG(compiler) << "Compile: " << GetMemoryUsageString();
 }
 
 void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, size_t class_def_index) {
@@ -2001,6 +2139,7 @@
                                    bool compilation_enabled) {
   CompiledMethod* compiled_method = nullptr;
   uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
+  MethodReference method_ref(&dex_file, method_idx);
 
   if ((access_flags & kAccNative) != 0) {
     // Are we interpreting only and have support for generic JNI down calls?
@@ -2014,7 +2153,6 @@
   } else if ((access_flags & kAccAbstract) != 0) {
     // Abstract methods don't have code.
   } else {
-    MethodReference method_ref(&dex_file, method_idx);
     bool compile = compilation_enabled &&
                    verification_results_->IsCandidateForCompilation(method_ref, access_flags);
     if (compile) {
@@ -2051,16 +2189,18 @@
     // When compiling with PIC, there should be zero non-relative linker patches
     CHECK(!compile_pic || non_relative_linker_patch_count == 0u);
 
-    MethodReference ref(&dex_file, method_idx);
-    DCHECK(GetCompiledMethod(ref) == nullptr) << PrettyMethod(method_idx, dex_file);
+    DCHECK(GetCompiledMethod(method_ref) == nullptr) << PrettyMethod(method_idx, dex_file);
     {
       MutexLock mu(self, compiled_methods_lock_);
-      compiled_methods_.Put(ref, compiled_method);
+      compiled_methods_.Put(method_ref, compiled_method);
       non_relative_linker_patch_count_ += non_relative_linker_patch_count;
     }
-    DCHECK(GetCompiledMethod(ref) != nullptr) << PrettyMethod(method_idx, dex_file);
+    DCHECK(GetCompiledMethod(method_ref) != nullptr) << PrettyMethod(method_idx, dex_file);
   }
 
+  // Done compiling, delete the verified method to reduce native memory usage.
+  verification_results_->RemoveVerifiedMethod(method_ref);
+
   if (self->IsExceptionPending()) {
     ScopedObjectAccess soa(self);
     LOG(FATAL) << "Unexpected exception compiling: " << PrettyMethod(method_idx, dex_file) << "\n"
@@ -2210,4 +2350,21 @@
   }
   return !compile;
 }
+
+std::string CompilerDriver::GetMemoryUsageString() const {
+  std::ostringstream oss;
+  const ArenaPool* arena_pool = GetArenaPool();
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated());
+  oss << " java alloc=" << PrettySize(heap->GetBytesAllocated());
+#ifdef HAVE_MALLOC_H
+  struct mallinfo info = mallinfo();
+  const size_t allocated_space = static_cast<size_t>(info.uordblks);
+  const size_t free_space = static_cast<size_t>(info.fordblks);
+  oss << " native alloc=" << PrettySize(allocated_space) << " free="
+      << PrettySize(free_space);
+#endif
+  return oss.str();
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 437a1a9..615e0d0 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -39,6 +39,7 @@
 #include "thread_pool.h"
 #include "utils/arena_allocator.h"
 #include "utils/dedupe_set.h"
+#include "dex/verified_method.h"
 
 namespace art {
 
@@ -340,6 +341,9 @@
   ArenaPool* GetArenaPool() {
     return &arena_pool_;
   }
+  const ArenaPool* GetArenaPool() const {
+    return &arena_pool_;
+  }
 
   bool WriteElf(const std::string& android_root,
                 bool is_host,
@@ -395,6 +399,9 @@
   // Should the compiler run on this method given profile information?
   bool SkipCompilation(const std::string& method_name);
 
+  // Get memory usage during compilation.
+  std::string GetMemoryUsageString() const;
+
  private:
   // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
   // The only external contract is that unresolved method has flags 0 and resolved non-0.
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 0592f0c..aec7d24 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_
 #define ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_
 
+#include <ostream>
 #include <string>
 #include <vector>
 
@@ -70,7 +71,8 @@
 #ifdef ART_SEA_IR_MODE
     sea_ir_mode_(false),
 #endif
-    verbose_methods_(nullptr) {
+    verbose_methods_(nullptr),
+    init_failure_output_(nullptr) {
   }
 
   CompilerOptions(CompilerFilter compiler_filter,
@@ -90,7 +92,8 @@
 #ifdef ART_SEA_IR_MODE
                   bool sea_ir_mode,
 #endif
-                  const std::vector<std::string>* verbose_methods
+                  const std::vector<std::string>* verbose_methods,
+                  std::ostream* init_failure_output
                   ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -109,7 +112,8 @@
 #ifdef ART_SEA_IR_MODE
     sea_ir_mode_(sea_ir_mode),
 #endif
-    verbose_methods_(verbose_methods) {
+    verbose_methods_(verbose_methods),
+    init_failure_output_(init_failure_output) {
   }
 
   CompilerFilter GetCompilerFilter() const {
@@ -217,6 +221,10 @@
     return false;
   }
 
+  std::ostream* GetInitFailureOutput() const {
+    return init_failure_output_;
+  }
+
  private:
   CompilerFilter compiler_filter_;
   const size_t huge_method_threshold_;
@@ -241,6 +249,9 @@
   // Vector of methods to have verbose output enabled for.
   const std::vector<std::string>* const verbose_methods_;
 
+  // Log initialization of initialization failures to this stream if not null.
+  std::ostream* const init_failure_output_;
+
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
 };
 std::ostream& operator<<(std::ostream& os, const CompilerOptions::CompilerFilter& rhs);
diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h
index 84f5799..03ae489 100644
--- a/compiler/driver/dex_compilation_unit.h
+++ b/compiler/driver/dex_compilation_unit.h
@@ -102,6 +102,10 @@
     return verified_method_;
   }
 
+  void ClearVerifiedMethod() {
+    verified_method_ = nullptr;
+  }
+
   const std::string& GetSymbol();
 
  private:
@@ -117,7 +121,7 @@
   const uint16_t class_def_idx_;
   const uint32_t dex_method_idx_;
   const uint32_t access_flags_;
-  const VerifiedMethod* const verified_method_;
+  const VerifiedMethod* verified_method_;
 
   std::string symbol_;
 };
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 3b1d914..ab5c6c7 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -77,6 +77,7 @@
     Thread::Current()->TransitionFromSuspendedToRunnable();
     PruneNonImageClasses();  // Remove junk
     ComputeLazyFieldsForImageClasses();  // Add useful information
+    ProcessStrings();
     Thread::Current()->TransitionFromRunnableToSuspended(kNative);
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -561,9 +562,9 @@
     bool is_prefix = false;
     if (it != existing_strings.end()) {
       CHECK_LE(length, it->second);
-      is_prefix = std::equal(combined_chars.begin() + it->first,
-                             combined_chars.begin() + it->first + it->second,
-                             combined_chars.begin() + new_string.first);
+      is_prefix = std::equal(combined_chars.begin() + new_string.first,
+                             combined_chars.begin() + new_string.first + new_string.second,
+                             combined_chars.begin() + it->first);
     }
     if (is_prefix) {
       // Shares a prefix, set the offset to where the new offset will be.
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index a57f892..8a7abb4 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -366,6 +366,8 @@
     Offset offset(mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
         kArm64PointerSize).Int32Value());
     assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+    // Ensure we emit the literal pool.
+    assembler.EmitSlowPaths();
     std::vector<uint8_t> thunk_code(assembler.CodeSize());
     MemoryRegion code(thunk_code.data(), thunk_code.size());
     assembler.FinalizeInstructions(code);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index b261460..eb6181c 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -155,6 +155,37 @@
   current_block_ = nullptr;
 }
 
+static bool ShouldSkipCompilation(const CompilerDriver& compiler_driver,
+                                  const DexCompilationUnit& dex_compilation_unit,
+                                  size_t number_of_dex_instructions,
+                                  size_t number_of_blocks ATTRIBUTE_UNUSED,
+                                  size_t number_of_branches) {
+  const CompilerOptions& compiler_options = compiler_driver.GetCompilerOptions();
+  CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter();
+  if (compiler_filter == CompilerOptions::kEverything) {
+    return false;
+  }
+
+  if (compiler_options.IsHugeMethod(number_of_dex_instructions)) {
+    LOG(INFO) << "Skip compilation of huge method "
+              << PrettyMethod(dex_compilation_unit.GetDexMethodIndex(),
+                              *dex_compilation_unit.GetDexFile())
+              << ": " << number_of_dex_instructions << " dex instructions";
+    return true;
+  }
+
+  // If it's large and contains no branches, it's likely to be machine generated initialization.
+  if (compiler_options.IsLargeMethod(number_of_dex_instructions) && (number_of_branches == 0)) {
+    LOG(INFO) << "Skip compilation of large method with no branch "
+              << PrettyMethod(dex_compilation_unit.GetDexMethodIndex(),
+                              *dex_compilation_unit.GetDexFile())
+              << ": " << number_of_dex_instructions << " dex instructions";
+    return true;
+  }
+
+  return false;
+}
+
 HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
   const uint16_t* code_ptr = code_item.insns_;
   const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
@@ -171,9 +202,27 @@
   InitializeLocals(code_item.registers_size_);
   graph_->UpdateMaximumNumberOfOutVRegs(code_item.outs_size_);
 
+  // Compute the number of dex instructions, blocks, and branches. We will
+  // check these values against limits given to the compiler.
+  size_t number_of_dex_instructions = 0;
+  size_t number_of_blocks = 0;
+  size_t number_of_branches = 0;
+
   // To avoid splitting blocks, we compute ahead of time the instructions that
   // start a new block, and create these blocks.
-  ComputeBranchTargets(code_ptr, code_end);
+  ComputeBranchTargets(
+      code_ptr, code_end, &number_of_dex_instructions, &number_of_blocks, &number_of_branches);
+
+  // Note that the compiler driver is null when unit testing.
+  if (compiler_driver_ != nullptr) {
+    if (ShouldSkipCompilation(*compiler_driver_,
+                              *dex_compilation_unit_,
+                              number_of_dex_instructions,
+                              number_of_blocks,
+                              number_of_branches)) {
+      return nullptr;
+    }
+  }
 
   // Also create blocks for catch handlers.
   if (code_item.tries_size_ != 0) {
@@ -232,7 +281,11 @@
   current_block_ = block;
 }
 
-void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end) {
+void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
+                                         const uint16_t* code_end,
+                                         size_t* number_of_dex_instructions,
+                                         size_t* number_of_blocks,
+                                         size_t* number_of_branches) {
   // TODO: Support switch instructions.
   branch_targets_.SetSize(code_end - code_ptr);
 
@@ -245,19 +298,23 @@
   // the locations these instructions branch to.
   size_t dex_pc = 0;
   while (code_ptr < code_end) {
+    (*number_of_dex_instructions)++;
     const Instruction& instruction = *Instruction::At(code_ptr);
     if (instruction.IsBranch()) {
+      (*number_of_branches)++;
       int32_t target = instruction.GetTargetOffset() + dex_pc;
       // Create a block for the target instruction.
       if (FindBlockStartingAt(target) == nullptr) {
         block = new (arena_) HBasicBlock(graph_, target);
         branch_targets_.Put(target, block);
+        (*number_of_blocks)++;
       }
       dex_pc += instruction.SizeInCodeUnits();
       code_ptr += instruction.SizeInCodeUnits();
       if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
         block = new (arena_) HBasicBlock(graph_, dex_pc);
         branch_targets_.Put(dex_pc, block);
+        (*number_of_blocks)++;
       }
     } else {
       code_ptr += instruction.SizeInCodeUnits();
@@ -313,6 +370,15 @@
   UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
 }
 
+void HGraphBuilder::Binop_23x_cmp(const Instruction& instruction,
+                                  Primitive::Type type,
+                                  HCompare::Bias bias) {
+  HInstruction* first = LoadLocal(instruction.VRegB(), type);
+  HInstruction* second = LoadLocal(instruction.VRegC(), type);
+  current_block_->AddInstruction(new (arena_) HCompare(type, first, second, bias));
+  UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+}
+
 template<typename T>
 void HGraphBuilder::Binop_12x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegA(), type);
@@ -414,38 +480,36 @@
   bool is_instance_call = invoke_type != kStatic;
   const size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1);
 
-  HInvoke* invoke = nullptr;
-  if (invoke_type == kVirtual || invoke_type == kInterface || invoke_type == kSuper) {
-    MethodReference target_method(dex_file_, method_idx);
-    uintptr_t direct_code;
-    uintptr_t direct_method;
-    int table_index;
-    InvokeType optimized_invoke_type = invoke_type;
-    compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true,
-                                        &optimized_invoke_type, &target_method, &table_index,
-                                        &direct_code, &direct_method);
-    if (table_index == -1) {
-      return false;
-    }
+  MethodReference target_method(dex_file_, method_idx);
+  uintptr_t direct_code;
+  uintptr_t direct_method;
+  int table_index;
+  InvokeType optimized_invoke_type = invoke_type;
 
-    if (optimized_invoke_type == kVirtual) {
-      invoke = new (arena_) HInvokeVirtual(
-          arena_, number_of_arguments, return_type, dex_pc, table_index);
-    } else if (optimized_invoke_type == kInterface) {
-      invoke = new (arena_) HInvokeInterface(
-          arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
-    } else if (optimized_invoke_type == kDirect) {
-      // For this compiler, sharpening only works if we compile PIC.
-      DCHECK(compiler_driver_->GetCompilerOptions().GetCompilePic());
-      // Treat invoke-direct like static calls for now.
-      invoke = new (arena_) HInvokeStatic(
-          arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index);
-    }
+  if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true,
+                                           &optimized_invoke_type, &target_method, &table_index,
+                                           &direct_code, &direct_method)) {
+    LOG(INFO) << "Did not compile " << PrettyMethod(method_idx, *dex_file_)
+              << " because a method call could not be resolved";
+    return false;
+  }
+  DCHECK(optimized_invoke_type != kSuper);
+
+  HInvoke* invoke = nullptr;
+  if (optimized_invoke_type == kVirtual) {
+    invoke = new (arena_) HInvokeVirtual(
+        arena_, number_of_arguments, return_type, dex_pc, table_index);
+  } else if (optimized_invoke_type == kInterface) {
+    invoke = new (arena_) HInvokeInterface(
+        arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
   } else {
-    DCHECK(invoke_type == kDirect || invoke_type == kStatic);
+    DCHECK(optimized_invoke_type == kDirect || optimized_invoke_type == kStatic);
+    // Sharpening to kDirect only works if we compile PIC.
+    DCHECK((optimized_invoke_type == invoke_type) || (optimized_invoke_type != kDirect)
+           || compiler_driver_->GetCompilerOptions().GetCompilePic());
     // Treat invoke-direct like static calls for now.
     invoke = new (arena_) HInvokeStatic(
-        arena_, number_of_arguments, return_type, dex_pc, method_idx);
+        arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index);
   }
 
   size_t start_index = 0;
@@ -1034,6 +1098,21 @@
       break;
     }
 
+    case Instruction::LONG_TO_FLOAT: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat);
+      break;
+    }
+
+    case Instruction::LONG_TO_DOUBLE: {
+      Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble);
+      break;
+    }
+
+    case Instruction::FLOAT_TO_INT: {
+      Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt);
+      break;
+    }
+
     case Instruction::INT_TO_BYTE: {
       Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte);
       break;
@@ -1492,7 +1571,27 @@
       break;
 
     case Instruction::CMP_LONG: {
-      Binop_23x<HCompare>(instruction, Primitive::kPrimLong);
+      Binop_23x_cmp(instruction, Primitive::kPrimLong, HCompare::kNoBias);
+      break;
+    }
+
+    case Instruction::CMPG_FLOAT: {
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kGtBias);
+      break;
+    }
+
+    case Instruction::CMPG_DOUBLE: {
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kGtBias);
+      break;
+    }
+
+    case Instruction::CMPL_FLOAT: {
+      Binop_23x_cmp(instruction, Primitive::kPrimFloat, HCompare::kLtBias);
+      break;
+    }
+
+    case Instruction::CMPL_DOUBLE: {
+      Binop_23x_cmp(instruction, Primitive::kPrimDouble, HCompare::kLtBias);
       break;
     }
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 204005d..8519bcb 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -80,7 +80,13 @@
 
   // Finds all instructions that start a new block, and populates branch_targets_ with
   // the newly created blocks.
-  void ComputeBranchTargets(const uint16_t* start, const uint16_t* end);
+  // As a side effect, also compute the number of dex instructions, blocks, and
+  // branches.
+  void ComputeBranchTargets(const uint16_t* start,
+                            const uint16_t* end,
+                            size_t* number_of_dex_instructions,
+                            size_t* number_of_block,
+                            size_t* number_of_branches);
   void MaybeUpdateCurrentBlock(size_t index);
   HBasicBlock* FindBlockStartingAt(int32_t index) const;
 
@@ -107,6 +113,8 @@
   template<typename T>
   void Binop_23x_shift(const Instruction& instruction, Primitive::Type type);
 
+  void Binop_23x_cmp(const Instruction& instruction, Primitive::Type type, HCompare::Bias bias);
+
   template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 0b59327..e581af2 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -71,11 +71,7 @@
     }
   }
   GenerateSlowPaths();
-
-  size_t code_size = GetAssembler()->CodeSize();
-  uint8_t* buffer = allocator->Allocate(code_size);
-  MemoryRegion code(buffer, code_size);
-  GetAssembler()->FinalizeInstructions(code);
+  Finalize(allocator);
 }
 
 void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
@@ -97,9 +93,13 @@
     }
   }
   GenerateSlowPaths();
+  Finalize(allocator);
+}
 
+void CodeGenerator::Finalize(CodeAllocator* allocator) {
   size_t code_size = GetAssembler()->CodeSize();
   uint8_t* buffer = allocator->Allocate(code_size);
+
   MemoryRegion code(buffer, code_size);
   GetAssembler()->FinalizeInstructions(code);
 }
@@ -228,7 +228,8 @@
       DCHECK(!blocked_fpu_registers_[loc.reg()]);
       blocked_fpu_registers_[loc.reg()] = true;
     } else {
-      DCHECK_EQ(loc.GetPolicy(), Location::kRequiresRegister);
+      DCHECK(loc.GetPolicy() == Location::kRequiresRegister
+             || loc.GetPolicy() == Location::kRequiresFpuRegister);
     }
   }
 
@@ -259,10 +260,21 @@
   for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
     Location loc = locations->GetTemp(i);
     if (loc.IsUnallocated()) {
-      DCHECK_EQ(loc.GetPolicy(), Location::kRequiresRegister);
-      // TODO: Adjust handling of temps. We currently consider temps to use
-      // core registers. They may also use floating point registers at some point.
-      loc = AllocateFreeRegister(Primitive::kPrimInt);
+      switch (loc.GetPolicy()) {
+        case Location::kRequiresRegister:
+          // Allocate a core register (large enough to fit a 32-bit integer).
+          loc = AllocateFreeRegister(Primitive::kPrimInt);
+          break;
+
+        case Location::kRequiresFpuRegister:
+          // Allocate a core register (large enough to fit a 64-bit double).
+          loc = AllocateFreeRegister(Primitive::kPrimDouble);
+          break;
+
+        default:
+          LOG(FATAL) << "Unexpected policy for temporary location "
+                     << loc.GetPolicy();
+      }
       locations->SetTempAt(i, loc);
     }
   }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index f906eb8..7c8f6a2 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -30,6 +30,14 @@
 static size_t constexpr kVRegSize = 4;
 static size_t constexpr kUninitializedFrameSize = 0;
 
+// Binary encoding of 2^32 for type double.
+static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
+// Binary encoding of 2^31 for type double.
+static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
+
+// Maximum value for a primitive integer.
+static int32_t constexpr kPrimIntMax = 0x7fffffff;
+
 class Assembler;
 class CodeGenerator;
 class DexCompilationUnit;
@@ -85,6 +93,7 @@
   }
 
   virtual void Initialize() = 0;
+  virtual void Finalize(CodeAllocator* allocator);
   virtual void GenerateFrameEntry() = 0;
   virtual void GenerateFrameExit() = 0;
   virtual void Bind(HBasicBlock* block) = 0;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 890cfdd..448a5a0 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -495,7 +495,8 @@
         codegen_(codegen) {}
 
 void CodeGeneratorARM::GenerateFrameEntry() {
-  bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
+  bool skip_overflow_check =
+      IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
   if (!skip_overflow_check) {
     if (kExplicitStackOverflowCheck) {
       SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM();
@@ -655,26 +656,26 @@
   }
   if (destination.IsRegister()) {
     if (source.IsRegister()) {
-      __ Mov(destination.As<Register>(), source.As<Register>());
+      __ Mov(destination.AsRegister<Register>(), source.AsRegister<Register>());
     } else if (source.IsFpuRegister()) {
-      __ vmovrs(destination.As<Register>(), source.As<SRegister>());
+      __ vmovrs(destination.AsRegister<Register>(), source.AsFpuRegister<SRegister>());
     } else {
-      __ LoadFromOffset(kLoadWord, destination.As<Register>(), SP, source.GetStackIndex());
+      __ LoadFromOffset(kLoadWord, destination.AsRegister<Register>(), SP, source.GetStackIndex());
     }
   } else if (destination.IsFpuRegister()) {
     if (source.IsRegister()) {
-      __ vmovsr(destination.As<SRegister>(), source.As<Register>());
+      __ vmovsr(destination.AsFpuRegister<SRegister>(), source.AsRegister<Register>());
     } else if (source.IsFpuRegister()) {
-      __ vmovs(destination.As<SRegister>(), source.As<SRegister>());
+      __ vmovs(destination.AsFpuRegister<SRegister>(), source.AsFpuRegister<SRegister>());
     } else {
-      __ LoadSFromOffset(destination.As<SRegister>(), SP, source.GetStackIndex());
+      __ LoadSFromOffset(destination.AsFpuRegister<SRegister>(), SP, source.GetStackIndex());
     }
   } else {
     DCHECK(destination.IsStackSlot()) << destination;
     if (source.IsRegister()) {
-      __ StoreToOffset(kStoreWord, source.As<Register>(), SP, destination.GetStackIndex());
+      __ StoreToOffset(kStoreWord, source.AsRegister<Register>(), SP, destination.GetStackIndex());
     } else if (source.IsFpuRegister()) {
-      __ StoreSToOffset(source.As<SRegister>(), SP, destination.GetStackIndex());
+      __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex());
     } else {
       DCHECK(source.IsStackSlot()) << source;
       __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
@@ -689,19 +690,25 @@
   }
   if (destination.IsRegisterPair()) {
     if (source.IsRegisterPair()) {
-      __ Mov(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
-      __ Mov(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+      EmitParallelMoves(
+          Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
+          Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
+          Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
+          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()));
     } else if (source.IsFpuRegister()) {
       UNIMPLEMENTED(FATAL);
     } else if (source.IsQuickParameter()) {
       uint16_t register_index = source.GetQuickParameterRegisterIndex();
       uint16_t stack_index = source.GetQuickParameterStackIndex();
       InvokeDexCallingConvention calling_convention;
-      __ Mov(destination.AsRegisterPairLow<Register>(),
-             calling_convention.GetRegisterAt(register_index));
-      __ LoadFromOffset(kLoadWord, destination.AsRegisterPairHigh<Register>(),
-             SP, calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize());
+      EmitParallelMoves(
+          Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
+          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
+          Location::StackSlot(
+              calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()),
+          Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()));
     } else {
+      // No conflict possible, so just do the moves.
       DCHECK(source.IsDoubleStackSlot());
       if (destination.AsRegisterPairLow<Register>() == R1) {
         DCHECK_EQ(destination.AsRegisterPairHigh<Register>(), R2);
@@ -725,22 +732,21 @@
     uint16_t register_index = destination.GetQuickParameterRegisterIndex();
     uint16_t stack_index = destination.GetQuickParameterStackIndex();
     if (source.IsRegisterPair()) {
-      __ Mov(calling_convention.GetRegisterAt(register_index),
-             source.AsRegisterPairLow<Register>());
-      __ StoreToOffset(kStoreWord, source.AsRegisterPairHigh<Register>(),
-             SP, calling_convention.GetStackOffsetOf(stack_index + 1));
+      UNIMPLEMENTED(FATAL);
     } else if (source.IsFpuRegister()) {
       UNIMPLEMENTED(FATAL);
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ LoadFromOffset(
-          kLoadWord, calling_convention.GetRegisterAt(register_index), SP, source.GetStackIndex());
-      __ LoadFromOffset(kLoadWord, R0, SP, source.GetHighStackIndex(kArmWordSize));
-      __ StoreToOffset(kStoreWord, R0, SP, calling_convention.GetStackOffsetOf(stack_index + 1));
+      EmitParallelMoves(
+          Location::StackSlot(source.GetStackIndex()),
+          Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
+          Location::StackSlot(source.GetHighStackIndex(kArmWordSize)),
+          Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1)));
     }
   } else {
     DCHECK(destination.IsDoubleStackSlot());
     if (source.IsRegisterPair()) {
+      // No conflict possible, so just do the moves.
       if (source.AsRegisterPairLow<Register>() == R1) {
         DCHECK_EQ(source.AsRegisterPairHigh<Register>(), R2);
         __ StoreToOffset(kStoreWord, R1, SP, destination.GetStackIndex());
@@ -753,21 +759,24 @@
       InvokeDexCallingConvention calling_convention;
       uint16_t register_index = source.GetQuickParameterRegisterIndex();
       uint16_t stack_index = source.GetQuickParameterStackIndex();
+      // Just move the low part. The only time a source is a quick parameter is
+      // when moving the parameter to its stack locations. And the (Java) caller
+      // of this method has already done that.
       __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(register_index),
-             SP, destination.GetStackIndex());
-      __ LoadFromOffset(kLoadWord, R0,
-             SP, calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize());
-      __ StoreToOffset(kStoreWord, R0, SP, destination.GetHighStackIndex(kArmWordSize));
+                       SP, destination.GetStackIndex());
+      DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(),
+                static_cast<size_t>(destination.GetHighStackIndex(kArmWordSize)));
     } else if (source.IsFpuRegisterPair()) {
       __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()),
                         SP,
                         destination.GetStackIndex());
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
-      __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
-      __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize));
-      __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+      EmitParallelMoves(
+          Location::StackSlot(source.GetStackIndex()),
+          Location::StackSlot(destination.GetStackIndex()),
+          Location::StackSlot(source.GetHighStackIndex(kArmWordSize)),
+          Location::StackSlot(destination.GetHighStackIndex(kArmWordSize)));
     }
   }
 }
@@ -783,7 +792,7 @@
     if (const_to_move->IsIntConstant()) {
       int32_t value = const_to_move->AsIntConstant()->GetValue();
       if (location.IsRegister()) {
-        __ LoadImmediate(location.As<Register>(), value);
+        __ LoadImmediate(location.AsRegister<Register>(), value);
       } else {
         DCHECK(location.IsStackSlot());
         __ LoadImmediate(IP, value);
@@ -933,27 +942,27 @@
     if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
       // Condition has been materialized, compare the output to 0
       DCHECK(if_instr->GetLocations()->InAt(0).IsRegister());
-      __ cmp(if_instr->GetLocations()->InAt(0).As<Register>(),
+      __ cmp(if_instr->GetLocations()->InAt(0).AsRegister<Register>(),
              ShifterOperand(0));
       __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()), NE);
     } else {
       // Condition has not been materialized, use its inputs as the
       // comparison and its condition as the branch condition.
       LocationSummary* locations = cond->GetLocations();
+      Register left = locations->InAt(0).AsRegister<Register>();
       if (locations->InAt(1).IsRegister()) {
-        __ cmp(locations->InAt(0).As<Register>(),
-               ShifterOperand(locations->InAt(1).As<Register>()));
+        __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>()));
       } else {
         DCHECK(locations->InAt(1).IsConstant());
         int32_t value =
             locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
         ShifterOperand operand;
-        if (ShifterOperand::CanHoldArm(value, &operand)) {
-          __ cmp(locations->InAt(0).As<Register>(), ShifterOperand(value));
+        if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) {
+          __ cmp(left, operand);
         } else {
           Register temp = IP;
           __ LoadImmediate(temp, value);
-          __ cmp(locations->InAt(0).As<Register>(), ShifterOperand(temp));
+          __ cmp(left, ShifterOperand(temp));
         }
       }
       __ b(codegen_->GetLabelOf(if_instr->IfTrueSuccessor()),
@@ -979,27 +988,27 @@
 
 void InstructionCodeGeneratorARM::VisitCondition(HCondition* comp) {
   if (!comp->NeedsMaterialization()) return;
-
   LocationSummary* locations = comp->GetLocations();
+  Register left = locations->InAt(0).AsRegister<Register>();
+
   if (locations->InAt(1).IsRegister()) {
-    __ cmp(locations->InAt(0).As<Register>(),
-           ShifterOperand(locations->InAt(1).As<Register>()));
+    __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>()));
   } else {
     DCHECK(locations->InAt(1).IsConstant());
     int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
     ShifterOperand operand;
-    if (ShifterOperand::CanHoldArm(value, &operand)) {
-      __ cmp(locations->InAt(0).As<Register>(), ShifterOperand(value));
+    if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) {
+      __ cmp(left, operand);
     } else {
       Register temp = IP;
       __ LoadImmediate(temp, value);
-      __ cmp(locations->InAt(0).As<Register>(), ShifterOperand(temp));
+      __ cmp(left, ShifterOperand(temp));
     }
   }
   __ it(ARMCondition(comp->GetCondition()), kItElse);
-  __ mov(locations->Out().As<Register>(), ShifterOperand(1),
+  __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1),
          ARMCondition(comp->GetCondition()));
-  __ mov(locations->Out().As<Register>(), ShifterOperand(0),
+  __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(0),
          ARMOppositeCondition(comp->GetCondition()));
 }
 
@@ -1169,7 +1178,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) {
-  Register temp = invoke->GetLocations()->GetTemp(0).As<Register>();
+  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
 
   // TODO: Implement all kinds of calls:
   // 1) boot -> boot
@@ -1216,7 +1225,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  Register temp = invoke->GetLocations()->GetTemp(0).As<Register>();
+  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() +
           invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
   LocationSummary* locations = invoke->GetLocations();
@@ -1227,7 +1236,7 @@
     __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
     __ LoadFromOffset(kLoadWord, temp, temp, class_offset);
   } else {
-    __ LoadFromOffset(kLoadWord, temp, receiver.As<Register>(), class_offset);
+    __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   // temp = temp->GetMethodAt(method_offset);
   uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
@@ -1249,7 +1258,7 @@
 
 void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = invoke->GetLocations()->GetTemp(0).As<Register>();
+  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() +
           (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry);
   LocationSummary* locations = invoke->GetLocations();
@@ -1257,14 +1266,15 @@
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
   // Set the hidden argument.
-  __ LoadImmediate(invoke->GetLocations()->GetTemp(1).As<Register>(), invoke->GetDexMethodIndex());
+  __ LoadImmediate(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(),
+                   invoke->GetDexMethodIndex());
 
   // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
     __ LoadFromOffset(kLoadWord, temp, temp, class_offset);
   } else {
-    __ LoadFromOffset(kLoadWord, temp, receiver.As<Register>(), class_offset);
+    __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   // temp = temp->GetImtEntryAt(method_offset);
   uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
@@ -1308,7 +1318,7 @@
   switch (neg->GetResultType()) {
     case Primitive::kPrimInt:
       DCHECK(in.IsRegister());
-      __ rsb(out.As<Register>(), in.As<Register>(), ShifterOperand(0));
+      __ rsb(out.AsRegister<Register>(), in.AsRegister<Register>(), ShifterOperand(0));
       break;
 
     case Primitive::kPrimLong:
@@ -1334,7 +1344,7 @@
 
     case Primitive::kPrimFloat:
       DCHECK(in.IsFpuRegister());
-      __ vnegs(out.As<SRegister>(), in.As<SRegister>());
+      __ vnegs(out.AsFpuRegister<SRegister>(), in.AsFpuRegister<SRegister>());
       break;
 
     case Primitive::kPrimDouble:
@@ -1353,6 +1363,7 @@
       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
+  DCHECK_NE(result_type, input_type);
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
@@ -1395,6 +1406,12 @@
           break;
 
         case Primitive::kPrimFloat:
+          // Processing a Dex `float-to-int' instruction.
+          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetOut(Location::RequiresRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1434,7 +1451,6 @@
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
-        case Primitive::kPrimChar:
           // Processing a Dex `int-to-char' instruction.
           locations->SetInAt(0, Location::RequiresRegister());
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
@@ -1458,6 +1474,15 @@
           break;
 
         case Primitive::kPrimLong:
+          // Processing a Dex `long-to-float' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          locations->AddTemp(Location::RequiresRegister());
+          locations->AddTemp(Location::RequiresRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1481,6 +1506,14 @@
           break;
 
         case Primitive::kPrimLong:
+          // Processing a Dex `long-to-double' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          locations->AddTemp(Location::RequiresRegister());
+          locations->AddTemp(Location::RequiresRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          break;
+
         case Primitive::kPrimFloat:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1504,6 +1537,7 @@
   Location in = locations->InAt(0);
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
+  DCHECK_NE(result_type, input_type);
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
@@ -1511,7 +1545,7 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-byte' instruction.
-          __ sbfx(out.As<Register>(), in.As<Register>(), 0, 8);
+          __ sbfx(out.AsRegister<Register>(), in.AsRegister<Register>(), 0, 8);
           break;
 
         default:
@@ -1526,7 +1560,7 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-short' instruction.
-          __ sbfx(out.As<Register>(), in.As<Register>(), 0, 16);
+          __ sbfx(out.AsRegister<Register>(), in.AsRegister<Register>(), 0, 16);
           break;
 
         default:
@@ -1541,18 +1575,26 @@
           // Processing a Dex `long-to-int' instruction.
           DCHECK(out.IsRegister());
           if (in.IsRegisterPair()) {
-            __ Mov(out.As<Register>(), in.AsRegisterPairLow<Register>());
+            __ Mov(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
           } else if (in.IsDoubleStackSlot()) {
-            __ LoadFromOffset(kLoadWord, out.As<Register>(), SP, in.GetStackIndex());
+            __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), SP, in.GetStackIndex());
           } else {
             DCHECK(in.IsConstant());
             DCHECK(in.GetConstant()->IsLongConstant());
             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
-            __ LoadImmediate(out.As<Register>(), static_cast<int32_t>(value));
+            __ LoadImmediate(out.AsRegister<Register>(), static_cast<int32_t>(value));
           }
           break;
 
-        case Primitive::kPrimFloat:
+        case Primitive::kPrimFloat: {
+          // Processing a Dex `float-to-int' instruction.
+          SRegister temp = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
+          __ vmovs(temp, in.AsFpuRegister<SRegister>());
+          __ vcvtis(temp, temp);
+          __ vmovrs(out.AsRegister<Register>(), temp);
+          break;
+        }
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1573,7 +1615,7 @@
           // Processing a Dex `int-to-long' instruction.
           DCHECK(out.IsRegisterPair());
           DCHECK(in.IsRegister());
-          __ Mov(out.AsRegisterPairLow<Register>(), in.As<Register>());
+          __ Mov(out.AsRegisterPairLow<Register>(), in.AsRegister<Register>());
           // Sign extension.
           __ Asr(out.AsRegisterPairHigh<Register>(),
                  out.AsRegisterPairLow<Register>(),
@@ -1597,9 +1639,8 @@
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
-        case Primitive::kPrimChar:
           // Processing a Dex `int-to-char' instruction.
-          __ ubfx(out.As<Register>(), in.As<Register>(), 0, 16);
+          __ ubfx(out.AsRegister<Register>(), in.AsRegister<Register>(), 0, 16);
           break;
 
         default:
@@ -1615,12 +1656,53 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar: {
           // Processing a Dex `int-to-float' instruction.
-          __ vmovsr(out.As<SRegister>(), in.As<Register>());
-          __ vcvtsi(out.As<SRegister>(), out.As<SRegister>());
+          __ vmovsr(out.AsFpuRegister<SRegister>(), in.AsRegister<Register>());
+          __ vcvtsi(out.AsFpuRegister<SRegister>(), out.AsFpuRegister<SRegister>());
           break;
         }
 
-        case Primitive::kPrimLong:
+        case Primitive::kPrimLong: {
+          // Processing a Dex `long-to-float' instruction.
+          Register low = in.AsRegisterPairLow<Register>();
+          Register high = in.AsRegisterPairHigh<Register>();
+          SRegister output = out.AsFpuRegister<SRegister>();
+          Register constant_low = locations->GetTemp(0).AsRegister<Register>();
+          Register constant_high = locations->GetTemp(1).AsRegister<Register>();
+          SRegister temp1_s = locations->GetTemp(2).AsFpuRegisterPairLow<SRegister>();
+          DRegister temp1_d = FromLowSToD(temp1_s);
+          SRegister temp2_s = locations->GetTemp(3).AsFpuRegisterPairLow<SRegister>();
+          DRegister temp2_d = FromLowSToD(temp2_s);
+
+          // Operations use doubles for precision reasons (each 32-bit
+          // half of a long fits in the 53-bit mantissa of a double,
+          // but not in the 24-bit mantissa of a float).  This is
+          // especially important for the low bits.  The result is
+          // eventually converted to float.
+
+          // temp1_d = int-to-double(high)
+          __ vmovsr(temp1_s, high);
+          __ vcvtdi(temp1_d, temp1_s);
+          // Using vmovd to load the `k2Pow32EncodingForDouble` constant
+          // as an immediate value into `temp2_d` does not work, as
+          // this instruction only transfers 8 significant bits of its
+          // immediate operand.  Instead, use two 32-bit core
+          // registers to load `k2Pow32EncodingForDouble` into
+          // `temp2_d`.
+          __ LoadImmediate(constant_low, Low32Bits(k2Pow32EncodingForDouble));
+          __ LoadImmediate(constant_high, High32Bits(k2Pow32EncodingForDouble));
+          __ vmovdrr(temp2_d, constant_low, constant_high);
+          // temp1_d = temp1_d * 2^32
+          __ vmuld(temp1_d, temp1_d, temp2_d);
+          // temp2_d = unsigned-to-double(low)
+          __ vmovsr(temp2_s, low);
+          __ vcvtdu(temp2_d, temp2_s);
+          // temp1_d = temp1_d + temp2_d
+          __ vaddd(temp1_d, temp1_d, temp2_d);
+          // output = double-to-float(temp1_d);
+          __ vcvtsd(output, temp1_d);
+          break;
+        }
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1639,13 +1721,44 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar: {
           // Processing a Dex `int-to-double' instruction.
-          __ vmovsr(out.AsFpuRegisterPairLow<SRegister>(), in.As<Register>());
+          __ vmovsr(out.AsFpuRegisterPairLow<SRegister>(), in.AsRegister<Register>());
           __ vcvtdi(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()),
                     out.AsFpuRegisterPairLow<SRegister>());
           break;
         }
 
-        case Primitive::kPrimLong:
+        case Primitive::kPrimLong: {
+          // Processing a Dex `long-to-double' instruction.
+          Register low = in.AsRegisterPairLow<Register>();
+          Register high = in.AsRegisterPairHigh<Register>();
+          SRegister out_s = out.AsFpuRegisterPairLow<SRegister>();
+          DRegister out_d = FromLowSToD(out_s);
+          Register constant_low = locations->GetTemp(0).AsRegister<Register>();
+          Register constant_high = locations->GetTemp(1).AsRegister<Register>();
+          SRegister temp_s = locations->GetTemp(2).AsFpuRegisterPairLow<SRegister>();
+          DRegister temp_d = FromLowSToD(temp_s);
+
+          // out_d = int-to-double(high)
+          __ vmovsr(out_s, high);
+          __ vcvtdi(out_d, out_s);
+          // Using vmovd to load the `k2Pow32EncodingForDouble` constant
+          // as an immediate value into `temp_d` does not work, as
+          // this instruction only transfers 8 significant bits of its
+          // immediate operand.  Instead, use two 32-bit core
+          // registers to load `k2Pow32EncodingForDouble` into `temp_d`.
+          __ LoadImmediate(constant_low, Low32Bits(k2Pow32EncodingForDouble));
+          __ LoadImmediate(constant_high, High32Bits(k2Pow32EncodingForDouble));
+          __ vmovdrr(temp_d, constant_low, constant_high);
+          // out_d = out_d * 2^32
+          __ vmuld(out_d, out_d, temp_d);
+          // temp_d = unsigned-to-double(low)
+          __ vmovsr(temp_s, low);
+          __ vcvtdu(temp_d, temp_s);
+          // out_d = out_d + temp_d
+          __ vaddd(out_d, out_d, temp_d);
+          break;
+        }
+
         case Primitive::kPrimFloat:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1697,10 +1810,12 @@
   switch (add->GetResultType()) {
     case Primitive::kPrimInt:
       if (second.IsRegister()) {
-        __ add(out.As<Register>(), first.As<Register>(), ShifterOperand(second.As<Register>()));
+        __ add(out.AsRegister<Register>(),
+               first.AsRegister<Register>(),
+               ShifterOperand(second.AsRegister<Register>()));
       } else {
-        __ AddConstant(out.As<Register>(),
-                       first.As<Register>(),
+        __ AddConstant(out.AsRegister<Register>(),
+                       first.AsRegister<Register>(),
                        second.GetConstant()->AsIntConstant()->GetValue());
       }
       break;
@@ -1715,7 +1830,9 @@
       break;
 
     case Primitive::kPrimFloat:
-      __ vadds(out.As<SRegister>(), first.As<SRegister>(), second.As<SRegister>());
+      __ vadds(out.AsFpuRegister<SRegister>(),
+               first.AsFpuRegister<SRegister>(),
+               second.AsFpuRegister<SRegister>());
       break;
 
     case Primitive::kPrimDouble:
@@ -1761,10 +1878,12 @@
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt: {
       if (second.IsRegister()) {
-        __ sub(out.As<Register>(), first.As<Register>(), ShifterOperand(second.As<Register>()));
+        __ sub(out.AsRegister<Register>(),
+               first.AsRegister<Register>(),
+               ShifterOperand(second.AsRegister<Register>()));
       } else {
-        __ AddConstant(out.As<Register>(),
-                       first.As<Register>(),
+        __ AddConstant(out.AsRegister<Register>(),
+                       first.AsRegister<Register>(),
                        -second.GetConstant()->AsIntConstant()->GetValue());
       }
       break;
@@ -1781,7 +1900,9 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ vsubs(out.As<SRegister>(), first.As<SRegister>(), second.As<SRegister>());
+      __ vsubs(out.AsFpuRegister<SRegister>(),
+               first.AsFpuRegister<SRegister>(),
+               second.AsFpuRegister<SRegister>());
       break;
     }
 
@@ -1830,7 +1951,9 @@
   Location second = locations->InAt(1);
   switch (mul->GetResultType()) {
     case Primitive::kPrimInt: {
-      __ mul(out.As<Register>(), first.As<Register>(), second.As<Register>());
+      __ mul(out.AsRegister<Register>(),
+             first.AsRegister<Register>(),
+             second.AsRegister<Register>());
       break;
     }
     case Primitive::kPrimLong: {
@@ -1865,7 +1988,9 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ vmuls(out.As<SRegister>(), first.As<SRegister>(), second.As<SRegister>());
+      __ vmuls(out.AsFpuRegister<SRegister>(),
+               first.AsFpuRegister<SRegister>(),
+               second.AsFpuRegister<SRegister>());
       break;
     }
 
@@ -1925,7 +2050,9 @@
 
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
-      __ sdiv(out.As<Register>(), first.As<Register>(), second.As<Register>());
+      __ sdiv(out.AsRegister<Register>(),
+              first.AsRegister<Register>(),
+              second.AsRegister<Register>());
       break;
     }
 
@@ -1943,7 +2070,9 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ vdivs(out.As<SRegister>(), first.As<SRegister>(), second.As<SRegister>());
+      __ vdivs(out.AsFpuRegister<SRegister>(),
+               first.AsFpuRegister<SRegister>(),
+               second.AsFpuRegister<SRegister>());
       break;
     }
 
@@ -2002,16 +2131,16 @@
 
   switch (rem->GetResultType()) {
     case Primitive::kPrimInt: {
-      Register reg1 = first.As<Register>();
-      Register reg2 = second.As<Register>();
-      Register temp = locations->GetTemp(0).As<Register>();
+      Register reg1 = first.AsRegister<Register>();
+      Register reg2 = second.AsRegister<Register>();
+      Register temp = locations->GetTemp(0).AsRegister<Register>();
 
       // temp = reg1 / reg2  (integer division)
       // temp = temp * reg2
       // dest = reg1 - temp
       __ sdiv(temp, reg1, reg2);
       __ mul(temp, temp, reg2);
-      __ sub(out.As<Register>(), reg1, ShifterOperand(temp));
+      __ sub(out.AsRegister<Register>(), reg1, ShifterOperand(temp));
       break;
     }
 
@@ -2058,7 +2187,7 @@
   switch (instruction->GetType()) {
     case Primitive::kPrimInt: {
       if (value.IsRegister()) {
-        __ cmp(value.As<Register>(), ShifterOperand(0));
+        __ cmp(value.AsRegister<Register>(), ShifterOperand(0));
         __ b(slow_path->GetEntryLabel(), EQ);
       } else {
         DCHECK(value.IsConstant()) << value;
@@ -2127,11 +2256,11 @@
   Primitive::Type type = op->GetResultType();
   switch (type) {
     case Primitive::kPrimInt: {
-      Register out_reg = out.As<Register>();
-      Register first_reg = first.As<Register>();
+      Register out_reg = out.AsRegister<Register>();
+      Register first_reg = first.AsRegister<Register>();
       // Arm doesn't mask the shift count so we need to do it ourselves.
       if (second.IsRegister()) {
-        Register second_reg = second.As<Register>();
+        Register second_reg = second.AsRegister<Register>();
         __ and_(second_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
         if (op->IsShl()) {
           __ Lsl(out_reg, first_reg, second_reg);
@@ -2160,7 +2289,7 @@
       InvokeRuntimeCallingConvention calling_convention;
       DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
       DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
-      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.As<Register>());
+      DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegister<Register>());
       DCHECK_EQ(R0, out.AsRegisterPairLow<Register>());
       DCHECK_EQ(R2, out.AsRegisterPairHigh<Register>());
 
@@ -2270,11 +2399,11 @@
   Location in = locations->InAt(0);
   switch (not_->InputAt(0)->GetType()) {
     case Primitive::kPrimBoolean:
-      __ eor(out.As<Register>(), in.As<Register>(), ShifterOperand(1));
+      __ eor(out.AsRegister<Register>(), in.AsRegister<Register>(), ShifterOperand(1));
       break;
 
     case Primitive::kPrimInt:
-      __ mvn(out.As<Register>(), ShifterOperand(in.As<Register>()));
+      __ mvn(out.AsRegister<Register>(), ShifterOperand(in.AsRegister<Register>()));
       break;
 
     case Primitive::kPrimLong:
@@ -2292,44 +2421,72 @@
 void LocationsBuilderARM::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
+  }
 }
 
 void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
   LocationSummary* locations = compare->GetLocations();
-  switch (compare->InputAt(0)->GetType()) {
+  Register out = locations->Out().AsRegister<Register>();
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+
+  Label less, greater, done;
+  Primitive::Type type = compare->InputAt(0)->GetType();
+  switch (type) {
     case Primitive::kPrimLong: {
-      Register output = locations->Out().As<Register>();
-      Location left = locations->InAt(0);
-      Location right = locations->InAt(1);
-      Label less, greater, done;
       __ cmp(left.AsRegisterPairHigh<Register>(),
              ShifterOperand(right.AsRegisterPairHigh<Register>()));  // Signed compare.
       __ b(&less, LT);
       __ b(&greater, GT);
-      // Do LoadImmediate before any `cmp`, as LoadImmediate might affect
-      // the status flags.
-      __ LoadImmediate(output, 0);
+      // Do LoadImmediate before any `cmp`, as LoadImmediate might affect the status flags.
+      __ LoadImmediate(out, 0);
       __ cmp(left.AsRegisterPairLow<Register>(),
              ShifterOperand(right.AsRegisterPairLow<Register>()));  // Unsigned compare.
-      __ b(&done, EQ);
-      __ b(&less, CC);
-
-      __ Bind(&greater);
-      __ LoadImmediate(output, 1);
-      __ b(&done);
-
-      __ Bind(&less);
-      __ LoadImmediate(output, -1);
-
-      __ Bind(&done);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      __ LoadImmediate(out, 0);
+      if (type == Primitive::kPrimFloat) {
+        __ vcmps(left.AsFpuRegister<SRegister>(), right.AsFpuRegister<SRegister>());
+      } else {
+        __ vcmpd(FromLowSToD(left.AsFpuRegisterPairLow<SRegister>()),
+                 FromLowSToD(right.AsFpuRegisterPairLow<SRegister>()));
+      }
+      __ vmstat();  // transfer FP status register to ARM APSR.
+      __ b(compare->IsGtBias() ? &greater : &less, VS);  // VS for unordered.
       break;
     }
     default:
-      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+      LOG(FATAL) << "Unexpected compare type " << type;
   }
+  __ b(&done, EQ);
+  __ b(&less, CC);  // CC is for both: unsigned compare for longs and 'less than' for floats.
+
+  __ Bind(&greater);
+  __ LoadImmediate(out, 1);
+  __ b(&done);
+
+  __ Bind(&less);
+  __ LoadImmediate(out, -1);
+
+  __ Bind(&done);
 }
 
 void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
@@ -2362,32 +2519,32 @@
 
 void InstructionCodeGeneratorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
   uint32_t offset = instruction->GetFieldOffset().Uint32Value();
   Primitive::Type field_type = instruction->GetFieldType();
 
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      Register value = locations->InAt(1).As<Register>();
+      Register value = locations->InAt(1).AsRegister<Register>();
       __ StoreToOffset(kStoreByte, value, obj, offset);
       break;
     }
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      Register value = locations->InAt(1).As<Register>();
+      Register value = locations->InAt(1).AsRegister<Register>();
       __ StoreToOffset(kStoreHalfword, value, obj, offset);
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      Register value = locations->InAt(1).As<Register>();
+      Register value = locations->InAt(1).AsRegister<Register>();
       __ StoreToOffset(kStoreWord, value, obj, offset);
       if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
-        Register temp = locations->GetTemp(0).As<Register>();
-        Register card = locations->GetTemp(1).As<Register>();
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
+        Register card = locations->GetTemp(1).AsRegister<Register>();
         codegen_->MarkGCCard(temp, card, obj, value);
       }
       break;
@@ -2400,7 +2557,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      SRegister value = locations->InAt(1).As<SRegister>();
+      SRegister value = locations->InAt(1).AsFpuRegister<SRegister>();
       __ StoreSToOffset(value, obj, offset);
       break;
     }
@@ -2426,37 +2583,37 @@
 
 void InstructionCodeGeneratorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
   uint32_t offset = instruction->GetFieldOffset().Uint32Value();
 
   switch (instruction->GetType()) {
     case Primitive::kPrimBoolean: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
       break;
     }
 
     case Primitive::kPrimByte: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
       break;
     }
 
     case Primitive::kPrimShort: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
       break;
     }
 
     case Primitive::kPrimChar: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ LoadFromOffset(kLoadWord, out, obj, offset);
       break;
     }
@@ -2469,7 +2626,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      SRegister out = locations->Out().As<SRegister>();
+      SRegister out = locations->Out().AsFpuRegister<SRegister>();
       __ LoadSFromOffset(out, obj, offset);
       break;
     }
@@ -2503,7 +2660,7 @@
   Location obj = locations->InAt(0);
 
   if (obj.IsRegister()) {
-    __ cmp(obj.As<Register>(), ShifterOperand(0));
+    __ cmp(obj.AsRegister<Register>(), ShifterOperand(0));
     __ b(slow_path->GetEntryLabel(), EQ);
   } else {
     DCHECK(obj.IsConstant()) << obj;
@@ -2522,18 +2679,19 @@
 
 void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
 
   switch (instruction->GetType()) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
         __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.As<Register>()));
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
         __ LoadFromOffset(kLoadUnsignedByte, out, IP, data_offset);
       }
       break;
@@ -2541,12 +2699,13 @@
 
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
         __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.As<Register>()));
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
         __ LoadFromOffset(kLoadSignedByte, out, IP, data_offset);
       }
       break;
@@ -2554,12 +2713,13 @@
 
     case Primitive::kPrimShort: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
         __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_2));
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
         __ LoadFromOffset(kLoadSignedHalfword, out, IP, data_offset);
       }
       break;
@@ -2567,12 +2727,13 @@
 
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
         __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_2));
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
         __ LoadFromOffset(kLoadUnsignedHalfword, out, IP, data_offset);
       }
       break;
@@ -2582,12 +2743,13 @@
     case Primitive::kPrimNot: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
         __ LoadFromOffset(kLoadWord, out, obj, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_4));
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
         __ LoadFromOffset(kLoadWord, out, IP, data_offset);
       }
       break;
@@ -2597,10 +2759,11 @@
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       Location out = locations->Out();
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
         __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_8));
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
         __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), IP, data_offset);
       }
       break;
@@ -2645,7 +2808,7 @@
 
 void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
   Primitive::Type value_type = instruction->GetComponentType();
   bool needs_runtime_call = locations->WillCall();
@@ -2656,12 +2819,13 @@
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register value = locations->InAt(2).As<Register>();
+      Register value = locations->InAt(2).AsRegister<Register>();
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
         __ StoreToOffset(kStoreByte, value, obj, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.As<Register>()));
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>()));
         __ StoreToOffset(kStoreByte, value, IP, data_offset);
       }
       break;
@@ -2670,12 +2834,13 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register value = locations->InAt(2).As<Register>();
+      Register value = locations->InAt(2).AsRegister<Register>();
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
         __ StoreToOffset(kStoreHalfword, value, obj, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_2));
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_2));
         __ StoreToOffset(kStoreHalfword, value, IP, data_offset);
       }
       break;
@@ -2685,24 +2850,27 @@
     case Primitive::kPrimNot: {
       if (!needs_runtime_call) {
         uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-        Register value = locations->InAt(2).As<Register>();
+        Register value = locations->InAt(2).AsRegister<Register>();
         if (index.IsConstant()) {
-          size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
           __ StoreToOffset(kStoreWord, value, obj, offset);
         } else {
           DCHECK(index.IsRegister()) << index;
-          __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_4));
+          __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
           __ StoreToOffset(kStoreWord, value, IP, data_offset);
         }
         if (needs_write_barrier) {
           DCHECK_EQ(value_type, Primitive::kPrimNot);
-          Register temp = locations->GetTemp(0).As<Register>();
-          Register card = locations->GetTemp(1).As<Register>();
+          Register temp = locations->GetTemp(0).AsRegister<Register>();
+          Register card = locations->GetTemp(1).AsRegister<Register>();
           codegen_->MarkGCCard(temp, card, obj, value);
         }
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
-        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject), instruction, instruction->GetDexPc());
+        codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+                                instruction,
+                                instruction->GetDexPc());
       }
       break;
     }
@@ -2711,10 +2879,11 @@
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
       Location value = locations->InAt(2);
       if (index.IsConstant()) {
-        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
         __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), obj, offset);
       } else {
-        __ add(IP, obj, ShifterOperand(index.As<Register>(), LSL, TIMES_8));
+        __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8));
         __ StoreToOffset(kStoreWordPair, value.AsRegisterPairLow<Register>(), IP, data_offset);
       }
       break;
@@ -2740,8 +2909,8 @@
 void InstructionCodeGeneratorARM::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
-  Register obj = locations->InAt(0).As<Register>();
-  Register out = locations->Out().As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
   __ LoadFromOffset(kLoadWord, out, obj, offset);
 }
 
@@ -2761,8 +2930,8 @@
       instruction, locations->InAt(0), locations->InAt(1));
   codegen_->AddSlowPath(slow_path);
 
-  Register index = locations->InAt(0).As<Register>();
-  Register length = locations->InAt(1).As<Register>();
+  Register index = locations->InAt(0).AsRegister<Register>();
+  Register length = locations->InAt(1).AsRegister<Register>();
 
   __ cmp(index, ShifterOperand(length));
   __ b(slow_path->GetEntryLabel(), CS);
@@ -2843,15 +3012,15 @@
 
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
-      __ Mov(destination.As<Register>(), source.As<Register>());
+      __ Mov(destination.AsRegister<Register>(), source.AsRegister<Register>());
     } else {
       DCHECK(destination.IsStackSlot());
-      __ StoreToOffset(kStoreWord, source.As<Register>(),
+      __ StoreToOffset(kStoreWord, source.AsRegister<Register>(),
                        SP, destination.GetStackIndex());
     }
   } else if (source.IsStackSlot()) {
     if (destination.IsRegister()) {
-      __ LoadFromOffset(kLoadWord, destination.As<Register>(),
+      __ LoadFromOffset(kLoadWord, destination.AsRegister<Register>(),
                         SP, source.GetStackIndex());
     } else {
       DCHECK(destination.IsStackSlot());
@@ -2863,7 +3032,7 @@
     DCHECK(source.GetConstant()->IsIntConstant());
     int32_t value = source.GetConstant()->AsIntConstant()->GetValue();
     if (destination.IsRegister()) {
-      __ LoadImmediate(destination.As<Register>(), value);
+      __ LoadImmediate(destination.AsRegister<Register>(), value);
     } else {
       DCHECK(destination.IsStackSlot());
       __ LoadImmediate(IP, value);
@@ -2895,15 +3064,15 @@
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    DCHECK_NE(source.As<Register>(), IP);
-    DCHECK_NE(destination.As<Register>(), IP);
-    __ Mov(IP, source.As<Register>());
-    __ Mov(source.As<Register>(), destination.As<Register>());
-    __ Mov(destination.As<Register>(), IP);
+    DCHECK_NE(source.AsRegister<Register>(), IP);
+    DCHECK_NE(destination.AsRegister<Register>(), IP);
+    __ Mov(IP, source.AsRegister<Register>());
+    __ Mov(source.AsRegister<Register>(), destination.AsRegister<Register>());
+    __ Mov(destination.AsRegister<Register>(), IP);
   } else if (source.IsRegister() && destination.IsStackSlot()) {
-    Exchange(source.As<Register>(), destination.GetStackIndex());
+    Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
-    Exchange(destination.As<Register>(), source.GetStackIndex());
+    Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
     Exchange(source.GetStackIndex(), destination.GetStackIndex());
   } else {
@@ -2929,7 +3098,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
-  Register out = cls->GetLocations()->Out().As<Register>();
+  Register out = cls->GetLocations()->Out().AsRegister<Register>();
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
@@ -2969,7 +3138,8 @@
   SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
       check->GetLoadClass(), check, check->GetDexPc(), true);
   codegen_->AddSlowPath(slow_path);
-  GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).As<Register>());
+  GenerateClassInitializationCheck(slow_path,
+                                   check->GetLocations()->InAt(0).AsRegister<Register>());
 }
 
 void InstructionCodeGeneratorARM::GenerateClassInitializationCheck(
@@ -2992,37 +3162,37 @@
 
 void InstructionCodeGeneratorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register cls = locations->InAt(0).As<Register>();
+  Register cls = locations->InAt(0).AsRegister<Register>();
   uint32_t offset = instruction->GetFieldOffset().Uint32Value();
 
   switch (instruction->GetType()) {
     case Primitive::kPrimBoolean: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ LoadFromOffset(kLoadUnsignedByte, out, cls, offset);
       break;
     }
 
     case Primitive::kPrimByte: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ LoadFromOffset(kLoadSignedByte, out, cls, offset);
       break;
     }
 
     case Primitive::kPrimShort: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ LoadFromOffset(kLoadSignedHalfword, out, cls, offset);
       break;
     }
 
     case Primitive::kPrimChar: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ LoadFromOffset(kLoadUnsignedHalfword, out, cls, offset);
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ LoadFromOffset(kLoadWord, out, cls, offset);
       break;
     }
@@ -3035,7 +3205,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      SRegister out = locations->Out().As<SRegister>();
+      SRegister out = locations->Out().AsFpuRegister<SRegister>();
       __ LoadSFromOffset(out, cls, offset);
       break;
     }
@@ -3068,32 +3238,32 @@
 
 void InstructionCodeGeneratorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register cls = locations->InAt(0).As<Register>();
+  Register cls = locations->InAt(0).AsRegister<Register>();
   uint32_t offset = instruction->GetFieldOffset().Uint32Value();
   Primitive::Type field_type = instruction->GetFieldType();
 
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      Register value = locations->InAt(1).As<Register>();
+      Register value = locations->InAt(1).AsRegister<Register>();
       __ StoreToOffset(kStoreByte, value, cls, offset);
       break;
     }
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      Register value = locations->InAt(1).As<Register>();
+      Register value = locations->InAt(1).AsRegister<Register>();
       __ StoreToOffset(kStoreHalfword, value, cls, offset);
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      Register value = locations->InAt(1).As<Register>();
+      Register value = locations->InAt(1).AsRegister<Register>();
       __ StoreToOffset(kStoreWord, value, cls, offset);
       if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
-        Register temp = locations->GetTemp(0).As<Register>();
-        Register card = locations->GetTemp(1).As<Register>();
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
+        Register card = locations->GetTemp(1).AsRegister<Register>();
         codegen_->MarkGCCard(temp, card, cls, value);
       }
       break;
@@ -3106,7 +3276,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      SRegister value = locations->InAt(1).As<SRegister>();
+      SRegister value = locations->InAt(1).AsFpuRegister<SRegister>();
       __ StoreSToOffset(value, cls, offset);
       break;
     }
@@ -3133,7 +3303,7 @@
   SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
   codegen_->AddSlowPath(slow_path);
 
-  Register out = load->GetLocations()->Out().As<Register>();
+  Register out = load->GetLocations()->Out().AsRegister<Register>();
   codegen_->LoadCurrentMethod(out);
   __ LoadFromOffset(kLoadWord, out, out, mirror::ArtMethod::DeclaringClassOffset().Int32Value());
   __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
@@ -3150,7 +3320,7 @@
 }
 
 void InstructionCodeGeneratorARM::VisitLoadException(HLoadException* load) {
-  Register out = load->GetLocations()->Out().As<Register>();
+  Register out = load->GetLocations()->Out().AsRegister<Register>();
   int32_t offset = Thread::ExceptionOffset<kArmWordSize>().Int32Value();
   __ LoadFromOffset(kLoadWord, out, TR, offset);
   __ LoadImmediate(IP, 0);
@@ -3181,9 +3351,9 @@
 
 void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
-  Register cls = locations->InAt(1).As<Register>();
-  Register out = locations->Out().As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register cls = locations->InAt(1).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Label done, zero;
   SlowPathCodeARM* slow_path = nullptr;
@@ -3228,9 +3398,9 @@
 
 void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
-  Register cls = locations->InAt(1).As<Register>();
-  Register temp = locations->GetTemp(0).As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register cls = locations->InAt(1).AsRegister<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
 
   SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
@@ -3292,9 +3462,9 @@
   LocationSummary* locations = instruction->GetLocations();
 
   if (instruction->GetResultType() == Primitive::kPrimInt) {
-    Register first = locations->InAt(0).As<Register>();
-    Register second = locations->InAt(1).As<Register>();
-    Register out = locations->Out().As<Register>();
+    Register first = locations->InAt(0).AsRegister<Register>();
+    Register second = locations->InAt(1).AsRegister<Register>();
+    Register out = locations->Out().AsRegister<Register>();
     if (instruction->IsAnd()) {
       __ and_(out, first, ShifterOperand(second));
     } else if (instruction->IsOr()) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 0fc4307..a61ef2d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -38,7 +38,7 @@
 
 namespace arm64 {
 
-// TODO: clean-up some of the constant definitions.
+static constexpr bool kExplicitStackOverflowCheck = false;
 static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
 static constexpr int kCurrentMethodStackOffset = 0;
 
@@ -167,7 +167,7 @@
   return MemOperand(sp, location.GetStackIndex());
 }
 
-MemOperand HeapOperand(const Register& base, size_t offset) {
+MemOperand HeapOperand(const Register& base, size_t offset = 0) {
   // A heap reference must be 32bit, so fit in a W register.
   DCHECK(base.IsW());
   return MemOperand(base.X(), offset);
@@ -393,6 +393,20 @@
   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
 };
 
+class StackOverflowCheckSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  StackOverflowCheckSlowPathARM64() {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    __ Bind(GetEntryLabel());
+    arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowStackOverflow), nullptr, 0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM64);
+};
+
 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
  public:
   explicit SuspendCheckSlowPathARM64(HSuspendCheck* instruction,
@@ -418,7 +432,6 @@
     return &return_label_;
   }
 
-
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -437,7 +450,7 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     __ Bind(GetEntryLabel());
     __ Brk(__LINE__);  // TODO: Unimplemented TypeCheckSlowPathARM64.
-    __ b(GetExitLabel());
+    __ B(GetExitLabel());
   }
 
  private:
@@ -479,13 +492,30 @@
 #undef __
 #define __ GetVIXLAssembler()->
 
+void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
+  // Ensure we emit the literal pool.
+  __ FinalizeCode();
+  CodeGenerator::Finalize(allocator);
+}
+
 void CodeGeneratorARM64::GenerateFrameEntry() {
-  // TODO: Add proper support for the stack overflow check.
-  UseScratchRegisterScope temps(GetVIXLAssembler());
-  Register temp = temps.AcquireX();
-  __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
-  __ Ldr(temp, MemOperand(temp, 0));
-  RecordPcInfo(nullptr, 0);
+  bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
+  if (do_overflow_check) {
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    Register temp = temps.AcquireX();
+    if (kExplicitStackOverflowCheck) {
+      SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM64();
+      AddSlowPath(slow_path);
+
+      __ Ldr(temp, MemOperand(tr, Thread::StackEndOffset<kArm64WordSize>().Int32Value()));
+      __ Cmp(sp, temp);
+      __ B(lo, slow_path->GetEntryLabel());
+    } else {
+      __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
+      __ Ldr(wzr, MemOperand(temp, 0));
+      RecordPcInfo(nullptr, 0);
+    }
+  }
 
   CPURegList preserved_regs = GetFramePreservedRegisters();
   int frame_size = GetFrameSize();
@@ -588,12 +618,12 @@
 void CodeGeneratorARM64::MarkGCCard(Register object, Register value) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register card = temps.AcquireX();
-  Register temp = temps.AcquireX();
+  Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
   vixl::Label done;
   __ Cbz(value, &done);
   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64WordSize>().Int32Value()));
   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
-  __ Strb(card, MemOperand(card, temp));
+  __ Strb(card, MemOperand(card, temp.X()));
   __ Bind(&done);
 }
 
@@ -601,7 +631,7 @@
   // Block reserved registers:
   //   ip0 (VIXL temporary)
   //   ip1 (VIXL temporary)
-  //   xSuspend (Suspend counter)
+  //   tr
   //   lr
   // sp is not part of the allocatable registers, so we don't need to block it.
   // TODO: Avoid blocking callee-saved registers, and instead preserve them
@@ -772,12 +802,14 @@
                                        uint32_t dex_pc) {
   __ Ldr(lr, MemOperand(tr, entry_point_offset));
   __ Blr(lr);
-  RecordPcInfo(instruction, dex_pc);
-  DCHECK(instruction->IsSuspendCheck()
-      || instruction->IsBoundsCheck()
-      || instruction->IsNullCheck()
-      || instruction->IsDivZeroCheck()
-      || !IsLeafMethod());
+  if (instruction != nullptr) {
+    RecordPcInfo(instruction, dex_pc);
+    DCHECK(instruction->IsSuspendCheck()
+        || instruction->IsBoundsCheck()
+        || instruction->IsNullCheck()
+        || instruction->IsDivZeroCheck()
+        || !IsLeafMethod());
+    }
 }
 
 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
@@ -787,12 +819,30 @@
   __ Ldr(temp, HeapOperand(class_reg, mirror::Class::StatusOffset()));
   __ Cmp(temp, mirror::Class::kStatusInitialized);
   __ B(lt, slow_path->GetEntryLabel());
-  // Even if the initialized flag is set, we may be in a situation where caches are not synced
-  // properly. Therefore, we do a memory fence.
-  __ Dmb(InnerShareable, BarrierAll);
+  // Even if the initialized flag is set, we need to ensure consistent memory ordering.
+  __ Dmb(InnerShareable, BarrierReads);
   __ Bind(slow_path->GetExitLabel());
 }
 
+void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                         HBasicBlock* successor) {
+  SuspendCheckSlowPathARM64* slow_path =
+    new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
+  codegen_->AddSlowPath(slow_path);
+  UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+  Register temp = temps.AcquireW();
+
+  __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64WordSize>().SizeValue()));
+  if (successor == nullptr) {
+    __ Cbnz(temp, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ Cbz(temp, codegen_->GetLabelOf(successor));
+    __ B(slow_path->GetEntryLabel());
+    // slow_path will return to GetLabelOf(successor).
+  }
+}
+
 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
                                                              CodeGeneratorARM64* codegen)
       : HGraphVisitor(graph),
@@ -801,10 +851,6 @@
 
 #define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
   M(ParallelMove)                                          \
-  M(Rem)                                                   \
-  M(Shl)                                                   \
-  M(Shr)                                                   \
-  M(UShr)                                                  \
 
 #define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
 
@@ -897,6 +943,63 @@
   }
 }
 
+void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  Primitive::Type type = instr->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected shift type " << type;
+  }
+}
+
+void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
+  DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
+
+  Primitive::Type type = instr->GetType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      Register dst = OutputRegister(instr);
+      Register lhs = InputRegisterAt(instr, 0);
+      Operand rhs = InputOperandAt(instr, 1);
+      if (rhs.IsImmediate()) {
+        uint32_t shift_value = (type == Primitive::kPrimInt)
+          ? static_cast<uint32_t>(rhs.immediate() & kMaxIntShiftValue)
+          : static_cast<uint32_t>(rhs.immediate() & kMaxLongShiftValue);
+        if (instr->IsShl()) {
+          __ Lsl(dst, lhs, shift_value);
+        } else if (instr->IsShr()) {
+          __ Asr(dst, lhs, shift_value);
+        } else {
+          __ Lsr(dst, lhs, shift_value);
+        }
+      } else {
+        Register rhs_reg = dst.IsX() ? rhs.reg().X() : rhs.reg().W();
+
+        if (instr->IsShl()) {
+          __ Lsl(dst, lhs, rhs_reg);
+        } else if (instr->IsShr()) {
+          __ Asr(dst, lhs, rhs_reg);
+        } else {
+          __ Lsr(dst, lhs, rhs_reg);
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected shift operation type " << type;
+  }
+}
+
 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
   HandleBinaryOp(instruction);
 }
@@ -927,17 +1030,17 @@
   Register obj = InputRegisterAt(instruction, 0);
   Location index = locations->InAt(1);
   size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
-  MemOperand source(obj);
+  MemOperand source = HeapOperand(obj);
   UseScratchRegisterScope temps(GetVIXLAssembler());
 
   if (index.IsConstant()) {
     offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
-    source = MemOperand(obj, offset);
+    source = HeapOperand(obj, offset);
   } else {
     Register temp = temps.AcquireSameSizeAs(obj);
     Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
     __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(type)));
-    source = MemOperand(temp, offset);
+    source = HeapOperand(temp, offset);
   }
 
   codegen_->Load(type, OutputCPURegister(instruction), source);
@@ -982,17 +1085,17 @@
     CPURegister value = InputCPURegisterAt(instruction, 2);
     Location index = locations->InAt(1);
     size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
-    MemOperand destination(obj);
+    MemOperand destination = HeapOperand(obj);
     UseScratchRegisterScope temps(GetVIXLAssembler());
 
     if (index.IsConstant()) {
       offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
-      destination = MemOperand(obj, offset);
+      destination = HeapOperand(obj, offset);
     } else {
       Register temp = temps.AcquireSameSizeAs(obj);
       Register index_reg = InputRegisterAt(instruction, 1);
       __ Add(temp, obj, Operand(index_reg, LSL, Primitive::ComponentSizeShift(value_type)));
-      destination = MemOperand(temp, offset);
+      destination = HeapOperand(temp, offset);
     }
 
     codegen_->Store(value_type, value, destination);
@@ -1059,29 +1162,59 @@
   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
 }
 
-void LocationsBuilderARM64::VisitCompare(HCompare* instruction) {
+void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void InstructionCodeGeneratorARM64::VisitCompare(HCompare* instruction) {
-  Primitive::Type in_type = instruction->InputAt(0)->GetType();
-
-  DCHECK_EQ(in_type, Primitive::kPrimLong);
+      new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
+  Primitive::Type in_type = compare->InputAt(0)->GetType();
   switch (in_type) {
     case Primitive::kPrimLong: {
-      vixl::Label done;
-      Register result = OutputRegister(instruction);
-      Register left = InputRegisterAt(instruction, 0);
-      Operand right = InputOperandAt(instruction, 1);
-      __ Subs(result.X(), left, right);
-      __ B(eq, &done);
-      __ Mov(result, 1);
-      __ Cneg(result, result, le);
-      __ Bind(&done);
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for compare operation " << in_type;
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
+  Primitive::Type in_type = compare->InputAt(0)->GetType();
+
+  //  0 if: left == right
+  //  1 if: left  > right
+  // -1 if: left  < right
+  switch (in_type) {
+    case Primitive::kPrimLong: {
+      Register result = OutputRegister(compare);
+      Register left = InputRegisterAt(compare, 0);
+      Operand right = InputOperandAt(compare, 1);
+
+      __ Cmp(left, right);
+      __ Cset(result, ne);
+      __ Cneg(result, result, lt);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      Register result = OutputRegister(compare);
+      FPRegister left = InputFPRegisterAt(compare, 0);
+      FPRegister right = InputFPRegisterAt(compare, 1);
+
+      __ Fcmp(left, right);
+      if (compare->IsGtBias()) {
+        __ Cset(result, ne);
+      } else {
+        __ Csetm(result, ne);
+      }
+      __ Cneg(result, result, compare->IsGtBias() ? mi : gt);
       break;
     }
     default:
@@ -1110,7 +1243,7 @@
   Condition cond = ARM64Condition(instruction->GetCondition());
 
   __ Cmp(lhs, rhs);
-  __ Csel(res, vixl::Assembler::AppropriateZeroRegFor(res), Operand(1), InvertCondition(cond));
+  __ Cset(res, cond);
 }
 
 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
@@ -1235,8 +1368,20 @@
 
 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
   HBasicBlock* successor = got->GetSuccessor();
-  // TODO: Support for suspend checks emission.
-  if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
+  DCHECK(!successor->IsExitBlock());
+  HBasicBlock* block = got->GetBlock();
+  HInstruction* previous = got->GetPrevious();
+  HLoopInformation* info = block->GetLoopInformation();
+
+  if (info != nullptr && info->IsBackEdge(block) && info->HasSuspendCheck()) {
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;
+  }
+  if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(block, successor)) {
     __ B(codegen_->GetLabelOf(successor));
   }
 }
@@ -1244,27 +1389,32 @@
 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
   HInstruction* cond = if_instr->InputAt(0);
-  DCHECK(cond->IsCondition());
-  if (cond->AsCondition()->NeedsMaterialization()) {
+  if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
 }
 
 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
   HInstruction* cond = if_instr->InputAt(0);
-  DCHECK(cond->IsCondition());
   HCondition* condition = cond->AsCondition();
   vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
   vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
 
-  // TODO: Support constant condition input in VisitIf.
-
-  if (condition->NeedsMaterialization()) {
+  if (cond->IsIntConstant()) {
+    int32_t cond_value = cond->AsIntConstant()->GetValue();
+    if (cond_value == 1) {
+      if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfTrueSuccessor())) {
+        __ B(true_target);
+      }
+      return;
+    } else {
+      DCHECK_EQ(cond_value, 0);
+    }
+  } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
     // The condition instruction has been materialized, compare the output to 0.
     Location cond_val = if_instr->GetLocations()->InAt(0);
     DCHECK(cond_val.IsRegister());
     __ Cbnz(InputRegisterAt(if_instr, 0), true_target);
-
   } else {
     // The condition instruction has not been materialized, use its inputs as
     // the comparison and its condition as the branch condition.
@@ -1282,7 +1432,6 @@
       __ B(arm64_cond, true_target);
     }
   }
-
   if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
     __ B(false_target);
   }
@@ -1295,8 +1444,7 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
-  MemOperand field = MemOperand(InputRegisterAt(instruction, 0),
-                                instruction->GetFieldOffset().Uint32Value());
+  MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
   codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
 }
 
@@ -1310,7 +1458,7 @@
   Primitive::Type field_type = instruction->GetFieldType();
   CPURegister value = InputCPURegisterAt(instruction, 1);
   Register obj = InputRegisterAt(instruction, 0);
-  codegen_->Store(field_type, value, MemOperand(obj, instruction->GetFieldOffset().Uint32Value()));
+  codegen_->Store(field_type, value, HeapOperand(obj, instruction->GetFieldOffset()));
   if (field_type == Primitive::kPrimNot) {
     codegen_->MarkGCCard(obj, Register(value));
   }
@@ -1339,7 +1487,7 @@
   __ Cbz(obj, &done);
 
   // Compare the class of `obj` with `cls`.
-  __ Ldr(out, MemOperand(obj, mirror::Object::ClassOffset().Int32Value()));
+  __ Ldr(out, HeapOperand(obj, mirror::Object::ClassOffset()));
   __ Cmp(out, cls);
   if (instruction->IsClassFinal()) {
     // Classes must be equal for the instanceof to succeed.
@@ -1446,14 +1594,12 @@
   // temp = method;
   codegen_->LoadCurrentMethod(temp);
   // temp = temp->dex_cache_resolved_methods_;
-  __ Ldr(temp, MemOperand(temp.X(),
-                          mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+  __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset()));
   // temp = temp[index_in_cache];
-  __ Ldr(temp, MemOperand(temp.X(), index_in_cache));
+  __ Ldr(temp, HeapOperand(temp, index_in_cache));
   // lr = temp->entry_point_from_quick_compiled_code_;
-  __ Ldr(lr, MemOperand(temp.X(),
-                        mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
-                            kArm64WordSize).SizeValue()));
+  __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+                          kArm64WordSize)));
   // lr();
   __ Blr(lr);
 
@@ -1464,7 +1610,7 @@
 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   LocationSummary* locations = invoke->GetLocations();
   Location receiver = locations->InAt(0);
-  Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0));
   size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
     invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
   Offset class_offset = mirror::Object::ClassOffset();
@@ -1472,16 +1618,16 @@
 
   // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
-    __ Ldr(temp.W(), MemOperand(sp, receiver.GetStackIndex()));
-    __ Ldr(temp.W(), MemOperand(temp, class_offset.SizeValue()));
+    __ Ldr(temp, MemOperand(sp, receiver.GetStackIndex()));
+    __ Ldr(temp, HeapOperand(temp, class_offset));
   } else {
     DCHECK(receiver.IsRegister());
-    __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
+    __ Ldr(temp, HeapOperandFrom(receiver, class_offset));
   }
   // temp = temp->GetMethodAt(method_offset);
-  __ Ldr(temp.W(), MemOperand(temp, method_offset));
+  __ Ldr(temp, HeapOperand(temp, method_offset));
   // lr = temp->GetEntryPoint();
-  __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
+  __ Ldr(lr, HeapOperand(temp, entry_point.SizeValue()));
   // lr();
   __ Blr(lr);
   DCHECK(!codegen_->IsLeafMethod());
@@ -1506,7 +1652,7 @@
     DCHECK(cls->CanCallRuntime());
     codegen_->LoadCurrentMethod(out);
     __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DexCacheResolvedTypesOffset()));
-    __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
+    __ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
 
     SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
         cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
@@ -1555,7 +1701,7 @@
   codegen_->LoadCurrentMethod(out);
   __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DeclaringClassOffset()));
   __ Ldr(out, HeapOperand(out, mirror::Class::DexCacheStringsOffset()));
-  __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())));
+  __ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
   __ Cbz(out, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
@@ -1797,6 +1943,43 @@
   LOG(FATAL) << "Unreachable";
 }
 
+void LocationsBuilderARM64::VisitRem(HRem* rem) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
+  switch (rem->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      Register dividend = InputRegisterAt(rem, 0);
+      Register divisor = InputRegisterAt(rem, 1);
+      Register output = OutputRegister(rem);
+      Register temp = temps.AcquireSameSizeAs(output);
+
+      __ Sdiv(temp, dividend, divisor);
+      __ Msub(output, temp, divisor, dividend);
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   Primitive::Type return_type = instruction->InputAt(0)->GetType();
@@ -1819,6 +2002,22 @@
   __ Br(lr);
 }
 
+void LocationsBuilderARM64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderARM64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
 void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
   Primitive::Type field_type = store->InputAt(1)->GetType();
@@ -1863,9 +2062,8 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
-  Register cls = InputRegisterAt(instruction, 0);
-  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
-  codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), MemOperand(cls, offset));
+  MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), instruction->GetFieldOffset());
+  codegen_->Load(instruction->GetType(), OutputCPURegister(instruction), field);
 }
 
 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
@@ -1878,10 +2076,10 @@
 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   CPURegister value = InputCPURegisterAt(instruction, 1);
   Register cls = InputRegisterAt(instruction, 0);
-  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+  Offset offset = instruction->GetFieldOffset();
   Primitive::Type field_type = instruction->GetFieldType();
 
-  codegen_->Store(field_type, value, MemOperand(cls, offset));
+  codegen_->Store(field_type, value, HeapOperand(cls, offset));
   if (field_type == Primitive::kPrimNot) {
     codegen_->MarkGCCard(cls, Register(value));
   }
@@ -1892,14 +2090,17 @@
 }
 
 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
-  // TODO: Improve support for suspend checks.
-  SuspendCheckSlowPathARM64* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, nullptr);
-  codegen_->AddSlowPath(slow_path);
-
-  __ Subs(wSuspend, wSuspend, 1);
-  __ B(slow_path->GetEntryLabel(), le);
-  __ Bind(slow_path->GetReturnLabel());
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
 }
 
 void LocationsBuilderARM64::VisitTemporary(HTemporary* temp) {
@@ -1928,6 +2129,7 @@
       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   Primitive::Type input_type = conversion->GetInputType();
   Primitive::Type result_type = conversion->GetResultType();
+  DCHECK_NE(input_type, result_type);
   if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) ||
       (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) {
     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
@@ -1956,17 +2158,34 @@
     int result_size = Primitive::ComponentSize(result_type);
     int input_size = Primitive::ComponentSize(input_type);
     int min_size = kBitsPerByte * std::min(result_size, input_size);
+    Register output = OutputRegister(conversion);
+    Register source = InputRegisterAt(conversion, 0);
     if ((result_type == Primitive::kPrimChar) ||
         ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
-      __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, min_size);
+      __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size);
     } else {
-      __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, min_size);
+      __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size);
     }
-    return;
+  } else if (IsFPType(result_type) && IsIntegralType(input_type)) {
+    CHECK(input_type == Primitive::kPrimInt || input_type == Primitive::kPrimLong);
+    __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
+  } else if (IsIntegralType(result_type) && IsFPType(input_type)) {
+    CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
+    __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
+  } else if (IsFPType(result_type) && IsFPType(input_type)) {
+    __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
+  } else {
+    LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
+                << " to " << result_type;
   }
+}
 
-  LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
-             << " to " << result_type;
+void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
 }
 
 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index a40f27f..0e3d25f 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -44,12 +44,10 @@
 static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
 
 const vixl::Register tr = vixl::x18;        // Thread Register
-const vixl::Register wSuspend = vixl::w19;  // Suspend Register
-const vixl::Register xSuspend = vixl::x19;
 
 const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
 const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
-const vixl::CPURegList runtime_reserved_core_registers(tr, xSuspend, vixl::lr);
+const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
 const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister,
                                                     vixl::kXRegSize,
                                                     kArm64CalleeSaveRefSpills);
@@ -110,7 +108,9 @@
 
  private:
   void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::Register class_reg);
+  void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
   void HandleBinaryOp(HBinaryOperation* instr);
+  void HandleShift(HBinaryOperation* instr);
 
   Arm64Assembler* const assembler_;
   CodeGeneratorARM64* const codegen_;
@@ -130,6 +130,7 @@
 
  private:
   void HandleBinaryOp(HBinaryOperation* instr);
+  void HandleShift(HBinaryOperation* instr);
   void HandleInvoke(HInvoke* instr);
 
   CodeGeneratorARM64* const codegen_;
@@ -232,6 +233,8 @@
     }
   }
 
+  void Finalize(CodeAllocator* allocator) OVERRIDE;
+
   // Code generation helpers.
   void MoveConstant(vixl::CPURegister destination, HConstant* constant);
   void MoveHelper(Location destination, Location source, Primitive::Type type);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3689452..6f83d9f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -143,7 +143,9 @@
   BoundsCheckSlowPathX86(HBoundsCheck* instruction,
                          Location index_location,
                          Location length_location)
-      : instruction_(instruction), index_location_(index_location), length_location_(length_location) {}
+      : instruction_(instruction),
+        index_location_(index_location),
+        length_location_(length_location) {}
 
   virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
@@ -311,7 +313,8 @@
         Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
 
     if (instruction_->IsInstanceOf()) {
-      __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInstanceofNonTrivial)));
+      __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize,
+                                                              pInstanceofNonTrivial)));
     } else {
       DCHECK(instruction_->IsCheckCast());
       __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pCheckCast)));
@@ -464,7 +467,8 @@
   static const int kFakeReturnRegister = 8;
   core_spill_mask_ |= (1 << kFakeReturnRegister);
 
-  bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
+  bool skip_overflow_check =
+      IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
   if (!skip_overflow_check && !kExplicitStackOverflowCheck) {
     __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86))));
     RecordPcInfo(nullptr, 0);
@@ -567,28 +571,28 @@
   }
   if (destination.IsRegister()) {
     if (source.IsRegister()) {
-      __ movl(destination.As<Register>(), source.As<Register>());
+      __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
     } else if (source.IsFpuRegister()) {
-      __ movd(destination.As<Register>(), source.As<XmmRegister>());
+      __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>());
     } else {
       DCHECK(source.IsStackSlot());
-      __ movl(destination.As<Register>(), Address(ESP, source.GetStackIndex()));
+      __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
     }
   } else if (destination.IsFpuRegister()) {
     if (source.IsRegister()) {
-      __ movd(destination.As<XmmRegister>(), source.As<Register>());
+      __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>());
     } else if (source.IsFpuRegister()) {
-      __ movaps(destination.As<XmmRegister>(), source.As<XmmRegister>());
+      __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
     } else {
       DCHECK(source.IsStackSlot());
-      __ movss(destination.As<XmmRegister>(), Address(ESP, source.GetStackIndex()));
+      __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
     }
   } else {
     DCHECK(destination.IsStackSlot()) << destination;
     if (source.IsRegister()) {
-      __ movl(Address(ESP, destination.GetStackIndex()), source.As<Register>());
+      __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
     } else if (source.IsFpuRegister()) {
-      __ movss(Address(ESP, destination.GetStackIndex()), source.As<XmmRegister>());
+      __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
     } else {
       DCHECK(source.IsStackSlot());
       __ pushl(Address(ESP, source.GetStackIndex()));
@@ -603,19 +607,25 @@
   }
   if (destination.IsRegisterPair()) {
     if (source.IsRegisterPair()) {
-      __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
-      __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+      EmitParallelMoves(
+          Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
+          Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
+          Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
+          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()));
     } else if (source.IsFpuRegister()) {
       LOG(FATAL) << "Unimplemented";
     } else if (source.IsQuickParameter()) {
       uint16_t register_index = source.GetQuickParameterRegisterIndex();
       uint16_t stack_index = source.GetQuickParameterStackIndex();
       InvokeDexCallingConvention calling_convention;
-      __ movl(destination.AsRegisterPairLow<Register>(),
-              calling_convention.GetRegisterAt(register_index));
-      __ movl(destination.AsRegisterPairHigh<Register>(), Address(ESP,
-          calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()));
+      EmitParallelMoves(
+          Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
+          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
+          Location::StackSlot(
+              calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()),
+          Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()));
     } else {
+      // No conflict possible, so just do the moves.
       DCHECK(source.IsDoubleStackSlot());
       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
       __ movl(destination.AsRegisterPairHigh<Register>(),
@@ -625,47 +635,52 @@
     InvokeDexCallingConvention calling_convention;
     uint16_t register_index = destination.GetQuickParameterRegisterIndex();
     uint16_t stack_index = destination.GetQuickParameterStackIndex();
-    if (source.IsRegister()) {
-      __ movl(calling_convention.GetRegisterAt(register_index), source.AsRegisterPairLow<Register>());
-      __ movl(Address(ESP, calling_convention.GetStackOffsetOf(stack_index + 1)),
-              source.AsRegisterPairHigh<Register>());
+    if (source.IsRegisterPair()) {
+      LOG(FATAL) << "Unimplemented";
     } else if (source.IsFpuRegister()) {
       LOG(FATAL) << "Unimplemented";
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movl(calling_convention.GetRegisterAt(register_index),
-              Address(ESP, source.GetStackIndex()));
-      __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize)));
-      __ popl(Address(ESP, calling_convention.GetStackOffsetOf(stack_index + 1)));
+      EmitParallelMoves(
+          Location::StackSlot(source.GetStackIndex()),
+          Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
+          Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
+          Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1)));
     }
   } else if (destination.IsFpuRegister()) {
     if (source.IsDoubleStackSlot()) {
-      __ movsd(destination.As<XmmRegister>(), Address(ESP, source.GetStackIndex()));
+      __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
     } else {
       LOG(FATAL) << "Unimplemented";
     }
   } else {
     DCHECK(destination.IsDoubleStackSlot()) << destination;
     if (source.IsRegisterPair()) {
+      // No conflict possible, so just do the moves.
       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
               source.AsRegisterPairHigh<Register>());
     } else if (source.IsQuickParameter()) {
+      // No conflict possible, so just do the move.
       InvokeDexCallingConvention calling_convention;
       uint16_t register_index = source.GetQuickParameterRegisterIndex();
       uint16_t stack_index = source.GetQuickParameterStackIndex();
+      // Just move the low part. The only time a source is a quick parameter is
+      // when moving the parameter to its stack locations. And the (Java) caller
+      // of this method has already done that.
       __ movl(Address(ESP, destination.GetStackIndex()),
               calling_convention.GetRegisterAt(register_index));
       DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(),
                 static_cast<size_t>(destination.GetHighStackIndex(kX86WordSize)));
     } else if (source.IsFpuRegister()) {
-      __ movsd(Address(ESP, destination.GetStackIndex()), source.As<XmmRegister>());
+      __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ pushl(Address(ESP, source.GetStackIndex()));
-      __ popl(Address(ESP, destination.GetStackIndex()));
-      __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize)));
-      __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)));
+      EmitParallelMoves(
+          Location::StackSlot(source.GetStackIndex()),
+          Location::StackSlot(destination.GetStackIndex()),
+          Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
+          Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)));
     }
   }
 }
@@ -681,7 +696,7 @@
     if (const_to_move->IsIntConstant()) {
       Immediate imm(const_to_move->AsIntConstant()->GetValue());
       if (location.IsRegister()) {
-        __ movl(location.As<Register>(), imm);
+        __ movl(location.AsRegister<Register>(), imm);
       } else if (location.IsStackSlot()) {
         __ movl(Address(ESP, location.GetStackIndex()), imm);
       } else {
@@ -695,7 +710,8 @@
         __ movl(location.AsRegisterPairHigh<Register>(), Immediate(High32Bits(value)));
       } else if (location.IsDoubleStackSlot()) {
         __ movl(Address(ESP, location.GetStackIndex()), Immediate(Low32Bits(value)));
-        __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)), Immediate(High32Bits(value)));
+        __ movl(Address(ESP, location.GetHighStackIndex(kX86WordSize)),
+                Immediate(High32Bits(value)));
       } else {
         DCHECK(location.IsConstant());
         DCHECK_EQ(location.GetConstant(), instruction);
@@ -828,7 +844,7 @@
         // Materialized condition, compare against 0.
         Location lhs = if_instr->GetLocations()->InAt(0);
         if (lhs.IsRegister()) {
-          __ cmpl(lhs.As<Register>(), Immediate(0));
+          __ cmpl(lhs.AsRegister<Register>(), Immediate(0));
         } else {
           __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
         }
@@ -843,13 +859,13 @@
       // LHS is guaranteed to be in a register (see
       // LocationsBuilderX86::VisitCondition).
       if (rhs.IsRegister()) {
-        __ cmpl(lhs.As<Register>(), rhs.As<Register>());
+        __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
       } else if (rhs.IsConstant()) {
         HIntConstant* instruction = rhs.GetConstant()->AsIntConstant();
         Immediate imm(instruction->AsIntConstant()->GetValue());
-        __ cmpl(lhs.As<Register>(), imm);
+        __ cmpl(lhs.AsRegister<Register>(), imm);
       } else {
-        __ cmpl(lhs.As<Register>(), Address(ESP, rhs.GetStackIndex()));
+        __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
       }
       __ j(X86Condition(cond->AsCondition()->GetCondition()),
            codegen_->GetLabelOf(if_instr->IfTrueSuccessor()));
@@ -920,18 +936,18 @@
 void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) {
   if (comp->NeedsMaterialization()) {
     LocationSummary* locations = comp->GetLocations();
-    Register reg = locations->Out().As<Register>();
+    Register reg = locations->Out().AsRegister<Register>();
     // Clear register: setcc only sets the low byte.
     __ xorl(reg, reg);
     if (locations->InAt(1).IsRegister()) {
-      __ cmpl(locations->InAt(0).As<Register>(),
-              locations->InAt(1).As<Register>());
+      __ cmpl(locations->InAt(0).AsRegister<Register>(),
+              locations->InAt(1).AsRegister<Register>());
     } else if (locations->InAt(1).IsConstant()) {
       HConstant* instruction = locations->InAt(1).GetConstant();
       Immediate imm(instruction->AsIntConstant()->GetValue());
-      __ cmpl(locations->InAt(0).As<Register>(), imm);
+      __ cmpl(locations->InAt(0).AsRegister<Register>(), imm);
     } else {
-      __ cmpl(locations->InAt(0).As<Register>(),
+      __ cmpl(locations->InAt(0).AsRegister<Register>(),
               Address(ESP, locations->InAt(1).GetStackIndex()));
     }
     __ setb(X86Condition(comp->GetCondition()), reg);
@@ -1078,7 +1094,7 @@
       case Primitive::kPrimShort:
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
-        DCHECK_EQ(ret->GetLocations()->InAt(0).As<Register>(), EAX);
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX);
         break;
 
       case Primitive::kPrimLong:
@@ -1088,7 +1104,7 @@
 
       case Primitive::kPrimFloat:
       case Primitive::kPrimDouble:
-        DCHECK_EQ(ret->GetLocations()->InAt(0).As<XmmRegister>(), XMM0);
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0);
         break;
 
       default:
@@ -1104,7 +1120,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeStatic(HInvokeStatic* invoke) {
-  Register temp = invoke->GetLocations()->GetTemp(0).As<Register>();
+  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
 
   // TODO: Implement all kinds of calls:
   // 1) boot -> boot
@@ -1169,7 +1185,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  Register temp = invoke->GetLocations()->GetTemp(0).As<Register>();
+  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() +
           invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
   LocationSummary* locations = invoke->GetLocations();
@@ -1180,7 +1196,7 @@
     __ movl(temp, Address(ESP, receiver.GetStackIndex()));
     __ movl(temp, Address(temp, class_offset));
   } else {
-    __ movl(temp, Address(receiver.As<Register>(), class_offset));
+    __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
   }
   // temp = temp->GetMethodAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
@@ -1200,7 +1216,7 @@
 
 void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  Register temp = invoke->GetLocations()->GetTemp(0).As<Register>();
+  Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() +
           (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry);
   LocationSummary* locations = invoke->GetLocations();
@@ -1209,14 +1225,14 @@
 
   // Set the hidden argument.
   __ movl(temp, Immediate(invoke->GetDexMethodIndex()));
-  __ movd(invoke->GetLocations()->GetTemp(1).As<XmmRegister>(), temp);
+  __ movd(invoke->GetLocations()->GetTemp(1).AsFpuRegister<XmmRegister>(), temp);
 
   // temp = object->GetClass();
   if (receiver.IsStackSlot()) {
     __ movl(temp, Address(ESP, receiver.GetStackIndex()));
     __ movl(temp, Address(temp, class_offset));
   } else {
-    __ movl(temp, Address(receiver.As<Register>(), class_offset));
+    __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
   }
   // temp = temp->GetImtEntryAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
@@ -1239,11 +1255,16 @@
       break;
 
     case Primitive::kPrimFloat:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      locations->AddTemp(Location::RequiresRegister());
+      locations->AddTemp(Location::RequiresFpuRegister());
+      break;
+
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      // Output overlaps as we need a fresh (zero-initialized)
-      // register to perform subtraction from zero.
-      locations->SetOut(Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      locations->AddTemp(Location::RequiresFpuRegister());
       break;
 
     default:
@@ -1259,7 +1280,7 @@
     case Primitive::kPrimInt:
       DCHECK(in.IsRegister());
       DCHECK(in.Equals(out));
-      __ negl(out.As<Register>());
+      __ negl(out.AsRegister<Register>());
       break;
 
     case Primitive::kPrimLong:
@@ -1275,21 +1296,29 @@
       __ negl(out.AsRegisterPairHigh<Register>());
       break;
 
-    case Primitive::kPrimFloat:
-      DCHECK(!in.Equals(out));
-      // out = 0
-      __ xorps(out.As<XmmRegister>(), out.As<XmmRegister>());
-      // out = out - in
-      __ subss(out.As<XmmRegister>(), in.As<XmmRegister>());
+    case Primitive::kPrimFloat: {
+      DCHECK(in.Equals(out));
+      Register constant = locations->GetTemp(0).AsRegister<Register>();
+      XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+      // Implement float negation with an exclusive or with value
+      // 0x80000000 (mask for bit 31, representing the sign of a
+      // single-precision floating-point number).
+      __ movl(constant, Immediate(INT32_C(0x80000000)));
+      __ movd(mask, constant);
+      __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
       break;
+    }
 
-    case Primitive::kPrimDouble:
-      DCHECK(!in.Equals(out));
-      // out = 0
-      __ xorpd(out.As<XmmRegister>(), out.As<XmmRegister>());
-      // out = out - in
-      __ subsd(out.As<XmmRegister>(), in.As<XmmRegister>());
+    case Primitive::kPrimDouble: {
+      DCHECK(in.Equals(out));
+      XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      // Implement double negation with an exclusive or with value
+      // 0x8000000000000000 (mask for bit 63, representing the sign of
+      // a double-precision floating-point number).
+      __ LoadLongConstant(mask, INT64_C(0x8000000000000000));
+      __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
       break;
+    }
 
     default:
       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
@@ -1301,6 +1330,7 @@
       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
+  DCHECK_NE(result_type, input_type);
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
@@ -1343,6 +1373,12 @@
           break;
 
         case Primitive::kPrimFloat:
+          // Processing a Dex `float-to-int' instruction.
+          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetOut(Location::RequiresRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1382,7 +1418,6 @@
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
-        case Primitive::kPrimChar:
           // Processing a Dex `int-to-char' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
@@ -1406,6 +1441,13 @@
           break;
 
         case Primitive::kPrimLong:
+          // Processing a Dex `long-to-float' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1429,6 +1471,13 @@
           break;
 
         case Primitive::kPrimLong:
+          // Processing a Dex `long-to-double' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          break;
+
         case Primitive::kPrimFloat:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1452,6 +1501,7 @@
   Location in = locations->InAt(0);
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
+  DCHECK_NE(result_type, input_type);
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
@@ -1460,13 +1510,13 @@
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-byte' instruction.
           if (in.IsRegister()) {
-            __ movsxb(out.As<Register>(), in.As<ByteRegister>());
+            __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
           } else if (in.IsStackSlot()) {
-            __ movsxb(out.As<Register>(), Address(ESP, in.GetStackIndex()));
+            __ movsxb(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
           } else {
             DCHECK(in.GetConstant()->IsIntConstant());
             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
-            __ movl(out.As<Register>(), Immediate(static_cast<int8_t>(value)));
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
           }
           break;
 
@@ -1483,13 +1533,13 @@
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-short' instruction.
           if (in.IsRegister()) {
-            __ movsxw(out.As<Register>(), in.As<Register>());
+            __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>());
           } else if (in.IsStackSlot()) {
-            __ movsxw(out.As<Register>(), Address(ESP, in.GetStackIndex()));
+            __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
           } else {
             DCHECK(in.GetConstant()->IsIntConstant());
             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
-            __ movl(out.As<Register>(), Immediate(static_cast<int16_t>(value)));
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value)));
           }
           break;
 
@@ -1504,18 +1554,42 @@
         case Primitive::kPrimLong:
           // Processing a Dex `long-to-int' instruction.
           if (in.IsRegisterPair()) {
-            __ movl(out.As<Register>(), in.AsRegisterPairLow<Register>());
+            __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>());
           } else if (in.IsDoubleStackSlot()) {
-            __ movl(out.As<Register>(), Address(ESP, in.GetStackIndex()));
+            __ movl(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
           } else {
             DCHECK(in.IsConstant());
             DCHECK(in.GetConstant()->IsLongConstant());
             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
-            __ movl(out.As<Register>(), Immediate(static_cast<int32_t>(value)));
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<int32_t>(value)));
           }
           break;
 
-        case Primitive::kPrimFloat:
+        case Primitive::kPrimFloat: {
+          // Processing a Dex `float-to-int' instruction.
+          XmmRegister input = in.AsFpuRegister<XmmRegister>();
+          Register output = out.AsRegister<Register>();
+          XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+          Label done, nan;
+
+          __ movl(output, Immediate(kPrimIntMax));
+          // temp = int-to-float(output)
+          __ cvtsi2ss(temp, output);
+          // if input >= temp goto done
+          __ comiss(input, temp);
+          __ j(kAboveEqual, &done);
+          // if input == NaN goto nan
+          __ j(kUnordered, &nan);
+          // output = float-to-int-truncate(input)
+          __ cvttss2si(output, input);
+          __ jmp(&done);
+          __ Bind(&nan);
+          //  output = 0
+          __ xorl(output, output);
+          __ Bind(&done);
+          break;
+        }
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1536,7 +1610,7 @@
           // Processing a Dex `int-to-long' instruction.
           DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX);
           DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX);
-          DCHECK_EQ(in.As<Register>(), EAX);
+          DCHECK_EQ(in.AsRegister<Register>(), EAX);
           __ cdq();
           break;
 
@@ -1557,16 +1631,15 @@
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
-        case Primitive::kPrimChar:
           // Processing a Dex `Process a Dex `int-to-char'' instruction.
           if (in.IsRegister()) {
-            __ movzxw(out.As<Register>(), in.As<Register>());
+            __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>());
           } else if (in.IsStackSlot()) {
-            __ movzxw(out.As<Register>(), Address(ESP, in.GetStackIndex()));
+            __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
           } else {
             DCHECK(in.GetConstant()->IsIntConstant());
             int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
-            __ movl(out.As<Register>(), Immediate(static_cast<uint16_t>(value)));
+            __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value)));
           }
           break;
 
@@ -1578,15 +1651,48 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
-          // Processing a Dex `int-to-float' instruction.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
-          __ cvtsi2ss(out.As<XmmRegister>(), in.As<Register>());
+          // Processing a Dex `int-to-float' instruction.
+          __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
           break;
 
-        case Primitive::kPrimLong:
+        case Primitive::kPrimLong: {
+          // Processing a Dex `long-to-float' instruction.
+          Register low = in.AsRegisterPairLow<Register>();
+          Register high = in.AsRegisterPairHigh<Register>();
+          XmmRegister result = out.AsFpuRegister<XmmRegister>();
+          XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+          XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+
+          // Operations use doubles for precision reasons (each 32-bit
+          // half of a long fits in the 53-bit mantissa of a double,
+          // but not in the 24-bit mantissa of a float).  This is
+          // especially important for the low bits.  The result is
+          // eventually converted to float.
+
+          // low = low - 2^31 (to prevent bit 31 of `low` to be
+          // interpreted as a sign bit)
+          __ subl(low, Immediate(0x80000000));
+          // temp = int-to-double(high)
+          __ cvtsi2sd(temp, high);
+          // temp = temp * 2^32
+          __ LoadLongConstant(constant, k2Pow32EncodingForDouble);
+          __ mulsd(temp, constant);
+          // result = int-to-double(low)
+          __ cvtsi2sd(result, low);
+          // result = result + 2^31 (restore the original value of `low`)
+          __ LoadLongConstant(constant, k2Pow31EncodingForDouble);
+          __ addsd(result, constant);
+          // result = result + temp
+          __ addsd(result, temp);
+          // result = double-to-float(result)
+          __ cvtsd2ss(result, result);
+          break;
+        }
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1600,15 +1706,40 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
-          // Processing a Dex `int-to-double' instruction.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
-          __ cvtsi2sd(out.As<XmmRegister>(), in.As<Register>());
+          // Processing a Dex `int-to-double' instruction.
+          __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>());
           break;
 
-        case Primitive::kPrimLong:
+        case Primitive::kPrimLong: {
+          // Processing a Dex `long-to-double' instruction.
+          Register low = in.AsRegisterPairLow<Register>();
+          Register high = in.AsRegisterPairHigh<Register>();
+          XmmRegister result = out.AsFpuRegister<XmmRegister>();
+          XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+          XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+
+          // low = low - 2^31 (to prevent bit 31 of `low` to be
+          // interpreted as a sign bit)
+          __ subl(low, Immediate(0x80000000));
+          // temp = int-to-double(high)
+          __ cvtsi2sd(temp, high);
+          // temp = temp * 2^32
+          __ LoadLongConstant(constant, k2Pow32EncodingForDouble);
+          __ mulsd(temp, constant);
+          // result = int-to-double(low)
+          __ cvtsi2sd(result, low);
+          // result = result + 2^31 (restore the original value of `low`)
+          __ LoadLongConstant(constant, k2Pow31EncodingForDouble);
+          __ addsd(result, constant);
+          // result = result + temp
+          __ addsd(result, temp);
+          break;
+        }
+
         case Primitive::kPrimFloat:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1660,11 +1791,12 @@
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
       if (second.IsRegister()) {
-        __ addl(first.As<Register>(), second.As<Register>());
+        __ addl(first.AsRegister<Register>(), second.AsRegister<Register>());
       } else if (second.IsConstant()) {
-        __ addl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
+        __ addl(first.AsRegister<Register>(),
+                Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
       } else {
-        __ addl(first.As<Register>(), Address(ESP, second.GetStackIndex()));
+        __ addl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
       }
       break;
     }
@@ -1683,18 +1815,18 @@
 
     case Primitive::kPrimFloat: {
       if (second.IsFpuRegister()) {
-        __ addss(first.As<XmmRegister>(), second.As<XmmRegister>());
+        __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else {
-        __ addss(first.As<XmmRegister>(), Address(ESP, second.GetStackIndex()));
+        __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
       }
       break;
     }
 
     case Primitive::kPrimDouble: {
       if (second.IsFpuRegister()) {
-        __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>());
+        __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       } else {
-        __ addsd(first.As<XmmRegister>(), Address(ESP, second.GetStackIndex()));
+        __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
       }
       break;
     }
@@ -1736,11 +1868,12 @@
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt: {
       if (second.IsRegister()) {
-        __ subl(first.As<Register>(), second.As<Register>());
+        __ subl(first.AsRegister<Register>(), second.AsRegister<Register>());
       } else if (second.IsConstant()) {
-        __ subl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
+        __ subl(first.AsRegister<Register>(),
+                Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
       } else {
-        __ subl(first.As<Register>(), Address(ESP, second.GetStackIndex()));
+        __ subl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
       }
       break;
     }
@@ -1758,12 +1891,12 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ subss(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ subsd(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
@@ -1818,13 +1951,13 @@
   switch (mul->GetResultType()) {
     case Primitive::kPrimInt: {
       if (second.IsRegister()) {
-        __ imull(first.As<Register>(), second.As<Register>());
+        __ imull(first.AsRegister<Register>(), second.AsRegister<Register>());
       } else if (second.IsConstant()) {
         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
-        __ imull(first.As<Register>(), imm);
+        __ imull(first.AsRegister<Register>(), imm);
       } else {
         DCHECK(second.IsStackSlot());
-        __ imull(first.As<Register>(), Address(ESP, second.GetStackIndex()));
+        __ imull(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
       }
       break;
     }
@@ -1836,8 +1969,8 @@
       Register in1_lo = first.AsRegisterPairLow<Register>();
       Address in2_hi(ESP, second.GetHighStackIndex(kX86WordSize));
       Address in2_lo(ESP, second.GetStackIndex());
-      Register eax = locations->GetTemp(0).As<Register>();
-      Register edx = locations->GetTemp(1).As<Register>();
+      Register eax = locations->GetTemp(0).AsRegister<Register>();
+      Register edx = locations->GetTemp(1).AsRegister<Register>();
 
       DCHECK_EQ(EAX, eax);
       DCHECK_EQ(EDX, edx);
@@ -1868,12 +2001,12 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ mulss(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ mulsd(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
@@ -1893,12 +2026,13 @@
 
   switch (instruction->GetResultType()) {
     case Primitive::kPrimInt: {
-      Register second_reg = second.As<Register>();
-      DCHECK_EQ(EAX, first.As<Register>());
-      DCHECK_EQ(is_div ? EAX : EDX, out.As<Register>());
+      Register second_reg = second.AsRegister<Register>();
+      DCHECK_EQ(EAX, first.AsRegister<Register>());
+      DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
 
       SlowPathCodeX86* slow_path =
-          new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.As<Register>(), is_div);
+          new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(),
+                                                                 is_div);
       codegen_->AddSlowPath(slow_path);
 
       // 0x80000000/-1 triggers an arithmetic exception!
@@ -1997,13 +2131,13 @@
 
     case Primitive::kPrimFloat: {
       DCHECK(first.Equals(out));
-      __ divss(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
       DCHECK(first.Equals(out));
-      __ divsd(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
@@ -2097,7 +2231,7 @@
   switch (instruction->GetType()) {
     case Primitive::kPrimInt: {
       if (value.IsRegister()) {
-        __ testl(value.As<Register>(), value.As<Register>());
+        __ testl(value.AsRegister<Register>(), value.AsRegister<Register>());
         __ j(kEqual, slow_path->GetEntryLabel());
       } else if (value.IsStackSlot()) {
         __ cmpl(Address(ESP, value.GetStackIndex()), Immediate(0));
@@ -2112,7 +2246,7 @@
     }
     case Primitive::kPrimLong: {
       if (value.IsRegisterPair()) {
-        Register temp = locations->GetTemp(0).As<Register>();
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
         __ movl(temp, value.AsRegisterPairLow<Register>());
         __ orl(temp, value.AsRegisterPairHigh<Register>());
         __ j(kEqual, slow_path->GetEntryLabel());
@@ -2165,9 +2299,9 @@
 
   switch (op->GetResultType()) {
     case Primitive::kPrimInt: {
-      Register first_reg = first.As<Register>();
+      Register first_reg = first.AsRegister<Register>();
       if (second.IsRegister()) {
-        Register second_reg = second.As<Register>();
+        Register second_reg = second.AsRegister<Register>();
         DCHECK_EQ(ECX, second_reg);
         if (op->IsShl()) {
           __ shll(first_reg, second_reg);
@@ -2189,7 +2323,7 @@
       break;
     }
     case Primitive::kPrimLong: {
-      Register second_reg = second.As<Register>();
+      Register second_reg = second.AsRegister<Register>();
       DCHECK_EQ(ECX, second_reg);
       if (op->IsShl()) {
         GenerateShlLong(first, second_reg);
@@ -2335,11 +2469,11 @@
   DCHECK(in.Equals(out));
   switch (not_->InputAt(0)->GetType()) {
     case Primitive::kPrimBoolean:
-      __ xorl(out.As<Register>(), Immediate(1));
+      __ xorl(out.AsRegister<Register>(), Immediate(1));
       break;
 
     case Primitive::kPrimInt:
-      __ notl(out.As<Register>());
+      __ notl(out.AsRegister<Register>());
       break;
 
     case Primitive::kPrimLong:
@@ -2355,20 +2489,36 @@
 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // TODO: we set any here but we don't handle constants
+      locations->SetInAt(1, Location::Any());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
   LocationSummary* locations = compare->GetLocations();
+  Register out = locations->Out().AsRegister<Register>();
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+
+  Label less, greater, done;
   switch (compare->InputAt(0)->GetType()) {
     case Primitive::kPrimLong: {
-      Label less, greater, done;
-      Register output = locations->Out().As<Register>();
-      Location left = locations->InAt(0);
-      Location right = locations->InAt(1);
-      if (right.IsRegister()) {
+      if (right.IsRegisterPair()) {
         __ cmpl(left.AsRegisterPairHigh<Register>(), right.AsRegisterPairHigh<Register>());
       } else {
         DCHECK(right.IsDoubleStackSlot());
@@ -2383,23 +2533,33 @@
         DCHECK(right.IsDoubleStackSlot());
         __ cmpl(left.AsRegisterPairLow<Register>(), Address(ESP, right.GetStackIndex()));
       }
-      __ movl(output, Immediate(0));
-      __ j(kEqual, &done);
-      __ j(kBelow, &less);  // Unsigned compare.
-
-      __ Bind(&greater);
-      __ movl(output, Immediate(1));
-      __ jmp(&done);
-
-      __ Bind(&less);
-      __ movl(output, Immediate(-1));
-
-      __ Bind(&done);
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      break;
+    }
+    case Primitive::kPrimDouble: {
+      __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
       break;
     }
     default:
-      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+      LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
   }
+  __ movl(out, Immediate(0));
+  __ j(kEqual, &done);
+  __ j(kBelow, &less);  // kBelow is for CF (unsigned & floats).
+
+  __ Bind(&greater);
+  __ movl(out, Immediate(1));
+  __ jmp(&done);
+
+  __ Bind(&less);
+  __ movl(out, Immediate(-1));
+
+  __ Bind(&done);
 }
 
 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
@@ -2444,33 +2604,33 @@
 
 void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
   uint32_t offset = instruction->GetFieldOffset().Uint32Value();
   Primitive::Type field_type = instruction->GetFieldType();
 
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      ByteRegister value = locations->InAt(1).As<ByteRegister>();
+      ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>();
       __ movb(Address(obj, offset), value);
       break;
     }
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      Register value = locations->InAt(1).As<Register>();
+      Register value = locations->InAt(1).AsRegister<Register>();
       __ movw(Address(obj, offset), value);
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      Register value = locations->InAt(1).As<Register>();
+      Register value = locations->InAt(1).AsRegister<Register>();
       __ movl(Address(obj, offset), value);
 
       if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
-        Register temp = locations->GetTemp(0).As<Register>();
-        Register card = locations->GetTemp(1).As<Register>();
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
+        Register card = locations->GetTemp(1).AsRegister<Register>();
         codegen_->MarkGCCard(temp, card, obj, value);
       }
       break;
@@ -2484,13 +2644,13 @@
     }
 
     case Primitive::kPrimFloat: {
-      XmmRegister value = locations->InAt(1).As<XmmRegister>();
+      XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
       __ movss(Address(obj, offset), value);
       break;
     }
 
     case Primitive::kPrimDouble: {
-      XmmRegister value = locations->InAt(1).As<XmmRegister>();
+      XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
       __ movsd(Address(obj, offset), value);
       break;
     }
@@ -2522,37 +2682,37 @@
 
 void InstructionCodeGeneratorX86::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
   uint32_t offset = instruction->GetFieldOffset().Uint32Value();
 
   switch (instruction->GetType()) {
     case Primitive::kPrimBoolean: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ movzxb(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimByte: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ movsxb(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimShort: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ movsxw(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimChar: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ movzxw(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ movl(out, Address(obj, offset));
       break;
     }
@@ -2565,13 +2725,13 @@
     }
 
     case Primitive::kPrimFloat: {
-      XmmRegister out = locations->Out().As<XmmRegister>();
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
       __ movss(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimDouble: {
-      XmmRegister out = locations->Out().As<XmmRegister>();
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
       __ movsd(out, Address(obj, offset));
       break;
     }
@@ -2599,7 +2759,7 @@
   Location obj = locations->InAt(0);
 
   if (obj.IsRegister()) {
-    __ cmpl(obj.As<Register>(), Immediate(0));
+    __ cmpl(obj.AsRegister<Register>(), Immediate(0));
   } else if (obj.IsStackSlot()) {
     __ cmpl(Address(ESP, obj.GetStackIndex()), Immediate(0));
   } else {
@@ -2621,54 +2781,54 @@
 
 void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
 
   switch (instruction->GetType()) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
       } else {
-        __ movzxb(out, Address(obj, index.As<Register>(), TIMES_1, data_offset));
+        __ movzxb(out, Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset));
       }
       break;
     }
 
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
       } else {
-        __ movsxb(out, Address(obj, index.As<Register>(), TIMES_1, data_offset));
+        __ movsxb(out, Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset));
       }
       break;
     }
 
     case Primitive::kPrimShort: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
       } else {
-        __ movsxw(out, Address(obj, index.As<Register>(), TIMES_2, data_offset));
+        __ movsxw(out, Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset));
       }
       break;
     }
 
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
       } else {
-        __ movzxw(out, Address(obj, index.As<Register>(), TIMES_2, data_offset));
+        __ movzxw(out, Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset));
       }
       break;
     }
@@ -2676,12 +2836,12 @@
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
       } else {
-        __ movl(out, Address(obj, index.As<Register>(), TIMES_4, data_offset));
+        __ movl(out, Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset));
       }
       break;
     }
@@ -2695,9 +2855,9 @@
         __ movl(out.AsRegisterPairHigh<Register>(), Address(obj, offset + kX86WordSize));
       } else {
         __ movl(out.AsRegisterPairLow<Register>(),
-                Address(obj, index.As<Register>(), TIMES_8, data_offset));
+                Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset));
         __ movl(out.AsRegisterPairHigh<Register>(),
-                Address(obj, index.As<Register>(), TIMES_8, data_offset + kX86WordSize));
+                Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize));
       }
       break;
     }
@@ -2757,7 +2917,7 @@
 
 void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
@@ -2772,17 +2932,17 @@
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
         if (value.IsRegister()) {
-          __ movb(Address(obj, offset), value.As<ByteRegister>());
+          __ movb(Address(obj, offset), value.AsRegister<ByteRegister>());
         } else {
           __ movb(Address(obj, offset),
                   Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
         }
       } else {
         if (value.IsRegister()) {
-          __ movb(Address(obj, index.As<Register>(), TIMES_1, data_offset),
-                  value.As<ByteRegister>());
+          __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset),
+                  value.AsRegister<ByteRegister>());
         } else {
-          __ movb(Address(obj, index.As<Register>(), TIMES_1, data_offset),
+          __ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset),
                   Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
         }
       }
@@ -2795,17 +2955,17 @@
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
         if (value.IsRegister()) {
-          __ movw(Address(obj, offset), value.As<Register>());
+          __ movw(Address(obj, offset), value.AsRegister<Register>());
         } else {
           __ movw(Address(obj, offset),
                   Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
         }
       } else {
         if (value.IsRegister()) {
-          __ movw(Address(obj, index.As<Register>(), TIMES_2, data_offset),
-                  value.As<Register>());
+          __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset),
+                  value.AsRegister<Register>());
         } else {
-          __ movw(Address(obj, index.As<Register>(), TIMES_2, data_offset),
+          __ movw(Address(obj, index.AsRegister<Register>(), TIMES_2, data_offset),
                   Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
         }
       }
@@ -2817,9 +2977,10 @@
       if (!needs_runtime_call) {
         uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
         if (index.IsConstant()) {
-          size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
           if (value.IsRegister()) {
-            __ movl(Address(obj, offset), value.As<Register>());
+            __ movl(Address(obj, offset), value.AsRegister<Register>());
           } else {
             DCHECK(value.IsConstant()) << value;
             __ movl(Address(obj, offset),
@@ -2828,19 +2989,19 @@
         } else {
           DCHECK(index.IsRegister()) << index;
           if (value.IsRegister()) {
-            __ movl(Address(obj, index.As<Register>(), TIMES_4, data_offset),
-                    value.As<Register>());
+            __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset),
+                    value.AsRegister<Register>());
           } else {
             DCHECK(value.IsConstant()) << value;
-            __ movl(Address(obj, index.As<Register>(), TIMES_4, data_offset),
+            __ movl(Address(obj, index.AsRegister<Register>(), TIMES_4, data_offset),
                     Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
           }
         }
 
         if (needs_write_barrier) {
-          Register temp = locations->GetTemp(0).As<Register>();
-          Register card = locations->GetTemp(1).As<Register>();
-          codegen_->MarkGCCard(temp, card, obj, value.As<Register>());
+          Register temp = locations->GetTemp(0).AsRegister<Register>();
+          Register card = locations->GetTemp(1).AsRegister<Register>();
+          codegen_->MarkGCCard(temp, card, obj, value.AsRegister<Register>());
         }
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
@@ -2866,16 +3027,16 @@
         }
       } else {
         if (value.IsRegisterPair()) {
-          __ movl(Address(obj, index.As<Register>(), TIMES_8, data_offset),
+          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
                   value.AsRegisterPairLow<Register>());
-          __ movl(Address(obj, index.As<Register>(), TIMES_8, data_offset + kX86WordSize),
+          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
                   value.AsRegisterPairHigh<Register>());
         } else {
           DCHECK(value.IsConstant());
           int64_t val = value.GetConstant()->AsLongConstant()->GetValue();
-          __ movl(Address(obj, index.As<Register>(), TIMES_8, data_offset),
+          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset),
                   Immediate(Low32Bits(val)));
-          __ movl(Address(obj, index.As<Register>(), TIMES_8, data_offset + kX86WordSize),
+          __ movl(Address(obj, index.AsRegister<Register>(), TIMES_8, data_offset + kX86WordSize),
                   Immediate(High32Bits(val)));
         }
       }
@@ -2902,8 +3063,8 @@
 void InstructionCodeGeneratorX86::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
-  Register obj = locations->InAt(0).As<Register>();
-  Register out = locations->Out().As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
+  Register out = locations->Out().AsRegister<Register>();
   __ movl(out, Address(obj, offset));
 }
 
@@ -2923,8 +3084,8 @@
       instruction, locations->InAt(0), locations->InAt(1));
   codegen_->AddSlowPath(slow_path);
 
-  Register index = locations->InAt(0).As<Register>();
-  Register length = locations->InAt(1).As<Register>();
+  Register index = locations->InAt(0).AsRegister<Register>();
+  Register length = locations->InAt(1).AsRegister<Register>();
 
   __ cmpl(index, length);
   __ j(kAboveEqual, slow_path->GetEntryLabel());
@@ -3001,14 +3162,14 @@
 
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
-      __ movl(destination.As<Register>(), source.As<Register>());
+      __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>());
     } else {
       DCHECK(destination.IsStackSlot());
-      __ movl(Address(ESP, destination.GetStackIndex()), source.As<Register>());
+      __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>());
     }
   } else if (source.IsStackSlot()) {
     if (destination.IsRegister()) {
-      __ movl(destination.As<Register>(), Address(ESP, source.GetStackIndex()));
+      __ movl(destination.AsRegister<Register>(), Address(ESP, source.GetStackIndex()));
     } else {
       DCHECK(destination.IsStackSlot());
       MoveMemoryToMemory(destination.GetStackIndex(),
@@ -3018,7 +3179,7 @@
     HIntConstant* instruction = source.GetConstant()->AsIntConstant();
     Immediate imm(instruction->AsIntConstant()->GetValue());
     if (destination.IsRegister()) {
-      __ movl(destination.As<Register>(), imm);
+      __ movl(destination.AsRegister<Register>(), imm);
     } else {
       __ movl(Address(ESP, destination.GetStackIndex()), imm);
     }
@@ -3060,11 +3221,11 @@
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    __ xchgl(destination.As<Register>(), source.As<Register>());
+    __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
-    Exchange(source.As<Register>(), destination.GetStackIndex());
+    Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
-    Exchange(destination.As<Register>(), source.GetStackIndex());
+    Exchange(destination.AsRegister<Register>(), source.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
     Exchange(destination.GetStackIndex(), source.GetStackIndex());
   } else {
@@ -3090,7 +3251,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
-  Register out = cls->GetLocations()->Out().As<Register>();
+  Register out = cls->GetLocations()->Out().AsRegister<Register>();
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
@@ -3129,7 +3290,8 @@
   SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
       check->GetLoadClass(), check, check->GetDexPc(), true);
   codegen_->AddSlowPath(slow_path);
-  GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).As<Register>());
+  GenerateClassInitializationCheck(slow_path,
+                                   check->GetLocations()->InAt(0).AsRegister<Register>());
 }
 
 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
@@ -3150,37 +3312,37 @@
 
 void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register cls = locations->InAt(0).As<Register>();
+  Register cls = locations->InAt(0).AsRegister<Register>();
   uint32_t offset = instruction->GetFieldOffset().Uint32Value();
 
   switch (instruction->GetType()) {
     case Primitive::kPrimBoolean: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ movzxb(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimByte: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ movsxb(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimShort: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ movsxw(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimChar: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ movzxw(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      Register out = locations->Out().As<Register>();
+      Register out = locations->Out().AsRegister<Register>();
       __ movl(out, Address(cls, offset));
       break;
     }
@@ -3193,13 +3355,13 @@
     }
 
     case Primitive::kPrimFloat: {
-      XmmRegister out = locations->Out().As<XmmRegister>();
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
       __ movss(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimDouble: {
-      XmmRegister out = locations->Out().As<XmmRegister>();
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
       __ movsd(out, Address(cls, offset));
       break;
     }
@@ -3237,33 +3399,33 @@
 
 void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register cls = locations->InAt(0).As<Register>();
+  Register cls = locations->InAt(0).AsRegister<Register>();
   uint32_t offset = instruction->GetFieldOffset().Uint32Value();
   Primitive::Type field_type = instruction->GetFieldType();
 
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      ByteRegister value = locations->InAt(1).As<ByteRegister>();
+      ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>();
       __ movb(Address(cls, offset), value);
       break;
     }
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      Register value = locations->InAt(1).As<Register>();
+      Register value = locations->InAt(1).AsRegister<Register>();
       __ movw(Address(cls, offset), value);
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      Register value = locations->InAt(1).As<Register>();
+      Register value = locations->InAt(1).AsRegister<Register>();
       __ movl(Address(cls, offset), value);
 
       if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
-        Register temp = locations->GetTemp(0).As<Register>();
-        Register card = locations->GetTemp(1).As<Register>();
+        Register temp = locations->GetTemp(0).AsRegister<Register>();
+        Register card = locations->GetTemp(1).AsRegister<Register>();
         codegen_->MarkGCCard(temp, card, cls, value);
       }
       break;
@@ -3277,13 +3439,13 @@
     }
 
     case Primitive::kPrimFloat: {
-      XmmRegister value = locations->InAt(1).As<XmmRegister>();
+      XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
       __ movss(Address(cls, offset), value);
       break;
     }
 
     case Primitive::kPrimDouble: {
-      XmmRegister value = locations->InAt(1).As<XmmRegister>();
+      XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
       __ movsd(Address(cls, offset), value);
       break;
     }
@@ -3304,7 +3466,7 @@
   SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
   codegen_->AddSlowPath(slow_path);
 
-  Register out = load->GetLocations()->Out().As<Register>();
+  Register out = load->GetLocations()->Out().AsRegister<Register>();
   codegen_->LoadCurrentMethod(out);
   __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value()));
   __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
@@ -3322,7 +3484,7 @@
 
 void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) {
   Address address = Address::Absolute(Thread::ExceptionOffset<kX86WordSize>().Int32Value());
-  __ fs()->movl(load->GetLocations()->Out().As<Register>(), address);
+  __ fs()->movl(load->GetLocations()->Out().AsRegister<Register>(), address);
   __ fs()->movl(address, Immediate(0));
 }
 
@@ -3350,9 +3512,9 @@
 
 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
   Location cls = locations->InAt(1);
-  Register out = locations->Out().As<Register>();
+  Register out = locations->Out().AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Label done, zero;
   SlowPathCodeX86* slow_path = nullptr;
@@ -3364,7 +3526,7 @@
   __ movl(out, Address(obj, class_offset));
   // Compare the class of `obj` with `cls`.
   if (cls.IsRegister()) {
-    __ cmpl(out, cls.As<Register>());
+    __ cmpl(out, cls.AsRegister<Register>());
   } else {
     DCHECK(cls.IsStackSlot()) << cls;
     __ cmpl(out, Address(ESP, cls.GetStackIndex()));
@@ -3403,9 +3565,9 @@
 
 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).As<Register>();
+  Register obj = locations->InAt(0).AsRegister<Register>();
   Location cls = locations->InAt(1);
-  Register temp = locations->GetTemp(0).As<Register>();
+  Register temp = locations->GetTemp(0).AsRegister<Register>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
       instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
@@ -3418,7 +3580,7 @@
 
   // Compare the class of `obj` with `cls`.
   if (cls.IsRegister()) {
-    __ cmpl(temp, cls.As<Register>());
+    __ cmpl(temp, cls.AsRegister<Register>());
   } else {
     DCHECK(cls.IsStackSlot()) << cls;
     __ cmpl(temp, Address(ESP, cls.GetStackIndex()));
@@ -3477,30 +3639,33 @@
   if (instruction->GetResultType() == Primitive::kPrimInt) {
     if (second.IsRegister()) {
       if (instruction->IsAnd()) {
-        __ andl(first.As<Register>(), second.As<Register>());
+        __ andl(first.AsRegister<Register>(), second.AsRegister<Register>());
       } else if (instruction->IsOr()) {
-        __ orl(first.As<Register>(), second.As<Register>());
+        __ orl(first.AsRegister<Register>(), second.AsRegister<Register>());
       } else {
         DCHECK(instruction->IsXor());
-        __ xorl(first.As<Register>(), second.As<Register>());
+        __ xorl(first.AsRegister<Register>(), second.AsRegister<Register>());
       }
     } else if (second.IsConstant()) {
       if (instruction->IsAnd()) {
-        __ andl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
+        __ andl(first.AsRegister<Register>(),
+                Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
       } else if (instruction->IsOr()) {
-        __ orl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
+        __ orl(first.AsRegister<Register>(),
+               Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
       } else {
         DCHECK(instruction->IsXor());
-        __ xorl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
+        __ xorl(first.AsRegister<Register>(),
+                Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
       }
     } else {
       if (instruction->IsAnd()) {
-        __ andl(first.As<Register>(), Address(ESP, second.GetStackIndex()));
+        __ andl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
       } else if (instruction->IsOr()) {
-        __ orl(first.As<Register>(), Address(ESP, second.GetStackIndex()));
+        __ orl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
       } else {
         DCHECK(instruction->IsXor());
-        __ xorl(first.As<Register>(), Address(ESP, second.GetStackIndex()));
+        __ xorl(first.AsRegister<Register>(), Address(ESP, second.GetStackIndex()));
       }
     }
   } else {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 34fa1e7..47fd304 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -539,37 +539,37 @@
   }
   if (destination.IsRegister()) {
     if (source.IsRegister()) {
-      __ movq(destination.As<CpuRegister>(), source.As<CpuRegister>());
+      __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
     } else if (source.IsFpuRegister()) {
-      __ movd(destination.As<CpuRegister>(), source.As<XmmRegister>());
+      __ movd(destination.AsRegister<CpuRegister>(), source.AsFpuRegister<XmmRegister>());
     } else if (source.IsStackSlot()) {
-      __ movl(destination.As<CpuRegister>(),
+      __ movl(destination.AsRegister<CpuRegister>(),
               Address(CpuRegister(RSP), source.GetStackIndex()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movq(destination.As<CpuRegister>(),
+      __ movq(destination.AsRegister<CpuRegister>(),
               Address(CpuRegister(RSP), source.GetStackIndex()));
     }
   } else if (destination.IsFpuRegister()) {
     if (source.IsRegister()) {
-      __ movd(destination.As<XmmRegister>(), source.As<CpuRegister>());
+      __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<CpuRegister>());
     } else if (source.IsFpuRegister()) {
-      __ movaps(destination.As<XmmRegister>(), source.As<XmmRegister>());
+      __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
     } else if (source.IsStackSlot()) {
-      __ movss(destination.As<XmmRegister>(),
+      __ movss(destination.AsFpuRegister<XmmRegister>(),
               Address(CpuRegister(RSP), source.GetStackIndex()));
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movsd(destination.As<XmmRegister>(),
+      __ movsd(destination.AsFpuRegister<XmmRegister>(),
                Address(CpuRegister(RSP), source.GetStackIndex()));
     }
   } else if (destination.IsStackSlot()) {
     if (source.IsRegister()) {
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
-              source.As<CpuRegister>());
+              source.AsRegister<CpuRegister>());
     } else if (source.IsFpuRegister()) {
       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
-               source.As<XmmRegister>());
+               source.AsFpuRegister<XmmRegister>());
     } else {
       DCHECK(source.IsStackSlot());
       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -579,10 +579,10 @@
     DCHECK(destination.IsDoubleStackSlot());
     if (source.IsRegister()) {
       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
-              source.As<CpuRegister>());
+              source.AsRegister<CpuRegister>());
     } else if (source.IsFpuRegister()) {
       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
-               source.As<XmmRegister>());
+               source.AsFpuRegister<XmmRegister>());
     } else {
       DCHECK(source.IsDoubleStackSlot());
       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -604,7 +604,7 @@
     if (const_to_move->IsIntConstant()) {
       Immediate imm(const_to_move->AsIntConstant()->GetValue());
       if (location.IsRegister()) {
-        __ movl(location.As<CpuRegister>(), imm);
+        __ movl(location.AsRegister<CpuRegister>(), imm);
       } else if (location.IsStackSlot()) {
         __ movl(Address(CpuRegister(RSP), location.GetStackIndex()), imm);
       } else {
@@ -614,7 +614,7 @@
     } else if (const_to_move->IsLongConstant()) {
       int64_t value = const_to_move->AsLongConstant()->GetValue();
       if (location.IsRegister()) {
-        __ movq(location.As<CpuRegister>(), Immediate(value));
+        __ movq(location.AsRegister<CpuRegister>(), Immediate(value));
       } else if (location.IsDoubleStackSlot()) {
         __ movq(CpuRegister(TMP), Immediate(value));
         __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
@@ -637,7 +637,8 @@
 
       case Primitive::kPrimLong:
       case Primitive::kPrimDouble:
-        Move(location, Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
+        Move(location,
+             Location::DoubleStackSlot(GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
         break;
 
       default:
@@ -741,7 +742,7 @@
         // Materialized condition, compare against 0.
         Location lhs = if_instr->GetLocations()->InAt(0);
         if (lhs.IsRegister()) {
-          __ cmpl(lhs.As<CpuRegister>(), Immediate(0));
+          __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(0));
         } else {
           __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()),
                   Immediate(0));
@@ -755,12 +756,12 @@
       Location lhs = cond->GetLocations()->InAt(0);
       Location rhs = cond->GetLocations()->InAt(1);
       if (rhs.IsRegister()) {
-        __ cmpl(lhs.As<CpuRegister>(), rhs.As<CpuRegister>());
+        __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
       } else if (rhs.IsConstant()) {
-        __ cmpl(lhs.As<CpuRegister>(),
+        __ cmpl(lhs.AsRegister<CpuRegister>(),
                 Immediate(rhs.GetConstant()->AsIntConstant()->GetValue()));
       } else {
-        __ cmpl(lhs.As<CpuRegister>(),
+        __ cmpl(lhs.AsRegister<CpuRegister>(),
                 Address(CpuRegister(RSP), rhs.GetStackIndex()));
       }
       __ j(X86_64Condition(cond->AsCondition()->GetCondition()),
@@ -831,17 +832,17 @@
 void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) {
   if (comp->NeedsMaterialization()) {
     LocationSummary* locations = comp->GetLocations();
-    CpuRegister reg = locations->Out().As<CpuRegister>();
+    CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
     // Clear register: setcc only sets the low byte.
     __ xorq(reg, reg);
     if (locations->InAt(1).IsRegister()) {
-      __ cmpl(locations->InAt(0).As<CpuRegister>(),
-              locations->InAt(1).As<CpuRegister>());
+      __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
+              locations->InAt(1).AsRegister<CpuRegister>());
     } else if (locations->InAt(1).IsConstant()) {
-      __ cmpl(locations->InAt(0).As<CpuRegister>(),
+      __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
               Immediate(locations->InAt(1).GetConstant()->AsIntConstant()->GetValue()));
     } else {
-      __ cmpl(locations->InAt(0).As<CpuRegister>(),
+      __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
               Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex()));
     }
     __ setcc(X86_64Condition(comp->GetCondition()), reg);
@@ -899,33 +900,61 @@
 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
+  }
 }
 
 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
-  Label greater, done;
   LocationSummary* locations = compare->GetLocations();
-  switch (compare->InputAt(0)->GetType()) {
-    case Primitive::kPrimLong:
-      __ cmpq(locations->InAt(0).As<CpuRegister>(),
-              locations->InAt(1).As<CpuRegister>());
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+
+  Label less, greater, done;
+  Primitive::Type type = compare->InputAt(0)->GetType();
+  switch (type) {
+    case Primitive::kPrimLong: {
+      __ cmpq(left.AsRegister<CpuRegister>(), right.AsRegister<CpuRegister>());
       break;
+    }
+    case Primitive::kPrimFloat: {
+      __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      break;
+    }
+    case Primitive::kPrimDouble: {
+      __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
+      __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
+      break;
+    }
     default:
-      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+      LOG(FATAL) << "Unexpected compare type " << type;
   }
-
-  CpuRegister output = locations->Out().As<CpuRegister>();
-  __ movl(output, Immediate(0));
+  __ movl(out, Immediate(0));
   __ j(kEqual, &done);
-  __ j(kGreater, &greater);
-
-  __ movl(output, Immediate(-1));
-  __ jmp(&done);
+  __ j(type == Primitive::kPrimLong ? kLess : kBelow, &less);  //  ucomis{s,d} sets CF (kBelow)
 
   __ Bind(&greater);
-  __ movl(output, Immediate(1));
+  __ movl(out, Immediate(1));
+  __ jmp(&done);
+
+  __ Bind(&less);
+  __ movl(out, Immediate(-1));
 
   __ Bind(&done);
 }
@@ -1019,12 +1048,12 @@
       case Primitive::kPrimInt:
       case Primitive::kPrimNot:
       case Primitive::kPrimLong:
-        DCHECK_EQ(ret->GetLocations()->InAt(0).As<CpuRegister>().AsRegister(), RAX);
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
         break;
 
       case Primitive::kPrimFloat:
       case Primitive::kPrimDouble:
-        DCHECK_EQ(ret->GetLocations()->InAt(0).As<XmmRegister>().AsFloatRegister(),
+        DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
                   XMM0);
         break;
 
@@ -1097,7 +1126,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeStatic(HInvokeStatic* invoke) {
-  CpuRegister temp = invoke->GetLocations()->GetTemp(0).As<CpuRegister>();
+  CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
   // TODO: Implement all kinds of calls:
   // 1) boot -> boot
   // 2) app -> boot
@@ -1156,7 +1185,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  CpuRegister temp = invoke->GetLocations()->GetTemp(0).As<CpuRegister>();
+  CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
           invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
   LocationSummary* locations = invoke->GetLocations();
@@ -1167,7 +1196,7 @@
     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
     __ movl(temp, Address(temp, class_offset));
   } else {
-    __ movl(temp, Address(receiver.As<CpuRegister>(), class_offset));
+    __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
   }
   // temp = temp->GetMethodAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
@@ -1187,7 +1216,7 @@
 
 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
-  CpuRegister temp = invoke->GetLocations()->GetTemp(0).As<CpuRegister>();
+  CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
   uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() +
           (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry);
   LocationSummary* locations = invoke->GetLocations();
@@ -1195,7 +1224,7 @@
   size_t class_offset = mirror::Object::ClassOffset().SizeValue();
 
   // Set the hidden argument.
-  __ movq(invoke->GetLocations()->GetTemp(1).As<CpuRegister>(),
+  __ movq(invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(),
           Immediate(invoke->GetDexMethodIndex()));
 
   // temp = object->GetClass();
@@ -1203,7 +1232,7 @@
     __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
     __ movl(temp, Address(temp, class_offset));
   } else {
-    __ movl(temp, Address(receiver.As<CpuRegister>(), class_offset));
+    __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
   }
   // temp = temp->GetImtEntryAt(method_offset);
   __ movl(temp, Address(temp, method_offset));
@@ -1228,9 +1257,9 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
-      // Output overlaps as we need a fresh (zero-initialized)
-      // register to perform subtraction from zero.
-      locations->SetOut(Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      locations->AddTemp(Location::RequiresRegister());
+      locations->AddTemp(Location::RequiresFpuRegister());
       break;
 
     default:
@@ -1246,49 +1275,40 @@
     case Primitive::kPrimInt:
       DCHECK(in.IsRegister());
       DCHECK(in.Equals(out));
-      __ negl(out.As<CpuRegister>());
+      __ negl(out.AsRegister<CpuRegister>());
       break;
 
     case Primitive::kPrimLong:
       DCHECK(in.IsRegister());
       DCHECK(in.Equals(out));
-      __ negq(out.As<CpuRegister>());
+      __ negq(out.AsRegister<CpuRegister>());
       break;
 
-    case Primitive::kPrimFloat:
-      DCHECK(in.IsFpuRegister());
-      DCHECK(out.IsFpuRegister());
-      DCHECK(!in.Equals(out));
-      // TODO: Instead of computing negation as a subtraction from
-      // zero, implement it with an exclusive or with value 0x80000000
-      // (mask for bit 31, representing the sign of a single-precision
-      // floating-point number), fetched from a constant pool:
-      //
-      //   xorps out, [RIP:...] // value at RIP is 0x80 00 00 00
-
-      // out = 0
-      __ xorps(out.As<XmmRegister>(), out.As<XmmRegister>());
-      // out = out - in
-      __ subss(out.As<XmmRegister>(), in.As<XmmRegister>());
+    case Primitive::kPrimFloat: {
+      DCHECK(in.Equals(out));
+      CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>();
+      XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+      // Implement float negation with an exclusive or with value
+      // 0x80000000 (mask for bit 31, representing the sign of a
+      // single-precision floating-point number).
+      __ movq(constant, Immediate(INT64_C(0x80000000)));
+      __ movd(mask, constant);
+      __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
       break;
+    }
 
-    case Primitive::kPrimDouble:
-      DCHECK(in.IsFpuRegister());
-      DCHECK(out.IsFpuRegister());
-      DCHECK(!in.Equals(out));
-      // TODO: Instead of computing negation as a subtraction from
-      // zero, implement it with an exclusive or with value
+    case Primitive::kPrimDouble: {
+      DCHECK(in.Equals(out));
+      CpuRegister constant = locations->GetTemp(0).AsRegister<CpuRegister>();
+      XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+      // Implement double negation with an exclusive or with value
       // 0x8000000000000000 (mask for bit 63, representing the sign of
-      // a double-precision floating-point number), fetched from a
-      // constant pool:
-      //
-      //   xorpd out, [RIP:...] // value at RIP is 0x80 00 00 00 00 00 00 00
-
-      // out = 0
-      __ xorpd(out.As<XmmRegister>(), out.As<XmmRegister>());
-      // out = out - in
-      __ subsd(out.As<XmmRegister>(), in.As<XmmRegister>());
+      // a double-precision floating-point number).
+      __ movq(constant, Immediate(INT64_C(0x8000000000000000)));
+      __ movd(mask, constant);
+      __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
       break;
+    }
 
     default:
       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
@@ -1300,6 +1320,7 @@
       new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
+  DCHECK_NE(result_type, input_type);
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
@@ -1342,6 +1363,12 @@
           break;
 
         case Primitive::kPrimFloat:
+          // Processing a Dex `float-to-int' instruction.
+          locations->SetInAt(0, Location::RequiresFpuRegister());
+          locations->SetOut(Location::RequiresRegister());
+          locations->AddTemp(Location::RequiresFpuRegister());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1383,7 +1410,6 @@
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
-        case Primitive::kPrimChar:
           // Processing a Dex `int-to-char' instruction.
           locations->SetInAt(0, Location::Any());
           locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
@@ -1407,6 +1433,11 @@
           break;
 
         case Primitive::kPrimLong:
+          // Processing a Dex `long-to-float' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1430,6 +1461,11 @@
           break;
 
         case Primitive::kPrimLong:
+          // Processing a Dex `long-to-double' instruction.
+          locations->SetInAt(0, Location::RequiresRegister());
+          locations->SetOut(Location::RequiresFpuRegister());
+          break;
+
         case Primitive::kPrimFloat:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1453,6 +1489,7 @@
   Location in = locations->InAt(0);
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
+  DCHECK_NE(result_type, input_type);
   switch (result_type) {
     case Primitive::kPrimByte:
       switch (input_type) {
@@ -1461,13 +1498,13 @@
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-byte' instruction.
           if (in.IsRegister()) {
-            __ movsxb(out.As<CpuRegister>(), in.As<CpuRegister>());
+            __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
           } else if (in.IsStackSlot()) {
-            __ movsxb(out.As<CpuRegister>(),
+            __ movsxb(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
             DCHECK(in.GetConstant()->IsIntConstant());
-            __ movl(out.As<CpuRegister>(),
+            __ movl(out.AsRegister<CpuRegister>(),
                     Immediate(static_cast<int8_t>(in.GetConstant()->AsIntConstant()->GetValue())));
           }
           break;
@@ -1485,13 +1522,13 @@
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-short' instruction.
           if (in.IsRegister()) {
-            __ movsxw(out.As<CpuRegister>(), in.As<CpuRegister>());
+            __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
           } else if (in.IsStackSlot()) {
-            __ movsxw(out.As<CpuRegister>(),
+            __ movsxw(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
             DCHECK(in.GetConstant()->IsIntConstant());
-            __ movl(out.As<CpuRegister>(),
+            __ movl(out.AsRegister<CpuRegister>(),
                     Immediate(static_cast<int16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
           }
           break;
@@ -1507,19 +1544,43 @@
         case Primitive::kPrimLong:
           // Processing a Dex `long-to-int' instruction.
           if (in.IsRegister()) {
-            __ movl(out.As<CpuRegister>(), in.As<CpuRegister>());
+            __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
           } else if (in.IsDoubleStackSlot()) {
-            __ movl(out.As<CpuRegister>(),
+            __ movl(out.AsRegister<CpuRegister>(),
                     Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
             DCHECK(in.IsConstant());
             DCHECK(in.GetConstant()->IsLongConstant());
             int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
-            __ movl(out.As<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
+            __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
           }
           break;
 
-        case Primitive::kPrimFloat:
+        case Primitive::kPrimFloat: {
+          // Processing a Dex `float-to-int' instruction.
+          XmmRegister input = in.AsFpuRegister<XmmRegister>();
+          CpuRegister output = out.AsRegister<CpuRegister>();
+          XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+          Label done, nan;
+
+          __ movl(output, Immediate(kPrimIntMax));
+          // temp = int-to-float(output)
+          __ cvtsi2ss(temp, output);
+          // if input >= temp goto done
+          __ comiss(input, temp);
+          __ j(kAboveEqual, &done);
+          // if input == NaN goto nan
+          __ j(kUnordered, &nan);
+          // output = float-to-int-truncate(input)
+          __ cvttss2si(output, input);
+          __ jmp(&done);
+          __ Bind(&nan);
+          //  output = 0
+          __ xorl(output, output);
+          __ Bind(&done);
+          break;
+        }
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1540,7 +1601,7 @@
         case Primitive::kPrimChar:
           // Processing a Dex `int-to-long' instruction.
           DCHECK(in.IsRegister());
-          __ movsxd(out.As<CpuRegister>(), in.As<CpuRegister>());
+          __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
           break;
 
         case Primitive::kPrimFloat:
@@ -1560,16 +1621,15 @@
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
-        case Primitive::kPrimChar:
           // Processing a Dex `int-to-char' instruction.
           if (in.IsRegister()) {
-            __ movzxw(out.As<CpuRegister>(), in.As<CpuRegister>());
+            __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
           } else if (in.IsStackSlot()) {
-            __ movzxw(out.As<CpuRegister>(),
+            __ movzxw(out.AsRegister<CpuRegister>(),
                       Address(CpuRegister(RSP), in.GetStackIndex()));
           } else {
             DCHECK(in.GetConstant()->IsIntConstant());
-            __ movl(out.As<CpuRegister>(),
+            __ movl(out.AsRegister<CpuRegister>(),
                     Immediate(static_cast<uint16_t>(in.GetConstant()->AsIntConstant()->GetValue())));
           }
           break;
@@ -1582,15 +1642,19 @@
 
     case Primitive::kPrimFloat:
       switch (input_type) {
-          // Processing a Dex `int-to-float' instruction.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
-          __ cvtsi2ss(out.As<XmmRegister>(), in.As<CpuRegister>());
+          // Processing a Dex `int-to-float' instruction.
+          __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
           break;
 
         case Primitive::kPrimLong:
+          // Processing a Dex `long-to-float' instruction.
+          __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+          break;
+
         case Primitive::kPrimDouble:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1604,15 +1668,19 @@
 
     case Primitive::kPrimDouble:
       switch (input_type) {
-          // Processing a Dex `int-to-double' instruction.
         case Primitive::kPrimByte:
         case Primitive::kPrimShort:
         case Primitive::kPrimInt:
         case Primitive::kPrimChar:
-          __ cvtsi2sd(out.As<XmmRegister>(), in.As<CpuRegister>());
+          // Processing a Dex `int-to-double' instruction.
+          __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
           break;
 
         case Primitive::kPrimLong:
+          // Processing a Dex `long-to-double' instruction.
+          __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
+          break;
+
         case Primitive::kPrimFloat:
           LOG(FATAL) << "Type conversion from " << input_type
                      << " to " << result_type << " not yet implemented";
@@ -1670,28 +1738,28 @@
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
       if (second.IsRegister()) {
-        __ addl(first.As<CpuRegister>(), second.As<CpuRegister>());
+        __ addl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       } else if (second.IsConstant()) {
         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
-        __ addl(first.As<CpuRegister>(), imm);
+        __ addl(first.AsRegister<CpuRegister>(), imm);
       } else {
-        __ addl(first.As<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
+        __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-      __ addq(first.As<CpuRegister>(), second.As<CpuRegister>());
+      __ addq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       break;
     }
 
     case Primitive::kPrimFloat: {
-      __ addss(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ addsd(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
@@ -1736,27 +1804,27 @@
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt: {
       if (second.IsRegister()) {
-        __ subl(first.As<CpuRegister>(), second.As<CpuRegister>());
+        __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       } else if (second.IsConstant()) {
         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
-        __ subl(first.As<CpuRegister>(), imm);
+        __ subl(first.AsRegister<CpuRegister>(), imm);
       } else {
-        __ subl(first.As<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
+        __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
     }
     case Primitive::kPrimLong: {
-      __ subq(first.As<CpuRegister>(), second.As<CpuRegister>());
+      __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       break;
     }
 
     case Primitive::kPrimFloat: {
-      __ subss(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ subsd(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
@@ -1802,28 +1870,29 @@
   switch (mul->GetResultType()) {
     case Primitive::kPrimInt: {
       if (second.IsRegister()) {
-        __ imull(first.As<CpuRegister>(), second.As<CpuRegister>());
+        __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       } else if (second.IsConstant()) {
         Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
-        __ imull(first.As<CpuRegister>(), imm);
+        __ imull(first.AsRegister<CpuRegister>(), imm);
       } else {
         DCHECK(second.IsStackSlot());
-        __ imull(first.As<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
+        __ imull(first.AsRegister<CpuRegister>(),
+                 Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
     }
     case Primitive::kPrimLong: {
-      __ imulq(first.As<CpuRegister>(), second.As<CpuRegister>());
+      __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       break;
     }
 
     case Primitive::kPrimFloat: {
-      __ mulss(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ mulsd(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
@@ -1840,10 +1909,10 @@
   bool is_div = instruction->IsDiv();
   LocationSummary* locations = instruction->GetLocations();
 
-  CpuRegister out_reg = locations->Out().As<CpuRegister>();
-  CpuRegister second_reg = locations->InAt(1).As<CpuRegister>();
+  CpuRegister out_reg = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister second_reg = locations->InAt(1).AsRegister<CpuRegister>();
 
-  DCHECK_EQ(RAX, locations->InAt(0).As<CpuRegister>().AsRegister());
+  DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
   DCHECK_EQ(is_div ? RAX : RDX, out_reg.AsRegister());
 
   SlowPathCodeX86_64* slow_path =
@@ -1915,12 +1984,12 @@
     }
 
     case Primitive::kPrimFloat: {
-      __ divss(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
     case Primitive::kPrimDouble: {
-      __ divsd(first.As<XmmRegister>(), second.As<XmmRegister>());
+      __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
       break;
     }
 
@@ -1993,7 +2062,7 @@
   switch (instruction->GetType()) {
     case Primitive::kPrimInt: {
       if (value.IsRegister()) {
-        __ testl(value.As<CpuRegister>(), value.As<CpuRegister>());
+        __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
         __ j(kEqual, slow_path->GetEntryLabel());
       } else if (value.IsStackSlot()) {
         __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
@@ -2008,7 +2077,7 @@
     }
     case Primitive::kPrimLong: {
       if (value.IsRegister()) {
-        __ testq(value.As<CpuRegister>(), value.As<CpuRegister>());
+        __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
         __ j(kEqual, slow_path->GetEntryLabel());
       } else if (value.IsDoubleStackSlot()) {
         __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
@@ -2050,13 +2119,13 @@
   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
 
   LocationSummary* locations = op->GetLocations();
-  CpuRegister first_reg = locations->InAt(0).As<CpuRegister>();
+  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
   Location second = locations->InAt(1);
 
   switch (op->GetResultType()) {
     case Primitive::kPrimInt: {
       if (second.IsRegister()) {
-        CpuRegister second_reg = second.As<CpuRegister>();
+        CpuRegister second_reg = second.AsRegister<CpuRegister>();
         if (op->IsShl()) {
           __ shll(first_reg, second_reg);
         } else if (op->IsShr()) {
@@ -2078,7 +2147,7 @@
     }
     case Primitive::kPrimLong: {
       if (second.IsRegister()) {
-        CpuRegister second_reg = second.As<CpuRegister>();
+        CpuRegister second_reg = second.AsRegister<CpuRegister>();
         if (op->IsShl()) {
           __ shlq(first_reg, second_reg);
         } else if (op->IsShr()) {
@@ -2196,20 +2265,20 @@
 
 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
   LocationSummary* locations = not_->GetLocations();
-  DCHECK_EQ(locations->InAt(0).As<CpuRegister>().AsRegister(),
-            locations->Out().As<CpuRegister>().AsRegister());
+  DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
+            locations->Out().AsRegister<CpuRegister>().AsRegister());
   Location out = locations->Out();
   switch (not_->InputAt(0)->GetType()) {
     case Primitive::kPrimBoolean:
-      __ xorq(out.As<CpuRegister>(), Immediate(1));
+      __ xorq(out.AsRegister<CpuRegister>(), Immediate(1));
       break;
 
     case Primitive::kPrimInt:
-      __ notl(out.As<CpuRegister>());
+      __ notl(out.AsRegister<CpuRegister>());
       break;
 
     case Primitive::kPrimLong:
-      __ notq(out.As<CpuRegister>());
+      __ notq(out.AsRegister<CpuRegister>());
       break;
 
     default:
@@ -2248,51 +2317,51 @@
 
 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).As<CpuRegister>();
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   size_t offset = instruction->GetFieldOffset().SizeValue();
   Primitive::Type field_type = instruction->GetFieldType();
 
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      CpuRegister value = locations->InAt(1).As<CpuRegister>();
+      CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
       __ movb(Address(obj, offset), value);
       break;
     }
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      CpuRegister value = locations->InAt(1).As<CpuRegister>();
+      CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
       __ movw(Address(obj, offset), value);
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      CpuRegister value = locations->InAt(1).As<CpuRegister>();
+      CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
       __ movl(Address(obj, offset), value);
       if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
-        CpuRegister temp = locations->GetTemp(0).As<CpuRegister>();
-        CpuRegister card = locations->GetTemp(1).As<CpuRegister>();
+        CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+        CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
         codegen_->MarkGCCard(temp, card, obj, value);
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-      CpuRegister value = locations->InAt(1).As<CpuRegister>();
+      CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
       __ movq(Address(obj, offset), value);
       break;
     }
 
     case Primitive::kPrimFloat: {
-      XmmRegister value = locations->InAt(1).As<XmmRegister>();
+      XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
       __ movss(Address(obj, offset), value);
       break;
     }
 
     case Primitive::kPrimDouble: {
-      XmmRegister value = locations->InAt(1).As<XmmRegister>();
+      XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
       __ movsd(Address(obj, offset), value);
       break;
     }
@@ -2312,55 +2381,55 @@
 
 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).As<CpuRegister>();
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   size_t offset = instruction->GetFieldOffset().SizeValue();
 
   switch (instruction->GetType()) {
     case Primitive::kPrimBoolean: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movzxb(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimByte: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movsxb(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimShort: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movsxw(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimChar: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movzxw(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movl(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimLong: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movq(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimFloat: {
-      XmmRegister out = locations->Out().As<XmmRegister>();
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
       __ movss(out, Address(obj, offset));
       break;
     }
 
     case Primitive::kPrimDouble: {
-      XmmRegister out = locations->Out().As<XmmRegister>();
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
       __ movsd(out, Address(obj, offset));
       break;
     }
@@ -2388,7 +2457,7 @@
   Location obj = locations->InAt(0);
 
   if (obj.IsRegister()) {
-    __ cmpl(obj.As<CpuRegister>(), Immediate(0));
+    __ cmpl(obj.AsRegister<CpuRegister>(), Immediate(0));
   } else if (obj.IsStackSlot()) {
     __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
   } else {
@@ -2411,54 +2480,54 @@
 
 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).As<CpuRegister>();
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
 
   switch (instruction->GetType()) {
     case Primitive::kPrimBoolean: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
       } else {
-        __ movzxb(out, Address(obj, index.As<CpuRegister>(), TIMES_1, data_offset));
+        __ movzxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
       }
       break;
     }
 
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value();
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxb(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset));
       } else {
-        __ movsxb(out, Address(obj, index.As<CpuRegister>(), TIMES_1, data_offset));
+        __ movsxb(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset));
       }
       break;
     }
 
     case Primitive::kPrimShort: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value();
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movsxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
       } else {
-        __ movsxw(out, Address(obj, index.As<CpuRegister>(), TIMES_2, data_offset));
+        __ movsxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
       }
       break;
     }
 
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movzxw(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset));
       } else {
-        __ movzxw(out, Address(obj, index.As<CpuRegister>(), TIMES_2, data_offset));
+        __ movzxw(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset));
       }
       break;
     }
@@ -2467,48 +2536,48 @@
     case Primitive::kPrimNot: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movl(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
       } else {
-        __ movl(out, Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset));
+        __ movl(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
       }
       break;
     }
 
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       if (index.IsConstant()) {
         __ movq(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
       } else {
-        __ movq(out, Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset));
+        __ movq(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
       }
       break;
     }
 
     case Primitive::kPrimFloat: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      XmmRegister out = locations->Out().As<XmmRegister>();
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movss(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset));
       } else {
-        __ movss(out, Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset));
+        __ movss(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset));
       }
       break;
     }
 
     case Primitive::kPrimDouble: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      XmmRegister out = locations->Out().As<XmmRegister>();
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
       if (index.IsConstant()) {
         __ movsd(out, Address(obj,
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset));
       } else {
-        __ movsd(out, Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset));
+        __ movsd(out, Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset));
       }
       break;
     }
@@ -2556,7 +2625,7 @@
 
 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).As<CpuRegister>();
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   Location index = locations->InAt(1);
   Location value = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
@@ -2571,16 +2640,17 @@
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
         if (value.IsRegister()) {
-          __ movb(Address(obj, offset), value.As<CpuRegister>());
+          __ movb(Address(obj, offset), value.AsRegister<CpuRegister>());
         } else {
-          __ movb(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+          __ movb(Address(obj, offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
         }
       } else {
         if (value.IsRegister()) {
-          __ movb(Address(obj, index.As<CpuRegister>(), TIMES_1, data_offset),
-                  value.As<CpuRegister>());
+          __ movb(Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset),
+                  value.AsRegister<CpuRegister>());
         } else {
-          __ movb(Address(obj, index.As<CpuRegister>(), TIMES_1, data_offset),
+          __ movb(Address(obj, index.AsRegister<CpuRegister>(), TIMES_1, data_offset),
                   Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
         }
       }
@@ -2593,19 +2663,20 @@
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
         if (value.IsRegister()) {
-          __ movw(Address(obj, offset), value.As<CpuRegister>());
+          __ movw(Address(obj, offset), value.AsRegister<CpuRegister>());
         } else {
           DCHECK(value.IsConstant()) << value;
-          __ movw(Address(obj, offset), Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
+          __ movw(Address(obj, offset),
+                  Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
         }
       } else {
         DCHECK(index.IsRegister()) << index;
         if (value.IsRegister()) {
-          __ movw(Address(obj, index.As<CpuRegister>(), TIMES_2, data_offset),
-                  value.As<CpuRegister>());
+          __ movw(Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset),
+                  value.AsRegister<CpuRegister>());
         } else {
           DCHECK(value.IsConstant()) << value;
-          __ movw(Address(obj, index.As<CpuRegister>(), TIMES_2, data_offset),
+          __ movw(Address(obj, index.AsRegister<CpuRegister>(), TIMES_2, data_offset),
                   Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
         }
       }
@@ -2620,7 +2691,7 @@
           size_t offset =
               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
           if (value.IsRegister()) {
-            __ movl(Address(obj, offset), value.As<CpuRegister>());
+            __ movl(Address(obj, offset), value.AsRegister<CpuRegister>());
           } else {
             DCHECK(value.IsConstant()) << value;
             __ movl(Address(obj, offset),
@@ -2629,24 +2700,25 @@
         } else {
           DCHECK(index.IsRegister()) << index;
           if (value.IsRegister()) {
-            __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
-                    value.As<CpuRegister>());
+            __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
+                    value.AsRegister<CpuRegister>());
           } else {
             DCHECK(value.IsConstant()) << value;
-            __ movl(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
+            __ movl(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
                     Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
           }
         }
 
         if (needs_write_barrier) {
           DCHECK_EQ(value_type, Primitive::kPrimNot);
-          CpuRegister temp = locations->GetTemp(0).As<CpuRegister>();
-          CpuRegister card = locations->GetTemp(1).As<CpuRegister>();
-          codegen_->MarkGCCard(temp, card, obj, value.As<CpuRegister>());
+          CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+          CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
+          codegen_->MarkGCCard(temp, card, obj, value.AsRegister<CpuRegister>());
         }
       } else {
         DCHECK_EQ(value_type, Primitive::kPrimNot);
-        __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAputObject), true));
+        __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAputObject),
+                                        true));
         DCHECK(!codegen_->IsLeafMethod());
         codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
       }
@@ -2658,11 +2730,11 @@
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
         DCHECK(value.IsRegister());
-        __ movq(Address(obj, offset), value.As<CpuRegister>());
+        __ movq(Address(obj, offset), value.AsRegister<CpuRegister>());
       } else {
         DCHECK(value.IsRegister());
-        __ movq(Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset),
-                value.As<CpuRegister>());
+        __ movq(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
+                value.AsRegister<CpuRegister>());
       }
       break;
     }
@@ -2672,11 +2744,11 @@
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
         DCHECK(value.IsFpuRegister());
-        __ movss(Address(obj, offset), value.As<XmmRegister>());
+        __ movss(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
       } else {
         DCHECK(value.IsFpuRegister());
-        __ movss(Address(obj, index.As<CpuRegister>(), TIMES_4, data_offset),
-                value.As<XmmRegister>());
+        __ movss(Address(obj, index.AsRegister<CpuRegister>(), TIMES_4, data_offset),
+                value.AsFpuRegister<XmmRegister>());
       }
       break;
     }
@@ -2686,11 +2758,11 @@
       if (index.IsConstant()) {
         size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
         DCHECK(value.IsFpuRegister());
-        __ movsd(Address(obj, offset), value.As<XmmRegister>());
+        __ movsd(Address(obj, offset), value.AsFpuRegister<XmmRegister>());
       } else {
         DCHECK(value.IsFpuRegister());
-        __ movsd(Address(obj, index.As<CpuRegister>(), TIMES_8, data_offset),
-                value.As<XmmRegister>());
+        __ movsd(Address(obj, index.AsRegister<CpuRegister>(), TIMES_8, data_offset),
+                value.AsFpuRegister<XmmRegister>());
       }
       break;
     }
@@ -2711,8 +2783,8 @@
 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   uint32_t offset = mirror::Array::LengthOffset().Uint32Value();
-  CpuRegister obj = locations->InAt(0).As<CpuRegister>();
-  CpuRegister out = locations->Out().As<CpuRegister>();
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   __ movl(out, Address(obj, offset));
 }
 
@@ -2732,8 +2804,8 @@
       instruction, locations->InAt(0), locations->InAt(1));
   codegen_->AddSlowPath(slow_path);
 
-  CpuRegister index = locations->InAt(0).As<CpuRegister>();
-  CpuRegister length = locations->InAt(1).As<CpuRegister>();
+  CpuRegister index = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister length = locations->InAt(1).AsRegister<CpuRegister>();
 
   __ cmpl(index, length);
   __ j(kAboveEqual, slow_path->GetEntryLabel());
@@ -2817,21 +2889,21 @@
 
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
-      __ movq(destination.As<CpuRegister>(), source.As<CpuRegister>());
+      __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
     } else if (destination.IsStackSlot()) {
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
-              source.As<CpuRegister>());
+              source.AsRegister<CpuRegister>());
     } else {
       DCHECK(destination.IsDoubleStackSlot());
       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
-              source.As<CpuRegister>());
+              source.AsRegister<CpuRegister>());
     }
   } else if (source.IsStackSlot()) {
     if (destination.IsRegister()) {
-      __ movl(destination.As<CpuRegister>(),
+      __ movl(destination.AsRegister<CpuRegister>(),
               Address(CpuRegister(RSP), source.GetStackIndex()));
     } else if (destination.IsFpuRegister()) {
-      __ movss(destination.As<XmmRegister>(),
+      __ movss(destination.AsFpuRegister<XmmRegister>(),
               Address(CpuRegister(RSP), source.GetStackIndex()));
     } else {
       DCHECK(destination.IsStackSlot());
@@ -2840,10 +2912,11 @@
     }
   } else if (source.IsDoubleStackSlot()) {
     if (destination.IsRegister()) {
-      __ movq(destination.As<CpuRegister>(),
+      __ movq(destination.AsRegister<CpuRegister>(),
               Address(CpuRegister(RSP), source.GetStackIndex()));
     } else if (destination.IsFpuRegister()) {
-      __ movsd(destination.As<XmmRegister>(), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movsd(destination.AsFpuRegister<XmmRegister>(),
+               Address(CpuRegister(RSP), source.GetStackIndex()));
     } else {
       DCHECK(destination.IsDoubleStackSlot()) << destination;
       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
@@ -2854,7 +2927,7 @@
     if (constant->IsIntConstant()) {
       Immediate imm(constant->AsIntConstant()->GetValue());
       if (destination.IsRegister()) {
-        __ movl(destination.As<CpuRegister>(), imm);
+        __ movl(destination.AsRegister<CpuRegister>(), imm);
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
@@ -2862,7 +2935,7 @@
     } else if (constant->IsLongConstant()) {
       int64_t value = constant->AsLongConstant()->GetValue();
       if (destination.IsRegister()) {
-        __ movq(destination.As<CpuRegister>(), Immediate(value));
+        __ movq(destination.AsRegister<CpuRegister>(), Immediate(value));
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
         __ movq(CpuRegister(TMP), Immediate(value));
@@ -2872,7 +2945,7 @@
       Immediate imm(bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()));
       if (destination.IsFpuRegister()) {
         __ movl(CpuRegister(TMP), imm);
-        __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+        __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
       } else {
         DCHECK(destination.IsStackSlot()) << destination;
         __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
@@ -2882,7 +2955,7 @@
       Immediate imm(bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()));
       if (destination.IsFpuRegister()) {
         __ movq(CpuRegister(TMP), imm);
-        __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+        __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
         __ movq(CpuRegister(TMP), imm);
@@ -2891,14 +2964,14 @@
     }
   } else if (source.IsFpuRegister()) {
     if (destination.IsFpuRegister()) {
-      __ movaps(destination.As<XmmRegister>(), source.As<XmmRegister>());
+      __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
     } else if (destination.IsStackSlot()) {
       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
-               source.As<XmmRegister>());
+               source.AsFpuRegister<XmmRegister>());
     } else {
       DCHECK(destination.IsDoubleStackSlot()) << destination;
       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
-               source.As<XmmRegister>());
+               source.AsFpuRegister<XmmRegister>());
     }
   }
 }
@@ -2959,31 +3032,31 @@
   Location destination = move->GetDestination();
 
   if (source.IsRegister() && destination.IsRegister()) {
-    __ xchgq(destination.As<CpuRegister>(), source.As<CpuRegister>());
+    __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
-    Exchange32(source.As<CpuRegister>(), destination.GetStackIndex());
+    Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
-    Exchange32(destination.As<CpuRegister>(), source.GetStackIndex());
+    Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
     Exchange32(destination.GetStackIndex(), source.GetStackIndex());
   } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
-    Exchange64(source.As<CpuRegister>(), destination.GetStackIndex());
+    Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
   } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
-    Exchange64(destination.As<CpuRegister>(), source.GetStackIndex());
+    Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
   } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
     Exchange64(destination.GetStackIndex(), source.GetStackIndex());
   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
-    __ movd(CpuRegister(TMP), source.As<XmmRegister>());
-    __ movaps(source.As<XmmRegister>(), destination.As<XmmRegister>());
-    __ movd(destination.As<XmmRegister>(), CpuRegister(TMP));
+    __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
+    __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
+    __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
   } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
-    Exchange32(source.As<XmmRegister>(), destination.GetStackIndex());
+    Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
-    Exchange32(destination.As<XmmRegister>(), source.GetStackIndex());
+    Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
   } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
-    Exchange64(source.As<XmmRegister>(), destination.GetStackIndex());
+    Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
   } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
-    Exchange64(destination.As<XmmRegister>(), source.GetStackIndex());
+    Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
   } else {
     LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
   }
@@ -3018,7 +3091,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
-  CpuRegister out = cls->GetLocations()->Out().As<CpuRegister>();
+  CpuRegister out = cls->GetLocations()->Out().AsRegister<CpuRegister>();
   if (cls->IsReferrersClass()) {
     DCHECK(!cls->CanCallRuntime());
     DCHECK(!cls->MustGenerateClinitCheck());
@@ -3056,7 +3129,8 @@
   SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
       check->GetLoadClass(), check, check->GetDexPc(), true);
   codegen_->AddSlowPath(slow_path);
-  GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).As<CpuRegister>());
+  GenerateClassInitializationCheck(slow_path,
+                                   check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
 }
 
 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
@@ -3068,55 +3142,55 @@
 
 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister cls = locations->InAt(0).As<CpuRegister>();
+  CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>();
   size_t offset = instruction->GetFieldOffset().SizeValue();
 
   switch (instruction->GetType()) {
     case Primitive::kPrimBoolean: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movzxb(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimByte: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movsxb(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimShort: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movsxw(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimChar: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movzxw(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movl(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimLong: {
-      CpuRegister out = locations->Out().As<CpuRegister>();
+      CpuRegister out = locations->Out().AsRegister<CpuRegister>();
       __ movq(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimFloat: {
-      XmmRegister out = locations->Out().As<XmmRegister>();
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
       __ movss(out, Address(cls, offset));
       break;
     }
 
     case Primitive::kPrimDouble: {
-      XmmRegister out = locations->Out().As<XmmRegister>();
+      XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
       __ movsd(out, Address(cls, offset));
       break;
     }
@@ -3144,51 +3218,51 @@
 
 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister cls = locations->InAt(0).As<CpuRegister>();
+  CpuRegister cls = locations->InAt(0).AsRegister<CpuRegister>();
   size_t offset = instruction->GetFieldOffset().SizeValue();
   Primitive::Type field_type = instruction->GetFieldType();
 
   switch (field_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
-      CpuRegister value = locations->InAt(1).As<CpuRegister>();
+      CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
       __ movb(Address(cls, offset), value);
       break;
     }
 
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
-      CpuRegister value = locations->InAt(1).As<CpuRegister>();
+      CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
       __ movw(Address(cls, offset), value);
       break;
     }
 
     case Primitive::kPrimInt:
     case Primitive::kPrimNot: {
-      CpuRegister value = locations->InAt(1).As<CpuRegister>();
+      CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
       __ movl(Address(cls, offset), value);
       if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->GetValue())) {
-        CpuRegister temp = locations->GetTemp(0).As<CpuRegister>();
-        CpuRegister card = locations->GetTemp(1).As<CpuRegister>();
+        CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+        CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
         codegen_->MarkGCCard(temp, card, cls, value);
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-      CpuRegister value = locations->InAt(1).As<CpuRegister>();
+      CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
       __ movq(Address(cls, offset), value);
       break;
     }
 
     case Primitive::kPrimFloat: {
-      XmmRegister value = locations->InAt(1).As<XmmRegister>();
+      XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
       __ movss(Address(cls, offset), value);
       break;
     }
 
     case Primitive::kPrimDouble: {
-      XmmRegister value = locations->InAt(1).As<XmmRegister>();
+      XmmRegister value = locations->InAt(1).AsFpuRegister<XmmRegister>();
       __ movsd(Address(cls, offset), value);
       break;
     }
@@ -3209,7 +3283,7 @@
   SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
   codegen_->AddSlowPath(slow_path);
 
-  CpuRegister out = load->GetLocations()->Out().As<CpuRegister>();
+  CpuRegister out = load->GetLocations()->Out().AsRegister<CpuRegister>();
   codegen_->LoadCurrentMethod(CpuRegister(out));
   __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value()));
   __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
@@ -3228,7 +3302,7 @@
 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
   Address address = Address::Absolute(
       Thread::ExceptionOffset<kX86_64WordSize>().Int32Value(), true);
-  __ gs()->movl(load->GetLocations()->Out().As<CpuRegister>(), address);
+  __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), address);
   __ gs()->movl(address, Immediate(0));
 }
 
@@ -3257,9 +3331,9 @@
 
 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).As<CpuRegister>();
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
-  CpuRegister out = locations->Out().As<CpuRegister>();
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   Label done, zero;
   SlowPathCodeX86_64* slow_path = nullptr;
@@ -3271,7 +3345,7 @@
   // Compare the class of `obj` with `cls`.
   __ movl(out, Address(obj, class_offset));
   if (cls.IsRegister()) {
-    __ cmpl(out, cls.As<CpuRegister>());
+    __ cmpl(out, cls.AsRegister<CpuRegister>());
   } else {
     DCHECK(cls.IsStackSlot()) << cls;
     __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
@@ -3309,9 +3383,9 @@
 
 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister obj = locations->InAt(0).As<CpuRegister>();
+  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   Location cls = locations->InAt(1);
-  CpuRegister temp = locations->GetTemp(0).As<CpuRegister>();
+  CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
       instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc());
@@ -3323,7 +3397,7 @@
   // Compare the class of `obj` with `cls`.
   __ movl(temp, Address(obj, class_offset));
   if (cls.IsRegister()) {
-    __ cmpl(temp, cls.As<CpuRegister>());
+    __ cmpl(temp, cls.AsRegister<CpuRegister>());
   } else {
     DCHECK(cls.IsStackSlot()) << cls;
     __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
@@ -3388,43 +3462,43 @@
   if (instruction->GetResultType() == Primitive::kPrimInt) {
     if (second.IsRegister()) {
       if (instruction->IsAnd()) {
-        __ andl(first.As<CpuRegister>(), second.As<CpuRegister>());
+        __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       } else if (instruction->IsOr()) {
-        __ orl(first.As<CpuRegister>(), second.As<CpuRegister>());
+        __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       } else {
         DCHECK(instruction->IsXor());
-        __ xorl(first.As<CpuRegister>(), second.As<CpuRegister>());
+        __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
       }
     } else if (second.IsConstant()) {
       Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
       if (instruction->IsAnd()) {
-        __ andl(first.As<CpuRegister>(), imm);
+        __ andl(first.AsRegister<CpuRegister>(), imm);
       } else if (instruction->IsOr()) {
-        __ orl(first.As<CpuRegister>(), imm);
+        __ orl(first.AsRegister<CpuRegister>(), imm);
       } else {
         DCHECK(instruction->IsXor());
-        __ xorl(first.As<CpuRegister>(), imm);
+        __ xorl(first.AsRegister<CpuRegister>(), imm);
       }
     } else {
       Address address(CpuRegister(RSP), second.GetStackIndex());
       if (instruction->IsAnd()) {
-        __ andl(first.As<CpuRegister>(), address);
+        __ andl(first.AsRegister<CpuRegister>(), address);
       } else if (instruction->IsOr()) {
-        __ orl(first.As<CpuRegister>(), address);
+        __ orl(first.AsRegister<CpuRegister>(), address);
       } else {
         DCHECK(instruction->IsXor());
-        __ xorl(first.As<CpuRegister>(), address);
+        __ xorl(first.AsRegister<CpuRegister>(), address);
       }
     }
   } else {
     DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
     if (instruction->IsAnd()) {
-      __ andq(first.As<CpuRegister>(), second.As<CpuRegister>());
+      __ andq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
     } else if (instruction->IsOr()) {
-      __ orq(first.As<CpuRegister>(), second.As<CpuRegister>());
+      __ orq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
     } else {
       DCHECK(instruction->IsXor());
-      __ xorq(first.As<CpuRegister>(), second.As<CpuRegister>());
+      __ xorq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
     }
   }
 }
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index c36b143..82fe03c 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -32,7 +32,7 @@
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   HGraph* graph = builder.BuildGraph(*item);
   graph->BuildDominatorTree();
-  graph->FindNaturalLoops();
+  graph->AnalyzeNaturalLoops();
   return graph;
 }
 
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 25168b5..6e5f1bd 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -91,29 +91,38 @@
   return block_effects_.Get(block->GetBlockId());
 }
 
-static bool IsLoopExit(HBasicBlock* block, HBasicBlock* successor) {
-  HLoopInformation* block_info = block->GetLoopInformation();
-  HLoopInformation* other_info = successor->GetLoopInformation();
-  return block_info != other_info && (other_info == nullptr || block_info->IsIn(*other_info));
-}
-
 void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
-  if (kIsDebugBuild) {
-    // Check that all non back-edge processors have been visited.
-    for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) {
-      HBasicBlock* predecessor = block->GetPredecessors().Get(i);
-      DCHECK(visited_.Get(predecessor->GetBlockId())
-             || (block->GetLoopInformation() != nullptr
-                 && (block->GetLoopInformation()->GetBackEdges().Get(0) == predecessor)));
+  ValueSet* set = nullptr;
+  const GrowableArray<HBasicBlock*>& predecessors = block->GetPredecessors();
+  if (predecessors.Size() == 0 || predecessors.Get(0)->IsEntryBlock()) {
+    // The entry block should only accumulate constant instructions, and
+    // the builder puts constants only in the entry block.
+    // Therefore, there is no need to propagate the value set to the next block.
+    set = new (allocator_) ValueSet(allocator_);
+  } else {
+    HBasicBlock* dominator = block->GetDominator();
+    set = sets_.Get(dominator->GetBlockId())->Copy();
+    if (dominator->GetSuccessors().Size() != 1 || dominator->GetSuccessors().Get(0) != block) {
+      // We have to copy if the dominator has other successors, or `block` is not a successor
+      // of the dominator.
+      set = set->Copy();
     }
-    visited_.Put(block->GetBlockId(), true);
+    if (!set->IsEmpty()) {
+      if (block->IsLoopHeader()) {
+        DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
+        set->Kill(GetLoopEffects(block));
+      } else if (predecessors.Size() > 1) {
+        for (size_t i = 0, e = predecessors.Size(); i < e; ++i) {
+          set->IntersectionWith(sets_.Get(predecessors.Get(i)->GetBlockId()));
+          if (set->IsEmpty()) {
+            break;
+          }
+        }
+      }
+    }
   }
 
-  ValueSet* set = sets_.Get(block->GetBlockId());
-
-  if (block->IsLoopHeader()) {
-    set->Kill(GetLoopEffects(block));
-  }
+  sets_.Put(block->GetBlockId(), set);
 
   HInstruction* current = block->GetFirstInstruction();
   while (current != nullptr) {
@@ -131,57 +140,6 @@
     }
     current = next;
   }
-
-  if (block == graph_->GetEntryBlock()) {
-    // The entry block should only accumulate constant instructions, and
-    // the builder puts constants only in the entry block.
-    // Therefore, there is no need to propagate the value set to the next block.
-    DCHECK_EQ(block->GetDominatedBlocks().Size(), 1u);
-    HBasicBlock* dominated = block->GetDominatedBlocks().Get(0);
-    sets_.Put(dominated->GetBlockId(), new (allocator_) ValueSet(allocator_));
-    return;
-  }
-
-  // Copy the value set to dominated blocks. We can re-use
-  // the current set for the last dominated block because we are done visiting
-  // this block.
-  for (size_t i = 0, e = block->GetDominatedBlocks().Size(); i < e; ++i) {
-    HBasicBlock* dominated = block->GetDominatedBlocks().Get(i);
-    sets_.Put(dominated->GetBlockId(), i == e - 1 ? set : set->Copy());
-  }
-
-  // Kill instructions in the value set of each successor. If the successor
-  // is a loop exit, then we use the side effects of the loop. If not, we use
-  // the side effects of this block.
-  for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) {
-    HBasicBlock* successor = block->GetSuccessors().Get(i);
-    if (successor->IsLoopHeader()
-        && successor->GetLoopInformation()->GetBackEdges().Get(0) == block) {
-      // In case of a back edge, we already have visited the loop header.
-      // We should not update its value set, because the last dominated block
-      // of the loop header uses the same value set.
-      DCHECK(visited_.Get(successor->GetBlockId()));
-      continue;
-    }
-    DCHECK(!visited_.Get(successor->GetBlockId()));
-    ValueSet* successor_set = sets_.Get(successor->GetBlockId());
-    // The dominator sets the set, and we are guaranteed to have visited it already.
-    DCHECK(successor_set != nullptr);
-
-    // If this block dominates this successor there is nothing to do.
-    // Also if the set is empty, there is nothing to kill.
-    if (successor->GetDominator() != block && !successor_set->IsEmpty()) {
-      if (block->IsInLoop() && IsLoopExit(block, successor)) {
-        // All instructions killed in the loop must be killed for a loop exit.
-        SideEffects effects = GetLoopEffects(block->GetLoopInformation()->GetHeader());
-        sets_.Get(successor->GetBlockId())->Kill(effects);
-      } else {
-        // Following block (that might be in the same loop).
-        // Just kill instructions based on this block's side effects.
-        sets_.Get(successor->GetBlockId())->Kill(GetBlockEffects(block));
-      }
-    }
-  }
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h
index 8e739cb..81f2c3f 100644
--- a/compiler/optimizing/gvn.h
+++ b/compiler/optimizing/gvn.h
@@ -96,6 +96,26 @@
     return nullptr;
   }
 
+  // Returns whether `instruction` is in the set.
+  HInstruction* IdentityLookup(HInstruction* instruction) const {
+    size_t hash_code = instruction->ComputeHashCode();
+    size_t index = hash_code % kDefaultNumberOfEntries;
+    HInstruction* existing = table_[index];
+    if (existing != nullptr && existing == instruction) {
+      return existing;
+    }
+
+    for (ValueSetNode* node = collisions_; node != nullptr; node = node->GetNext()) {
+      if (node->GetHashCode() == hash_code) {
+        existing = node->GetInstruction();
+        if (existing == instruction) {
+          return existing;
+        }
+      }
+    }
+    return nullptr;
+  }
+
   // Removes all instructions in the set that are affected by the given side effects.
   void Kill(SideEffects side_effects) {
     for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
@@ -106,9 +126,9 @@
       }
     }
 
-    ValueSetNode* current = collisions_;
-    ValueSetNode* previous = nullptr;
-    while (current != nullptr) {
+    for (ValueSetNode* current = collisions_, *previous = nullptr;
+         current != nullptr;
+         current = current->GetNext()) {
       HInstruction* instruction = current->GetInstruction();
       if (instruction->GetSideEffects().DependsOn(side_effects)) {
         if (previous == nullptr) {
@@ -120,7 +140,6 @@
       } else {
         previous = current;
       }
-      current = current->GetNext();
     }
   }
 
@@ -143,6 +162,44 @@
     return copy;
   }
 
+  void Clear() {
+    number_of_entries_ = 0;
+    collisions_ = nullptr;
+    for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
+      table_[i] = nullptr;
+    }
+  }
+
+  // Update this `ValueSet` by intersecting with instructions in `other`.
+  void IntersectionWith(ValueSet* other) {
+    if (IsEmpty()) {
+      return;
+    } else if (other->IsEmpty()) {
+      Clear();
+    } else {
+      for (size_t i = 0; i < kDefaultNumberOfEntries; ++i) {
+        if (table_[i] != nullptr && other->IdentityLookup(table_[i]) == nullptr) {
+          --number_of_entries_;
+          table_[i] = nullptr;
+        }
+      }
+      for (ValueSetNode* current = collisions_, *previous = nullptr;
+           current != nullptr;
+           current = current->GetNext()) {
+        if (other->IdentityLookup(current->GetInstruction()) == nullptr) {
+          if (previous == nullptr) {
+            collisions_ = current->GetNext();
+          } else {
+            previous->SetNext(current->GetNext());
+          }
+          --number_of_entries_;
+        } else {
+          previous = current;
+        }
+      }
+    }
+  }
+
   bool IsEmpty() const { return number_of_entries_ == 0; }
   size_t GetNumberOfEntries() const { return number_of_entries_; }
 
@@ -173,13 +230,11 @@
         allocator_(allocator),
         block_effects_(allocator, graph->GetBlocks().Size()),
         loop_effects_(allocator, graph->GetBlocks().Size()),
-        sets_(allocator, graph->GetBlocks().Size()),
-        visited_(allocator, graph->GetBlocks().Size()) {
+        sets_(allocator, graph->GetBlocks().Size()) {
     size_t number_of_blocks = graph->GetBlocks().Size();
     block_effects_.SetSize(number_of_blocks);
     loop_effects_.SetSize(number_of_blocks);
     sets_.SetSize(number_of_blocks);
-    visited_.SetSize(number_of_blocks);
 
     for (size_t i = 0; i < number_of_blocks; ++i) {
       block_effects_.Put(i, SideEffects::None());
@@ -219,9 +274,6 @@
   // in the path from the dominator to the block.
   GrowableArray<ValueSet*> sets_;
 
-  // Mark visisted blocks. Only used for debugging.
-  GrowableArray<bool> visited_;
-
   ART_FRIEND_TEST(GVNTest, LoopSideEffects);
   DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
 };
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index ad6e338..a6a68ca 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -175,7 +175,7 @@
 
   graph->BuildDominatorTree();
   graph->TransformToSSA();
-  graph->FindNaturalLoops();
+  graph->AnalyzeNaturalLoops();
   GlobalValueNumberer(&allocator, graph).Run();
 
   // Check that all field get instructions are still there.
@@ -239,7 +239,7 @@
 
   graph->BuildDominatorTree();
   graph->TransformToSSA();
-  graph->FindNaturalLoops();
+  graph->AnalyzeNaturalLoops();
 
   ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn(
       *outer_loop_header->GetLoopInformation()));
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 3d65e9a..49ca443 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -26,6 +26,7 @@
   void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
   void VisitEqual(HEqual* equal) OVERRIDE;
   void VisitArraySet(HArraySet* equal) OVERRIDE;
+  void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
 };
 
 void InstructionSimplifier::Run() {
@@ -78,4 +79,12 @@
   }
 }
 
+void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruction) {
+  if (instruction->GetResultType() == instruction->GetInputType()) {
+    // Remove the instruction if it's converting to the same type.
+    instruction->ReplaceWith(instruction->GetInput());
+    instruction->GetBlock()->RemoveInstruction(instruction);
+  }
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index c49cf7e..28ca5e8 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -44,7 +44,7 @@
 
   graph->BuildDominatorTree();
   graph->TransformToSSA();
-  graph->FindNaturalLoops();
+  graph->AnalyzeNaturalLoops();
 
   x86::CodeGeneratorX86 codegen(graph);
   SsaLivenessAnalysis liveness(*graph, &codegen);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index e3c6fec..5c7e6f0 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -38,7 +38,7 @@
   RemoveSuspendChecks(graph);
   graph->BuildDominatorTree();
   graph->TransformToSSA();
-  graph->FindNaturalLoops();
+  graph->AnalyzeNaturalLoops();
   // `Inline` conditions into ifs.
   PrepareForRegisterAllocation(graph).Run();
   return graph;
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 246e7ef..4b69e57 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -50,7 +50,7 @@
   ASSERT_NE(graph, nullptr);
   graph->BuildDominatorTree();
   graph->TransformToSSA();
-  graph->FindNaturalLoops();
+  graph->AnalyzeNaturalLoops();
   // `Inline` conditions into ifs.
   PrepareForRegisterAllocation(graph).Run();
   x86::CodeGeneratorX86 codegen(graph);
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index e1c8e8e..1ff26d9 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -161,7 +161,14 @@
   }
 
   template <typename T>
-  T As() const {
+  T AsRegister() const {
+    DCHECK(IsRegister());
+    return static_cast<T>(reg());
+  }
+
+  template <typename T>
+  T AsFpuRegister() const {
+    DCHECK(IsFpuRegister());
     return static_cast<T>(reg());
   }
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 8cb2ef6..ba4dccf 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -30,6 +30,36 @@
   VisitBlockForBackEdges(entry_block_, visited, &visiting);
 }
 
+static void RemoveAsUser(HInstruction* instruction) {
+  for (size_t i = 0; i < instruction->InputCount(); i++) {
+    instruction->InputAt(i)->RemoveUser(instruction, i);
+  }
+
+  HEnvironment* environment = instruction->GetEnvironment();
+  if (environment != nullptr) {
+    for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+      HInstruction* vreg = environment->GetInstructionAt(i);
+      if (vreg != nullptr) {
+        vreg->RemoveEnvironmentUser(environment, i);
+      }
+    }
+  }
+}
+
+void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const {
+  for (size_t i = 0; i < blocks_.Size(); ++i) {
+    if (!visited.IsBitSet(i)) {
+      HBasicBlock* block = blocks_.Get(i);
+      for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+        RemoveAsUser(it.Current());
+      }
+      for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+        RemoveAsUser(it.Current());
+      }
+    }
+  }
+}
+
 void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const {
   for (size_t i = 0; i < blocks_.Size(); ++i) {
     if (!visited.IsBitSet(i)) {
@@ -72,16 +102,21 @@
   // (1) Find the back edges in the graph doing a DFS traversal.
   FindBackEdges(&visited);
 
-  // (2) Remove blocks not visited during the initial DFS.
-  //     Step (3) requires dead blocks to be removed from the
+  // (2) Remove instructions and phis from blocks not visited during
+  //     the initial DFS as users from other instructions, so that
+  //     users can be safely removed before uses later.
+  RemoveInstructionsAsUsersFromDeadBlocks(visited);
+
+  // (3) Remove blocks not visited during the initial DFS.
+  //     Step (4) requires dead blocks to be removed from the
   //     predecessors list of live blocks.
   RemoveDeadBlocks(visited);
 
-  // (3) Simplify the CFG now, so that we don't need to recompute
+  // (4) Simplify the CFG now, so that we don't need to recompute
   //     dominators and the reverse post order.
   SimplifyCFG();
 
-  // (4) Compute the immediate dominator of each block. We visit
+  // (5) Compute the immediate dominator of each block. We visit
   //     the successors of a block only when all its forward branches
   //     have been processed.
   GrowableArray<size_t> visits(arena_, blocks_.Size());
@@ -237,7 +272,7 @@
   }
 }
 
-bool HGraph::FindNaturalLoops() const {
+bool HGraph::AnalyzeNaturalLoops() const {
   for (size_t i = 0; i < blocks_.Size(); ++i) {
     HBasicBlock* block = blocks_.Get(i);
     if (block->IsLoopHeader()) {
@@ -391,19 +426,7 @@
   instruction->SetBlock(nullptr);
   instruction_list->RemoveInstruction(instruction);
 
-  for (size_t i = 0; i < instruction->InputCount(); i++) {
-    instruction->InputAt(i)->RemoveUser(instruction, i);
-  }
-
-  HEnvironment* environment = instruction->GetEnvironment();
-  if (environment != nullptr) {
-    for (size_t i = 0, e = environment->Size(); i < e; ++i) {
-      HInstruction* vreg = environment->GetInstructionAt(i);
-      if (vreg != nullptr) {
-        vreg->RemoveEnvironmentUser(environment, i);
-      }
-    }
-  }
+  RemoveAsUser(instruction);
 }
 
 void HBasicBlock::RemoveInstruction(HInstruction* instruction) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index f562113..3908a61 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -112,10 +112,10 @@
   void TransformToSSA();
   void SimplifyCFG();
 
-  // Find all natural loops in this graph. Aborts computation and returns false
-  // if one loop is not natural, that is the header does not dominate the back
-  // edge.
-  bool FindNaturalLoops() const;
+  // Analyze all natural loops in this graph. Returns false if one
+  // loop is not natural, that is the header does not dominate the
+  // back edge.
+  bool AnalyzeNaturalLoops() const;
 
   void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
   void SimplifyLoop(HBasicBlock* header);
@@ -173,6 +173,7 @@
   void VisitBlockForBackEdges(HBasicBlock* block,
                               ArenaBitVector* visited,
                               ArenaBitVector* visiting);
+  void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
   void RemoveDeadBlocks(const ArenaBitVector& visited) const;
 
   ArenaAllocator* const arena_;
@@ -777,7 +778,7 @@
   }
 
   // Returns whether two instructions are equal, that is:
-  // 1) They have the same type and contain the same data,
+  // 1) They have the same type and contain the same data (InstructionDataEquals).
   // 2) Their inputs are identical.
   bool Equals(HInstruction* other) const;
 
@@ -1363,28 +1364,45 @@
 // Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
 class HCompare : public HBinaryOperation {
  public:
-  HCompare(Primitive::Type type, HInstruction* first, HInstruction* second)
-      : HBinaryOperation(Primitive::kPrimInt, first, second) {
+  // The bias applies for floating point operations and indicates how NaN
+  // comparisons are treated:
+  enum Bias {
+    kNoBias,  // bias is not applicable (i.e. for long operation)
+    kGtBias,  // return 1 for NaN comparisons
+    kLtBias,  // return -1 for NaN comparisons
+  };
+
+  HCompare(Primitive::Type type, HInstruction* first, HInstruction* second, Bias bias)
+      : HBinaryOperation(Primitive::kPrimInt, first, second), bias_(bias) {
     DCHECK_EQ(type, first->GetType());
     DCHECK_EQ(type, second->GetType());
   }
 
-  virtual int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
+  int32_t Evaluate(int32_t x, int32_t y) const OVERRIDE {
     return
       x == y ? 0 :
       x > y ? 1 :
       -1;
   }
-  virtual int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
+
+  int64_t Evaluate(int64_t x, int64_t y) const OVERRIDE {
     return
       x == y ? 0 :
       x > y ? 1 :
       -1;
   }
 
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return bias_ == other->AsCompare()->bias_;
+  }
+
+  bool IsGtBias() { return bias_ == kGtBias; }
+
   DECLARE_INSTRUCTION(Compare);
 
  private:
+  const Bias bias_;
+
   DISALLOW_COPY_AND_ASSIGN(HCompare);
 };
 
@@ -1652,7 +1670,12 @@
   uint16_t GetTypeIndex() const { return type_index_; }
 
   // Calls runtime so needs an environment.
-  virtual bool NeedsEnvironment() const { return true; }
+  bool NeedsEnvironment() const OVERRIDE { return true; }
+  // It may throw when called on:
+  //   - interfaces
+  //   - abstract/innaccessible/unknown classes
+  // TODO: optimize when possible.
+  bool CanThrow() const OVERRIDE { return true; }
 
   DECLARE_INSTRUCTION(NewInstance);
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d8533eb..100a6bc 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -192,7 +192,6 @@
 }
 
 static void RunOptimizations(HGraph* graph, const HGraphVisualizer& visualizer) {
-  TransformToSsa ssa(graph);
   HDeadCodeElimination opt1(graph);
   HConstantFolding opt2(graph);
   SsaRedundantPhiElimination opt3(graph);
@@ -202,7 +201,6 @@
   InstructionSimplifier opt7(graph);
 
   HOptimization* optimizations[] = {
-    &ssa,
     &opt1,
     &opt2,
     &opt3,
@@ -220,6 +218,23 @@
   }
 }
 
+static bool TryBuildingSsa(HGraph* graph,
+                           const DexCompilationUnit& dex_compilation_unit,
+                           const HGraphVisualizer& visualizer) {
+  graph->BuildDominatorTree();
+  graph->TransformToSSA();
+
+  if (!graph->AnalyzeNaturalLoops()) {
+    LOG(INFO) << "Skipping compilation of "
+              << PrettyMethod(dex_compilation_unit.GetDexMethodIndex(),
+                              *dex_compilation_unit.GetDexFile())
+              << ": it contains a non natural loop";
+    return false;
+  }
+  visualizer.DumpGraph("ssa transform");
+  return true;
+}
+
 CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
                                             uint32_t access_flags,
                                             InvokeType invoke_type,
@@ -281,7 +296,12 @@
   if (run_optimizations_
       && CanOptimize(*code_item)
       && RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set)) {
+    VLOG(compiler) << "Optimizing " << PrettyMethod(method_idx, dex_file);
     optimized_compiled_methods_++;
+    if (!TryBuildingSsa(graph, dex_compilation_unit, visualizer)) {
+      // We could not transform the graph to SSA, bailout.
+      return nullptr;
+    }
     RunOptimizations(graph, visualizer);
 
     PrepareForRegisterAllocation(graph).Run();
@@ -316,23 +336,10 @@
     LOG(FATAL) << "Could not allocate registers in optimizing compiler";
     UNREACHABLE();
   } else {
+    VLOG(compiler) << "Compile baseline " << PrettyMethod(method_idx, dex_file);
     unoptimized_compiled_methods_++;
     codegen->CompileBaseline(&allocator);
 
-    if (CanOptimize(*code_item)) {
-      // Run these phases to get some test coverage.
-      graph->BuildDominatorTree();
-      graph->TransformToSSA();
-      visualizer.DumpGraph("ssa");
-      graph->FindNaturalLoops();
-      SsaRedundantPhiElimination(graph).Run();
-      SsaDeadPhiElimination(graph).Run();
-      GVNOptimization(graph).Run();
-      SsaLivenessAnalysis liveness(*graph, codegen);
-      liveness.Analyze();
-      visualizer.DumpGraph(kLivenessPassName);
-    }
-
     std::vector<uint8_t> mapping_table;
     SrcMap src_mapping_table;
     codegen->BuildMappingTable(&mapping_table,
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 2948496..a6c0635 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -70,7 +70,8 @@
          it.Advance()) {
       HInstruction* current = it.Current();
       if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false;
-      if ((current->GetType() == Primitive::kPrimFloat || current->GetType() == Primitive::kPrimDouble)
+      if ((current->GetType() == Primitive::kPrimFloat
+           || current->GetType() == Primitive::kPrimDouble)
           && instruction_set != kX86_64) {
         return false;
       }
@@ -95,6 +96,25 @@
     ValidateInternal(true);
     processing_core_registers_ = false;
     ValidateInternal(true);
+    // Check that the linear order is still correct with regards to lifetime positions.
+    // Since only parallel moves have been inserted during the register allocation,
+    // these checks are mostly for making sure these moves have been added correctly.
+    size_t current_liveness = 0;
+    for (HLinearOrderIterator it(liveness_); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+        HInstruction* instruction = inst_it.Current();
+        DCHECK_LE(current_liveness, instruction->GetLifetimePosition());
+        current_liveness = instruction->GetLifetimePosition();
+      }
+      for (HInstructionIterator inst_it(block->GetInstructions());
+           !inst_it.Done();
+           inst_it.Advance()) {
+        HInstruction* instruction = inst_it.Current();
+        DCHECK_LE(current_liveness, instruction->GetLifetimePosition()) << instruction->DebugName();
+        current_liveness = instruction->GetLifetimePosition();
+      }
+    }
   }
 }
 
@@ -189,11 +209,29 @@
       BlockRegister(temp, position, position + 1);
     } else {
       DCHECK(temp.IsUnallocated());
-      DCHECK(temp.GetPolicy() == Location::kRequiresRegister);
-      LiveInterval* interval = LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
-      temp_intervals_.Add(interval);
-      interval->AddRange(position, position + 1);
-      unhandled_core_intervals_.Add(interval);
+      switch (temp.GetPolicy()) {
+        case Location::kRequiresRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt);
+          temp_intervals_.Add(interval);
+          interval->AddRange(position, position + 1);
+          unhandled_core_intervals_.Add(interval);
+          break;
+        }
+
+        case Location::kRequiresFpuRegister: {
+          LiveInterval* interval =
+              LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble);
+          temp_intervals_.Add(interval);
+          interval->AddRange(position, position + 1);
+          unhandled_fp_intervals_.Add(interval);
+          break;
+        }
+
+        default:
+          LOG(FATAL) << "Unexpected policy for temporary location "
+                     << temp.GetPolicy();
+      }
     }
   }
 
@@ -215,14 +253,7 @@
       // By adding the following interval in the algorithm, we can compute this
       // maximum before updating locations.
       LiveInterval* interval = LiveInterval::MakeSlowPathInterval(allocator_, instruction);
-      // The start of the interval must be after the position of the safepoint, so that
-      // we can just check the number of active registers at that position. Note that this
-      // will include the current interval in the computation of
-      // `maximum_number_of_live_registers`, so we need a better strategy if this becomes
-      // a problem.
-      // TODO: We could put the logic in AddSorted, to ensure the safepoint range is
-      // after all other intervals starting at that same position.
-      interval->AddRange(position + 1, position + 2);
+      interval->AddRange(position, position + 1);
       AddSorted(&unhandled_core_intervals_, interval);
       AddSorted(&unhandled_fp_intervals_, interval);
     }
@@ -484,16 +515,6 @@
     DCHECK(!current->IsFixed() && !current->HasSpillSlot());
     DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() >= current->GetStart());
 
-    if (current->IsSlowPathSafepoint()) {
-      // Synthesized interval to record the maximum number of live registers
-      // at safepoints. No need to allocate a register for it.
-      // We know that current actives are all live at the safepoint (modulo
-      // the one created by the safepoint).
-      maximum_number_of_live_registers_ =
-          std::max(maximum_number_of_live_registers_, active_.Size());
-      continue;
-    }
-
     size_t position = current->GetStart();
 
     // Remember the inactive_ size here since the ones moved to inactive_ from
@@ -534,6 +555,15 @@
       }
     }
 
+    if (current->IsSlowPathSafepoint()) {
+      // Synthesized interval to record the maximum number of live registers
+      // at safepoints. No need to allocate a register for it.
+      maximum_number_of_live_registers_ =
+          std::max(maximum_number_of_live_registers_, active_.Size());
+      DCHECK(unhandled_->IsEmpty() || unhandled_->Peek()->GetStart() > current->GetStart());
+      continue;
+    }
+
     // (4) Try to find an available register.
     bool success = TryAllocateFreeReg(current);
 
@@ -775,6 +805,12 @@
     if (current->StartsAfter(interval)) {
       insert_at = i;
       break;
+    } else if ((current->GetStart() == interval->GetStart()) && current->IsSlowPathSafepoint()) {
+      // Ensure the slow path interval is the last to be processed at its location: we want the
+      // interval to know all live registers at this location.
+      DCHECK(i == 1 || array->Get(i - 2)->StartsAfter(current));
+      insert_at = i;
+      break;
     }
   }
   array->InsertAt(insert_at, interval);
@@ -887,6 +923,14 @@
   move->AddMove(new (allocator_) MoveOperands(source, destination, nullptr));
 }
 
+static bool IsInstructionStart(size_t position) {
+  return (position & 1) == 0;
+}
+
+static bool IsInstructionEnd(size_t position) {
+  return (position & 1) == 1;
+}
+
 void RegisterAllocator::InsertParallelMoveAt(size_t position,
                                              HInstruction* instruction,
                                              Location source,
@@ -895,12 +939,29 @@
   if (source.Equals(destination)) return;
 
   HInstruction* at = liveness_.GetInstructionFromPosition(position / 2);
-  if (at == nullptr) {
-    // Block boundary, don't do anything the connection of split siblings will handle it.
-    return;
-  }
   HParallelMove* move;
-  if ((position & 1) == 1) {
+  if (at == nullptr) {
+    if (IsInstructionStart(position)) {
+      // Block boundary, don't do anything the connection of split siblings will handle it.
+      return;
+    } else {
+      // Move must happen before the first instruction of the block.
+      at = liveness_.GetInstructionFromPosition((position + 1) / 2);
+      // Note that parallel moves may have already been inserted, so we explicitly
+      // ask for the first instruction of the block: `GetInstructionFromPosition` does
+      // not contain the moves.
+      at = at->GetBlock()->GetFirstInstruction();
+      if (at->GetLifetimePosition() != position) {
+        DCHECK_GT(at->GetLifetimePosition(), position);
+        move = new (allocator_) HParallelMove(allocator_);
+        move->SetLifetimePosition(position);
+        at->GetBlock()->InsertInstructionBefore(move, at);
+      } else {
+        DCHECK(at->IsParallelMove());
+        move = at->AsParallelMove();
+      }
+    }
+  } else if (IsInstructionEnd(position)) {
     // Move must happen after the instruction.
     DCHECK(!at->IsControlFlow());
     move = at->GetNext()->AsParallelMove();
@@ -952,10 +1013,11 @@
   HParallelMove* move;
   // This is a parallel move for connecting blocks. We need to differentiate
   // it with moves for connecting siblings in a same block, and output moves.
+  size_t position = last->GetLifetimePosition();
   if (previous == nullptr || !previous->IsParallelMove()
-      || previous->AsParallelMove()->GetLifetimePosition() != block->GetLifetimeEnd()) {
+      || previous->AsParallelMove()->GetLifetimePosition() != position) {
     move = new (allocator_) HParallelMove(allocator_);
-    move->SetLifetimePosition(block->GetLifetimeEnd());
+    move->SetLifetimePosition(position);
     block->InsertInstructionBefore(move, last);
   } else {
     move = previous->AsParallelMove();
@@ -1074,6 +1136,7 @@
         case Location::kRegister: {
           locations->AddLiveRegister(source);
           DCHECK_LE(locations->GetNumberOfLiveRegisters(), maximum_number_of_live_registers_);
+
           if (current->GetType() == Primitive::kPrimNot) {
             locations->SetRegisterBit(source.reg());
           }
@@ -1107,12 +1170,10 @@
     return;
   }
 
+  // Intervals end at the lifetime end of a block. The decrement by one
+  // ensures the `Cover` call will return true.
   size_t from_position = from->GetLifetimeEnd() - 1;
-  // When an instruction dies at entry of another, and the latter is the beginning
-  // of a block, the register allocator ensures the former has a register
-  // at block->GetLifetimeStart() + 1. Since this is at a block boundary, it must
-  // must be handled in this method.
-  size_t to_position = to->GetLifetimeStart() + 1;
+  size_t to_position = to->GetLifetimeStart();
 
   LiveInterval* destination = nullptr;
   LiveInterval* source = nullptr;
@@ -1250,9 +1311,27 @@
       current = at;
     }
     LocationSummary* locations = at->GetLocations();
-    DCHECK(temp->GetType() == Primitive::kPrimInt);
-    locations->SetTempAt(
-        temp_index++, Location::RegisterLocation(temp->GetRegister()));
+    switch (temp->GetType()) {
+      case Primitive::kPrimInt:
+        locations->SetTempAt(
+            temp_index++, Location::RegisterLocation(temp->GetRegister()));
+        break;
+
+      case Primitive::kPrimDouble:
+        // TODO: Support the case of ARM, where a double value
+        // requires an FPU register pair (note that the ARM back end
+        // does not yet use this register allocator when a method uses
+        // floats or doubles).
+        DCHECK(codegen_->GetInstructionSet() != kArm
+               && codegen_->GetInstructionSet() != kThumb2);
+        locations->SetTempAt(
+            temp_index++, Location::FpuRegisterLocation(temp->GetRegister()));
+        break;
+
+      default:
+        LOG(FATAL) << "Unexpected type for temporary location "
+                   << temp->GetType();
+    }
   }
 }
 
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index ba4be34..8d75db9 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -41,7 +41,7 @@
   HGraph* graph = builder.BuildGraph(*item);
   graph->BuildDominatorTree();
   graph->TransformToSSA();
-  graph->FindNaturalLoops();
+  graph->AnalyzeNaturalLoops();
   x86::CodeGeneratorX86 codegen(graph);
   SsaLivenessAnalysis liveness(*graph, &codegen);
   liveness.Analyze();
@@ -255,7 +255,7 @@
   HGraph* graph = builder.BuildGraph(*item);
   graph->BuildDominatorTree();
   graph->TransformToSSA();
-  graph->FindNaturalLoops();
+  graph->AnalyzeNaturalLoops();
   return graph;
 }
 
@@ -494,7 +494,7 @@
   (*phi)->AddInput(*input2);
 
   graph->BuildDominatorTree();
-  graph->FindNaturalLoops();
+  graph->AnalyzeNaturalLoops();
   return graph;
 }
 
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 5ab328f..2cbd51a 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -22,20 +22,6 @@
 
 namespace art {
 
-class TransformToSsa : public HOptimization {
- public:
-  explicit TransformToSsa(HGraph* graph) : HOptimization(graph, true, "ssa transform") {}
-
-  void Run() OVERRIDE {
-    graph_->BuildDominatorTree();
-    graph_->TransformToSSA();
-    graph_->FindNaturalLoops();
-  }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(TransformToSsa);
-};
-
 static constexpr int kDefaultNumberOfLoops = 2;
 
 class SsaBuilder : public HGraphVisitor {
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 733b58f..cb07ffa 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -83,6 +83,7 @@
       break;
   }
 
+  assembler->EmitSlowPaths();
   size_t cs = assembler->CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc
index 004af98..a80ad93 100644
--- a/compiler/utils/arena_allocator.cc
+++ b/compiler/utils/arena_allocator.cc
@@ -189,6 +189,15 @@
   return ret;
 }
 
+size_t ArenaPool::GetBytesAllocated() const {
+  size_t total = 0;
+  MutexLock lock(Thread::Current(), lock_);
+  for (Arena* arena = free_arenas_; arena != nullptr; arena = arena->next_) {
+    total += arena->GetBytesAllocated();
+  }
+  return total;
+}
+
 void ArenaPool::FreeArenaChain(Arena* first) {
   if (UNLIKELY(RUNNING_ON_VALGRIND > 0)) {
     for (Arena* arena = first; arena != nullptr; arena = arena->next_) {
diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h
index 6d21399..7f5bc9a 100644
--- a/compiler/utils/arena_allocator.h
+++ b/compiler/utils/arena_allocator.h
@@ -135,6 +135,10 @@
     return Size() - bytes_allocated_;
   }
 
+  size_t GetBytesAllocated() const {
+    return bytes_allocated_;
+  }
+
  private:
   size_t bytes_allocated_;
   uint8_t* memory_;
@@ -153,11 +157,12 @@
  public:
   ArenaPool();
   ~ArenaPool();
-  Arena* AllocArena(size_t size);
-  void FreeArenaChain(Arena* first);
+  Arena* AllocArena(size_t size) LOCKS_EXCLUDED(lock_);
+  void FreeArenaChain(Arena* first) LOCKS_EXCLUDED(lock_);
+  size_t GetBytesAllocated() const LOCKS_EXCLUDED(lock_);
 
  private:
-  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   Arena* free_arenas_ GUARDED_BY(lock_);
   DISALLOW_COPY_AND_ASSIGN(ArenaPool);
 };
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 0f28591..0528773 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -165,36 +165,6 @@
   return 0;
 }
 
-bool ShifterOperand::CanHoldThumb(Register rd, Register rn, Opcode opcode,
-                                  uint32_t immediate, ShifterOperand* shifter_op) {
-  shifter_op->type_ = kImmediate;
-  shifter_op->immed_ = immediate;
-  shifter_op->is_shift_ = false;
-  shifter_op->is_rotate_ = false;
-  switch (opcode) {
-    case ADD:
-    case SUB:
-      if (rn == SP) {
-        if (rd == SP) {
-          return immediate < (1 << 9);    // 9 bits allowed.
-        } else {
-          return immediate < (1 << 12);   // 12 bits.
-        }
-      }
-      if (immediate < (1 << 12)) {    // Less than (or equal to) 12 bits can always be done.
-        return true;
-      }
-      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
-
-    case MOV:
-      // TODO: Support less than or equal to 12bits.
-      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
-    case MVN:
-    default:
-      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
-  }
-}
-
 uint32_t Address::encodingArm() const {
   CHECK(IsAbsoluteUint(12, offset_));
   uint32_t encoding;
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index d288b70..c86ec4b 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -30,6 +30,9 @@
 namespace art {
 namespace arm {
 
+class Arm32Assembler;
+class Thumb2Assembler;
+
 class ShifterOperand {
  public:
   ShifterOperand() : type_(kUnknown), rm_(kNoRegister), rs_(kNoRegister),
@@ -103,33 +106,6 @@
     kImmediate
   };
 
-  static bool CanHoldArm(uint32_t immediate, ShifterOperand* shifter_op) {
-    // Avoid the more expensive test for frequent small immediate values.
-    if (immediate < (1 << kImmed8Bits)) {
-      shifter_op->type_ = kImmediate;
-      shifter_op->is_rotate_ = true;
-      shifter_op->rotate_ = 0;
-      shifter_op->immed_ = immediate;
-      return true;
-    }
-    // Note that immediate must be unsigned for the test to work correctly.
-    for (int rot = 0; rot < 16; rot++) {
-      uint32_t imm8 = (immediate << 2*rot) | (immediate >> (32 - 2*rot));
-      if (imm8 < (1 << kImmed8Bits)) {
-        shifter_op->type_ = kImmediate;
-        shifter_op->is_rotate_ = true;
-        shifter_op->rotate_ = rot;
-        shifter_op->immed_ = imm8;
-        return true;
-      }
-    }
-    return false;
-  }
-
-  static bool CanHoldThumb(Register rd, Register rn, Opcode opcode,
-                           uint32_t immediate, ShifterOperand* shifter_op);
-
-
  private:
   Type type_;
   Register rm_;
@@ -140,6 +116,9 @@
   uint32_t rotate_;
   uint32_t immed_;
 
+  friend class Arm32Assembler;
+  friend class Thumb2Assembler;
+
 #ifdef SOURCE_ASSEMBLER_SUPPORT
   friend class BinaryAssembler;
 #endif
@@ -611,6 +590,14 @@
   virtual void Ror(Register rd, Register rm, Register rn, bool setcc = false,
                    Condition cond = AL) = 0;
 
+  // Returns whether the `immediate` can fit in a `ShifterOperand`. If yes,
+  // `shifter_op` contains the operand.
+  virtual bool ShifterOperandCanHold(Register rd,
+                                     Register rn,
+                                     Opcode opcode,
+                                     uint32_t immediate,
+                                     ShifterOperand* shifter_op) = 0;
+
   static bool IsInstructionForExceptionHandling(uintptr_t pc);
 
   virtual void Bind(Label* label) = 0;
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index a541763..8f6d45a 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -25,6 +25,37 @@
 namespace art {
 namespace arm {
 
+bool Arm32Assembler::ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op) {
+  // Avoid the more expensive test for frequent small immediate values.
+  if (immediate < (1 << kImmed8Bits)) {
+    shifter_op->type_ = ShifterOperand::kImmediate;
+    shifter_op->is_rotate_ = true;
+    shifter_op->rotate_ = 0;
+    shifter_op->immed_ = immediate;
+    return true;
+  }
+  // Note that immediate must be unsigned for the test to work correctly.
+  for (int rot = 0; rot < 16; rot++) {
+    uint32_t imm8 = (immediate << 2*rot) | (immediate >> (32 - 2*rot));
+    if (imm8 < (1 << kImmed8Bits)) {
+      shifter_op->type_ = ShifterOperand::kImmediate;
+      shifter_op->is_rotate_ = true;
+      shifter_op->rotate_ = rot;
+      shifter_op->immed_ = imm8;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Arm32Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED,
+                                           Register rn ATTRIBUTE_UNUSED,
+                                           Opcode opcode ATTRIBUTE_UNUSED,
+                                           uint32_t immediate,
+                                           ShifterOperand* shifter_op) {
+  return ShifterOperandCanHoldArm32(immediate, shifter_op);
+}
+
 void Arm32Assembler::and_(Register rd, Register rn, const ShifterOperand& so,
                         Condition cond) {
   EmitType01(cond, so.type(), AND, 0, rn, rd, so);
@@ -1291,16 +1322,16 @@
   // positive values and sub for negatives ones, which would slightly improve
   // the readability of generated code for some constants.
   ShifterOperand shifter_op;
-  if (ShifterOperand::CanHoldArm(value, &shifter_op)) {
+  if (ShifterOperandCanHoldArm32(value, &shifter_op)) {
     add(rd, rn, shifter_op, cond);
-  } else if (ShifterOperand::CanHoldArm(-value, &shifter_op)) {
+  } else if (ShifterOperandCanHoldArm32(-value, &shifter_op)) {
     sub(rd, rn, shifter_op, cond);
   } else {
     CHECK(rn != IP);
-    if (ShifterOperand::CanHoldArm(~value, &shifter_op)) {
+    if (ShifterOperandCanHoldArm32(~value, &shifter_op)) {
       mvn(IP, shifter_op, cond);
       add(rd, rn, ShifterOperand(IP), cond);
-    } else if (ShifterOperand::CanHoldArm(~(-value), &shifter_op)) {
+    } else if (ShifterOperandCanHoldArm32(~(-value), &shifter_op)) {
       mvn(IP, shifter_op, cond);
       sub(rd, rn, ShifterOperand(IP), cond);
     } else {
@@ -1318,16 +1349,16 @@
 void Arm32Assembler::AddConstantSetFlags(Register rd, Register rn, int32_t value,
                                          Condition cond) {
   ShifterOperand shifter_op;
-  if (ShifterOperand::CanHoldArm(value, &shifter_op)) {
+  if (ShifterOperandCanHoldArm32(value, &shifter_op)) {
     adds(rd, rn, shifter_op, cond);
-  } else if (ShifterOperand::CanHoldArm(-value, &shifter_op)) {
+  } else if (ShifterOperandCanHoldArm32(-value, &shifter_op)) {
     subs(rd, rn, shifter_op, cond);
   } else {
     CHECK(rn != IP);
-    if (ShifterOperand::CanHoldArm(~value, &shifter_op)) {
+    if (ShifterOperandCanHoldArm32(~value, &shifter_op)) {
       mvn(IP, shifter_op, cond);
       adds(rd, rn, ShifterOperand(IP), cond);
-    } else if (ShifterOperand::CanHoldArm(~(-value), &shifter_op)) {
+    } else if (ShifterOperandCanHoldArm32(~(-value), &shifter_op)) {
       mvn(IP, shifter_op, cond);
       subs(rd, rn, ShifterOperand(IP), cond);
     } else {
@@ -1343,9 +1374,9 @@
 
 void Arm32Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
   ShifterOperand shifter_op;
-  if (ShifterOperand::CanHoldArm(value, &shifter_op)) {
+  if (ShifterOperandCanHoldArm32(value, &shifter_op)) {
     mov(rd, shifter_op, cond);
-  } else if (ShifterOperand::CanHoldArm(~value, &shifter_op)) {
+  } else if (ShifterOperandCanHoldArm32(~value, &shifter_op)) {
     mvn(rd, shifter_op, cond);
   } else {
     movw(rd, Low16Bits(value), cond);
diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h
index 0b009e1..6c8d415 100644
--- a/compiler/utils/arm/assembler_arm32.h
+++ b/compiler/utils/arm/assembler_arm32.h
@@ -273,6 +273,12 @@
                       int32_t offset,
                       Condition cond = AL) OVERRIDE;
 
+  bool ShifterOperandCanHold(Register rd,
+                             Register rn,
+                             Opcode opcode,
+                             uint32_t immediate,
+                             ShifterOperand* shifter_op) OVERRIDE;
+
 
   static bool IsInstructionForExceptionHandling(uintptr_t pc);
 
@@ -359,6 +365,7 @@
   static int DecodeBranchOffset(int32_t inst);
   int32_t EncodeTstOffset(int offset, int32_t inst);
   int DecodeTstOffset(int32_t inst);
+  bool ShifterOperandCanHoldArm32(uint32_t immediate, ShifterOperand* shifter_op);
 };
 
 }  // namespace arm
diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc
index 837fe1e..951792d 100644
--- a/compiler/utils/arm/assembler_arm32_test.cc
+++ b/compiler/utils/arm/assembler_arm32_test.cc
@@ -49,7 +49,8 @@
   }
 
   std::string GetAssemblerParameters() OVERRIDE {
-    return " -march=armv7-a -mcpu=cortex-a15";  // Arm-v7a, cortex-a15 (means we have sdiv).
+    // Arm-v7a, cortex-a15 (means we have sdiv).
+    return " -march=armv7-a -mcpu=cortex-a15 -mfpu=neon";
   }
 
   const char* GetAssemblyHeader() OVERRIDE {
@@ -688,4 +689,12 @@
   T2Helper(&arm::Arm32Assembler::bx, true, "bx{cond} {reg1}", "bx");
 }
 
+TEST_F(AssemblerArm32Test, Vmstat) {
+  GetAssembler()->vmstat();
+
+  const char* expected = "vmrs APSR_nzcv, FPSCR\n";
+
+  DriverStr(expected, "vmrs");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index a377cb2..479186c 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -25,6 +25,39 @@
 namespace art {
 namespace arm {
 
+bool Thumb2Assembler::ShifterOperandCanHold(Register rd,
+                                            Register rn,
+                                            Opcode opcode,
+                                            uint32_t immediate,
+                                            ShifterOperand* shifter_op) {
+  shifter_op->type_ = ShifterOperand::kImmediate;
+  shifter_op->immed_ = immediate;
+  shifter_op->is_shift_ = false;
+  shifter_op->is_rotate_ = false;
+  switch (opcode) {
+    case ADD:
+    case SUB:
+      if (rn == SP) {
+        if (rd == SP) {
+          return immediate < (1 << 9);    // 9 bits allowed.
+        } else {
+          return immediate < (1 << 12);   // 12 bits.
+        }
+      }
+      if (immediate < (1 << 12)) {    // Less than (or equal to) 12 bits can always be done.
+        return true;
+      }
+      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+
+    case MOV:
+      // TODO: Support less than or equal to 12bits.
+      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+    case MVN:
+    default:
+      return ArmAssembler::ModifiedImmediate(immediate) != kInvalidModifiedImmediate;
+  }
+}
+
 void Thumb2Assembler::and_(Register rd, Register rn, const ShifterOperand& so,
                            Condition cond) {
   EmitDataProcessing(cond, AND, 0, rn, rd, so);
@@ -374,16 +407,11 @@
                           Register base,
                           RegList regs,
                           Condition cond) {
-  if (__builtin_popcount(regs) == 1) {
+  CHECK_NE(regs, 0u);  // Do not use ldm if there's nothing to load.
+  if (IsPowerOfTwo(regs)) {
     // Thumb doesn't support one reg in the list.
     // Find the register number.
-    int reg = 0;
-    while (reg < 16) {
-      if ((regs & (1 << reg)) != 0) {
-         break;
-      }
-      ++reg;
-    }
+    int reg = CTZ(static_cast<uint32_t>(regs));
     CHECK_LT(reg, 16);
     CHECK(am == DB_W);      // Only writeback is supported.
     ldr(static_cast<Register>(reg), Address(base, kRegisterSize, Address::PostIndex), cond);
@@ -397,16 +425,11 @@
                           Register base,
                           RegList regs,
                           Condition cond) {
-  if (__builtin_popcount(regs) == 1) {
+  CHECK_NE(regs, 0u);  // Do not use stm if there's nothing to store.
+  if (IsPowerOfTwo(regs)) {
     // Thumb doesn't support one reg in the list.
     // Find the register number.
-    int reg = 0;
-    while (reg < 16) {
-      if ((regs & (1 << reg)) != 0) {
-         break;
-      }
-      ++reg;
-    }
+    int reg = CTZ(static_cast<uint32_t>(regs));
     CHECK_LT(reg, 16);
     CHECK(am == IA || am == IA_W);
     Address::Mode strmode = am == IA ? Address::PreIndex : Address::Offset;
@@ -813,6 +836,7 @@
 
   if (thumb_opcode == 255U /* 0b11111111 */) {
     LOG(FATAL) << "Invalid thumb2 opcode " << opcode;
+    UNREACHABLE();
   }
 
   int32_t encoding = 0;
@@ -842,6 +866,7 @@
       uint32_t imm = ModifiedImmediate(so.encodingThumb());
       if (imm == kInvalidModifiedImmediate) {
         LOG(FATAL) << "Immediate value cannot fit in thumb2 modified immediate";
+        UNREACHABLE();
       }
       encoding = B31 | B30 | B29 | B28 |
           thumb_opcode << 21 |
@@ -979,6 +1004,7 @@
 
   if (thumb_opcode == 255U /* 0b11111111 */) {
     LOG(FATAL) << "Invalid thumb1 opcode " << opcode;
+    UNREACHABLE();
   }
 
   int16_t encoding = dp_opcode << 14 |
@@ -1116,7 +1142,7 @@
       break;
     default:
       LOG(FATAL) << "This opcode is not an ADD or SUB: " << opcode;
-      return;
+      UNREACHABLE();
   }
 
   int16_t encoding = dp_opcode << 14 |
@@ -1157,6 +1183,7 @@
       case RRX: opcode = 3U /* 0b11 */; amount = 0; break;
       default:
         LOG(FATAL) << "Unsupported thumb2 shift opcode";
+        UNREACHABLE();
     }
     // 32 bit.
     int32_t encoding = B31 | B30 | B29 | B27 | B25 | B22 |
@@ -1174,7 +1201,8 @@
       case LSR: opcode = 1U /* 0b01 */; break;
       case ASR: opcode = 2U /* 0b10 */; break;
       default:
-         LOG(FATAL) << "Unsupported thumb2 shift opcode";
+        LOG(FATAL) << "Unsupported thumb2 shift opcode";
+        UNREACHABLE();
     }
     int16_t encoding = opcode << 11 | amount << 6 | static_cast<int16_t>(rm) << 3 |
         static_cast<int16_t>(rd);
@@ -1198,6 +1226,7 @@
        case ROR: opcode = 3U /* 0b11 */; break;
        default:
          LOG(FATAL) << "Unsupported thumb2 shift opcode";
+         UNREACHABLE();
      }
      // 32 bit.
      int32_t encoding = B31 | B30 | B29 | B28 | B27 | B25 |
@@ -1212,7 +1241,8 @@
       case LSR: opcode = 3U /* 0b0011 */; break;
       case ASR: opcode = 4U /* 0b0100 */; break;
       default:
-         LOG(FATAL) << "Unsupported thumb2 shift opcode";
+        LOG(FATAL) << "Unsupported thumb2 shift opcode";
+        UNREACHABLE();
     }
     int16_t encoding = B14 | opcode << 6 | static_cast<int16_t>(rm) << 3 |
         static_cast<int16_t>(rd);
@@ -1241,6 +1271,7 @@
     } else {
       if (x) {
         LOG(FATAL) << "Invalid use of BX";
+        UNREACHABLE();
       } else {
         if (cond_ == AL) {
           // Can use the T4 encoding allowing a 24 bit offset.
@@ -1469,6 +1500,15 @@
   CheckCondition(cond);
   bool must_be_32bit = force_32bit_;
 
+  if (!must_be_32bit && base == SP && bam == (load ? IA_W : DB_W) &&
+      (regs & 0xff00 & ~(1 << (load ? PC : LR))) == 0) {
+    // Use 16-bit PUSH/POP.
+    int16_t encoding = B15 | B13 | B12 | (load ? B11 : 0) | B10 |
+        ((regs & (1 << (load ? PC : LR))) != 0 ? B8 : 0) | (regs & 0x00ff);
+    Emit16(encoding);
+    return;
+  }
+
   if ((regs & 0xff00) != 0) {
     must_be_32bit = true;
   }
@@ -1495,6 +1535,7 @@
       case DA_W:
       case IB_W:
         LOG(FATAL) << "LDM/STM mode not supported on thumb: " << bam;
+        UNREACHABLE();
     }
     if (load) {
       // Cannot have SP in the list.
@@ -1981,8 +2022,13 @@
 
 
 void Thumb2Assembler::vmstat(Condition cond) {  // VMRS APSR_nzcv, FPSCR.
+  CHECK_NE(cond, kNoCondition);
   CheckCondition(cond);
-  UNIMPLEMENTED(FATAL) << "Unimplemented thumb instruction";
+  int32_t encoding = (static_cast<int32_t>(cond) << kConditionShift) |
+      B27 | B26 | B25 | B23 | B22 | B21 | B20 | B16 |
+      (static_cast<int32_t>(PC)*B12) |
+      B11 | B9 | B4;
+  Emit32(encoding);
 }
 
 
@@ -2068,6 +2114,7 @@
   CheckCondition(AL);
   if (label->IsBound()) {
     LOG(FATAL) << "cbz can only be used to branch forwards";
+    UNREACHABLE();
   } else {
     uint16_t branchid = EmitCompareAndBranch(rn, static_cast<uint16_t>(label->position_), false);
     label->LinkTo(branchid);
@@ -2079,6 +2126,7 @@
   CheckCondition(AL);
   if (label->IsBound()) {
     LOG(FATAL) << "cbnz can only be used to branch forwards";
+    UNREACHABLE();
   } else {
     uint16_t branchid = EmitCompareAndBranch(rn, static_cast<uint16_t>(label->position_), true);
     label->LinkTo(branchid);
@@ -2360,16 +2408,16 @@
   // positive values and sub for negatives ones, which would slightly improve
   // the readability of generated code for some constants.
   ShifterOperand shifter_op;
-  if (ShifterOperand::CanHoldThumb(rd, rn, ADD, value, &shifter_op)) {
+  if (ShifterOperandCanHold(rd, rn, ADD, value, &shifter_op)) {
     add(rd, rn, shifter_op, cond);
-  } else if (ShifterOperand::CanHoldThumb(rd, rn, SUB, -value, &shifter_op)) {
+  } else if (ShifterOperandCanHold(rd, rn, SUB, -value, &shifter_op)) {
     sub(rd, rn, shifter_op, cond);
   } else {
     CHECK(rn != IP);
-    if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~value, &shifter_op)) {
+    if (ShifterOperandCanHold(rd, rn, MVN, ~value, &shifter_op)) {
       mvn(IP, shifter_op, cond);
       add(rd, rn, ShifterOperand(IP), cond);
-    } else if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~(-value), &shifter_op)) {
+    } else if (ShifterOperandCanHold(rd, rn, MVN, ~(-value), &shifter_op)) {
       mvn(IP, shifter_op, cond);
       sub(rd, rn, ShifterOperand(IP), cond);
     } else {
@@ -2387,16 +2435,16 @@
 void Thumb2Assembler::AddConstantSetFlags(Register rd, Register rn, int32_t value,
                                           Condition cond) {
   ShifterOperand shifter_op;
-  if (ShifterOperand::CanHoldThumb(rd, rn, ADD, value, &shifter_op)) {
+  if (ShifterOperandCanHold(rd, rn, ADD, value, &shifter_op)) {
     adds(rd, rn, shifter_op, cond);
-  } else if (ShifterOperand::CanHoldThumb(rd, rn, ADD, -value, &shifter_op)) {
+  } else if (ShifterOperandCanHold(rd, rn, ADD, -value, &shifter_op)) {
     subs(rd, rn, shifter_op, cond);
   } else {
     CHECK(rn != IP);
-    if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~value, &shifter_op)) {
+    if (ShifterOperandCanHold(rd, rn, MVN, ~value, &shifter_op)) {
       mvn(IP, shifter_op, cond);
       adds(rd, rn, ShifterOperand(IP), cond);
-    } else if (ShifterOperand::CanHoldThumb(rd, rn, MVN, ~(-value), &shifter_op)) {
+    } else if (ShifterOperandCanHold(rd, rn, MVN, ~(-value), &shifter_op)) {
       mvn(IP, shifter_op, cond);
       subs(rd, rn, ShifterOperand(IP), cond);
     } else {
@@ -2410,11 +2458,12 @@
   }
 }
 
+
 void Thumb2Assembler::LoadImmediate(Register rd, int32_t value, Condition cond) {
   ShifterOperand shifter_op;
-  if (ShifterOperand::CanHoldThumb(rd, R0, MOV, value, &shifter_op)) {
+  if (ShifterOperandCanHold(rd, R0, MOV, value, &shifter_op)) {
     mov(rd, shifter_op, cond);
-  } else if (ShifterOperand::CanHoldThumb(rd, R0, MVN, ~value, &shifter_op)) {
+  } else if (ShifterOperandCanHold(rd, R0, MVN, ~value, &shifter_op)) {
     mvn(rd, shifter_op, cond);
   } else {
     movw(rd, Low16Bits(value), cond);
@@ -2425,6 +2474,7 @@
   }
 }
 
+
 // Implementation note: this method must emit at most one instruction when
 // Address::CanHoldLoadOffsetThumb.
 void Thumb2Assembler::LoadFromOffset(LoadOperandType type,
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index cfa251a..48a3a7e 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -304,6 +304,12 @@
                       int32_t offset,
                       Condition cond = AL) OVERRIDE;
 
+  bool ShifterOperandCanHold(Register rd,
+                             Register rn,
+                             Opcode opcode,
+                             uint32_t immediate,
+                             ShifterOperand* shifter_op) OVERRIDE;
+
 
   static bool IsInstructionForExceptionHandling(uintptr_t pc);
 
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index 65d6d45..6ae95a4 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -30,7 +30,7 @@
   }
 
   std::string GetAssemblerParameters() OVERRIDE {
-    return " -mthumb";
+    return " -mthumb -mfpu=neon";
   }
 
   std::string GetDisassembleParameters() OVERRIDE {
@@ -156,4 +156,12 @@
   DriverStr(expected, "ubfx");
 }
 
+TEST_F(AssemblerThumb2Test, Vmstat) {
+  GetAssembler()->vmstat();
+
+  const char* expected = "vmrs APSR_nzcv, FPSCR\n";
+
+  DriverStr(expected, "vmrs");
+}
+
 }  // namespace art
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 390f2ea..21014c8 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -329,12 +329,12 @@
     if (dst.IsXRegister()) {
       if (size == 4) {
         CHECK(src.IsWRegister());
-        ___ Mov(reg_x(dst.AsXRegister()), reg_w(src.AsWRegister()));
+        ___ Mov(reg_w(dst.AsOverlappingWRegister()), reg_w(src.AsWRegister()));
       } else {
         if (src.IsXRegister()) {
           ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsXRegister()));
         } else {
-          ___ Mov(reg_x(dst.AsXRegister()), reg_w(src.AsWRegister()));
+          ___ Mov(reg_x(dst.AsXRegister()), reg_x(src.AsOverlappingXRegister()));
         }
       }
     } else if (dst.IsWRegister()) {
@@ -484,9 +484,9 @@
   CHECK(size == 1 || size == 2) << size;
   CHECK(reg.IsWRegister()) << reg;
   if (size == 1) {
-    ___ sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+    ___ Sxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
   } else {
-    ___ sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+    ___ Sxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
   }
 }
 
@@ -495,9 +495,9 @@
   CHECK(size == 1 || size == 2) << size;
   CHECK(reg.IsWRegister()) << reg;
   if (size == 1) {
-    ___ uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+    ___ Uxtb(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
   } else {
-    ___ uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
+    ___ Uxth(reg_w(reg.AsWRegister()), reg_w(reg.AsWRegister()));
   }
 }
 
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index a297ea3..f0353f6 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -613,6 +613,23 @@
 }
 
 
+void X86Assembler::ucomiss(XmmRegister a, XmmRegister b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitXmmRegisterOperand(a, b);
+}
+
+
+void X86Assembler::ucomisd(XmmRegister a, XmmRegister b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitXmmRegisterOperand(a, b);
+}
+
+
 void X86Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF2);
@@ -1318,13 +1335,19 @@
 }
 
 
+void X86Assembler::LoadLongConstant(XmmRegister dst, int64_t value) {
+  // TODO: Need to have a code constants table.
+  pushl(Immediate(High32Bits(value)));
+  pushl(Immediate(Low32Bits(value)));
+  movsd(dst, Address(ESP, 0));
+  addl(ESP, Immediate(2 * sizeof(int32_t)));
+}
+
+
 void X86Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
   // TODO: Need to have a code constants table.
   int64_t constant = bit_cast<int64_t, double>(value);
-  pushl(Immediate(High32Bits(constant)));
-  pushl(Immediate(Low32Bits(constant)));
-  movsd(dst, Address(ESP, 0));
-  addl(ESP, Immediate(2 * sizeof(intptr_t)));
+  LoadLongConstant(dst, constant);
 }
 
 
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 6ea66a5..9fecf1e 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -42,8 +42,6 @@
 
  private:
   const int32_t value_;
-
-  DISALLOW_COPY_AND_ASSIGN(Immediate);
 };
 
 
@@ -301,6 +299,8 @@
 
   void comiss(XmmRegister a, XmmRegister b);
   void comisd(XmmRegister a, XmmRegister b);
+  void ucomiss(XmmRegister a, XmmRegister b);
+  void ucomisd(XmmRegister a, XmmRegister b);
 
   void sqrtsd(XmmRegister dst, XmmRegister src);
   void sqrtss(XmmRegister dst, XmmRegister src);
@@ -441,6 +441,7 @@
 
   void AddImmediate(Register reg, const Immediate& imm);
 
+  void LoadLongConstant(XmmRegister dst, int64_t value);
   void LoadDoubleConstant(XmmRegister dst, double value);
 
   void DoubleNegate(XmmRegister d);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 5d8a3b1..d901673 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -16,7 +16,8 @@
 
 #include "assembler_x86.h"
 
-#include "gtest/gtest.h"
+#include "base/stl_util.h"
+#include "utils/assembler_test.h"
 
 namespace art {
 
@@ -29,4 +30,89 @@
   ASSERT_EQ(static_cast<size_t>(5), buffer.Size());
 }
 
+class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, x86::Register,
+                                              x86::XmmRegister, x86::Immediate> {
+ protected:
+  std::string GetArchitectureString() OVERRIDE {
+    return "x86";
+  }
+
+  std::string GetAssemblerParameters() OVERRIDE {
+    return " --32";
+  }
+
+  std::string GetDisassembleParameters() OVERRIDE {
+    return " -D -bbinary -mi386 --no-show-raw-insn";
+  }
+
+  void SetUpHelpers() OVERRIDE {
+    if (registers_.size() == 0) {
+      registers_.insert(end(registers_),
+                        {  // NOLINT(whitespace/braces)
+                          new x86::Register(x86::EAX),
+                          new x86::Register(x86::EBX),
+                          new x86::Register(x86::ECX),
+                          new x86::Register(x86::EDX),
+                          new x86::Register(x86::EBP),
+                          new x86::Register(x86::ESP),
+                          new x86::Register(x86::ESI),
+                          new x86::Register(x86::EDI)
+                        });
+    }
+
+    if (fp_registers_.size() == 0) {
+      fp_registers_.insert(end(fp_registers_),
+                           {  // NOLINT(whitespace/braces)
+                             new x86::XmmRegister(x86::XMM0),
+                             new x86::XmmRegister(x86::XMM1),
+                             new x86::XmmRegister(x86::XMM2),
+                             new x86::XmmRegister(x86::XMM3),
+                             new x86::XmmRegister(x86::XMM4),
+                             new x86::XmmRegister(x86::XMM5),
+                             new x86::XmmRegister(x86::XMM6),
+                             new x86::XmmRegister(x86::XMM7)
+                           });
+    }
+  }
+
+  void TearDown() OVERRIDE {
+    AssemblerTest::TearDown();
+    STLDeleteElements(&registers_);
+    STLDeleteElements(&fp_registers_);
+  }
+
+  std::vector<x86::Register*> GetRegisters() OVERRIDE {
+    return registers_;
+  }
+
+  std::vector<x86::XmmRegister*> GetFPRegisters() OVERRIDE {
+    return fp_registers_;
+  }
+
+  x86::Immediate CreateImmediate(int64_t imm_value) OVERRIDE {
+    return x86::Immediate(imm_value);
+  }
+
+ private:
+  std::vector<x86::Register*> registers_;
+  std::vector<x86::XmmRegister*> fp_registers_;
+};
+
+
+TEST_F(AssemblerX86Test, Movl) {
+  GetAssembler()->movl(x86::EAX, x86::EBX);
+  const char* expected = "mov %ebx, %eax\n";
+  DriverStr(expected, "movl");
+}
+
+TEST_F(AssemblerX86Test, LoadLongConstant) {
+  GetAssembler()->LoadLongConstant(x86::XMM0, 51);
+  const char* expected =
+      "push $0x0\n"
+      "push $0x33\n"
+      "movsd 0(%esp), %xmm0\n"
+      "add $8, %esp\n";
+  DriverStr(expected, "LoadLongConstant");
+}
+
 }  // namespace art
diff --git a/compiler/utils/x86/constants_x86.h b/compiler/utils/x86/constants_x86.h
index 45c3834..2dfb65c 100644
--- a/compiler/utils/x86/constants_x86.h
+++ b/compiler/utils/x86/constants_x86.h
@@ -96,7 +96,8 @@
   kZero         = kEqual,
   kNotZero      = kNotEqual,
   kNegative     = kSign,
-  kPositive     = kNotSign
+  kPositive     = kNotSign,
+  kUnordered    = kParityEven
 };
 
 
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index dff3849..474d8a9 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -593,9 +593,19 @@
 
 
 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
+  cvtsi2ss(dst, src, false);
+}
+
+
+void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
-  EmitOptionalRex32(dst, src);
+  if (is64bit) {
+    // Emit a REX.W prefix if the operand size is 64 bits.
+    EmitRex64(dst, src);
+  } else {
+    EmitOptionalRex32(dst, src);
+  }
   EmitUint8(0x0F);
   EmitUint8(0x2A);
   EmitOperand(dst.LowBits(), Operand(src));
@@ -603,9 +613,19 @@
 
 
 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
+  cvtsi2sd(dst, src, false);
+}
+
+
+void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF2);
-  EmitOptionalRex32(dst, src);
+  if (is64bit) {
+    // Emit a REX.W prefix if the operand size is 64 bits.
+    EmitRex64(dst, src);
+  } else {
+    EmitOptionalRex32(dst, src);
+  }
   EmitUint8(0x0F);
   EmitUint8(0x2A);
   EmitOperand(dst.LowBits(), Operand(src));
@@ -700,6 +720,24 @@
   EmitXmmRegisterOperand(a.LowBits(), b);
 }
 
+void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(a, b);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitXmmRegisterOperand(a.LowBits(), b);
+}
+
+
+void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(a, b);
+  EmitUint8(0x0F);
+  EmitUint8(0x2E);
+  EmitXmmRegisterOperand(a.LowBits(), b);
+}
+
 
 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index ab1bc9e..6e71e4a 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -329,7 +329,9 @@
   void divsd(XmmRegister dst, const Address& src);
 
   void cvtsi2ss(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
+  void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
   void cvtsi2sd(XmmRegister dst, CpuRegister src);  // Note: this is the r/m32 version.
+  void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
 
   void cvtss2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
   void cvtss2sd(XmmRegister dst, XmmRegister src);
@@ -344,6 +346,8 @@
 
   void comiss(XmmRegister a, XmmRegister b);
   void comisd(XmmRegister a, XmmRegister b);
+  void ucomiss(XmmRegister a, XmmRegister b);
+  void ucomisd(XmmRegister a, XmmRegister b);
 
   void sqrtsd(XmmRegister dst, XmmRegister src);
   void sqrtss(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 14a98b9..c8e923c 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -660,6 +660,14 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::comisd, "comisd %{reg2}, %{reg1}"), "comisd");
 }
 
+TEST_F(AssemblerX86_64Test, Ucomiss) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::ucomiss, "ucomiss %{reg2}, %{reg1}"), "ucomiss");
+}
+
+TEST_F(AssemblerX86_64Test, Ucomisd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::ucomisd, "ucomisd %{reg2}, %{reg1}"), "ucomisd");
+}
+
 TEST_F(AssemblerX86_64Test, Sqrtss) {
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::sqrtss, "sqrtss %{reg2}, %{reg1}"), "sqrtss");
 }
diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h
index 2a5b43d..0c782d4 100644
--- a/compiler/utils/x86_64/constants_x86_64.h
+++ b/compiler/utils/x86_64/constants_x86_64.h
@@ -105,7 +105,8 @@
   kZero         = kEqual,
   kNotZero      = kNotEqual,
   kNegative     = kSign,
-  kPositive     = kNotSign
+  kPositive     = kNotSign,
+  kUnordered    = kParityEven
 };
 
 
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 7d4b726..d7669e1 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -165,7 +165,13 @@
   UsageError("  --compiler-backend=(Quick|Optimizing|Portable): select compiler backend");
   UsageError("      set.");
   UsageError("      Example: --compiler-backend=Portable");
-  UsageError("      Default: Quick");
+  if (kUsePortableCompiler) {
+    UsageError("      Default: Portable");
+  } else if (kUseOptimizingCompiler) {
+    UsageError("      Default: Optimizing");
+  } else {
+    UsageError("      Default: Quick");
+  }
   UsageError("");
   UsageError("  --compiler-filter="
                 "(verify-none"
@@ -419,7 +425,9 @@
 class Dex2Oat FINAL {
  public:
   explicit Dex2Oat(TimingLogger* timings) :
-      compiler_kind_(kUsePortableCompiler ? Compiler::kPortable : Compiler::kQuick),
+      compiler_kind_(kUsePortableCompiler
+          ? Compiler::kPortable
+          : (kUseOptimizingCompiler ? Compiler::kOptimizing : Compiler::kQuick)),
       instruction_set_(kRuntimeISA),
       // Take the default set of instruction features from the build.
       method_inliner_map_(),
@@ -442,10 +450,10 @@
       timings_(timings) {}
 
   ~Dex2Oat() {
+    LogCompletionTime();  // Needs to be before since it accesses the runtime.
     if (kIsDebugBuild || (RUNNING_ON_VALGRIND != 0)) {
       delete runtime_;  // See field declaration for why this is manual.
     }
-    LogCompletionTime();
   }
 
   // Parse the arguments from the command line. In case of an unrecognized option or impossible
@@ -597,7 +605,6 @@
           compiler_kind_ = Compiler::kQuick;
         } else if (backend_str == "Optimizing") {
           compiler_kind_ = Compiler::kOptimizing;
-          compile_pic = true;
         } else if (backend_str == "Portable") {
           compiler_kind_ = Compiler::kPortable;
         } else {
@@ -702,11 +709,26 @@
         //       on having verbost methods.
         gLogVerbosity.compiler = false;
         Split(option.substr(strlen("--verbose-methods=")).ToString(), ',', &verbose_methods_);
+      } else if (option.starts_with("--dump-init-failures=")) {
+        std::string file_name = option.substr(strlen("--dump-init-failures=")).data();
+        init_failure_output_.reset(new std::ofstream(file_name));
+        if (init_failure_output_.get() == nullptr) {
+          LOG(ERROR) << "Failed to allocate ofstream";
+        } else if (init_failure_output_->fail()) {
+          LOG(ERROR) << "Failed to open " << file_name << " for writing the initialization "
+                     << "failures.";
+          init_failure_output_.reset();
+        }
       } else {
         Usage("Unknown argument %s", option.data());
       }
     }
 
+    if (compiler_kind_ == Compiler::kOptimizing) {
+      // Optimizing only supports PIC mode.
+      compile_pic = true;
+    }
+
     if (oat_filename_.empty() && oat_fd_ == -1) {
       Usage("Output must be supplied with either --oat-file or --oat-fd");
     }
@@ -906,7 +928,8 @@
   #endif
                                                 verbose_methods_.empty() ?
                                                     nullptr :
-                                                    &verbose_methods_));
+                                                    &verbose_methods_,
+                                                init_failure_output_.get()));
 
     // Done with usage checks, enable watchdog if requested
     if (watch_dog_enabled) {
@@ -960,6 +983,12 @@
     return true;
   }
 
+  void EraseOatFile() {
+    DCHECK(oat_file_.get() != nullptr);
+    oat_file_->Erase();
+    oat_file_.reset();
+  }
+
   // Set up the environment for compilation. Includes starting the runtime and loading/opening the
   // boot class path.
   bool Setup() {
@@ -1274,7 +1303,6 @@
       if (!driver_->WriteElf(android_root_, is_host_, dex_files_, oat_writer.get(),
                              oat_file_.get())) {
         LOG(ERROR) << "Failed to write ELF file " << oat_file_->GetPath();
-        oat_file_->Erase();
         return false;
       }
     }
@@ -1581,9 +1609,10 @@
     return ReadImageClasses(image_classes_stream);
   }
 
-  void LogCompletionTime() const {
+  void LogCompletionTime() {
     LOG(INFO) << "dex2oat took " << PrettyDuration(NanoTime() - start_ns_)
-              << " (threads: " << thread_count_ << ")";
+              << " (threads: " << thread_count_ << ") "
+              << driver_->GetMemoryUsageString();
   }
 
   std::unique_ptr<CompilerOptions> compiler_options_;
@@ -1640,6 +1669,7 @@
   std::string profile_file_;  // Profile file to use
   TimingLogger* timings_;
   std::unique_ptr<CumulativeLogger> compiler_phases_timings_;
+  std::unique_ptr<std::ostream> init_failure_output_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Dex2Oat);
 };
@@ -1672,6 +1702,7 @@
 
   // Create the boot.oat.
   if (!dex2oat.CreateOatFile()) {
+    dex2oat.EraseOatFile();
     return EXIT_FAILURE;
   }
 
@@ -1716,6 +1747,7 @@
 
   // Create the app oat.
   if (!dex2oat.CreateOatFile()) {
+    dex2oat.EraseOatFile();
     return EXIT_FAILURE;
   }
 
@@ -1773,6 +1805,7 @@
   LOG(INFO) << CommandLine();
 
   if (!dex2oat.Setup()) {
+    dex2oat.EraseOatFile();
     return EXIT_FAILURE;
   }
 
diff --git a/disassembler/Android.mk b/disassembler/Android.mk
index f2dd1ee..3ad2941 100644
--- a/disassembler/Android.mk
+++ b/disassembler/Android.mk
@@ -85,7 +85,11 @@
   LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/Android.mk
   include external/libcxx/libcxx.mk
   # For disassembler_arm64.
-  LOCAL_SHARED_LIBRARIES += libvixl
+  ifeq ($$(art_ndebug_or_debug),debug)
+     LOCAL_SHARED_LIBRARIES += libvixld
+  else
+     LOCAL_SHARED_LIBRARIES += libvixl
+  endif
   ifeq ($$(art_target_or_host),target)
     include $(BUILD_SHARED_LIBRARY)
   else # host
diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc
index fe50421..bd3bebf 100644
--- a/disassembler/disassembler_arm64.cc
+++ b/disassembler/disassembler_arm64.cc
@@ -74,7 +74,7 @@
     ++buffer;
   }
 
-  void* data_address = instr->LiteralAddress();
+  void* data_address = instr->LiteralAddress<void*>();
   ptrdiff_t buf_size_remaining = buffer_end - buffer;
   vixl::Instr op = instr->Mask(vixl::LoadLiteralMask);
 
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index d28b626..b048833 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -32,7 +32,6 @@
 #include "dex_instruction.h"
 #include "disassembler.h"
 #include "elf_builder.h"
-#include "field_helper.h"
 #include "gc_map.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
@@ -1459,50 +1458,54 @@
 
   static void PrintField(std::ostream& os, mirror::ArtField* field, mirror::Object* obj)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const char* descriptor = field->GetTypeDescriptor();
     os << StringPrintf("%s: ", field->GetName());
-    if (descriptor[0] != 'L' && descriptor[0] != '[') {
-      StackHandleScope<1> hs(Thread::Current());
-      FieldHelper fh(hs.NewHandle(field));
-      mirror::Class* type = fh.GetType();
-      DCHECK(type->IsPrimitive());
-      if (type->IsPrimitiveLong()) {
+    switch (field->GetTypeAsPrimitiveType()) {
+      case Primitive::kPrimLong:
         os << StringPrintf("%" PRId64 " (0x%" PRIx64 ")\n", field->Get64(obj), field->Get64(obj));
-      } else if (type->IsPrimitiveDouble()) {
+        break;
+      case Primitive::kPrimDouble:
         os << StringPrintf("%f (%a)\n", field->GetDouble(obj), field->GetDouble(obj));
-      } else if (type->IsPrimitiveFloat()) {
+        break;
+      case Primitive::kPrimFloat:
         os << StringPrintf("%f (%a)\n", field->GetFloat(obj), field->GetFloat(obj));
-      } else if (type->IsPrimitiveInt()) {
+        break;
+      case Primitive::kPrimInt:
         os << StringPrintf("%d (0x%x)\n", field->Get32(obj), field->Get32(obj));
-      } else if (type->IsPrimitiveChar()) {
+        break;
+      case Primitive::kPrimChar:
         os << StringPrintf("%u (0x%x)\n", field->GetChar(obj), field->GetChar(obj));
-      } else if (type->IsPrimitiveShort()) {
+        break;
+      case Primitive::kPrimShort:
         os << StringPrintf("%d (0x%x)\n", field->GetShort(obj), field->GetShort(obj));
-      } else if (type->IsPrimitiveBoolean()) {
+        break;
+      case Primitive::kPrimBoolean:
         os << StringPrintf("%s (0x%x)\n", field->GetBoolean(obj)? "true" : "false",
             field->GetBoolean(obj));
-      } else if (type->IsPrimitiveByte()) {
+        break;
+      case Primitive::kPrimByte:
         os << StringPrintf("%d (0x%x)\n", field->GetByte(obj), field->GetByte(obj));
-      } else {
-        LOG(FATAL) << "Unknown type: " << PrettyClass(type);
-      }
-    } else {
-      // Get the value, don't compute the type unless it is non-null as we don't want
-      // to cause class loading.
-      mirror::Object* value = field->GetObj(obj);
-      if (value == nullptr) {
-        os << StringPrintf("null   %s\n", PrettyDescriptor(descriptor).c_str());
-      } else {
-        // Grab the field type without causing resolution.
-        StackHandleScope<1> hs(Thread::Current());
-        FieldHelper fh(hs.NewHandle(field));
-        mirror::Class* field_type = fh.GetType(false);
-        if (field_type != nullptr) {
-          PrettyObjectValue(os, field_type, value);
+        break;
+      case Primitive::kPrimNot: {
+        // Get the value, don't compute the type unless it is non-null as we don't want
+        // to cause class loading.
+        mirror::Object* value = field->GetObj(obj);
+        if (value == nullptr) {
+          os << StringPrintf("null   %s\n", PrettyDescriptor(field->GetTypeDescriptor()).c_str());
         } else {
-          os << StringPrintf("%p   %s\n", value, PrettyDescriptor(descriptor).c_str());
+          // Grab the field type without causing resolution.
+          mirror::Class* field_type = field->GetType(false);
+          if (field_type != nullptr) {
+            PrettyObjectValue(os, field_type, value);
+          } else {
+            os << StringPrintf("%p   %s\n", value,
+                               PrettyDescriptor(field->GetTypeDescriptor()).c_str());
+          }
         }
+        break;
       }
+      default:
+        os << "unexpected field type: " << field->GetTypeDescriptor() << "\n";
+        break;
     }
   }
 
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 087c0ea..7dfdb75 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -42,7 +42,6 @@
   dex_file_verifier.cc \
   dex_instruction.cc \
   elf_file.cc \
-  field_helper.cc \
   gc/allocator/dlmalloc.cc \
   gc/allocator/rosalloc.cc \
   gc/accounting/card_table.cc \
@@ -91,7 +90,6 @@
   jobject_comparator.cc \
   mem_map.cc \
   memory_region.cc \
-  method_helper.cc \
   mirror/art_field.cc \
   mirror/art_method.cc \
   mirror/array.cc \
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index fe5b765..e45d3a3 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -24,7 +24,6 @@
 #include "class_linker.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
-#include "field_helper.h"
 #include "gc/space/space.h"
 #include "java_vm_ext.h"
 #include "jni_internal.h"
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 7a8e4a3..ee13e03 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -43,7 +43,6 @@
 #include "intern_table.h"
 #include "interpreter/interpreter.h"
 #include "leb128.h"
-#include "method_helper-inl.h"
 #include "oat.h"
 #include "oat_file.h"
 #include "object_lock.h"
@@ -1671,6 +1670,7 @@
       LOG(FATAL) << "Failed to open dex file " << dex_file_location
                  << " from within oat file " << oat_file.GetLocation()
                  << " error '" << error_msg << "'";
+      UNREACHABLE();
     }
 
     CHECK_EQ(dex_file->GetLocationChecksum(), oat_dex_file->GetDexFileLocationChecksum());
@@ -2393,6 +2393,7 @@
           break;
         default:
           LOG(FATAL) << "Unknown descriptor: " << c;
+          UNREACHABLE();
       }
     }
   }
@@ -2442,7 +2443,7 @@
   }
   DCHECK(!it.HasNext());
   LOG(FATAL) << "Failed to find method index " << method_idx << " in " << dex_file.GetLocation();
-  return 0;
+  UNREACHABLE();
 }
 
 const OatFile::OatMethod ClassLinker::FindOatMethodFor(mirror::ArtMethod* method, bool* found) {
@@ -2780,7 +2781,7 @@
 
   klass->SetDexClassDefIndex(dex_file.GetIndexForClassDef(dex_class_def));
   klass->SetDexTypeIndex(dex_class_def.class_idx_);
-  klass->SetDexCacheStrings(klass->GetDexCache()->GetStrings());
+  CHECK(klass->GetDexCacheStrings() != nullptr);
 
   const uint8_t* class_data = dex_file.GetClassData(dex_class_def);
   if (class_data == nullptr) {
@@ -3078,7 +3079,7 @@
     LOG(ERROR) << "Registered dex file " << i << " = " << dex_cache->GetDexFile()->GetLocation();
   }
   LOG(FATAL) << "Failed to find DexCache for DexFile " << location;
-  return nullptr;
+  UNREACHABLE();
 }
 
 void ClassLinker::FixupDexCaches(mirror::ArtMethod* resolution_method) {
@@ -3749,8 +3750,7 @@
   LOG(FATAL) << "Unexpected class status: " << oat_file_class_status
              << " " << dex_file.GetLocation() << " " << PrettyClass(klass) << " "
              << klass->GetDescriptor(&temp);
-
-  return false;
+  UNREACHABLE();
 }
 
 void ClassLinker::ResolveClassExceptionHandlerTypes(const DexFile& dex_file,
@@ -4345,6 +4345,41 @@
   UNREACHABLE();
 }
 
+static bool HasSameSignatureWithDifferentClassLoaders(Thread* self,
+                                                      Handle<mirror::ArtMethod> method1,
+                                                      Handle<mirror::ArtMethod> method2)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  {
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Class> return_type(hs.NewHandle(method1->GetReturnType()));
+    if (UNLIKELY(method2->GetReturnType() != return_type.Get())) {
+      return false;
+    }
+  }
+  const DexFile::TypeList* types1 = method1->GetParameterTypeList();
+  const DexFile::TypeList* types2 = method2->GetParameterTypeList();
+  if (types1 == nullptr) {
+    return (types2 == nullptr) || (types2->Size() == 0);
+  } else if (UNLIKELY(types2 == nullptr)) {
+    return types1->Size() == 0;
+  }
+  uint32_t num_types = types1->Size();
+  if (UNLIKELY(num_types != types2->Size())) {
+    return false;
+  }
+  for (uint32_t i = 0; i < num_types; ++i) {
+    mirror::Class* param_type =
+        method1->GetClassFromTypeIndex(types1->GetTypeItem(i).type_idx_, true);
+    mirror::Class* other_param_type =
+        method2->GetClassFromTypeIndex(types2->GetTypeItem(i).type_idx_, true);
+    if (UNLIKELY(param_type != other_param_type)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+
 bool ClassLinker::ValidateSuperClassDescriptors(Handle<mirror::Class> klass) {
   if (klass->IsInterface()) {
     return true;
@@ -4352,19 +4387,19 @@
   // Begin with the methods local to the superclass.
   Thread* self = Thread::Current();
   StackHandleScope<2> hs(self);
-  MutableMethodHelper mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
-  MutableMethodHelper super_mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
+  MutableHandle<mirror::ArtMethod> h_m(hs.NewHandle<mirror::ArtMethod>(nullptr));
+  MutableHandle<mirror::ArtMethod> super_h_m(hs.NewHandle<mirror::ArtMethod>(nullptr));
   if (klass->HasSuperClass() &&
       klass->GetClassLoader() != klass->GetSuperClass()->GetClassLoader()) {
     for (int i = klass->GetSuperClass()->GetVTableLength() - 1; i >= 0; --i) {
-      mh.ChangeMethod(klass->GetVTableEntry(i));
-      super_mh.ChangeMethod(klass->GetSuperClass()->GetVTableEntry(i));
-      if (mh.GetMethod() != super_mh.GetMethod() &&
-          !mh.HasSameSignatureWithDifferentClassLoaders(self, &super_mh)) {
+      h_m.Assign(klass->GetVTableEntry(i));
+      super_h_m.Assign(klass->GetSuperClass()->GetVTableEntry(i));
+      if (h_m.Get() != super_h_m.Get() &&
+          !HasSameSignatureWithDifferentClassLoaders(self, h_m, super_h_m)) {
         ThrowLinkageError(klass.Get(),
                           "Class %s method %s resolves differently in superclass %s",
                           PrettyDescriptor(klass.Get()).c_str(),
-                          PrettyMethod(mh.GetMethod()).c_str(),
+                          PrettyMethod(h_m.Get()).c_str(),
                           PrettyDescriptor(klass->GetSuperClass()).c_str());
         return false;
       }
@@ -4374,14 +4409,14 @@
     if (klass->GetClassLoader() != klass->GetIfTable()->GetInterface(i)->GetClassLoader()) {
       uint32_t num_methods = klass->GetIfTable()->GetInterface(i)->NumVirtualMethods();
       for (uint32_t j = 0; j < num_methods; ++j) {
-        mh.ChangeMethod(klass->GetIfTable()->GetMethodArray(i)->GetWithoutChecks(j));
-        super_mh.ChangeMethod(klass->GetIfTable()->GetInterface(i)->GetVirtualMethod(j));
-        if (mh.GetMethod() != super_mh.GetMethod() &&
-            !mh.HasSameSignatureWithDifferentClassLoaders(self, &super_mh)) {
+        h_m.Assign(klass->GetIfTable()->GetMethodArray(i)->GetWithoutChecks(j));
+        super_h_m.Assign(klass->GetIfTable()->GetInterface(i)->GetVirtualMethod(j));
+        if (h_m.Get() != super_h_m.Get() &&
+            !HasSameSignatureWithDifferentClassLoaders(self, h_m, super_h_m)) {
           ThrowLinkageError(klass.Get(),
                             "Class %s method %s resolves differently in interface %s",
                             PrettyDescriptor(klass.Get()).c_str(),
-                            PrettyMethod(mh.GetMethod()).c_str(),
+                            PrettyMethod(h_m.Get()).c_str(),
                             PrettyDescriptor(klass->GetIfTable()->GetInterface(i)).c_str());
           return false;
         }
@@ -5566,71 +5601,73 @@
       }
 
       // If we found something, check that it can be accessed by the referrer.
+      bool exception_generated = false;
       if (resolved != nullptr && referrer.Get() != nullptr) {
         mirror::Class* methods_class = resolved->GetDeclaringClass();
         mirror::Class* referring_class = referrer->GetDeclaringClass();
         if (!referring_class->CanAccess(methods_class)) {
           ThrowIllegalAccessErrorClassForMethodDispatch(referring_class, methods_class,
                                                         resolved, type);
-          return nullptr;
+          exception_generated = true;
         } else if (!referring_class->CanAccessMember(methods_class,
                                                      resolved->GetAccessFlags())) {
           ThrowIllegalAccessErrorMethod(referring_class, resolved);
-          return nullptr;
+          exception_generated = true;
         }
       }
-
-      // Otherwise, throw an IncompatibleClassChangeError if we found something, and check interface
-      // methods and throw if we find the method there. If we find nothing, throw a
-      // NoSuchMethodError.
-      switch (type) {
-        case kDirect:
-        case kStatic:
-          if (resolved != nullptr) {
-            ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer.Get());
-          } else {
-            resolved = klass->FindInterfaceMethod(name, signature);
-            if (resolved != nullptr) {
-              ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
-            } else {
-              ThrowNoSuchMethodError(type, klass, name, signature);
-            }
-          }
-          break;
-        case kInterface:
-          if (resolved != nullptr) {
-            ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
-          } else {
-            resolved = klass->FindVirtualMethod(name, signature);
+      if (!exception_generated) {
+        // Otherwise, throw an IncompatibleClassChangeError if we found something, and check
+        // interface methods and throw if we find the method there. If we find nothing, throw a
+        // NoSuchMethodError.
+        switch (type) {
+          case kDirect:
+          case kStatic:
             if (resolved != nullptr) {
               ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer.Get());
             } else {
-              ThrowNoSuchMethodError(type, klass, name, signature);
+              resolved = klass->FindInterfaceMethod(name, signature);
+              if (resolved != nullptr) {
+                ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
+              } else {
+                ThrowNoSuchMethodError(type, klass, name, signature);
+              }
             }
-          }
-          break;
-        case kSuper:
-          if (resolved != nullptr) {
-            ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
-          } else {
-            ThrowNoSuchMethodError(type, klass, name, signature);
-          }
-          break;
-        case kVirtual:
-          if (resolved != nullptr) {
-            ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
-          } else {
-            resolved = klass->FindInterfaceMethod(name, signature);
+            break;
+          case kInterface:
             if (resolved != nullptr) {
-              ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
+              ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
+            } else {
+              resolved = klass->FindVirtualMethod(name, signature);
+              if (resolved != nullptr) {
+                ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer.Get());
+              } else {
+                ThrowNoSuchMethodError(type, klass, name, signature);
+              }
+            }
+            break;
+          case kSuper:
+            if (resolved != nullptr) {
+              ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
             } else {
               ThrowNoSuchMethodError(type, klass, name, signature);
             }
-          }
-          break;
+            break;
+          case kVirtual:
+            if (resolved != nullptr) {
+              ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
+            } else {
+              resolved = klass->FindInterfaceMethod(name, signature);
+              if (resolved != nullptr) {
+                ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
+              } else {
+                ThrowNoSuchMethodError(type, klass, name, signature);
+              }
+            }
+            break;
+        }
       }
     }
-    DCHECK(Thread::Current()->IsExceptionPending());
+    Thread::Current()->AssertPendingException();
     return nullptr;
   }
 }
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index c22c51e..4f09460 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -23,7 +23,6 @@
 #include "common_runtime_test.h"
 #include "dex_file.h"
 #include "entrypoints/entrypoint_utils-inl.h"
-#include "field_helper.h"
 #include "gc/heap.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method.h"
@@ -178,9 +177,7 @@
     EXPECT_TRUE(field->GetClass() != nullptr);
     EXPECT_EQ(klass, field->GetDeclaringClass());
     EXPECT_TRUE(field->GetName() != nullptr);
-    StackHandleScope<1> hs(Thread::Current());
-    FieldHelper fh(hs.NewHandle(field));
-    EXPECT_TRUE(fh.GetType() != nullptr);
+    EXPECT_TRUE(field->GetType(true) != nullptr);
   }
 
   void AssertClass(const std::string& descriptor, Handle<mirror::Class> klass)
@@ -286,8 +283,7 @@
     for (size_t i = 0; i < klass->NumInstanceFields(); i++) {
       mirror::ArtField* field = klass->GetInstanceField(i);
       fhandle.Assign(field);
-      FieldHelper fh(fhandle);
-      mirror::Class* field_type = fh.GetType();
+      mirror::Class* field_type = fhandle->GetType(true);
       ASSERT_TRUE(field_type != nullptr);
       if (!field->IsPrimitiveType()) {
         ASSERT_TRUE(!field_type->IsPrimitive());
@@ -295,7 +291,7 @@
         if (current_ref_offset.Uint32Value() == end_ref_offset.Uint32Value()) {
           // While Reference.referent is not primitive, the ClassLinker
           // treats it as such so that the garbage collector won't scan it.
-          EXPECT_EQ(PrettyField(fh.GetField()),
+          EXPECT_EQ(PrettyField(fhandle.Get()),
                     "java.lang.Object java.lang.ref.Reference.referent");
         } else {
           current_ref_offset = MemberOffset(current_ref_offset.Uint32Value() +
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 86d027b..a9b70cb 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -25,13 +25,11 @@
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
-#include "field_helper.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
 #include "handle_scope.h"
 #include "jdwp/object_registry.h"
-#include "method_helper-inl.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class.h"
@@ -1924,7 +1922,7 @@
         HandleWrapper<mirror::Object> h_v(hs.NewHandleWrapper(&v));
         HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
         HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o));
-        field_type = FieldHelper(h_f).GetType();
+        field_type = h_f->GetType(true);
       }
       if (!field_type->IsAssignableFrom(v->GetClass())) {
         return JDWP::ERR_INVALID_OBJECT;
@@ -3687,7 +3685,7 @@
 
     {
       StackHandleScope<3> hs(soa.Self());
-      MethodHelper mh(hs.NewHandle(m));
+      HandleWrapper<mirror::ArtMethod> h_m(hs.NewHandleWrapper(&m));
       HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&receiver));
       HandleWrapper<mirror::Class> h_klass(hs.NewHandleWrapper(&c));
       const DexFile::TypeList* types = m->GetParameterTypeList();
@@ -3698,7 +3696,8 @@
 
         if (shorty[i + 1] == 'L') {
           // Did we really get an argument of an appropriate reference type?
-          mirror::Class* parameter_type = mh.GetClassFromTypeIdx(types->GetTypeItem(i).type_idx_);
+          mirror::Class* parameter_type =
+              h_m->GetClassFromTypeIndex(types->GetTypeItem(i).type_idx_, true);
           mirror::Object* argument = gRegistry->Get<mirror::Object*>(arg_values[i], &error);
           if (error != JDWP::ERR_NONE) {
             return JDWP::ERR_INVALID_OBJECT;
@@ -3712,8 +3711,6 @@
           v.l = gRegistry->GetJObject(arg_values[i]);
         }
       }
-      // Update in case it moved.
-      m = mh.GetMethod();
     }
 
     req->receiver = receiver;
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index da2dfe1..c329fe6 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -20,7 +20,6 @@
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
 #include "gc/accounting/card_table-inl.h"
-#include "method_helper-inl.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
index 908d3cd..3b47f24 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.cc
@@ -24,9 +24,7 @@
 
 namespace art {
 
-// TODO: Make the MethodHelper here be compaction safe.
-extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, MethodHelper* mh,
-                                                   const DexFile::CodeItem* code_item,
+extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, const DexFile::CodeItem* code_item,
                                                    ShadowFrame* shadow_frame, JValue* result) {
   mirror::ArtMethod* method = shadow_frame->GetMethod();
   // Ensure static methods are initialized.
@@ -50,11 +48,11 @@
   }
   uint16_t arg_offset = (code_item == NULL) ? 0 : code_item->registers_size_ - code_item->ins_size_;
   if (kUsePortableCompiler) {
-    InvokeWithShadowFrame(self, shadow_frame, arg_offset, mh, result);
+    InvokeWithShadowFrame(self, shadow_frame, arg_offset, result);
   } else {
     method->Invoke(self, shadow_frame->GetVRegArgs(arg_offset),
                    (shadow_frame->NumberOfVRegs() - arg_offset) * sizeof(uint32_t),
-                   result, mh->GetShorty());
+                   result, method->GetShorty());
   }
 }
 
diff --git a/runtime/entrypoints/interpreter/interpreter_entrypoints.h b/runtime/entrypoints/interpreter/interpreter_entrypoints.h
index 5d646e9..0952214 100644
--- a/runtime/entrypoints/interpreter/interpreter_entrypoints.h
+++ b/runtime/entrypoints/interpreter/interpreter_entrypoints.h
@@ -27,17 +27,14 @@
 namespace art {
 
 union JValue;
-class MethodHelper;
 class ShadowFrame;
 class Thread;
 
 // Pointers to functions that are called by interpreter trampolines via thread-local storage.
 struct PACKED(4) InterpreterEntryPoints {
-  void (*pInterpreterToInterpreterBridge)(Thread* self, MethodHelper* mh,
-                                          const DexFile::CodeItem* code_item,
+  void (*pInterpreterToInterpreterBridge)(Thread* self, const DexFile::CodeItem* code_item,
                                           ShadowFrame* shadow_frame, JValue* result);
-  void (*pInterpreterToCompiledCodeBridge)(Thread* self, MethodHelper* mh,
-                                           const DexFile::CodeItem* code_item,
+  void (*pInterpreterToCompiledCodeBridge)(Thread* self, const DexFile::CodeItem* code_item,
                                            ShadowFrame* shadow_frame, JValue* result);
 };
 
diff --git a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
index e7975f8..2a2771f 100644
--- a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
@@ -27,6 +27,56 @@
 
 namespace art {
 
+class ShortyHelper {
+ public:
+  ShortyHelper(const char* shorty, uint32_t shorty_len, bool is_static)
+      : shorty_(shorty), shorty_len_(shorty_len), is_static_(is_static) {
+  }
+
+  const char* GetShorty() const {
+    return shorty_;
+  }
+
+  uint32_t GetShortyLength() const {
+    return shorty_len_;
+  }
+
+  size_t NumArgs() const {
+    // "1 +" because the first in Args is the receiver.
+    // "- 1" because we don't count the return type.
+    return (is_static_ ? 0 : 1) + GetShortyLength() - 1;
+  }
+
+  // Get the primitive type associated with the given parameter.
+  Primitive::Type GetParamPrimitiveType(size_t param) const {
+    CHECK_LT(param, NumArgs());
+    if (is_static_) {
+      param++;  // 0th argument must skip return value at start of the shorty.
+    } else if (param == 0) {
+      return Primitive::kPrimNot;
+    }
+    return Primitive::GetType(shorty_[param]);
+  }
+
+  // Is the specified parameter a long or double, where parameter 0 is 'this' for instance methods.
+  bool IsParamALongOrDouble(size_t param) const {
+    Primitive::Type type = GetParamPrimitiveType(param);
+    return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
+  }
+
+  // Is the specified parameter a reference, where parameter 0 is 'this' for instance methods.
+  bool IsParamAReference(size_t param) const {
+    return GetParamPrimitiveType(param) == Primitive::kPrimNot;
+  }
+
+ private:
+  const char* const shorty_;
+  const uint32_t shorty_len_;
+  const bool is_static_;
+
+  DISALLOW_COPY_AND_ASSIGN(ShortyHelper);
+};
+
 // Visits the arguments as saved to the stack by a Runtime::kRefAndArgs callee save frame.
 class PortableArgumentVisitor {
  public:
@@ -60,7 +110,7 @@
 #define PORTABLE_STACK_ARG_SKIP 0
 #endif
 
-  PortableArgumentVisitor(MethodHelper& caller_mh, mirror::ArtMethod** sp)
+  PortableArgumentVisitor(ShortyHelper& caller_mh, mirror::ArtMethod** sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) :
     caller_mh_(caller_mh),
     args_in_regs_(ComputeArgsInRegs(caller_mh)),
@@ -119,7 +169,7 @@
   }
 
  private:
-  static size_t ComputeArgsInRegs(MethodHelper& mh) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  static size_t ComputeArgsInRegs(ShortyHelper& mh) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
 #if (defined(__i386__))
     UNUSED(mh);
     return 0;
@@ -136,7 +186,7 @@
     return args_in_regs;
 #endif
   }
-  MethodHelper& caller_mh_;
+  ShortyHelper& caller_mh_;
   const size_t args_in_regs_;
   const size_t num_params_;
   uint8_t* const reg_args_;
@@ -149,7 +199,7 @@
 // Visits arguments on the stack placing them into the shadow frame.
 class BuildPortableShadowFrameVisitor : public PortableArgumentVisitor {
  public:
-  BuildPortableShadowFrameVisitor(MethodHelper& caller_mh, mirror::ArtMethod** sp,
+  BuildPortableShadowFrameVisitor(ShortyHelper& caller_mh, mirror::ArtMethod** sp,
       ShadowFrame& sf, size_t first_arg_reg) :
     PortableArgumentVisitor(caller_mh, sp), sf_(sf), cur_reg_(first_arg_reg) { }
   virtual void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
@@ -198,7 +248,9 @@
   } else {
     const char* old_cause = self->StartAssertNoThreadSuspension("Building interpreter shadow frame");
     StackHandleScope<2> hs(self);
-    MethodHelper mh(hs.NewHandle(method));
+    uint32_t shorty_len;
+    const char* shorty = method->GetShorty(&shorty_len);
+    ShortyHelper mh(shorty, shorty_len, method->IsStatic());
     const DexFile::CodeItem* code_item = method->GetCodeItem();
     uint16_t num_regs = code_item->registers_size_;
     void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
@@ -224,7 +276,7 @@
       }
     }
 
-    JValue result = interpreter::EnterInterpreterFromEntryPoint(self, &mh, code_item, shadow_frame);
+    JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
     // Pop transition.
     self->PopManagedStackFragment(fragment);
     return result.GetJ();
@@ -235,7 +287,7 @@
 // to jobjects.
 class BuildPortableArgumentVisitor : public PortableArgumentVisitor {
  public:
-  BuildPortableArgumentVisitor(MethodHelper& caller_mh, mirror::ArtMethod** sp,
+  BuildPortableArgumentVisitor(ShortyHelper& caller_mh, mirror::ArtMethod** sp,
                                ScopedObjectAccessUnchecked& soa, std::vector<jvalue>& args) :
     PortableArgumentVisitor(caller_mh, sp), soa_(soa), args_(args) {}
 
@@ -294,8 +346,9 @@
   jobject rcvr_jobj = soa.AddLocalReference<jobject>(receiver);
 
   // Placing arguments into args vector and remove the receiver.
-  StackHandleScope<1> hs(self);
-  MethodHelper proxy_mh(hs.NewHandle(proxy_method));
+  uint32_t shorty_len;
+  const char* shorty = proxy_method->GetShorty(&shorty_len);
+  ShortyHelper proxy_mh(shorty, shorty_len, false);
   std::vector<jvalue> args;
   BuildPortableArgumentVisitor local_ref_visitor(proxy_mh, sp, soa, args);
   local_ref_visitor.VisitArguments();
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 93dc62a..cb81629 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -22,6 +22,7 @@
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/card_table-inl.h"
 #include "interpreter/interpreter.h"
+#include "method_reference.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
@@ -510,25 +511,25 @@
     BuildQuickShadowFrameVisitor shadow_frame_builder(sp, method->IsStatic(), shorty, shorty_len,
                                                       shadow_frame, first_arg_reg);
     shadow_frame_builder.VisitArguments();
+    const bool needs_initialization =
+        method->IsStatic() && !method->GetDeclaringClass()->IsInitialized();
     // Push a transition back into managed code onto the linked list in thread.
     ManagedStack fragment;
     self->PushManagedStackFragment(&fragment);
     self->PushShadowFrame(shadow_frame);
     self->EndAssertNoThreadSuspension(old_cause);
 
-    StackHandleScope<1> hs(self);
-    MethodHelper mh(hs.NewHandle(method));
-    if (mh.Get()->IsStatic() && !mh.Get()->GetDeclaringClass()->IsInitialized()) {
+    if (needs_initialization) {
       // Ensure static method's class is initialized.
-      StackHandleScope<1> hs2(self);
-      Handle<mirror::Class> h_class(hs2.NewHandle(mh.Get()->GetDeclaringClass()));
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> h_class(hs.NewHandle(shadow_frame->GetMethod()->GetDeclaringClass()));
       if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
-        DCHECK(Thread::Current()->IsExceptionPending()) << PrettyMethod(mh.Get());
+        DCHECK(Thread::Current()->IsExceptionPending()) << PrettyMethod(shadow_frame->GetMethod());
         self->PopManagedStackFragment(fragment);
         return 0;
       }
     }
-    JValue result = interpreter::EnterInterpreterFromEntryPoint(self, &mh, code_item, shadow_frame);
+    JValue result = interpreter::EnterInterpreterFromEntryPoint(self, code_item, shadow_frame);
     // Pop transition.
     self->PopManagedStackFragment(fragment);
     // No need to restore the args since the method has already been run by the interpreter.
@@ -641,7 +642,7 @@
 
   // Convert proxy method into expected interface method.
   mirror::ArtMethod* interface_method = proxy_method->FindOverriddenMethod();
-  DCHECK(interface_method != NULL) << PrettyMethod(proxy_method);
+  DCHECK(interface_method != nullptr) << PrettyMethod(proxy_method);
   DCHECK(!interface_method->IsProxyMethod()) << PrettyMethod(interface_method);
   jobject interface_method_jobj = soa.AddLocalReference<jobject>(interface_method);
 
@@ -710,12 +711,12 @@
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
   mirror::ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp);
   InvokeType invoke_type;
-  const DexFile* dex_file;
-  uint32_t dex_method_idx;
-  if (called->IsRuntimeMethod()) {
+  MethodReference called_method(nullptr, 0);
+  const bool called_method_known_on_entry = !called->IsRuntimeMethod();
+  if (!called_method_known_on_entry) {
     uint32_t dex_pc = caller->ToDexPc(QuickArgumentVisitor::GetCallingPc(sp));
     const DexFile::CodeItem* code;
-    dex_file = caller->GetDexFile();
+    called_method.dex_file = caller->GetDexFile();
     code = caller->GetCodeItem();
     CHECK_LT(dex_pc, code->insns_size_in_code_units_);
     const Instruction* instr = Instruction::At(&code->insns_[dex_pc]);
@@ -763,33 +764,33 @@
         is_range = true;
         break;
       default:
-        LOG(FATAL) << "Unexpected call into trampoline: " << instr->DumpString(NULL);
-        // Avoid used uninitialized warnings.
-        invoke_type = kDirect;
-        is_range = false;
+        LOG(FATAL) << "Unexpected call into trampoline: " << instr->DumpString(nullptr);
+        UNREACHABLE();
     }
-    dex_method_idx = (is_range) ? instr->VRegB_3rc() : instr->VRegB_35c();
+    called_method.dex_method_index = (is_range) ? instr->VRegB_3rc() : instr->VRegB_35c();
   } else {
     invoke_type = kStatic;
-    dex_file = called->GetDexFile();
-    dex_method_idx = called->GetDexMethodIndex();
+    called_method.dex_file = called->GetDexFile();
+    called_method.dex_method_index = called->GetDexMethodIndex();
   }
   uint32_t shorty_len;
   const char* shorty =
-      dex_file->GetMethodShorty(dex_file->GetMethodId(dex_method_idx), &shorty_len);
+      called_method.dex_file->GetMethodShorty(
+          called_method.dex_file->GetMethodId(called_method.dex_method_index), &shorty_len);
   RememberForGcArgumentVisitor visitor(sp, invoke_type == kStatic, shorty, shorty_len, &soa);
   visitor.VisitArguments();
   self->EndAssertNoThreadSuspension(old_cause);
-  bool virtual_or_interface = invoke_type == kVirtual || invoke_type == kInterface;
+  const bool virtual_or_interface = invoke_type == kVirtual || invoke_type == kInterface;
   // Resolve method filling in dex cache.
-  if (UNLIKELY(called->IsRuntimeMethod())) {
+  if (!called_method_known_on_entry) {
     StackHandleScope<1> hs(self);
     mirror::Object* dummy = nullptr;
     HandleWrapper<mirror::Object> h_receiver(
         hs.NewHandleWrapper(virtual_or_interface ? &receiver : &dummy));
-    called = linker->ResolveMethod(self, dex_method_idx, &caller, invoke_type);
+    DCHECK_EQ(caller->GetDexFile(), called_method.dex_file);
+    called = linker->ResolveMethod(self, called_method.dex_method_index, &caller, invoke_type);
   }
-  const void* code = NULL;
+  const void* code = nullptr;
   if (LIKELY(!self->IsExceptionPending())) {
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type))
@@ -810,20 +811,26 @@
                                << invoke_type << " " << orig_called->GetVtableIndex();
 
       // We came here because of sharpening. Ensure the dex cache is up-to-date on the method index
-      // of the sharpened method.
-      if (called->HasSameDexCacheResolvedMethods(caller)) {
-        caller->SetDexCacheResolvedMethod(called->GetDexMethodIndex(), called);
-      } else {
+      // of the sharpened method avoiding dirtying the dex cache if possible.
+      // Note, called_method.dex_method_index references the dex method before the
+      // FindVirtualMethodFor... This is ok for FindDexMethodIndexInOtherDexFile that only cares
+      // about the name and signature.
+      uint32_t update_dex_cache_method_index = called->GetDexMethodIndex();
+      if (!called->HasSameDexCacheResolvedMethods(caller)) {
         // Calling from one dex file to another, need to compute the method index appropriate to
         // the caller's dex file. Since we get here only if the original called was a runtime
         // method, we've got the correct dex_file and a dex_method_idx from above.
-        DCHECK_EQ(caller->GetDexFile(), dex_file);
-        StackHandleScope<1> hs(self);
-        MethodHelper mh(hs.NewHandle(called));
-        uint32_t method_index = mh.FindDexMethodIndexInOtherDexFile(*dex_file, dex_method_idx);
-        if (method_index != DexFile::kDexNoIndex) {
-          caller->SetDexCacheResolvedMethod(method_index, called);
-        }
+        DCHECK(!called_method_known_on_entry);
+        DCHECK_EQ(caller->GetDexFile(), called_method.dex_file);
+        const DexFile* caller_dex_file = called_method.dex_file;
+        uint32_t caller_method_name_and_sig_index = called_method.dex_method_index;
+        update_dex_cache_method_index =
+            called->FindDexMethodIndexInOtherDexFile(*caller_dex_file,
+                                                     caller_method_name_and_sig_index);
+      }
+      if ((update_dex_cache_method_index != DexFile::kDexNoIndex) &&
+          (caller->GetDexCacheResolvedMethod(update_dex_cache_method_index) != called)) {
+        caller->SetDexCacheResolvedMethod(update_dex_cache_method_index, called);
       }
     }
     // Ensure that the called method's class is initialized.
@@ -845,7 +852,7 @@
       DCHECK(called_class->IsErroneous());
     }
   }
-  CHECK_EQ(code == NULL, self->IsExceptionPending());
+  CHECK_EQ(code == nullptr, self->IsExceptionPending());
   // Fixup any locally saved objects may have moved during a GC.
   visitor.FixupReferences();
   // Place called method in callee-save frame to be placed as first argument to quick method.
@@ -1262,6 +1269,7 @@
           break;
         default:
           LOG(FATAL) << "Unexpected type: " << cur_type_ << " in " << shorty;
+          UNREACHABLE();
       }
     }
 
@@ -1785,7 +1793,7 @@
       visitor.FixupReferences();
     }
 
-    if (UNLIKELY(method == NULL)) {
+    if (UNLIKELY(method == nullptr)) {
       CHECK(self->IsExceptionPending());
       return GetTwoWordFailureValue();  // Failure.
     }
@@ -1794,7 +1802,7 @@
   const void* code = method->GetEntryPointFromQuickCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
-  DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method)
+  DCHECK(code != nullptr) << "Code was null in method: " << PrettyMethod(method)
                           << " location: "
                           << method->GetDexFile()->GetLocation();
 
@@ -1880,7 +1888,7 @@
   mirror::ArtMethod* method;
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
     method = this_object->GetClass()->FindVirtualMethodForInterface(interface_method);
-    if (UNLIKELY(method == NULL)) {
+    if (UNLIKELY(method == nullptr)) {
       ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(interface_method, this_object,
                                                                  caller_method);
       return GetTwoWordFailureValue();  // Failure.
@@ -1900,7 +1908,7 @@
     Instruction::Code instr_code = instr->Opcode();
     CHECK(instr_code == Instruction::INVOKE_INTERFACE ||
           instr_code == Instruction::INVOKE_INTERFACE_RANGE)
-        << "Unexpected call into interface trampoline: " << instr->DumpString(NULL);
+        << "Unexpected call into interface trampoline: " << instr->DumpString(nullptr);
     uint32_t dex_method_idx;
     if (instr_code == Instruction::INVOKE_INTERFACE) {
       dex_method_idx = instr->VRegB_35c();
@@ -1932,7 +1940,7 @@
   const void* code = method->GetEntryPointFromQuickCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
-  DCHECK(code != nullptr) << "Code was NULL in method: " << PrettyMethod(method)
+  DCHECK(code != nullptr) << "Code was null in method: " << PrettyMethod(method)
                           << " location: " << method->GetDexFile()->GetLocation();
 
   return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(code),
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 835485c..94753d4 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -177,6 +177,12 @@
 
   Thread* self = Thread::Current();
 
+  // If ART is not running, or the thread is not attached to ART pass the
+  // signal on to the next handler in the chain.
+  if (self == nullptr || Runtime::Current() == nullptr || !Runtime::Current()->IsStarted()) {
+    InvokeUserSignalHandler(sig, info, context);
+    return;
+  }
   // Now set up the nested signal handler.
 
   // TODO: add SIGSEGV back to the nested signals when we can handle running out stack gracefully.
diff --git a/runtime/field_helper.cc b/runtime/field_helper.cc
deleted file mode 100644
index 5c85c46..0000000
--- a/runtime/field_helper.cc
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "field_helper.h"
-
-#include "class_linker-inl.h"
-#include "dex_file.h"
-#include "mirror/dex_cache.h"
-#include "runtime.h"
-#include "thread-inl.h"
-
-namespace art {
-
-mirror::Class* FieldHelper::GetType(bool resolve) {
-  uint32_t field_index = field_->GetDexFieldIndex();
-  if (UNLIKELY(field_->GetDeclaringClass()->IsProxyClass())) {
-    return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(),
-                                                                 field_->GetTypeDescriptor());
-  }
-  const DexFile* dex_file = field_->GetDexFile();
-  const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
-  mirror::Class* type = field_->GetDexCache()->GetResolvedType(field_id.type_idx_);
-  if (resolve && (type == nullptr)) {
-    type = Runtime::Current()->GetClassLinker()->ResolveType(field_id.type_idx_, field_.Get());
-    CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
-  }
-  return type;
-}
-
-const char* FieldHelper::GetDeclaringClassDescriptor() {
-  return field_->GetDeclaringClass()->GetDescriptor(&declaring_class_descriptor_);
-}
-
-}  // namespace art
diff --git a/runtime/field_helper.h b/runtime/field_helper.h
deleted file mode 100644
index 8097025..0000000
--- a/runtime/field_helper.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_FIELD_HELPER_H_
-#define ART_RUNTIME_FIELD_HELPER_H_
-
-#include "base/macros.h"
-#include "handle.h"
-#include "mirror/art_field.h"
-
-namespace art {
-
-class FieldHelper {
- public:
-  explicit FieldHelper(Handle<mirror::ArtField> f) : field_(f) {}
-
-  mirror::ArtField* GetField() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return field_.Get();
-  }
-
-  mirror::Class* GetType(bool resolve = true) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // The returned const char* is only guaranteed to be valid for the lifetime of the FieldHelper.
-  // If you need it longer, copy it into a std::string.
-  const char* GetDeclaringClassDescriptor() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
- private:
-  Handle<mirror::ArtField> field_;
-  std::string declaring_class_descriptor_;
-
-  DISALLOW_COPY_AND_ASSIGN(FieldHelper);
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_FIELD_HELPER_H_
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 3101c68..9d2f6d1 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -48,11 +48,20 @@
   }
   // Need to check that we arent the large object allocator since the large object allocation code
   // path this function. If we didn't check we would have an infinite loop.
-  if (kCheckLargeObject && UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
-    return AllocLargeObject<kInstrumented, PreFenceVisitor>(self, klass, byte_count,
-                                                            pre_fence_visitor);
-  }
   mirror::Object* obj;
+  if (kCheckLargeObject && UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
+    obj = AllocLargeObject<kInstrumented, PreFenceVisitor>(self, &klass, byte_count,
+                                                           pre_fence_visitor);
+    if (obj != nullptr) {
+      return obj;
+    } else {
+      // There should be an OOM exception, since we are retrying, clear it.
+      self->ClearException();
+    }
+    // If the large object allocation failed, try to use the normal spaces (main space,
+    // non moving space). This can happen if there is significant virtual address space
+    // fragmentation.
+  }
   AllocationTimer alloc_timer(this, &obj);
   size_t bytes_allocated;
   size_t usable_size;
@@ -171,10 +180,13 @@
 }
 
 template <bool kInstrumented, typename PreFenceVisitor>
-inline mirror::Object* Heap::AllocLargeObject(Thread* self, mirror::Class* klass,
+inline mirror::Object* Heap::AllocLargeObject(Thread* self, mirror::Class** klass,
                                               size_t byte_count,
                                               const PreFenceVisitor& pre_fence_visitor) {
-  return AllocObjectWithAllocator<kInstrumented, false, PreFenceVisitor>(self, klass, byte_count,
+  // Save and restore the class in case it moves.
+  StackHandleScope<1> hs(self);
+  auto klass_wrapper = hs.NewHandleWrapper(klass);
+  return AllocObjectWithAllocator<kInstrumented, false, PreFenceVisitor>(self, *klass, byte_count,
                                                                          kAllocatorTypeLOS,
                                                                          pre_fence_visitor);
 }
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 0cceaa4..0fd0a9f 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -365,6 +365,7 @@
   uint8_t* heap_end = continuous_spaces_.back()->Limit();
   size_t heap_capacity = heap_end - heap_begin;
   // Remove the main backup space since it slows down the GC to have unused extra spaces.
+  // TODO: Avoid needing to do this.
   if (main_space_backup_.get() != nullptr) {
     RemoveSpace(main_space_backup_.get());
   }
@@ -977,6 +978,22 @@
   Trim();
 }
 
+class TrimIndirectReferenceTableClosure : public Closure {
+ public:
+  explicit TrimIndirectReferenceTableClosure(Barrier* barrier) : barrier_(barrier) {
+  }
+  virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
+    ATRACE_BEGIN("Trimming reference table");
+    thread->GetJniEnv()->locals.Trim();
+    ATRACE_END();
+    barrier_->Pass(Thread::Current());
+  }
+
+ private:
+  Barrier* const barrier_;
+};
+
+
 void Heap::Trim() {
   Thread* self = Thread::Current();
   {
@@ -998,6 +1015,19 @@
     WaitForGcToCompleteLocked(kGcCauseTrim, self);
     collector_type_running_ = kCollectorTypeHeapTrim;
   }
+  // Trim reference tables.
+  {
+    ScopedObjectAccess soa(self);
+    JavaVMExt* vm = soa.Vm();
+    // Trim globals indirect reference table.
+    vm->TrimGlobals();
+    // Trim locals indirect reference tables.
+    Barrier barrier(0);
+    TrimIndirectReferenceTableClosure closure(&barrier);
+    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+    size_t barrier_count = Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
+    barrier.Increment(self, barrier_count);
+  }
   uint64_t start_ns = NanoTime();
   // Trim the managed spaces.
   uint64_t total_alloc_space_allocated = 0;
@@ -1571,6 +1601,8 @@
   to_space->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
   const uint64_t space_size_before_compaction = from_space->Size();
   AddSpace(to_space);
+  // Make sure that we will have enough room to copy.
+  CHECK_GE(to_space->GetFootprintLimit(), from_space->GetFootprintLimit());
   Compact(to_space, from_space, kGcCauseHomogeneousSpaceCompact);
   // Leave as prot read so that we can still run ROSAlloc verification on this space.
   from_space->GetMemMap()->Protect(PROT_READ);
@@ -1689,8 +1721,8 @@
         RemoveSpace(temp_space_);
         temp_space_ = nullptr;
         mem_map->Protect(PROT_READ | PROT_WRITE);
-        CreateMainMallocSpace(mem_map.get(), kDefaultInitialSize, mem_map->Size(),
-                              mem_map->Size());
+        CreateMainMallocSpace(mem_map.get(), kDefaultInitialSize,
+                              std::min(mem_map->Size(), growth_limit_), mem_map->Size());
         mem_map.release();
         // Compact to the main space from the bump pointer space, don't need to swap semispaces.
         AddSpace(main_space_);
@@ -1703,9 +1735,9 @@
         if (kIsDebugBuild && kUseRosAlloc) {
           mem_map->Protect(PROT_READ | PROT_WRITE);
         }
-        main_space_backup_.reset(CreateMallocSpaceFromMemMap(mem_map.get(), kDefaultInitialSize,
-                                                             mem_map->Size(), mem_map->Size(),
-                                                             name, true));
+        main_space_backup_.reset(CreateMallocSpaceFromMemMap(
+            mem_map.get(), kDefaultInitialSize, std::min(mem_map->Size(), growth_limit_),
+            mem_map->Size(), name, true));
         if (kIsDebugBuild && kUseRosAlloc) {
           mem_map->Protect(PROT_NONE);
         }
@@ -1947,7 +1979,8 @@
       MemMap* mem_map = main_space_->ReleaseMemMap();
       RemoveSpace(main_space_);
       space::Space* old_main_space = main_space_;
-      CreateMainMallocSpace(mem_map, kDefaultInitialSize, mem_map->Size(), mem_map->Size());
+      CreateMainMallocSpace(mem_map, kDefaultInitialSize, std::min(mem_map->Size(), growth_limit_),
+                            mem_map->Size());
       delete old_main_space;
       AddSpace(main_space_);
     } else {
@@ -2959,7 +2992,18 @@
 
 void Heap::ClearGrowthLimit() {
   growth_limit_ = capacity_;
-  non_moving_space_->ClearGrowthLimit();
+  for (const auto& space : continuous_spaces_) {
+    if (space->IsMallocSpace()) {
+      gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+      malloc_space->ClearGrowthLimit();
+      malloc_space->SetFootprintLimit(malloc_space->Capacity());
+    }
+  }
+  // This space isn't added for performance reasons.
+  if (main_space_backup_.get() != nullptr) {
+    main_space_backup_->ClearGrowthLimit();
+    main_space_backup_->SetFootprintLimit(main_space_backup_->Capacity());
+  }
 }
 
 void Heap::AddFinalizerReference(Thread* self, mirror::Object** object) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 69a573e..4e1a0ff 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -654,7 +654,7 @@
 
   // We don't force this to be inlined since it is a slow path.
   template <bool kInstrumented, typename PreFenceVisitor>
-  mirror::Object* AllocLargeObject(Thread* self, mirror::Class* klass, size_t byte_count,
+  mirror::Object* AllocLargeObject(Thread* self, mirror::Class** klass, size_t byte_count,
                                    const PreFenceVisitor& pre_fence_visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/globals.h b/runtime/globals.h
index 4d33196..3104229 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -64,6 +64,12 @@
 static constexpr bool kUsePortableCompiler = false;
 #endif
 
+#if defined(ART_USE_OPTIMIZING_COMPILER)
+static constexpr bool kUseOptimizingCompiler = true;
+#else
+static constexpr bool kUseOptimizingCompiler = false;
+#endif
+
 // Garbage collector constants.
 static constexpr bool kMovingCollector = true && !kUsePortableCompiler;
 static constexpr bool kMarkCompactSupport = false && kMovingCollector;
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index 4d177a3..0d84a1e 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -162,13 +162,12 @@
   DCHECK(table_ != NULL);
   DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles);
 
-  int idx = ExtractIndex(iref);
-
   if (GetIndirectRefKind(iref) == kHandleScopeOrInvalid &&
       Thread::Current()->HandleScopeContains(reinterpret_cast<jobject>(iref))) {
     LOG(WARNING) << "Attempt to remove local handle scope entry from IRT, ignoring";
     return true;
   }
+  const int idx = ExtractIndex(iref);
   if (idx < bottomIndex) {
     // Wrong segment.
     LOG(WARNING) << "Attempt to remove index outside index area (" << idx
@@ -236,6 +235,13 @@
   return true;
 }
 
+void IndirectReferenceTable::Trim() {
+  const size_t top_index = Capacity();
+  auto* release_start = AlignUp(reinterpret_cast<uint8_t*>(&table_[top_index]), kPageSize);
+  uint8_t* release_end = table_mem_map_->End();
+  madvise(release_start, release_end - release_start, MADV_DONTNEED);
+}
+
 void IndirectReferenceTable::VisitRoots(RootCallback* callback, void* arg, uint32_t tid,
                                         RootType root_type) {
   for (auto ref : *this) {
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 168f9f2..fbd5714 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -331,6 +331,9 @@
     return Offset(OFFSETOF_MEMBER(IndirectReferenceTable, segment_state_));
   }
 
+  // Release pages past the end of the table that may have previously held references.
+  void Trim() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
  private:
   // Extract the table index from an indirect reference.
   static uint32_t ExtractIndex(IndirectRef iref) {
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index b17f303..b04a18b 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -327,37 +327,31 @@
 // Clang 3.4 fails to build the goto interpreter implementation.
 static constexpr InterpreterImplKind kInterpreterImplKind = kSwitchImpl;
 template<bool do_access_check, bool transaction_active>
-JValue ExecuteGotoImpl(Thread*, MethodHelper&, const DexFile::CodeItem*, ShadowFrame&, JValue) {
+JValue ExecuteGotoImpl(Thread*, const DexFile::CodeItem*, ShadowFrame&, JValue) {
   LOG(FATAL) << "UNREACHABLE";
   UNREACHABLE();
 }
 // Explicit definitions of ExecuteGotoImpl.
 template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<true, false>(Thread* self, MethodHelper& mh,
-                                    const DexFile::CodeItem* code_item,
+JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
                                     ShadowFrame& shadow_frame, JValue result_register);
 template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<false, false>(Thread* self, MethodHelper& mh,
-                                     const DexFile::CodeItem* code_item,
+JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
                                      ShadowFrame& shadow_frame, JValue result_register);
 template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<true, true>(Thread* self, MethodHelper& mh,
-                                    const DexFile::CodeItem* code_item,
+JValue ExecuteGotoImpl<true, true>(Thread* self,  const DexFile::CodeItem* code_item,
+                                   ShadowFrame& shadow_frame, JValue result_register);
+template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
                                     ShadowFrame& shadow_frame, JValue result_register);
-template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<false, true>(Thread* self, MethodHelper& mh,
-                                     const DexFile::CodeItem* code_item,
-                                     ShadowFrame& shadow_frame, JValue result_register);
 #endif
 
-static JValue Execute(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
-                      ShadowFrame& shadow_frame, JValue result_register)
+static JValue Execute(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame,
+                      JValue result_register)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-static inline JValue Execute(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
+static inline JValue Execute(Thread* self, const DexFile::CodeItem* code_item,
                              ShadowFrame& shadow_frame, JValue result_register) {
-  DCHECK(shadow_frame.GetMethod() == mh.GetMethod() ||
-         shadow_frame.GetMethod()->GetDeclaringClass()->IsProxyClass());
   DCHECK(!shadow_frame.GetMethod()->IsAbstract());
   DCHECK(!shadow_frame.GetMethod()->IsNative());
   shadow_frame.GetMethod()->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
@@ -367,32 +361,32 @@
     // Enter the "without access check" interpreter.
     if (kInterpreterImplKind == kSwitchImpl) {
       if (transaction_active) {
-        return ExecuteSwitchImpl<false, true>(self, mh, code_item, shadow_frame, result_register);
+        return ExecuteSwitchImpl<false, true>(self, code_item, shadow_frame, result_register);
       } else {
-        return ExecuteSwitchImpl<false, false>(self, mh, code_item, shadow_frame, result_register);
+        return ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame, result_register);
       }
     } else {
       DCHECK_EQ(kInterpreterImplKind, kComputedGotoImplKind);
       if (transaction_active) {
-        return ExecuteGotoImpl<false, true>(self, mh, code_item, shadow_frame, result_register);
+        return ExecuteGotoImpl<false, true>(self, code_item, shadow_frame, result_register);
       } else {
-        return ExecuteGotoImpl<false, false>(self, mh, code_item, shadow_frame, result_register);
+        return ExecuteGotoImpl<false, false>(self, code_item, shadow_frame, result_register);
       }
     }
   } else {
     // Enter the "with access check" interpreter.
     if (kInterpreterImplKind == kSwitchImpl) {
       if (transaction_active) {
-        return ExecuteSwitchImpl<true, true>(self, mh, code_item, shadow_frame, result_register);
+        return ExecuteSwitchImpl<true, true>(self, code_item, shadow_frame, result_register);
       } else {
-        return ExecuteSwitchImpl<true, false>(self, mh, code_item, shadow_frame, result_register);
+        return ExecuteSwitchImpl<true, false>(self, code_item, shadow_frame, result_register);
       }
     } else {
       DCHECK_EQ(kInterpreterImplKind, kComputedGotoImplKind);
       if (transaction_active) {
-        return ExecuteGotoImpl<true, true>(self, mh, code_item, shadow_frame, result_register);
+        return ExecuteGotoImpl<true, true>(self, code_item, shadow_frame, result_register);
       } else {
-        return ExecuteGotoImpl<true, false>(self, mh, code_item, shadow_frame, result_register);
+        return ExecuteGotoImpl<true, false>(self, code_item, shadow_frame, result_register);
       }
     }
   }
@@ -473,9 +467,7 @@
     }
   }
   if (LIKELY(!method->IsNative())) {
-    StackHandleScope<1> hs(self);
-    MethodHelper mh(hs.NewHandle(method));
-    JValue r = Execute(self, mh, code_item, *shadow_frame, JValue());
+    JValue r = Execute(self, code_item, *shadow_frame, JValue());
     if (result != NULL) {
       *result = r;
     }
@@ -500,10 +492,8 @@
   value.SetJ(ret_val->GetJ());  // Set value to last known result in case the shadow frame chain is empty.
   while (shadow_frame != NULL) {
     self->SetTopOfShadowStack(shadow_frame);
-    StackHandleScope<1> hs(self);
-    MethodHelper mh(hs.NewHandle(shadow_frame->GetMethod()));
-    const DexFile::CodeItem* code_item = mh.GetMethod()->GetCodeItem();
-    value = Execute(self, mh, code_item, *shadow_frame, value);
+    const DexFile::CodeItem* code_item = shadow_frame->GetMethod()->GetCodeItem();
+    value = Execute(self, code_item, *shadow_frame, value);
     ShadowFrame* old_frame = shadow_frame;
     shadow_frame = shadow_frame->GetLink();
     delete old_frame;
@@ -511,8 +501,7 @@
   ret_val->SetJ(value.GetJ());
 }
 
-JValue EnterInterpreterFromEntryPoint(Thread* self, MethodHelper* mh,
-                                      const DexFile::CodeItem* code_item,
+JValue EnterInterpreterFromEntryPoint(Thread* self, const DexFile::CodeItem* code_item,
                                       ShadowFrame* shadow_frame) {
   DCHECK_EQ(self, Thread::Current());
   bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
@@ -521,11 +510,10 @@
     return JValue();
   }
 
-  return Execute(self, *mh, code_item, *shadow_frame, JValue());
+  return Execute(self, code_item, *shadow_frame, JValue());
 }
 
-extern "C" void artInterpreterToInterpreterBridge(Thread* self, MethodHelper* mh,
-                                                  const DexFile::CodeItem* code_item,
+extern "C" void artInterpreterToInterpreterBridge(Thread* self, const DexFile::CodeItem* code_item,
                                                   ShadowFrame* shadow_frame, JValue* result) {
   bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
@@ -534,10 +522,10 @@
   }
 
   self->PushShadowFrame(shadow_frame);
-  DCHECK_EQ(shadow_frame->GetMethod(), mh->Get());
   // Ensure static methods are initialized.
-  if (mh->Get()->IsStatic()) {
-    mirror::Class* declaring_class = mh->Get()->GetDeclaringClass();
+  const bool is_static = shadow_frame->GetMethod()->IsStatic();
+  if (is_static) {
+    mirror::Class* declaring_class = shadow_frame->GetMethod()->GetDeclaringClass();
     if (UNLIKELY(!declaring_class->IsInitialized())) {
       StackHandleScope<1> hs(self);
       HandleWrapper<Class> h_declaring_class(hs.NewHandleWrapper(&declaring_class));
@@ -551,15 +539,15 @@
     }
   }
 
-  if (LIKELY(!mh->Get()->IsNative())) {
-    result->SetJ(Execute(self, *mh, code_item, *shadow_frame, JValue()).GetJ());
+  if (LIKELY(!shadow_frame->GetMethod()->IsNative())) {
+    result->SetJ(Execute(self, code_item, *shadow_frame, JValue()).GetJ());
   } else {
     // We don't expect to be asked to interpret native code (which is entered via a JNI compiler
     // generated stub) except during testing and image writing.
     CHECK(!Runtime::Current()->IsStarted());
-    Object* receiver = mh->Get()->IsStatic() ? nullptr : shadow_frame->GetVRegReference(0);
-    uint32_t* args = shadow_frame->GetVRegArgs(mh->Get()->IsStatic() ? 0 : 1);
-    UnstartedRuntimeJni(self, mh->Get(), receiver, args, result);
+    Object* receiver = is_static ? nullptr : shadow_frame->GetVRegReference(0);
+    uint32_t* args = shadow_frame->GetVRegArgs(is_static ? 0 : 1);
+    UnstartedRuntimeJni(self, shadow_frame->GetMethod(), receiver, args, result);
   }
 
   self->PopShadowFrame();
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index d327a71..7d634b3 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -27,7 +27,6 @@
 }  // namespace mirror
 
 union JValue;
-class MethodHelper;
 class ShadowFrame;
 class Thread;
 
@@ -42,21 +41,18 @@
                                            JValue* ret_val)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-extern JValue EnterInterpreterFromEntryPoint(Thread* self, MethodHelper* mh,
-                                             const DexFile::CodeItem* code_item,
+extern JValue EnterInterpreterFromEntryPoint(Thread* self, const DexFile::CodeItem* code_item,
                                              ShadowFrame* shadow_frame)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 
 }  // namespace interpreter
 
-extern "C" void artInterpreterToInterpreterBridge(Thread* self, MethodHelper* mh,
-                                                  const DexFile::CodeItem* code_item,
+extern "C" void artInterpreterToInterpreterBridge(Thread* self, const DexFile::CodeItem* code_item,
                                                   ShadowFrame* shadow_frame, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, MethodHelper* mh,
-                                                   const DexFile::CodeItem* code_item,
+extern "C" void artInterpreterToCompiledCodeBridge(Thread* self, const DexFile::CodeItem* code_item,
                                                    ShadowFrame* shadow_frame, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index eb80c30..3c7db85 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -16,7 +16,6 @@
 
 #include "interpreter_common.h"
 
-#include "field_helper.h"
 #include "mirror/array-inl.h"
 
 namespace art {
@@ -267,8 +266,7 @@
           HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
           HandleWrapper<mirror::Object> h_reg(hs.NewHandleWrapper(&reg));
           HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
-          FieldHelper fh(h_f);
-          field_class = fh.GetType();
+          field_class = h_f->GetType(true);
         }
         if (!reg->VerifierInstanceOf(field_class)) {
           // This should never happen.
@@ -505,14 +503,14 @@
   return found_dex_pc;
 }
 
-void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh) {
-  LOG(FATAL) << "Unexpected instruction: " << inst->DumpString(mh.GetMethod()->GetDexFile());
-  exit(0);  // Unreachable, keep GCC happy.
+void UnexpectedOpcode(const Instruction* inst, const ShadowFrame& shadow_frame) {
+  LOG(FATAL) << "Unexpected instruction: "
+             << inst->DumpString(shadow_frame.GetMethod()->GetDexFile());
+  UNREACHABLE();
 }
 
-static void UnstartedRuntimeInvoke(Thread* self, MethodHelper* mh,
-                                   const DexFile::CodeItem* code_item, ShadowFrame* shadow_frame,
-                                   JValue* result, size_t arg_offset)
+static void UnstartedRuntimeInvoke(Thread* self, const DexFile::CodeItem* code_item,
+                                   ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 // Assign register 'src_reg' from shadow_frame to register 'dest_reg' into new_shadow_frame.
@@ -541,29 +539,28 @@
 }
 
 template<bool is_range, bool do_assignability_check>
-bool DoCall(ArtMethod* method, Thread* self, ShadowFrame& shadow_frame,
+bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result) {
   // Compute method information.
-  const DexFile::CodeItem* code_item = method->GetCodeItem();
+  const DexFile::CodeItem* code_item = called_method->GetCodeItem();
   const uint16_t num_ins = (is_range) ? inst->VRegA_3rc(inst_data) : inst->VRegA_35c(inst_data);
   uint16_t num_regs;
   if (LIKELY(code_item != NULL)) {
     num_regs = code_item->registers_size_;
     DCHECK_EQ(num_ins, code_item->ins_size_);
   } else {
-    DCHECK(method->IsNative() || method->IsProxyMethod());
+    DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
     num_regs = num_ins;
   }
 
   // Allocate shadow frame on the stack.
   const char* old_cause = self->StartAssertNoThreadSuspension("DoCall");
   void* memory = alloca(ShadowFrame::ComputeSize(num_regs));
-  ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, method, 0, memory));
+  ShadowFrame* new_shadow_frame(ShadowFrame::Create(num_regs, &shadow_frame, called_method, 0,
+                                                    memory));
 
   // Initialize new shadow frame.
   const size_t first_dest_reg = num_regs - num_ins;
-  StackHandleScope<1> hs(self);
-  MethodHelper mh(hs.NewHandle(method));
   if (do_assignability_check) {
     // Slow path.
     // We might need to do class loading, which incurs a thread state change to kNative. So
@@ -573,11 +570,12 @@
 
     // We need to do runtime check on reference assignment. We need to load the shorty
     // to get the exact type of each reference argument.
-    const DexFile::TypeList* params = mh.Get()->GetParameterTypeList();
+    const DexFile::TypeList* params = new_shadow_frame->GetMethod()->GetParameterTypeList();
     uint32_t shorty_len = 0;
-    const char* shorty = mh.Get()->GetShorty(&shorty_len);
+    const char* shorty = new_shadow_frame->GetMethod()->GetShorty(&shorty_len);
 
-    // TODO: find a cleaner way to separate non-range and range information without duplicating code.
+    // TODO: find a cleaner way to separate non-range and range information without duplicating
+    //       code.
     uint32_t arg[5];  // only used in invoke-XXX.
     uint32_t vregC;   // only used in invoke-XXX-range.
     if (is_range) {
@@ -589,7 +587,7 @@
     // Handle receiver apart since it's not part of the shorty.
     size_t dest_reg = first_dest_reg;
     size_t arg_offset = 0;
-    if (!mh.Get()->IsStatic()) {
+    if (!new_shadow_frame->GetMethod()->IsStatic()) {
       size_t receiver_reg = is_range ? vregC : arg[0];
       new_shadow_frame->SetVRegReference(dest_reg, shadow_frame.GetVRegReference(receiver_reg));
       ++dest_reg;
@@ -602,7 +600,9 @@
         case 'L': {
           Object* o = shadow_frame.GetVRegReference(src_reg);
           if (do_assignability_check && o != NULL) {
-            Class* arg_type = mh.GetClassFromTypeIdx(params->GetTypeItem(shorty_pos).type_idx_);
+            Class* arg_type =
+                new_shadow_frame->GetMethod()->GetClassFromTypeIndex(
+                    params->GetTypeItem(shorty_pos).type_idx_, true);
             if (arg_type == NULL) {
               CHECK(self->IsExceptionPending());
               return false;
@@ -613,7 +613,7 @@
               self->ThrowNewExceptionF(self->GetCurrentLocationForThrow(),
                                        "Ljava/lang/VirtualMachineError;",
                                        "Invoking %s with bad arg %d, type '%s' not instance of '%s'",
-                                       mh.Get()->GetName(), shorty_pos,
+                                       new_shadow_frame->GetMethod()->GetName(), shorty_pos,
                                        o->GetClass()->GetDescriptor(&temp1),
                                        arg_type->GetDescriptor(&temp2));
               return false;
@@ -650,7 +650,8 @@
       uint16_t regList = inst->Fetch16(2);
       uint16_t count = num_ins;
       if (count == 5) {
-        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + 4U, (inst_data >> 8) & 0x0f);
+        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + 4U,
+                       (inst_data >> 8) & 0x0f);
         --count;
        }
       for (size_t arg_index = 0; arg_index < count; ++arg_index, regList >>= 4) {
@@ -662,17 +663,24 @@
 
   // Do the call now.
   if (LIKELY(Runtime::Current()->IsStarted())) {
-    if (kIsDebugBuild && mh.Get()->GetEntryPointFromInterpreter() == nullptr) {
-      LOG(FATAL) << "Attempt to invoke non-executable method: " << PrettyMethod(mh.Get());
+    if (kIsDebugBuild && new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter() == nullptr) {
+      LOG(FATAL) << "Attempt to invoke non-executable method: "
+          << PrettyMethod(new_shadow_frame->GetMethod());
+      UNREACHABLE();
     }
     if (kIsDebugBuild && Runtime::Current()->GetInstrumentation()->IsForcedInterpretOnly() &&
-        !mh.Get()->IsNative() && !mh.Get()->IsProxyMethod() &&
-        mh.Get()->GetEntryPointFromInterpreter() == artInterpreterToCompiledCodeBridge) {
-      LOG(FATAL) << "Attempt to call compiled code when -Xint: " << PrettyMethod(mh.Get());
+        !new_shadow_frame->GetMethod()->IsNative() &&
+        !new_shadow_frame->GetMethod()->IsProxyMethod() &&
+        new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter()
+            == artInterpreterToCompiledCodeBridge) {
+      LOG(FATAL) << "Attempt to call compiled code when -Xint: "
+          << PrettyMethod(new_shadow_frame->GetMethod());
+      UNREACHABLE();
     }
-    (mh.Get()->GetEntryPointFromInterpreter())(self, &mh, code_item, new_shadow_frame, result);
+    (new_shadow_frame->GetMethod()->GetEntryPointFromInterpreter())(self, code_item,
+                                                                    new_shadow_frame, result);
   } else {
-    UnstartedRuntimeInvoke(self, &mh, code_item, new_shadow_frame, result, first_dest_reg);
+    UnstartedRuntimeInvoke(self, code_item, new_shadow_frame, result, first_dest_reg);
   }
   return !self->IsExceptionPending();
 }
@@ -813,8 +821,8 @@
   result->SetL(found);
 }
 
-static void UnstartedRuntimeInvoke(Thread* self, MethodHelper* mh,
-                                   const DexFile::CodeItem* code_item, ShadowFrame* shadow_frame,
+static void UnstartedRuntimeInvoke(Thread* self,  const DexFile::CodeItem* code_item,
+                                   ShadowFrame* shadow_frame,
                                    JValue* result, size_t arg_offset) {
   // In a runtime that's not started we intercept certain methods to avoid complicated dependency
   // problems in core libraries.
@@ -934,7 +942,7 @@
     }
   } else {
     // Not special, continue with regular interpreter execution.
-    artInterpreterToInterpreterBridge(self, mh, code_item, shadow_frame, result);
+    artInterpreterToInterpreterBridge(self, code_item, shadow_frame, result);
   }
 }
 
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index f88d56a..ce7c1c3 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -33,7 +33,6 @@
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "handle_scope-inl.h"
-#include "method_helper-inl.h"
 #include "nth_caller_visitor.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method.h"
@@ -70,13 +69,11 @@
 // External references to both interpreter implementations.
 
 template<bool do_access_check, bool transaction_active>
-extern JValue ExecuteSwitchImpl(Thread* self, MethodHelper& mh,
-                                const DexFile::CodeItem* code_item,
+extern JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
                                 ShadowFrame& shadow_frame, JValue result_register);
 
 template<bool do_access_check, bool transaction_active>
-extern JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh,
-                              const DexFile::CodeItem* code_item,
+extern JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item,
                               ShadowFrame& shadow_frame, JValue result_register);
 
 void ThrowNullPointerExceptionFromInterpreter(const ShadowFrame& shadow_frame)
@@ -100,7 +97,7 @@
 // DoInvokeVirtualQuick functions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<bool is_range, bool do_assignability_check>
-bool DoCall(ArtMethod* method, Thread* self, ShadowFrame& shadow_frame,
+bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result);
 
 // Handles invoke-XXX/range instructions.
@@ -112,19 +109,20 @@
   const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
   Object* receiver = (type == kStatic) ? nullptr : shadow_frame.GetVRegReference(vregC);
   mirror::ArtMethod* sf_method = shadow_frame.GetMethod();
-  ArtMethod* const method = FindMethodFromCode<type, do_access_check>(
+  ArtMethod* const called_method = FindMethodFromCode<type, do_access_check>(
       method_idx, &receiver, &sf_method, self);
   // The shadow frame should already be pushed, so we don't need to update it.
-  if (UNLIKELY(method == nullptr)) {
+  if (UNLIKELY(called_method == nullptr)) {
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
     return false;
-  } else if (UNLIKELY(method->IsAbstract())) {
-    ThrowAbstractMethodError(method);
+  } else if (UNLIKELY(called_method->IsAbstract())) {
+    ThrowAbstractMethodError(called_method);
     result->SetJ(0);
     return false;
   } else {
-    return DoCall<is_range, do_access_check>(method, self, shadow_frame, inst, inst_data, result);
+    return DoCall<is_range, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
+                                             result);
   }
 }
 
@@ -144,18 +142,18 @@
   }
   const uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
   CHECK(receiver->GetClass()->ShouldHaveEmbeddedImtAndVTable());
-  ArtMethod* const method = receiver->GetClass()->GetEmbeddedVTableEntry(vtable_idx);
-  if (UNLIKELY(method == nullptr)) {
+  ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(vtable_idx);
+  if (UNLIKELY(called_method == nullptr)) {
     CHECK(self->IsExceptionPending());
     result->SetJ(0);
     return false;
-  } else if (UNLIKELY(method->IsAbstract())) {
-    ThrowAbstractMethodError(method);
+  } else if (UNLIKELY(called_method->IsAbstract())) {
+    ThrowAbstractMethodError(called_method);
     result->SetJ(0);
     return false;
   } else {
     // No need to check since we've been quickened.
-    return DoCall<is_range, false>(method, self, shadow_frame, inst, inst_data, result);
+    return DoCall<is_range, false>(called_method, self, shadow_frame, inst, inst_data, result);
   }
 }
 
@@ -351,12 +349,12 @@
     uint32_t dex_pc, const instrumentation::Instrumentation* instrumentation)
         SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-void UnexpectedOpcode(const Instruction* inst, MethodHelper& mh)
+void UnexpectedOpcode(const Instruction* inst, const ShadowFrame& shadow_frame)
   __attribute__((cold, noreturn))
   SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 static inline void TraceExecution(const ShadowFrame& shadow_frame, const Instruction* inst,
-                                  const uint32_t dex_pc, MethodHelper& mh)
+                                  const uint32_t dex_pc)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   constexpr bool kTracing = false;
   if (kTracing) {
@@ -364,7 +362,7 @@
     std::ostringstream oss;
     oss << PrettyMethod(shadow_frame.GetMethod())
         << StringPrintf("\n0x%x: ", dex_pc)
-        << inst->DumpString(mh.GetMethod()->GetDexFile()) << "\n";
+        << inst->DumpString(shadow_frame.GetMethod()->GetDexFile()) << "\n";
     for (uint32_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
       uint32_t raw_value = shadow_frame.GetVReg(i);
       Object* ref_value = shadow_frame.GetVRegReference(i);
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 6350c56..c332a7b 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -26,7 +26,6 @@
 // - "inst_data" : the current instruction's first 16 bits.
 // - "dex_pc": the current pc.
 // - "shadow_frame": the current shadow frame.
-// - "mh": the current MethodHelper.
 // - "currentHandlersTable": the current table of pointer to each instruction handler.
 
 // Advance to the next instruction and updates interpreter state.
@@ -36,7 +35,7 @@
     inst = inst->RelativeAt(disp);                                          \
     dex_pc = static_cast<uint32_t>(static_cast<int32_t>(dex_pc) + disp);    \
     shadow_frame.SetDexPC(dex_pc);                                          \
-    TraceExecution(shadow_frame, inst, dex_pc, mh);                         \
+    TraceExecution(shadow_frame, inst, dex_pc);                             \
     inst_data = inst->Fetch16(0);                                           \
     goto *currentHandlersTable[inst->Opcode(inst_data)];                    \
   } while (false)
@@ -59,6 +58,7 @@
   do {                                          \
     if (kIsDebugBuild) {                        \
       LOG(FATAL) << "We should not be here !";  \
+      UNREACHABLE();                            \
     }                                           \
   } while (false)
 
@@ -111,8 +111,8 @@
  *
  */
 template<bool do_access_check, bool transaction_active>
-JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
-                       ShadowFrame& shadow_frame, JValue result_register) {
+JValue ExecuteGotoImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame,
+                       JValue result_register) {
   // Define handler tables:
   // - The main handler table contains execution handlers for each instruction.
   // - The alternative handler table contains prelude handlers which check for thread suspend and
@@ -2279,103 +2279,103 @@
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_3E)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_3F)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_40)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_41)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_42)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_43)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_79)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_7A)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_EF)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_F0)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_F1)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_F2)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_F3)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_F4)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_F5)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_F6)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_F7)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_F8)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_F9)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_FA)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_FB)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_FC)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_FD)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_FE)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   HANDLE_INSTRUCTION_START(UNUSED_FF)
-    UnexpectedOpcode(inst, mh);
+    UnexpectedOpcode(inst, shadow_frame);
   HANDLE_INSTRUCTION_END();
 
   exception_pending_label: {
@@ -2430,21 +2430,17 @@
 
 // Explicit definitions of ExecuteGotoImpl.
 template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) HOT_ATTR
-JValue ExecuteGotoImpl<true, false>(Thread* self, MethodHelper& mh,
-                                    const DexFile::CodeItem* code_item,
+JValue ExecuteGotoImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
                                     ShadowFrame& shadow_frame, JValue result_register);
 template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) HOT_ATTR
-JValue ExecuteGotoImpl<false, false>(Thread* self, MethodHelper& mh,
-                                     const DexFile::CodeItem* code_item,
+JValue ExecuteGotoImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
                                      ShadowFrame& shadow_frame, JValue result_register);
 template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<true, true>(Thread* self, MethodHelper& mh,
-                                    const DexFile::CodeItem* code_item,
+JValue ExecuteGotoImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item,
+                                   ShadowFrame& shadow_frame, JValue result_register);
+template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+JValue ExecuteGotoImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
                                     ShadowFrame& shadow_frame, JValue result_register);
-template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteGotoImpl<false, true>(Thread* self, MethodHelper& mh,
-                                     const DexFile::CodeItem* code_item,
-                                     ShadowFrame& shadow_frame, JValue result_register);
 
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 1b6f53e..f9bbfa1 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -57,7 +57,7 @@
   } while (false)
 
 template<bool do_access_check, bool transaction_active>
-JValue ExecuteSwitchImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
+JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item,
                          ShadowFrame& shadow_frame, JValue result_register) {
   bool do_assignability_check = do_access_check;
   if (UNLIKELY(!shadow_frame.HasReferenceArray())) {
@@ -82,7 +82,7 @@
   while (true) {
     dex_pc = inst->GetDexPc(insns);
     shadow_frame.SetDexPC(dex_pc);
-    TraceExecution(shadow_frame, inst, dex_pc, mh);
+    TraceExecution(shadow_frame, inst, dex_pc);
     inst_data = inst->Fetch16(0);
     switch (inst->Opcode(inst_data)) {
       case Instruction::NOP:
@@ -2140,27 +2140,23 @@
       case Instruction::UNUSED_EF ... Instruction::UNUSED_FF:
       case Instruction::UNUSED_79:
       case Instruction::UNUSED_7A:
-        UnexpectedOpcode(inst, mh);
+        UnexpectedOpcode(inst, shadow_frame);
     }
   }
 }  // NOLINT(readability/fn_size)
 
 // Explicit definitions of ExecuteSwitchImpl.
 template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) HOT_ATTR
-JValue ExecuteSwitchImpl<true, false>(Thread* self, MethodHelper& mh,
-                                      const DexFile::CodeItem* code_item,
+JValue ExecuteSwitchImpl<true, false>(Thread* self, const DexFile::CodeItem* code_item,
                                       ShadowFrame& shadow_frame, JValue result_register);
 template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) HOT_ATTR
-JValue ExecuteSwitchImpl<false, false>(Thread* self, MethodHelper& mh,
-                                       const DexFile::CodeItem* code_item,
+JValue ExecuteSwitchImpl<false, false>(Thread* self, const DexFile::CodeItem* code_item,
                                        ShadowFrame& shadow_frame, JValue result_register);
 template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteSwitchImpl<true, true>(Thread* self, MethodHelper& mh,
-                                     const DexFile::CodeItem* code_item,
+JValue ExecuteSwitchImpl<true, true>(Thread* self, const DexFile::CodeItem* code_item,
                                      ShadowFrame& shadow_frame, JValue result_register);
 template SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-JValue ExecuteSwitchImpl<false, true>(Thread* self, MethodHelper& mh,
-                                      const DexFile::CodeItem* code_item,
+JValue ExecuteSwitchImpl<false, true>(Thread* self, const DexFile::CodeItem* code_item,
                                       ShadowFrame& shadow_frame, JValue result_register);
 
 }  // namespace interpreter
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index a5abce6..5d04fac 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -756,6 +756,11 @@
   }
 }
 
+void JavaVMExt::TrimGlobals() {
+  WriterMutexLock mu(Thread::Current(), globals_lock_);
+  globals_.Trim();
+}
+
 void JavaVMExt::VisitRoots(RootCallback* callback, void* arg) {
   Thread* self = Thread::Current();
   {
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 2957ba3..749b9fb 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -131,6 +131,9 @@
     return unchecked_functions_;
   }
 
+  void TrimGlobals() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      LOCKS_EXCLUDED(globals_lock_);
+
  private:
   Runtime* const runtime_;
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 1dcfcab..4797e69 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -566,7 +566,8 @@
     return soa.AddLocalReference<jobject>(decoded_obj);
   }
 
-  static void DeleteLocalRef(JNIEnv* env, jobject obj) {
+  static void DeleteLocalRef(JNIEnv* env, jobject obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     if (obj == nullptr) {
       return;
     }
diff --git a/runtime/method_helper-inl.h b/runtime/method_helper-inl.h
deleted file mode 100644
index 7a7949e..0000000
--- a/runtime/method_helper-inl.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_METHOD_HELPER_INL_H_
-#define ART_RUNTIME_METHOD_HELPER_INL_H_
-
-#include "method_helper.h"
-
-#include "class_linker.h"
-#include "mirror/object_array.h"
-#include "runtime.h"
-#include "thread-inl.h"
-
-namespace art {
-
-template <template <class T> class HandleKind>
-template <template <class T2> class HandleKind2>
-inline bool MethodHelperT<HandleKind>::HasSameNameAndSignature(MethodHelperT<HandleKind2>* other) {
-  const DexFile* dex_file = method_->GetDexFile();
-  const DexFile::MethodId& mid = dex_file->GetMethodId(GetMethod()->GetDexMethodIndex());
-  if (method_->GetDexCache() == other->method_->GetDexCache()) {
-    const DexFile::MethodId& other_mid =
-        dex_file->GetMethodId(other->GetMethod()->GetDexMethodIndex());
-    return mid.name_idx_ == other_mid.name_idx_ && mid.proto_idx_ == other_mid.proto_idx_;
-  }
-  const DexFile* other_dex_file = other->method_->GetDexFile();
-  const DexFile::MethodId& other_mid =
-      other_dex_file->GetMethodId(other->GetMethod()->GetDexMethodIndex());
-  if (!DexFileStringEquals(dex_file, mid.name_idx_, other_dex_file, other_mid.name_idx_)) {
-    return false;  // Name mismatch.
-  }
-  return dex_file->GetMethodSignature(mid) == other_dex_file->GetMethodSignature(other_mid);
-}
-
-template <template <class T> class HandleKind>
-inline mirror::Class* MethodHelperT<HandleKind>::GetClassFromTypeIdx(uint16_t type_idx,
-                                                                     bool resolve) {
-  mirror::ArtMethod* method = GetMethod();
-  mirror::Class* type = method->GetDexCacheResolvedType(type_idx);
-  if (type == nullptr && resolve) {
-    type = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
-    CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
-  }
-  return type;
-}
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_METHOD_HELPER_INL_H_
diff --git a/runtime/method_helper.cc b/runtime/method_helper.cc
deleted file mode 100644
index 81e1794..0000000
--- a/runtime/method_helper.cc
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "method_helper-inl.h"
-
-#include "class_linker.h"
-#include "dex_file-inl.h"
-#include "handle_scope-inl.h"
-#include "mirror/art_method-inl.h"
-#include "mirror/dex_cache.h"
-#include "runtime.h"
-
-namespace art {
-
-template <template <class T> class HandleKind>
-template <template <class T2> class HandleKind2>
-bool MethodHelperT<HandleKind>::HasSameSignatureWithDifferentClassLoaders(Thread* self,
-    MethodHelperT<HandleKind2>* other) {
-  {
-    StackHandleScope<1> hs(self);
-    Handle<mirror::Class> return_type(hs.NewHandle(GetMethod()->GetReturnType()));
-    if (UNLIKELY(other->GetMethod()->GetReturnType() != return_type.Get())) {
-      return false;
-    }
-  }
-  const DexFile::TypeList* types = method_->GetParameterTypeList();
-  const DexFile::TypeList* other_types = other->method_->GetParameterTypeList();
-  if (types == nullptr) {
-    return (other_types == nullptr) || (other_types->Size() == 0);
-  } else if (UNLIKELY(other_types == nullptr)) {
-    return types->Size() == 0;
-  }
-  uint32_t num_types = types->Size();
-  if (UNLIKELY(num_types != other_types->Size())) {
-    return false;
-  }
-  for (uint32_t i = 0; i < num_types; ++i) {
-    mirror::Class* param_type = GetClassFromTypeIdx(types->GetTypeItem(i).type_idx_);
-    mirror::Class* other_param_type =
-        other->GetClassFromTypeIdx(other_types->GetTypeItem(i).type_idx_);
-    if (UNLIKELY(param_type != other_param_type)) {
-      return false;
-    }
-  }
-  return true;
-}
-
-template <template <class T> class HandleKind>
-uint32_t MethodHelperT<HandleKind>::FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::ArtMethod* method = GetMethod();
-  const DexFile* dexfile = method->GetDexFile();
-  if (dexfile == &other_dexfile) {
-    return method->GetDexMethodIndex();
-  }
-  const DexFile::MethodId& mid = dexfile->GetMethodId(method->GetDexMethodIndex());
-  const char* mid_declaring_class_descriptor = dexfile->StringByTypeIdx(mid.class_idx_);
-  const DexFile::StringId* other_descriptor =
-      other_dexfile.FindStringId(mid_declaring_class_descriptor);
-  if (other_descriptor != nullptr) {
-    const DexFile::TypeId* other_type_id =
-        other_dexfile.FindTypeId(other_dexfile.GetIndexForStringId(*other_descriptor));
-    if (other_type_id != nullptr) {
-      const char* mid_name = dexfile->GetMethodName(mid);
-      const DexFile::StringId* other_name = other_dexfile.FindStringId(mid_name);
-      if (other_name != nullptr) {
-        uint16_t other_return_type_idx;
-        std::vector<uint16_t> other_param_type_idxs;
-        bool success = other_dexfile.CreateTypeList(
-            dexfile->GetMethodSignature(mid).ToString(), &other_return_type_idx,
-            &other_param_type_idxs);
-        if (success) {
-          const DexFile::ProtoId* other_sig =
-              other_dexfile.FindProtoId(other_return_type_idx, other_param_type_idxs);
-          if (other_sig != nullptr) {
-            const  DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
-                *other_type_id, *other_name, *other_sig);
-            if (other_mid != nullptr) {
-              return other_dexfile.GetIndexForMethodId(*other_mid);
-            }
-          }
-        }
-      }
-    }
-  }
-  return DexFile::kDexNoIndex;
-}
-
-template <template <typename> class HandleKind>
-uint32_t MethodHelperT<HandleKind>::FindDexMethodIndexInOtherDexFile(
-    const DexFile& other_dexfile, uint32_t name_and_signature_idx)
-    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::ArtMethod* method = GetMethod();
-  const DexFile* dexfile = method->GetDexFile();
-  const uint32_t dex_method_idx = method->GetDexMethodIndex();
-  const DexFile::MethodId& mid = dexfile->GetMethodId(dex_method_idx);
-  const DexFile::MethodId& name_and_sig_mid = other_dexfile.GetMethodId(name_and_signature_idx);
-  DCHECK_STREQ(dexfile->GetMethodName(mid), other_dexfile.GetMethodName(name_and_sig_mid));
-  DCHECK_EQ(dexfile->GetMethodSignature(mid), other_dexfile.GetMethodSignature(name_and_sig_mid));
-  if (dexfile == &other_dexfile) {
-    return dex_method_idx;
-  }
-  const char* mid_declaring_class_descriptor = dexfile->StringByTypeIdx(mid.class_idx_);
-  const DexFile::StringId* other_descriptor =
-      other_dexfile.FindStringId(mid_declaring_class_descriptor);
-  if (other_descriptor != nullptr) {
-    const DexFile::TypeId* other_type_id =
-        other_dexfile.FindTypeId(other_dexfile.GetIndexForStringId(*other_descriptor));
-    if (other_type_id != nullptr) {
-      const DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
-          *other_type_id, other_dexfile.GetStringId(name_and_sig_mid.name_idx_),
-          other_dexfile.GetProtoId(name_and_sig_mid.proto_idx_));
-      if (other_mid != nullptr) {
-        return other_dexfile.GetIndexForMethodId(*other_mid);
-      }
-    }
-  }
-  return DexFile::kDexNoIndex;
-}
-
-// Instantiate methods.
-template
-uint32_t MethodHelperT<Handle>::FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile);
-template
-uint32_t MethodHelperT<MutableHandle>::FindDexMethodIndexInOtherDexFile(
-    const DexFile& other_dexfile);
-
-template
-uint32_t MethodHelperT<Handle>::FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile,
-                                                                 uint32_t name_and_signature_idx);
-template
-uint32_t MethodHelperT<MutableHandle>::FindDexMethodIndexInOtherDexFile(
-    const DexFile& other_dexfile, uint32_t name_and_signature_idx);
-
-template
-bool MethodHelperT<Handle>::HasSameSignatureWithDifferentClassLoaders<Handle>(Thread* self,
-    MethodHelperT<Handle>* other);
-
-template
-bool MethodHelperT<Handle>::HasSameSignatureWithDifferentClassLoaders<MutableHandle>(Thread* self,
-    MethodHelperT<MutableHandle>* other);
-
-template
-bool MethodHelperT<MutableHandle>::HasSameSignatureWithDifferentClassLoaders<Handle>(Thread* self,
-    MethodHelperT<Handle>* other);
-
-template
-bool MethodHelperT<MutableHandle>::HasSameSignatureWithDifferentClassLoaders<MutableHandle>(
-    Thread* self, MethodHelperT<MutableHandle>* other);
-
-}  // namespace art
diff --git a/runtime/method_helper.h b/runtime/method_helper.h
deleted file mode 100644
index dc305d5..0000000
--- a/runtime/method_helper.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_METHOD_HELPER_H_
-#define ART_RUNTIME_METHOD_HELPER_H_
-
-#include "base/macros.h"
-#include "handle.h"
-#include "mirror/art_method.h"
-#include "primitive.h"
-
-namespace art {
-
-template <template <class T> class HandleKind>
-class MethodHelperT {
- public:
-  explicit MethodHelperT(HandleKind<mirror::ArtMethod> m)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) : method_(m), shorty_(nullptr), shorty_len_(0) {
-  }
-
-  mirror::ArtMethod* GetMethod() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_->GetInterfaceMethodIfProxy();
-  }
-
-  // GetMethod() != Get() for proxy methods.
-  mirror::ArtMethod* Get() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return method_.Get();
-  }
-
-  const char* GetShorty() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const char* result = shorty_;
-    if (result == nullptr) {
-      result = method_->GetShorty(&shorty_len_);
-      shorty_ = result;
-    }
-    return result;
-  }
-
-  uint32_t GetShortyLength() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    if (shorty_ == nullptr) {
-      GetShorty();
-    }
-    return shorty_len_;
-  }
-
-  // Counts the number of references in the parameter list of the corresponding method.
-  // Note: Thus does _not_ include "this" for non-static methods.
-  uint32_t GetNumberOfReferenceArgsWithoutReceiver() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const char* shorty = GetShorty();
-    uint32_t refs = 0;
-    for (uint32_t i = 1; i < shorty_len_ ; ++i) {
-      if (shorty[i] == 'L') {
-        refs++;
-      }
-    }
-
-    return refs;
-  }
-
-  size_t NumArgs() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // "1 +" because the first in Args is the receiver.
-    // "- 1" because we don't count the return type.
-    return (method_->IsStatic() ? 0 : 1) + GetShortyLength() - 1;
-  }
-
-  // Get the primitive type associated with the given parameter.
-  Primitive::Type GetParamPrimitiveType(size_t param) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    CHECK_LT(param, NumArgs());
-    if (GetMethod()->IsStatic()) {
-      param++;  // 0th argument must skip return value at start of the shorty
-    } else if (param == 0) {
-      return Primitive::kPrimNot;
-    }
-    return Primitive::GetType(GetShorty()[param]);
-  }
-
-  // Is the specified parameter a long or double, where parameter 0 is 'this' for instance methods.
-  bool IsParamALongOrDouble(size_t param) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    Primitive::Type type = GetParamPrimitiveType(param);
-    return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
-  }
-
-  // Is the specified parameter a reference, where parameter 0 is 'this' for instance methods.
-  bool IsParamAReference(size_t param) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return GetParamPrimitiveType(param) == Primitive::kPrimNot;
-  }
-
-  template <template <class T> class HandleKind2>
-  ALWAYS_INLINE bool HasSameNameAndSignature(MethodHelperT<HandleKind2>* other)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  template <template <class T> class HandleKind2>
-  bool HasSameSignatureWithDifferentClassLoaders(Thread* self, MethodHelperT<HandleKind2>* other)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  mirror::Class* GetClassFromTypeIdx(uint16_t type_idx, bool resolve = true)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  uint32_t FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
-  // The name_and_signature_idx MUST point to a MethodId with the same name and signature in the
-  // other_dexfile, such as the method index used to resolve this method in the other_dexfile.
-  uint32_t FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile,
-                                            uint32_t name_and_signature_idx)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-
- protected:
-  HandleKind<mirror::ArtMethod> method_;
-
-  const char* shorty_;
-  uint32_t shorty_len_;
-
- private:
-  template <template <class T2> class HandleKind2> friend class MethodHelperT;
-
-  DISALLOW_COPY_AND_ASSIGN(MethodHelperT);
-};
-
-class MethodHelper : public MethodHelperT<Handle> {
-  using MethodHelperT<Handle>::MethodHelperT;
- private:
-  DISALLOW_COPY_AND_ASSIGN(MethodHelper);
-};
-
-class MutableMethodHelper : public MethodHelperT<MutableHandle> {
-  using MethodHelperT<MutableHandle>::MethodHelperT;
- public:
-  void ChangeMethod(mirror::ArtMethod* new_m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    DCHECK(new_m != nullptr);
-    SetMethod(new_m);
-    shorty_ = nullptr;
-  }
-
- private:
-  // Set the method_ field, for proxy methods looking up the interface method via the resolved
-  // methods table.
-  void SetMethod(mirror::ArtMethod* method) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    method_.Assign(method);
-  }
-
-  DISALLOW_COPY_AND_ASSIGN(MutableMethodHelper);
-};
-
-}  // namespace art
-
-#endif  // ART_RUNTIME_METHOD_HELPER_H_
diff --git a/runtime/mirror/art_field-inl.h b/runtime/mirror/art_field-inl.h
index 03425cc..2b406bd 100644
--- a/runtime/mirror/art_field-inl.h
+++ b/runtime/mirror/art_field-inl.h
@@ -20,6 +20,7 @@
 #include "art_field.h"
 
 #include "base/logging.h"
+#include "class_linker.h"
 #include "dex_cache.h"
 #include "gc/accounting/card_table-inl.h"
 #include "jvalue.h"
@@ -289,6 +290,22 @@
   return GetTypeAsPrimitiveType() != Primitive::kPrimNot;
 }
 
+inline Class* ArtField::GetType(bool resolve) {
+  uint32_t field_index = GetDexFieldIndex();
+  if (UNLIKELY(GetDeclaringClass()->IsProxyClass())) {
+    return Runtime::Current()->GetClassLinker()->FindSystemClass(Thread::Current(),
+                                                                 GetTypeDescriptor());
+  }
+  const DexFile* dex_file = GetDexFile();
+  const DexFile::FieldId& field_id = dex_file->GetFieldId(field_index);
+  mirror::Class* type = GetDexCache()->GetResolvedType(field_id.type_idx_);
+  if (resolve && (type == nullptr)) {
+    type = Runtime::Current()->GetClassLinker()->ResolveType(field_id.type_idx_, this);
+    CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
+  }
+  return type;
+}
+
 inline size_t ArtField::FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   return Primitive::ComponentSize(GetTypeAsPrimitiveType());
 }
diff --git a/runtime/mirror/art_field.h b/runtime/mirror/art_field.h
index 50299b6..a1d8844 100644
--- a/runtime/mirror/art_field.h
+++ b/runtime/mirror/art_field.h
@@ -161,6 +161,8 @@
 
   bool IsPrimitiveType() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  Class* GetType(bool resolve) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   size_t FieldSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 22d55e2..c29276a 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -24,7 +24,6 @@
 #include "class_linker.h"
 #include "dex_cache.h"
 #include "dex_file.h"
-#include "method_helper.h"
 #include "object-inl.h"
 #include "object_array.h"
 #include "oat.h"
@@ -141,6 +140,15 @@
   return GetDexCacheResolvedTypes() == other->GetDexCacheResolvedTypes();
 }
 
+inline mirror::Class* ArtMethod::GetClassFromTypeIndex(uint16_t type_idx, bool resolve) {
+  mirror::Class* type = GetDexCacheResolvedType(type_idx);
+  if (type == nullptr && resolve) {
+    type = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, this);
+    CHECK(type != nullptr || Thread::Current()->IsExceptionPending());
+  }
+  return type;
+}
+
 inline uint32_t ArtMethod::GetCodeSize() {
   DCHECK(!IsRuntimeMethod() && !IsProxyMethod()) << PrettyMethod(this);
   const void* code = EntryPointToCodePointer(GetEntryPointFromQuickCompiledCode());
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 4f5ca3f..1729686 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -29,7 +29,6 @@
 #include "interpreter/interpreter.h"
 #include "jni_internal.h"
 #include "mapping_table.h"
-#include "method_helper-inl.h"
 #include "object_array-inl.h"
 #include "object_array.h"
 #include "object-inl.h"
@@ -93,7 +92,7 @@
 
 void ArtMethod::SetClass(Class* java_lang_reflect_ArtMethod) {
   CHECK(java_lang_reflect_ArtMethod_.IsNull());
-  CHECK(java_lang_reflect_ArtMethod != NULL);
+  CHECK(java_lang_reflect_ArtMethod != nullptr);
   java_lang_reflect_ArtMethod_ = GcRoot<Class>(java_lang_reflect_ArtMethod);
 }
 
@@ -116,14 +115,31 @@
   return num_registers;
 }
 
+static bool HasSameNameAndSignature(ArtMethod* method1, ArtMethod* method2)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  ScopedAssertNoThreadSuspension ants(Thread::Current(), "HasSameNameAndSignature");
+  const DexFile* dex_file = method1->GetDexFile();
+  const DexFile::MethodId& mid = dex_file->GetMethodId(method1->GetDexMethodIndex());
+  if (method1->GetDexCache() == method2->GetDexCache()) {
+    const DexFile::MethodId& mid2 = dex_file->GetMethodId(method2->GetDexMethodIndex());
+    return mid.name_idx_ == mid2.name_idx_ && mid.proto_idx_ == mid2.proto_idx_;
+  }
+  const DexFile* dex_file2 = method2->GetDexFile();
+  const DexFile::MethodId& mid2 = dex_file2->GetMethodId(method2->GetDexMethodIndex());
+  if (!DexFileStringEquals(dex_file, mid.name_idx_, dex_file2, mid2.name_idx_)) {
+    return false;  // Name mismatch.
+  }
+  return dex_file->GetMethodSignature(mid) == dex_file2->GetMethodSignature(mid2);
+}
+
 ArtMethod* ArtMethod::FindOverriddenMethod() {
   if (IsStatic()) {
-    return NULL;
+    return nullptr;
   }
   Class* declaring_class = GetDeclaringClass();
   Class* super_class = declaring_class->GetSuperClass();
   uint16_t method_index = GetMethodIndex();
-  ArtMethod* result = NULL;
+  ArtMethod* result = nullptr;
   // Did this method override a super class method? If so load the result from the super class'
   // vtable
   if (super_class->HasVTable() && method_index < super_class->GetVTableLength()) {
@@ -135,16 +151,13 @@
       CHECK_EQ(result,
                Runtime::Current()->GetClassLinker()->FindMethodForProxy(GetDeclaringClass(), this));
     } else {
-      StackHandleScope<2> hs(Thread::Current());
-      MethodHelper mh(hs.NewHandle(this));
-      MutableMethodHelper interface_mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
       IfTable* iftable = GetDeclaringClass()->GetIfTable();
-      for (size_t i = 0; i < iftable->Count() && result == NULL; i++) {
+      for (size_t i = 0; i < iftable->Count() && result == nullptr; i++) {
         Class* interface = iftable->GetInterface(i);
         for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-          interface_mh.ChangeMethod(interface->GetVirtualMethod(j));
-          if (mh.HasSameNameAndSignature(&interface_mh)) {
-            result = interface_mh.GetMethod();
+          mirror::ArtMethod* interface_method = interface->GetVirtualMethod(j);
+          if (HasSameNameAndSignature(this, interface_method)) {
+            result = interface_method;
             break;
           }
         }
@@ -152,14 +165,40 @@
     }
   }
   if (kIsDebugBuild) {
-    StackHandleScope<2> hs(Thread::Current());
-    MethodHelper result_mh(hs.NewHandle(result));
-    MethodHelper this_mh(hs.NewHandle(this));
-    DCHECK(result == nullptr || this_mh.HasSameNameAndSignature(&result_mh));
+    DCHECK(result == nullptr || HasSameNameAndSignature(this, result));
   }
   return result;
 }
 
+uint32_t ArtMethod::FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile,
+                                                     uint32_t name_and_signature_idx) {
+  const DexFile* dexfile = GetDexFile();
+  const uint32_t dex_method_idx = GetDexMethodIndex();
+  const DexFile::MethodId& mid = dexfile->GetMethodId(dex_method_idx);
+  const DexFile::MethodId& name_and_sig_mid = other_dexfile.GetMethodId(name_and_signature_idx);
+  DCHECK_STREQ(dexfile->GetMethodName(mid), other_dexfile.GetMethodName(name_and_sig_mid));
+  DCHECK_EQ(dexfile->GetMethodSignature(mid), other_dexfile.GetMethodSignature(name_and_sig_mid));
+  if (dexfile == &other_dexfile) {
+    return dex_method_idx;
+  }
+  const char* mid_declaring_class_descriptor = dexfile->StringByTypeIdx(mid.class_idx_);
+  const DexFile::StringId* other_descriptor =
+      other_dexfile.FindStringId(mid_declaring_class_descriptor);
+  if (other_descriptor != nullptr) {
+    const DexFile::TypeId* other_type_id =
+        other_dexfile.FindTypeId(other_dexfile.GetIndexForStringId(*other_descriptor));
+    if (other_type_id != nullptr) {
+      const DexFile::MethodId* other_mid = other_dexfile.FindMethodId(
+          *other_type_id, other_dexfile.GetStringId(name_and_sig_mid.name_idx_),
+          other_dexfile.GetProtoId(name_and_sig_mid.proto_idx_));
+      if (other_mid != nullptr) {
+        return other_dexfile.GetIndexForMethodId(*other_mid);
+      }
+    }
+  }
+  return DexFile::kDexNoIndex;
+}
+
 uint32_t ArtMethod::ToDexPc(const uintptr_t pc, bool abort_on_failure) {
   if (IsPortableCompiled()) {
     // Portable doesn't use the machine pc, we just use dex pc instead.
@@ -228,7 +267,6 @@
 
 uint32_t ArtMethod::FindCatchBlock(Handle<ArtMethod> h_this, Handle<Class> exception_type,
                                    uint32_t dex_pc, bool* has_no_move_exception) {
-  MethodHelper mh(h_this);
   const DexFile::CodeItem* code_item = h_this->GetCodeItem();
   // Set aside the exception while we resolve its type.
   Thread* self = Thread::Current();
@@ -248,7 +286,7 @@
       break;
     }
     // Does this catch exception type apply?
-    Class* iter_exception_type = mh.GetClassFromTypeIdx(iter_type_idx);
+    Class* iter_exception_type = h_this->GetClassFromTypeIndex(iter_type_idx, true);
     if (UNLIKELY(iter_exception_type == nullptr)) {
       // Now have a NoClassDefFoundError as exception. Ignore in case the exception class was
       // removed by a pro-guard like tool.
@@ -420,7 +458,7 @@
       }
     } else {
       LOG(INFO) << "Not invoking '" << PrettyMethod(this) << "' code=null";
-      if (result != NULL) {
+      if (result != nullptr) {
         result->SetJ(0);
       }
     }
@@ -430,6 +468,21 @@
   self->PopManagedStackFragment(fragment);
 }
 
+// Counts the number of references in the parameter list of the corresponding method.
+// Note: Thus does _not_ include "this" for non-static methods.
+static uint32_t GetNumberOfReferenceArgsWithoutReceiver(ArtMethod* method)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  uint32_t shorty_len;
+  const char* shorty = method->GetShorty(&shorty_len);
+  uint32_t refs = 0;
+  for (uint32_t i = 1; i < shorty_len ; ++i) {
+    if (shorty[i] == 'L') {
+      refs++;
+    }
+  }
+  return refs;
+}
+
 QuickMethodFrameInfo ArtMethod::GetQuickFrameInfo() {
   if (UNLIKELY(IsPortableCompiled())) {
     // Portable compiled dex bytecode or jni stub.
@@ -472,8 +525,7 @@
     // Generic JNI frame.
     DCHECK(IsNative());
     StackHandleScope<1> hs(Thread::Current());
-    uint32_t handle_refs =
-        MethodHelper(hs.NewHandle(this)).GetNumberOfReferenceArgsWithoutReceiver() + 1;
+    uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(this) + 1;
     size_t scope_size = HandleScope::SizeOf(handle_refs);
     QuickMethodFrameInfo callee_info = runtime->GetCalleeSaveMethodFrameInfo(Runtime::kRefsAndArgs);
 
@@ -493,7 +545,7 @@
 void ArtMethod::RegisterNative(const void* native_method, bool is_fast) {
   CHECK(IsNative()) << PrettyMethod(this);
   CHECK(!IsFastNative()) << PrettyMethod(this);
-  CHECK(native_method != NULL) << PrettyMethod(this);
+  CHECK(native_method != nullptr) << PrettyMethod(this);
   if (is_fast) {
     SetAccessFlags(GetAccessFlags() | kAccFastNative);
   }
diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h
index da494e0..2107944 100644
--- a/runtime/mirror/art_method.h
+++ b/runtime/mirror/art_method.h
@@ -32,15 +32,14 @@
 struct ArtMethodOffsets;
 struct ConstructorMethodOffsets;
 union JValue;
-class MethodHelper;
 class ScopedObjectAccessAlreadyRunnable;
 class StringPiece;
 class ShadowFrame;
 
 namespace mirror {
 
-typedef void (EntryPointFromInterpreter)(Thread* self, MethodHelper* mh,
-    const DexFile::CodeItem* code_item, ShadowFrame* shadow_frame, JValue* result);
+typedef void (EntryPointFromInterpreter)(Thread* self, const DexFile::CodeItem* code_item,
+                                         ShadowFrame* shadow_frame, JValue* result);
 
 #define ART_METHOD_HAS_PADDING_FIELD_ON_64_BIT
 
@@ -238,9 +237,21 @@
   bool HasSameDexCacheResolvedTypes(ObjectArray<Class>* other_cache)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  // Find the method that this method overrides
+  // Get the Class* from the type index into this method's dex cache.
+  mirror::Class* GetClassFromTypeIndex(uint16_t type_idx, bool resolve)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Find the method that this method overrides.
   ArtMethod* FindOverriddenMethod() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Find the method index for this method within other_dexfile. If this method isn't present then
+  // return DexFile::kDexNoIndex. The name_and_signature_idx MUST refer to a MethodId with the same
+  // name and signature in the other_dexfile, such as the method index used to resolve this method
+  // in the other_dexfile.
+  uint32_t FindDexMethodIndexInOtherDexFile(const DexFile& other_dexfile,
+                                            uint32_t name_and_signature_idx)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   void Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result, const char* shorty)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 599f178..1662ebf 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -575,6 +575,10 @@
                                                              allocator_type, VoidFunctor());
   if (add_finalizer && LIKELY(obj != nullptr)) {
     heap->AddFinalizerReference(self, &obj);
+    if (UNLIKELY(self->IsExceptionPending())) {
+      // Failed to allocate finalizer reference, it means that the whole allocation failed.
+      obj = nullptr;
+    }
   }
   return obj;
 }
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 5665059..bd3bfbf 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -149,6 +149,7 @@
 
 void Class::SetDexCache(DexCache* new_dex_cache) {
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), new_dex_cache);
+  SetDexCacheStrings(new_dex_cache != nullptr ? new_dex_cache->GetStrings() : nullptr);
 }
 
 void Class::SetClassSize(uint32_t new_class_size) {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 2fc5ffb..bd49754 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -29,6 +29,10 @@
 #include "read_barrier_option.h"
 #include "utils.h"
 
+#ifndef IMT_SIZE
+#error IMT_SIZE not defined
+#endif
+
 namespace art {
 
 struct ClassOffsets;
@@ -58,7 +62,7 @@
   // Interface method table size. Increasing this value reduces the chance of two interface methods
   // colliding in the interface method table but increases the size of classes that implement
   // (non-marker) interfaces.
-  static constexpr size_t kImtSize = 64;
+  static constexpr size_t kImtSize = IMT_SIZE;
 
   // imtable entry embedded in class object.
   struct MANAGED ImTableEntry {
@@ -654,6 +658,7 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   DexCache* GetDexCache() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Also updates the dex_cache_strings_ variable from new_dex_cache.
   void SetDexCache(DexCache* new_dex_cache) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   ALWAYS_INLINE ObjectArray<ArtMethod>* GetDirectMethods()
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index fa1f226..65d6ade 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -24,7 +24,6 @@
 #include "class.h"
 #include "class-inl.h"
 #include "class_linker-inl.h"
-#include "field_helper.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
 #include "iftable-inl.h"
@@ -202,12 +201,16 @@
     if (fields != NULL) {
       size_t num_ifields = fields->GetLength();
       for (size_t i = 0; i < num_ifields; ++i) {
+        StackHandleScope<1> hs(Thread::Current());
+        Handle<Object> h_object(hs.NewHandle(new_value));
         ArtField* field = fields->Get(i);
         if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
           CHECK_NE(field->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
-          StackHandleScope<1> hs(Thread::Current());
-          FieldHelper fh(hs.NewHandle(field));
-          CHECK(fh.GetType()->IsAssignableFrom(new_value->GetClass()));
+          // TODO: resolve the field type for moving GC.
+          mirror::Class* field_type = field->GetType(!kMovingCollector);
+          if (field_type != nullptr) {
+            CHECK(field_type->IsAssignableFrom(new_value->GetClass()));
+          }
           return;
         }
       }
@@ -225,9 +228,11 @@
         ArtField* field = fields->Get(i);
         if (field->GetOffset().Int32Value() == field_offset.Int32Value()) {
           CHECK_NE(field->GetTypeAsPrimitiveType(), Primitive::kPrimNot);
-          StackHandleScope<1> hs(Thread::Current());
-          FieldHelper fh(hs.NewHandle(field));
-          CHECK(fh.GetType()->IsAssignableFrom(new_value->GetClass()));
+          // TODO: resolve the field type for moving GC.
+          mirror::Class* field_type = field->GetType(!kMovingCollector);
+          if (field_type != nullptr) {
+            CHECK(field_type->IsAssignableFrom(new_value->GetClass()));
+          }
           return;
         }
       }
@@ -235,6 +240,7 @@
   }
   LOG(FATAL) << "Failed to find field for assignment to " << reinterpret_cast<void*>(this)
       << " of type " << PrettyDescriptor(c) << " at offset " << field_offset;
+  UNREACHABLE();
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 4402031..9d789cd 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -34,7 +34,6 @@
 #include "gc/heap.h"
 #include "handle_scope-inl.h"
 #include "iftable-inl.h"
-#include "method_helper-inl.h"
 #include "object-inl.h"
 #include "object_array-inl.h"
 #include "scoped_thread_state_change.h"
@@ -528,26 +527,6 @@
   EXPECT_STREQ(m3_2->GetName(), "m3");
   ArtMethod* m4_2 = klass2->GetVirtualMethod(3);
   EXPECT_STREQ(m4_2->GetName(), "m4");
-
-  MutableMethodHelper mh(hs.NewHandle(m1_1));
-  MutableMethodHelper mh2(hs.NewHandle(m1_2));
-  EXPECT_TRUE(mh.HasSameNameAndSignature(&mh2));
-  EXPECT_TRUE(mh2.HasSameNameAndSignature(&mh));
-
-  mh.ChangeMethod(m2_1);
-  mh2.ChangeMethod(m2_2);
-  EXPECT_TRUE(mh.HasSameNameAndSignature(&mh2));
-  EXPECT_TRUE(mh2.HasSameNameAndSignature(&mh));
-
-  mh.ChangeMethod(m3_1);
-  mh2.ChangeMethod(m3_2);
-  EXPECT_TRUE(mh.HasSameNameAndSignature(&mh2));
-  EXPECT_TRUE(mh2.HasSameNameAndSignature(&mh));
-
-  mh.ChangeMethod(m4_1);
-  mh2.ChangeMethod(m4_2);
-  EXPECT_TRUE(mh.HasSameNameAndSignature(&mh2));
-  EXPECT_TRUE(mh2.HasSameNameAndSignature(&mh));
 }
 
 TEST_F(ObjectTest, StringHashCode) {
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index a042620..2cebf02 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -20,7 +20,6 @@
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
-#include "field_helper.h"
 #include "jni_internal.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
@@ -294,9 +293,8 @@
     StackHandleScope<2> hs(soa.Self());
     HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o));
     HandleWrapper<mirror::ArtField> h_f(hs.NewHandleWrapper(&f));
-    FieldHelper fh(h_f);
     // May cause resolution.
-    field_type = fh.GetType(true);
+    field_type = h_f->GetType(true);
     if (field_type == nullptr) {
       DCHECK(soa.Self()->IsExceptionPending());
       return;
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index ffadfc6..1775468 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -28,10 +28,8 @@
 
 static const char* GetMethodShorty(JNIEnv* env, jmethodID mid) {
   ScopedObjectAccess soa(env);
-  StackHandleScope<1> scope(soa.Self());
   mirror::ArtMethod* m = soa.DecodeMethod(mid);
-  MethodHelper mh(scope.NewHandle(m));
-  return mh.GetShorty();
+  return m->GetShorty();
 }
 
 static uint32_t GetNativeMethodCount(JNIEnv* env, jclass clazz) {
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 1eded62..3260992 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -18,7 +18,6 @@
 #include <vector>
 
 #include "common_compiler_test.h"
-#include "field_helper.h"
 #include "mirror/art_field-inl.h"
 #include "scoped_thread_state_change.h"
 
@@ -184,21 +183,20 @@
 
   // Test "Class[] interfaces" field.
   MutableHandle<mirror::ArtField> fhandle = hs.NewHandle(static_fields->Get(0));
-  FieldHelper fh(fhandle);
-  EXPECT_EQ("interfaces", std::string(fh.GetField()->GetName()));
-  EXPECT_EQ("[Ljava/lang/Class;", std::string(fh.GetField()->GetTypeDescriptor()));
-  EXPECT_EQ(interfacesFieldClass.Get(), fh.GetType());
-  EXPECT_EQ("L$Proxy1234;", std::string(fh.GetDeclaringClassDescriptor()));
-  EXPECT_FALSE(fh.GetField()->IsPrimitiveType());
+  EXPECT_EQ("interfaces", std::string(fhandle->GetName()));
+  EXPECT_EQ("[Ljava/lang/Class;", std::string(fhandle->GetTypeDescriptor()));
+  EXPECT_EQ(interfacesFieldClass.Get(), fhandle->GetType(true));
+  std::string temp;
+  EXPECT_EQ("L$Proxy1234;", std::string(fhandle->GetDeclaringClass()->GetDescriptor(&temp)));
+  EXPECT_FALSE(fhandle->IsPrimitiveType());
 
   // Test "Class[][] throws" field.
   fhandle.Assign(static_fields->Get(1));
-  FieldHelper fh2(fhandle);
-  EXPECT_EQ("throws", std::string(fh2.GetField()->GetName()));
-  EXPECT_EQ("[[Ljava/lang/Class;", std::string(fh2.GetField()->GetTypeDescriptor()));
-  EXPECT_EQ(throwsFieldClass.Get(), fh2.GetType());
-  EXPECT_EQ("L$Proxy1234;", std::string(fh2.GetDeclaringClassDescriptor()));
-  EXPECT_FALSE(fh2.GetField()->IsPrimitiveType());
+  EXPECT_EQ("throws", std::string(fhandle->GetName()));
+  EXPECT_EQ("[[Ljava/lang/Class;", std::string(fhandle->GetTypeDescriptor()));
+  EXPECT_EQ(throwsFieldClass.Get(), fhandle->GetType(true));
+  EXPECT_EQ("L$Proxy1234;", std::string(fhandle->GetDeclaringClass()->GetDescriptor(&temp)));
+  EXPECT_FALSE(fhandle->IsPrimitiveType());
 }
 
 }  // namespace art
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 44d1bc4..85f9938 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -21,7 +21,6 @@
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils.h"
 #include "jni_internal.h"
-#include "method_helper-inl.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
@@ -220,9 +219,10 @@
   }
 
   bool BuildArgArrayFromObjectArray(mirror::Object* receiver,
-                                    mirror::ObjectArray<mirror::Object>* args, MethodHelper& mh)
+                                    mirror::ObjectArray<mirror::Object>* args,
+                                    Handle<mirror::ArtMethod> h_m)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    const DexFile::TypeList* classes = mh.GetMethod()->GetParameterTypeList();
+    const DexFile::TypeList* classes = h_m->GetParameterTypeList();
     // Set receiver if non-null (method is not static)
     if (receiver != nullptr) {
       Append(receiver);
@@ -231,11 +231,11 @@
       mirror::Object* arg = args->Get(args_offset);
       if (((shorty_[i] == 'L') && (arg != nullptr)) || ((arg == nullptr && shorty_[i] != 'L'))) {
         mirror::Class* dst_class =
-            mh.GetClassFromTypeIdx(classes->GetTypeItem(args_offset).type_idx_);
+            h_m->GetClassFromTypeIndex(classes->GetTypeItem(args_offset).type_idx_, true);
         if (UNLIKELY(arg == nullptr || !arg->InstanceOf(dst_class))) {
           ThrowIllegalArgumentException(nullptr,
               StringPrintf("method %s argument %zd has type %s, got %s",
-                  PrettyMethod(mh.GetMethod(), false).c_str(),
+                  PrettyMethod(h_m.Get(), false).c_str(),
                   args_offset + 1,  // Humans don't count from 0.
                   PrettyDescriptor(dst_class).c_str(),
                   PrettyTypeOf(arg).c_str()).c_str());
@@ -263,7 +263,7 @@
             } else { \
               ThrowIllegalArgumentException(nullptr, \
                   StringPrintf("method %s argument %zd has type %s, got %s", \
-                      PrettyMethod(mh.GetMethod(), false).c_str(), \
+                      PrettyMethod(h_m.Get(), false).c_str(), \
                       args_offset + 1, \
                       expected, \
                       PrettyTypeOf(arg).c_str()).c_str()); \
@@ -329,6 +329,7 @@
 #ifndef NDEBUG
         default:
           LOG(FATAL) << "Unexpected shorty character: " << shorty_[i];
+          UNREACHABLE();
 #endif
       }
 #undef DO_FIRST_ARG
@@ -360,14 +361,13 @@
   if (!m->IsStatic()) {
     offset = 1;
   }
-  // TODO: If args contain object references, it may cause problems
+  // TODO: If args contain object references, it may cause problems.
   Thread* self = Thread::Current();
   StackHandleScope<1> hs(self);
   Handle<mirror::ArtMethod> h_m(hs.NewHandle(m));
-  MethodHelper mh(h_m);
   for (uint32_t i = 0; i < num_params; i++) {
     uint16_t type_idx = params->GetTypeItem(i).type_idx_;
-    mirror::Class* param_type = mh.GetClassFromTypeIdx(type_idx);
+    mirror::Class* param_type = h_m->GetClassFromTypeIndex(type_idx, true);
     if (param_type == nullptr) {
       CHECK(self->IsExceptionPending());
       LOG(ERROR) << "Internal error: unresolvable type for argument type in JNI invoke: "
@@ -528,7 +528,7 @@
 }
 
 void InvokeWithShadowFrame(Thread* self, ShadowFrame* shadow_frame, uint16_t arg_offset,
-                           MethodHelper* mh, JValue* result) {
+                           JValue* result) {
   // We want to make sure that the stack is not within a small distance from the
   // protected region in case we are calling into a leaf function whose stack
   // check has been elided.
@@ -536,11 +536,12 @@
     ThrowStackOverflowError(self);
     return;
   }
-
-  ArgArray arg_array(mh->GetShorty(), mh->GetShortyLength());
+  uint32_t shorty_len;
+  const char* shorty = shadow_frame->GetMethod()->GetShorty(&shorty_len);
+  ArgArray arg_array(shorty, shorty_len);
   arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
   shadow_frame->GetMethod()->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result,
-                                    mh->GetShorty());
+                                    shorty);
 }
 
 jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject javaMethod,
@@ -571,7 +572,7 @@
     // Check that the receiver is non-null and an instance of the field's declaring class.
     receiver = soa.Decode<mirror::Object*>(javaReceiver);
     if (!VerifyObjectIsClass(receiver, declaring_class)) {
-      return NULL;
+      return nullptr;
     }
 
     // Find the actual implementation of the virtual method.
@@ -585,10 +586,10 @@
   uint32_t classes_size = (classes == nullptr) ? 0 : classes->Size();
   uint32_t arg_count = (objects != nullptr) ? objects->GetLength() : 0;
   if (arg_count != classes_size) {
-    ThrowIllegalArgumentException(NULL,
+    ThrowIllegalArgumentException(nullptr,
                                   StringPrintf("Wrong number of arguments; expected %d, got %d",
                                                classes_size, arg_count).c_str());
-    return NULL;
+    return nullptr;
   }
 
   // If method is not set to be accessible, verify it can be accessed by the caller.
@@ -611,8 +612,8 @@
   const char* shorty = m->GetShorty(&shorty_len);
   ArgArray arg_array(shorty, shorty_len);
   StackHandleScope<1> hs(soa.Self());
-  MethodHelper mh(hs.NewHandle(m));
-  if (!arg_array.BuildArgArrayFromObjectArray(receiver, objects, mh)) {
+  Handle<mirror::ArtMethod> h_m(hs.NewHandle(m));
+  if (!arg_array.BuildArgArrayFromObjectArray(receiver, objects, h_m)) {
     CHECK(soa.Self()->IsExceptionPending());
     return nullptr;
   }
@@ -627,22 +628,21 @@
     jmethodID mid = soa.Env()->GetMethodID(exception_class, "<init>", "(Ljava/lang/Throwable;)V");
     jobject exception_instance = soa.Env()->NewObject(exception_class, mid, th);
     soa.Env()->Throw(reinterpret_cast<jthrowable>(exception_instance));
-    return NULL;
+    return nullptr;
   }
 
   // Box if necessary and return.
-  return soa.AddLocalReference<jobject>(
-      BoxPrimitive(Primitive::GetType(mh.GetMethod()->GetReturnTypeDescriptor()[0]), result));
+  return soa.AddLocalReference<jobject>(BoxPrimitive(Primitive::GetType(shorty[0]), result));
 }
 
 bool VerifyObjectIsClass(mirror::Object* o, mirror::Class* c) {
-  if (o == NULL) {
-    ThrowNullPointerException(NULL, "null receiver");
+  if (o == nullptr) {
+    ThrowNullPointerException(nullptr, "null receiver");
     return false;
   } else if (!o->InstanceOf(c)) {
     std::string expected_class_name(PrettyDescriptor(c));
     std::string actual_class_name(PrettyTypeOf(o));
-    ThrowIllegalArgumentException(NULL,
+    ThrowIllegalArgumentException(nullptr,
                                   StringPrintf("Expected receiver of type %s, but got %s",
                                                expected_class_name.c_str(),
                                                actual_class_name.c_str()).c_str());
diff --git a/runtime/reflection.h b/runtime/reflection.h
index f9a7951..1a64871 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -29,7 +29,6 @@
   class Object;
 }  // namespace mirror
 union JValue;
-class MethodHelper;
 class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 class ThrowLocation;
@@ -65,7 +64,7 @@
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 void InvokeWithShadowFrame(Thread* self, ShadowFrame* shadow_frame, uint16_t arg_offset,
-                           MethodHelper* mh, JValue* result)
+                           JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
 jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject method, jobject receiver,
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index e30e745..7aed8b0 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -69,7 +69,12 @@
   // Cannot use this code to change into Runnable as changing to Runnable should fail if
   // old_state_and_flags.suspend_request is true.
   DCHECK_NE(new_state, kRunnable);
-  DCHECK_EQ(this, Thread::Current());
+  if (kIsDebugBuild && this != Thread::Current()) {
+    std::string name;
+    GetThreadName(name);
+    LOG(FATAL) << "Thread \"" << name << "\"(" << this << " != Thread::Current()="
+               << Thread::Current() << ") changing state to " << new_state;
+  }
   union StateAndFlags old_state_and_flags;
   old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
   tls32_.state_and_flags.as_struct.state = new_state;
diff --git a/runtime/thread.h b/runtime/thread.h
index b69d2f4..5b3e746 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1189,7 +1189,7 @@
 
  private:
   Thread* const self_;
-  const char* old_cause_;
+  const char* const old_cause_;
 };
 
 std::ostream& operator<<(std::ostream& os, const Thread& thread);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 5ff90d6..beafcda 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -168,7 +168,9 @@
     const uint32_t kWaitTimeoutMs = 10000;
     bool timed_out = barrier_.Increment(self, threads_running_checkpoint, kWaitTimeoutMs);
     if (timed_out) {
-      LOG(kIsDebugBuild ? FATAL : ERROR) << "Unexpected time out during dump checkpoint.";
+      // Avoid a recursive abort.
+      LOG((kIsDebugBuild && (gAborting == 0)) ? FATAL : ERROR)
+          << "Unexpected time out during dump checkpoint.";
     }
   }
 
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 2cc50b3..b510844 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -735,7 +735,9 @@
   if (the_trace_ != nullptr) {
     std::string name;
     thread->GetThreadName(name);
-    the_trace_->exited_threads_.Put(thread->GetTid(), name);
+    // The same thread/tid may be used multiple times. As SafeMap::Put does not allow to override
+    // a previous mapping, use SafeMap::Overwrite.
+    the_trace_->exited_threads_.Overwrite(thread->GetTid(), name);
   }
 }
 
diff --git a/runtime/utils.cc b/runtime/utils.cc
index ad46be6..1211547 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -28,7 +28,6 @@
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
-#include "field_helper.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class-inl.h"
@@ -324,8 +323,8 @@
     result += PrettyDescriptor(f->GetTypeDescriptor());
     result += ' ';
   }
-  StackHandleScope<1> hs(Thread::Current());
-  result += PrettyDescriptor(FieldHelper(hs.NewHandle(f)).GetDeclaringClassDescriptor());
+  std::string temp;
+  result += PrettyDescriptor(f->GetDeclaringClass()->GetDescriptor(&temp));
   result += '.';
   result += f->GetName();
   return result;
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index a10c7cb..1b3cc8f 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -25,12 +25,10 @@
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "dex_instruction_visitor.h"
-#include "field_helper.h"
 #include "gc/accounting/card_table-inl.h"
 #include "indenter.h"
 #include "intern_table.h"
 #include "leb128.h"
-#include "method_helper-inl.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/art_method-inl.h"
 #include "mirror/class.h"
@@ -3827,7 +3825,7 @@
     {
       StackHandleScope<1> hs(self_);
       HandleWrapper<mirror::ArtField> h_field(hs.NewHandleWrapper(&field));
-      field_type_class = FieldHelper(h_field).GetType(can_load_classes_);
+      field_type_class = h_field->GetType(can_load_classes_);
     }
     if (field_type_class != nullptr) {
       field_type = &reg_types_.FromClass(field->GetTypeDescriptor(), field_type_class,
@@ -3951,7 +3949,7 @@
     {
       StackHandleScope<1> hs(Thread::Current());
       HandleWrapper<mirror::ArtField> h_field(hs.NewHandleWrapper(&field));
-      field_type_class = FieldHelper(h_field).GetType(can_load_classes_);
+      field_type_class = h_field->GetType(can_load_classes_);
     }
 
     if (field_type_class != nullptr) {
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index c2877be..544cbc5 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -294,20 +294,20 @@
     assert(!env->ExceptionCheck());
 
     // Create a string object.
-    jobject library_string = env->NewStringUTF("arttest");
+    jobject library_string = env->NewStringUTF("non_existing_library");
     assert(library_string != nullptr);
     assert(!env->ExceptionCheck());
 
     env->CallStaticVoidMethod(system_clazz, loadLibraryMethodId, library_string);
-    if (env->ExceptionCheck()) {
-      // At most we expect UnsatisfiedLinkError.
-      jthrowable thrown = env->ExceptionOccurred();
-      env->ExceptionClear();
+    assert(env->ExceptionCheck());
 
-      jclass unsatisfied_link_error_clazz = env->FindClass("java/lang/UnsatisfiedLinkError");
-      jclass thrown_class = env->GetObjectClass(thrown);
-      assert(env->IsSameObject(unsatisfied_link_error_clazz, thrown_class));
-    }
+    // We expect UnsatisfiedLinkError.
+    jthrowable thrown = env->ExceptionOccurred();
+    env->ExceptionClear();
+
+    jclass unsatisfied_link_error_clazz = env->FindClass("java/lang/UnsatisfiedLinkError");
+    jclass thrown_class = env->GetObjectClass(thrown);
+    assert(env->IsSameObject(unsatisfied_link_error_clazz, thrown_class));
   }
 }
 
diff --git a/test/080-oom-throw-with-finalizer/expected.txt b/test/080-oom-throw-with-finalizer/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/080-oom-throw-with-finalizer/expected.txt
diff --git a/test/080-oom-throw-with-finalizer/info.txt b/test/080-oom-throw-with-finalizer/info.txt
new file mode 100644
index 0000000..37091ef
--- /dev/null
+++ b/test/080-oom-throw-with-finalizer/info.txt
@@ -0,0 +1 @@
+Regression test on correct processing of OOM thrown while adding a finalizer reference.
diff --git a/test/080-oom-throw-with-finalizer/src/Main.java b/test/080-oom-throw-with-finalizer/src/Main.java
new file mode 100644
index 0000000..57e9721
--- /dev/null
+++ b/test/080-oom-throw-with-finalizer/src/Main.java
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Vector;
+
+public class Main {
+    static char [][] holder;
+
+    static class ArrayMemEater {
+        static boolean sawOome;
+
+        static void blowup(char[][] holder) {
+            try {
+                for (int i = 0; i < holder.length; ++i) {
+                    holder[i] = new char[1024 * 1024];
+                }
+            } catch (OutOfMemoryError oome) {
+                ArrayMemEater.sawOome = true;
+            }
+        }
+    }
+
+    static class InstanceFinalizerMemEater {
+        public void finalize() {}
+    }
+
+    static boolean triggerArrayOOM(char[][] holder) {
+        ArrayMemEater.blowup(holder);
+        return ArrayMemEater.sawOome;
+    }
+
+    static boolean triggerInstanceFinalizerOOM() {
+        boolean sawOome = false;
+        try {
+            Vector v = new Vector();
+            while (true) {
+                v.add(new InstanceFinalizerMemEater());
+            }
+        } catch (OutOfMemoryError e) {
+            sawOome = true;
+        }
+        return sawOome;
+    }
+
+    public static void main(String[] args) {
+        // Keep holder alive to make instance OOM happen faster.
+        holder = new char[128 * 1024][];
+        if (!triggerArrayOOM(holder)) {
+            System.out.println("NEW_ARRAY did not throw OOME");
+        }
+
+        if (!triggerInstanceFinalizerOOM()) {
+            System.out.println("NEW_INSTANCE (finalize) did not throw OOME");
+        }
+
+        System.runFinalization();
+    }
+}
diff --git a/test/415-optimizing-arith-neg/src/Main.java b/test/415-optimizing-arith-neg/src/Main.java
index d9f8bcf..bd8a158 100644
--- a/test/415-optimizing-arith-neg/src/Main.java
+++ b/test/415-optimizing-arith-neg/src/Main.java
@@ -36,12 +36,24 @@
     }
   }
 
+  public static void assertEquals(String expected, float result) {
+    if (!expected.equals(new Float(result).toString())) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   public static void assertEquals(double expected, double result) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
 
+  public static void assertEquals(String expected, double result) {
+    if (!expected.equals(new Double(result).toString())) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
   public static void assertIsNaN(float result) {
     if (!Float.isNaN(result)) {
       throw new Error("Expected NaN: " + result);
@@ -116,9 +128,10 @@
   }
 
   private static void negFloat() {
+     assertEquals("-0.0", $opt$NegFloat(0F));
+     assertEquals("0.0", $opt$NegFloat(-0F));
      assertEquals(-1F, $opt$NegFloat(1F));
      assertEquals(1F, $opt$NegFloat(-1F));
-     assertEquals(0F, $opt$NegFloat(0F));
      assertEquals(51F, $opt$NegFloat(-51F));
      assertEquals(-51F, $opt$NegFloat(51F));
 
@@ -140,9 +153,10 @@
   }
 
   private static void negDouble() {
+     assertEquals("-0.0", $opt$NegDouble(0D));
+     assertEquals("0.0", $opt$NegDouble(-0D));
      assertEquals(-1D, $opt$NegDouble(1D));
      assertEquals(1D, $opt$NegDouble(-1D));
-     assertEquals(0D, $opt$NegDouble(0D));
      assertEquals(51D, $opt$NegDouble(-51D));
      assertEquals(-51D, $opt$NegDouble(51D));
 
diff --git a/test/421-large-frame/src/Main.java b/test/421-large-frame/src/Main.java
index 01b89ba..dae72fb 100644
--- a/test/421-large-frame/src/Main.java
+++ b/test/421-large-frame/src/Main.java
@@ -2029,6 +2029,11 @@
     l997 += l996;
     l998 += l997;
     l999 += l998;
-    return l999;
+    // Create a branch to beat the large method check.
+    if (l998 == l999) {
+      return l998;
+    } else {
+      return l999;
+    }
   }
 }
diff --git a/test/422-type-conversion/src/Main.java b/test/422-type-conversion/src/Main.java
index 37bc777..e7dbe24 100644
--- a/test/422-type-conversion/src/Main.java
+++ b/test/422-type-conversion/src/Main.java
@@ -85,6 +85,15 @@
     // Generate, compile and check long-to-int Dex instructions.
     longToInt();
 
+    // Generate, compile and check long-to-float Dex instructions.
+    longToFloat();
+
+    // Generate, compile and check long-to-double Dex instructions.
+    longToDouble();
+
+    // Generate, compile and check float-to-int Dex instructions.
+    floatToInt();
+
     // Generate, compile and check int-to-byte Dex instructions.
     shortToByte();
     intToByte();
@@ -267,6 +276,72 @@
     assertLongEquals(-1, $opt$IntToLong($opt$LongToInt(-4294967297L)));  // -(2^32 + 1)
   }
 
+  private static void longToFloat() {
+    assertFloatEquals(1F, $opt$LongToFloat(1L));
+    assertFloatEquals(0F, $opt$LongToFloat(0L));
+    assertFloatEquals(-1F, $opt$LongToFloat(-1L));
+    assertFloatEquals(51F, $opt$LongToFloat(51L));
+    assertFloatEquals(-51F, $opt$LongToFloat(-51L));
+    assertFloatEquals(2147483647F, $opt$LongToFloat(2147483647L));  // 2^31 - 1
+    assertFloatEquals(-2147483647F, $opt$LongToFloat(-2147483647L));  // -(2^31 - 1)
+    assertFloatEquals(-2147483648F, $opt$LongToFloat(-2147483648L));  // -(2^31)
+    assertFloatEquals(2147483648F, $opt$LongToFloat(2147483648L));  // (2^31)
+    assertFloatEquals(-2147483649F, $opt$LongToFloat(-2147483649L));  // -(2^31 + 1)
+    assertFloatEquals(4294967296F, $opt$LongToFloat(4294967296L));  // (2^32)
+    assertFloatEquals(-4294967296F, $opt$LongToFloat(-4294967296L));  // -(2^32)
+    assertFloatEquals(140739635871745F, $opt$LongToFloat(140739635871745L));  // 1 + 2^15 + 2^31 + 2^47
+    assertFloatEquals(-140739635871745F, $opt$LongToFloat(-140739635871745L));  // -(1 + 2^15 + 2^31 + 2^47)
+    assertFloatEquals(9223372036854775807F, $opt$LongToFloat(9223372036854775807L));  // 2^63 - 1
+    assertFloatEquals(-9223372036854775807F, $opt$LongToFloat(-9223372036854775807L));  // -(2^63 - 1)
+    assertFloatEquals(-9223372036854775808F, $opt$LongToFloat(-9223372036854775808L));  // -(2^63)
+  }
+
+  private static void longToDouble() {
+    assertDoubleEquals(1D, $opt$LongToDouble(1L));
+    assertDoubleEquals(0D, $opt$LongToDouble(0L));
+    assertDoubleEquals(-1D, $opt$LongToDouble(-1L));
+    assertDoubleEquals(51D, $opt$LongToDouble(51L));
+    assertDoubleEquals(-51D, $opt$LongToDouble(-51L));
+    assertDoubleEquals(2147483647D, $opt$LongToDouble(2147483647L));  // 2^31 - 1
+    assertDoubleEquals(-2147483647D, $opt$LongToDouble(-2147483647L));  // -(2^31 - 1)
+    assertDoubleEquals(-2147483648D, $opt$LongToDouble(-2147483648L));  // -(2^31)
+    assertDoubleEquals(2147483648D, $opt$LongToDouble(2147483648L));  // (2^31)
+    assertDoubleEquals(-2147483649D, $opt$LongToDouble(-2147483649L));  // -(2^31 + 1)
+    assertDoubleEquals(4294967296D, $opt$LongToDouble(4294967296L));  // (2^32)
+    assertDoubleEquals(-4294967296D, $opt$LongToDouble(-4294967296L));  // -(2^32)
+    assertDoubleEquals(140739635871745D, $opt$LongToDouble(140739635871745L));  // 1 + 2^15 + 2^31 + 2^47
+    assertDoubleEquals(-140739635871745D, $opt$LongToDouble(-140739635871745L));  // -(1 + 2^15 + 2^31 + 2^47)
+    assertDoubleEquals(9223372036854775807D, $opt$LongToDouble(9223372036854775807L));  // 2^63 - 1
+    assertDoubleEquals(-9223372036854775807D, $opt$LongToDouble(-9223372036854775807L));  // -(2^63 - 1)
+    assertDoubleEquals(-9223372036854775808D, $opt$LongToDouble(-9223372036854775808L));  // -(2^63)
+  }
+
+  private static void floatToInt() {
+    assertIntEquals(1, $opt$FloatToInt(1F));
+    assertIntEquals(0, $opt$FloatToInt(0F));
+    assertIntEquals(0, $opt$FloatToInt(-0F));
+    assertIntEquals(-1, $opt$FloatToInt(-1F));
+    assertIntEquals(51, $opt$FloatToInt(51F));
+    assertIntEquals(-51, $opt$FloatToInt(-51F));
+    assertIntEquals(0, $opt$FloatToInt(0.5F));
+    assertIntEquals(0, $opt$FloatToInt(0.4999999F));
+    assertIntEquals(0, $opt$FloatToInt(-0.4999999F));
+    assertIntEquals(0, $opt$FloatToInt(-0.5F));
+    assertIntEquals(42, $opt$FloatToInt(42.199F));
+    assertIntEquals(-42, $opt$FloatToInt(-42.199F));
+    assertIntEquals(2147483647, $opt$FloatToInt(2147483647F));  // 2^31 - 1
+    assertIntEquals(-2147483648, $opt$FloatToInt(-2147483647F));  // -(2^31 - 1)
+    assertIntEquals(-2147483648, $opt$FloatToInt(-2147483648F));  // -(2^31)
+    assertIntEquals(2147483647, $opt$FloatToInt(2147483648F));  // (2^31)
+    assertIntEquals(-2147483648, $opt$FloatToInt(-2147483649F));  // -(2^31 + 1)
+    assertIntEquals(2147483647, $opt$FloatToInt(9223372036854775807F));  // 2^63 - 1
+    assertIntEquals(-2147483648, $opt$FloatToInt(-9223372036854775807F));  // -(2^63 - 1)
+    assertIntEquals(-2147483648, $opt$FloatToInt(-9223372036854775808F));  // -(2^63)
+    assertIntEquals(0, $opt$FloatToInt(Float.NaN));
+    assertIntEquals(2147483647, $opt$FloatToInt(Float.POSITIVE_INFINITY));
+    assertIntEquals(-2147483648, $opt$FloatToInt(Float.NEGATIVE_INFINITY));
+  }
+
   private static void shortToByte() {
     assertByteEquals((byte)1, $opt$ShortToByte((short)1));
     assertByteEquals((byte)0, $opt$ShortToByte((short)0));
@@ -416,6 +491,15 @@
   static int $opt$LongToInt(long a){ return (int)a; }
   static int $opt$LongLiteralToInt(){ return (int)42L; }
 
+  // This method produces a long-to-float Dex instruction.
+  static float $opt$LongToFloat(long a){ return (float)a; }
+
+  // This method produces a long-to-double Dex instruction.
+  static double $opt$LongToDouble(long a){ return (double)a; }
+
+  // This method produces a float-to-int Dex instruction.
+  static int $opt$FloatToInt(float a){ return (int)a; }
+
   // These methods produce int-to-byte Dex instructions.
   static byte $opt$ShortToByte(short a){ return (byte)a; }
   static byte $opt$IntToByte(int a){ return (byte)a; }
diff --git a/test/432-optimizing-cmp/expected.txt b/test/432-optimizing-cmp/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/432-optimizing-cmp/expected.txt
diff --git a/test/432-optimizing-cmp/info.txt b/test/432-optimizing-cmp/info.txt
new file mode 100644
index 0000000..fad6cee
--- /dev/null
+++ b/test/432-optimizing-cmp/info.txt
@@ -0,0 +1 @@
+Tests for compare operations.
diff --git a/test/432-optimizing-cmp/smali/cmp.smali b/test/432-optimizing-cmp/smali/cmp.smali
new file mode 100644
index 0000000..470d940
--- /dev/null
+++ b/test/432-optimizing-cmp/smali/cmp.smali
@@ -0,0 +1,33 @@
+.class public LTestCmp;
+
+.super Ljava/lang/Object;
+
+.method public static $opt$CmpLong(JJ)I
+   .registers 5
+   cmp-long v0, v1, v3
+   return v0
+.end method
+
+.method public static $opt$CmpGtFloat(FF)I
+   .registers 3
+   cmpg-float v0, v1, v2
+   return v0
+.end method
+
+.method public static $opt$CmpLtFloat(FF)I
+   .registers 3
+   cmpl-float v0, v1, v2
+   return v0
+.end method
+
+.method public static $opt$CmpGtDouble(DD)I
+   .registers 5
+   cmpg-double v0, v1, v3
+   return v0
+.end method
+
+.method public static $opt$CmpLtDouble(DD)I
+   .registers 5
+   cmpl-double v0, v1, v3
+   return v0
+.end method
diff --git a/test/432-optimizing-cmp/src/Main.java b/test/432-optimizing-cmp/src/Main.java
new file mode 100644
index 0000000..3c7b13f
--- /dev/null
+++ b/test/432-optimizing-cmp/src/Main.java
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+
+  public static void main(String[] args) throws Exception {
+    cmpLong();
+    cmpFloat();
+    cmpDouble();
+  }
+
+  private static void cmpLong() throws Exception {
+    expectLt(3L, 5L);
+    expectGt(5L, 3L);
+    expectLt(Long.MIN_VALUE, Long.MAX_VALUE);
+    expectGt(Long.MAX_VALUE, Long.MIN_VALUE);
+
+    expectEquals(0, smaliCmpLong(0L, 0L));
+    expectEquals(0, smaliCmpLong(1L, 1L));
+    expectEquals(-1, smaliCmpLong(1L, 2L));
+    expectEquals(1, smaliCmpLong(2L, 1L));
+    expectEquals(-1, smaliCmpLong(Long.MIN_VALUE, Long.MAX_VALUE));
+    expectEquals(1, smaliCmpLong(Long.MAX_VALUE, Long.MIN_VALUE));
+    expectEquals(0, smaliCmpLong(Long.MIN_VALUE, Long.MIN_VALUE));
+    expectEquals(0, smaliCmpLong(Long.MAX_VALUE, Long.MAX_VALUE));
+  }
+
+  private static void cmpFloat() throws Exception {
+    expectLt(3.1F, 5.1F);
+    expectGt(5.1F, 3.1F);
+    expectLt(Float.MIN_VALUE, Float.MAX_VALUE);
+    expectGt(Float.MAX_VALUE, Float.MIN_VALUE);
+    expectFalse(3.1F, Float.NaN);
+    expectFalse(Float.NaN, 3.1F);
+
+    expectEquals(0, smaliCmpGtFloat(0F, 0F));
+    expectEquals(0, smaliCmpGtFloat(1F, 1F));
+    expectEquals(-1, smaliCmpGtFloat(1.1F, 2.1F));
+    expectEquals(1, smaliCmpGtFloat(2.1F, 1.1F));
+    expectEquals(-1, smaliCmpGtFloat(Float.MIN_VALUE, Float.MAX_VALUE));
+    expectEquals(1, smaliCmpGtFloat(Float.MAX_VALUE, Float.MIN_VALUE));
+    expectEquals(0, smaliCmpGtFloat(Float.MIN_VALUE, Float.MIN_VALUE));
+    expectEquals(0, smaliCmpGtFloat(Float.MAX_VALUE, Float.MAX_VALUE));
+    expectEquals(1, smaliCmpGtFloat(5F, Float.NaN));
+    expectEquals(1, smaliCmpGtFloat(Float.NaN, 5F));
+
+    expectEquals(0, smaliCmpLtFloat(0F, 0F));
+    expectEquals(0, smaliCmpLtFloat(1F, 1F));
+    expectEquals(-1, smaliCmpLtFloat(1.1F, 2.1F));
+    expectEquals(1, smaliCmpLtFloat(2.1F, 1.1F));
+    expectEquals(-1, smaliCmpLtFloat(Float.MIN_VALUE, Float.MAX_VALUE));
+    expectEquals(1, smaliCmpLtFloat(Float.MAX_VALUE, Float.MIN_VALUE));
+    expectEquals(0, smaliCmpLtFloat(Float.MIN_VALUE, Float.MIN_VALUE));
+    expectEquals(0, smaliCmpLtFloat(Float.MAX_VALUE, Float.MAX_VALUE));
+    expectEquals(-1, smaliCmpLtFloat(5F, Float.NaN));
+    expectEquals(-1, smaliCmpLtFloat(Float.NaN, 5F));
+  }
+
+  private static void cmpDouble() throws Exception {
+    expectLt(3.1D, 5.1D);
+    expectGt(5.1D, 3.1D);
+    expectLt(Double.MIN_VALUE, Double.MAX_VALUE);
+    expectGt(Double.MAX_VALUE, Double.MIN_VALUE);
+    expectFalse(3.1D, Double.NaN);
+    expectFalse(Double.NaN, 3.1D);
+
+    expectEquals(0, smaliCmpGtDouble(0D, 0D));
+    expectEquals(0, smaliCmpGtDouble(1D, 1D));
+    expectEquals(-1, smaliCmpGtDouble(1.1D, 2.1D));
+    expectEquals(1, smaliCmpGtDouble(2.1D, 1.1D));
+    expectEquals(-1, smaliCmpGtDouble(Double.MIN_VALUE, Double.MAX_VALUE));
+    expectEquals(1, smaliCmpGtDouble(Double.MAX_VALUE, Double.MIN_VALUE));
+    expectEquals(0, smaliCmpGtDouble(Double.MIN_VALUE, Double.MIN_VALUE));
+    expectEquals(0, smaliCmpGtDouble(Double.MAX_VALUE, Double.MAX_VALUE));
+    expectEquals(1, smaliCmpGtDouble(5D, Double.NaN));
+    expectEquals(1, smaliCmpGtDouble(Double.NaN, 5D));
+
+    expectEquals(0, smaliCmpLtDouble(0D, 0D));
+    expectEquals(0, smaliCmpLtDouble(1D, 1D));
+    expectEquals(-1, smaliCmpLtDouble(1.1D, 2.1D));
+    expectEquals(1, smaliCmpLtDouble(2.1D, 1.1D));
+    expectEquals(-1, smaliCmpLtDouble(Double.MIN_VALUE, Double.MAX_VALUE));
+    expectEquals(1, smaliCmpLtDouble(Double.MAX_VALUE, Double.MIN_VALUE));
+    expectEquals(0, smaliCmpLtDouble(Double.MIN_VALUE, Double.MIN_VALUE));
+    expectEquals(0, smaliCmpLtDouble(Double.MAX_VALUE, Double.MAX_VALUE));
+    expectEquals(-1, smaliCmpLtDouble(5D, Double.NaN));
+    expectEquals(-1, smaliCmpLtDouble(Float.NaN, 5D));
+  }
+
+ static boolean $opt$lt(long a, long b) {
+    return a < b;
+  }
+
+  static boolean $opt$lt(float a, float b) {
+    return a < b;
+  }
+
+  static boolean $opt$lt(double a, double b) {
+    return a < b;
+  }
+
+  static boolean $opt$gt(long a, long b) {
+    return a > b;
+  }
+
+  static boolean $opt$gt(float a, float b) {
+    return a > b;
+  }
+
+  static boolean $opt$gt(double a, double b) {
+    return a > b;
+  }
+
+  // Wrappers around methods located in file cmp.smali.
+
+  private static int smaliCmpLong(long a, long b) throws Exception {
+    Class<?> c = Class.forName("TestCmp");
+    Method m = c.getMethod("$opt$CmpLong", long.class, long.class);
+    int result = (Integer)m.invoke(null, a, b);
+    return result;
+  }
+
+  private static int smaliCmpGtFloat(float a, float b) throws Exception {
+    Class<?> c = Class.forName("TestCmp");
+    Method m = c.getMethod("$opt$CmpGtFloat", float.class, float.class);
+    int result = (Integer)m.invoke(null, a, b);
+    return result;
+  }
+
+  private static int smaliCmpLtFloat(float a, float b) throws Exception {
+    Class<?> c = Class.forName("TestCmp");
+    Method m = c.getMethod("$opt$CmpLtFloat", float.class, float.class);
+    int result = (Integer)m.invoke(null, a, b);
+    return result;
+  }
+
+  private static int smaliCmpGtDouble(double a, double b) throws Exception {
+    Class<?> c = Class.forName("TestCmp");
+    Method m = c.getMethod("$opt$CmpGtDouble", double.class, double.class);
+    int result = (Integer)m.invoke(null, a, b);
+    return result;
+  }
+
+  private static int smaliCmpLtDouble(double a, double b) throws Exception {
+    Class<?> c = Class.forName("TestCmp");
+    Method m = c.getMethod("$opt$CmpLtDouble", double.class, double.class);
+    int result = (Integer)m.invoke(null, a, b);
+    return result;
+  }
+
+    public static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectLt(long a, long b) {
+    if (!$opt$lt(a, b)) {
+      throw new Error("Expected: " + a + " < " + b);
+    }
+  }
+
+  public static void expectGt(long a, long b) {
+    if (!$opt$gt(a, b)) {
+      throw new Error("Expected: " + a + " > " + b);
+    }
+  }
+
+  public static void expectLt(float a, float b) {
+    if (!$opt$lt(a, b)) {
+      throw new Error("Expected: " + a + " < " + b);
+    }
+  }
+
+  public static void expectGt(float a, float b) {
+    if (!$opt$gt(a, b)) {
+      throw new Error("Expected: " + a + " > " + b);
+    }
+  }
+
+  public static void expectFalse(float a, float b) {
+    if ($opt$lt(a, b)) {
+      throw new Error("Not expecting: " + a + " < " + b);
+    }
+    if ($opt$gt(a, b)) {
+      throw new Error("Not expecting: " + a + " > " + b);
+    }
+  }
+
+  public static void expectLt(double a, double b) {
+    if (!$opt$lt(a, b)) {
+      throw new Error("Expected: " + a + " < " + b);
+    }
+  }
+
+  public static void expectGt(double a, double b) {
+    if (!$opt$gt(a, b)) {
+      throw new Error("Expected: " + a + " > " + b);
+    }
+  }
+
+  public static void expectFalse(double a, double b) {
+    if ($opt$lt(a, b)) {
+      throw new Error("Not expecting: " + a + " < " + b);
+    }
+    if ($opt$gt(a, b)) {
+      throw new Error("Not expecting: " + a + " > " + b);
+    }
+  }
+
+}
+
diff --git a/test/433-gvn/expected.txt b/test/433-gvn/expected.txt
new file mode 100644
index 0000000..d81cc07
--- /dev/null
+++ b/test/433-gvn/expected.txt
@@ -0,0 +1 @@
+42
diff --git a/test/433-gvn/info.txt b/test/433-gvn/info.txt
new file mode 100644
index 0000000..bcdab15
--- /dev/null
+++ b/test/433-gvn/info.txt
@@ -0,0 +1,3 @@
+Regression test for the optimizing compiler's GVN, that
+used to not take into account all side effects between
+a dominator and its dominated blocks.
diff --git a/test/433-gvn/src/Main.java b/test/433-gvn/src/Main.java
new file mode 100644
index 0000000..f9cb594
--- /dev/null
+++ b/test/433-gvn/src/Main.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println(foo());
+  }
+
+  public static int foo() {
+    Main m = new Main();
+    int a = m.field;
+    if (a == 0) {
+      m.field = 42;
+      if (m.test) {
+        a = 3;
+      }
+    }
+    // The compiler used to GVN this field get with the one line 24,
+    // even though the field is updated in the if.
+    return m.field;
+  }
+
+  public int field;
+  public boolean test = true;
+}
diff --git a/test/434-invoke-direct/expected.txt b/test/434-invoke-direct/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/434-invoke-direct/expected.txt
diff --git a/test/434-invoke-direct/info.txt b/test/434-invoke-direct/info.txt
new file mode 100644
index 0000000..eae1ef0
--- /dev/null
+++ b/test/434-invoke-direct/info.txt
@@ -0,0 +1,2 @@
+Tests that IllegalAccessError is thrown when a subclass invokes-direct a
+private method from the super class.
diff --git a/test/434-invoke-direct/smali/invoke.smali b/test/434-invoke-direct/smali/invoke.smali
new file mode 100644
index 0000000..b3cdd1e
--- /dev/null
+++ b/test/434-invoke-direct/smali/invoke.smali
@@ -0,0 +1,30 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LInvokeDirect;
+.super LInvokeDirectSuper;
+
+.method public constructor <init>()V
+      .registers 2
+       invoke-direct {v1}, LInvokeDirectSuper;-><init>()V
+       return-void
+.end method
+
+.method public run()I
+       .registers 3
+       invoke-super {v2}, LInvokeDirectSuper;->privateMethod()I
+       move-result v0
+       return v0
+.end method
diff --git a/test/434-invoke-direct/src/InvokeDirectSuper.java b/test/434-invoke-direct/src/InvokeDirectSuper.java
new file mode 100644
index 0000000..c80a18b
--- /dev/null
+++ b/test/434-invoke-direct/src/InvokeDirectSuper.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class InvokeDirectSuper {
+  public int val;
+
+  private int privateMethod() {
+    return val;
+  }
+}
diff --git a/test/434-invoke-direct/src/Main.java b/test/434-invoke-direct/src/Main.java
new file mode 100644
index 0000000..c363e1a
--- /dev/null
+++ b/test/434-invoke-direct/src/Main.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+
+  public static void main(String[] args) throws Throwable {
+    $opt$InvokeDirect();
+  }
+
+  private static void $opt$InvokeDirect() throws Throwable {
+    try {
+      Class<?> c = Class.forName("InvokeDirect");
+      Method m = c.getMethod("run");
+      m.invoke(c.newInstance());
+      throw new RuntimeException("Failed to throw IllegalAccessError");
+    } catch (InvocationTargetException e) {
+      if (!(e.getCause() instanceof IllegalAccessError)) {
+        throw new RuntimeException("Failed to throw IllegalAccessError");
+      }
+    }
+  }
+
+}
+
diff --git a/test/434-shifter-operand/expected.txt b/test/434-shifter-operand/expected.txt
new file mode 100644
index 0000000..52289c6
--- /dev/null
+++ b/test/434-shifter-operand/expected.txt
@@ -0,0 +1,3 @@
+false
+false
+true
diff --git a/test/434-shifter-operand/info.txt b/test/434-shifter-operand/info.txt
new file mode 100644
index 0000000..1ec9adc
--- /dev/null
+++ b/test/434-shifter-operand/info.txt
@@ -0,0 +1,2 @@
+Regression test for the arm backend of the optimizing
+compiler, that used to misuse ShifterOperand::CanHold.
diff --git a/test/434-shifter-operand/src/Main.java b/test/434-shifter-operand/src/Main.java
new file mode 100644
index 0000000..4d188eb
--- /dev/null
+++ b/test/434-shifter-operand/src/Main.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println(foo(42));
+    System.out.println(foo(0xffffffff));
+    System.out.println(foo(0xf0000000));
+  }
+
+  public static boolean foo(int a) {
+    return a < 0xf000000b;
+  }
+}
diff --git a/test/435-new-instance/expected.txt b/test/435-new-instance/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/435-new-instance/expected.txt
diff --git a/test/435-new-instance/info.txt b/test/435-new-instance/info.txt
new file mode 100644
index 0000000..3f30c1b
--- /dev/null
+++ b/test/435-new-instance/info.txt
@@ -0,0 +1,7 @@
+Tests that new-instance throws:
+- InstantiationError on interfaces and abstract classes
+- IllegalAccessError on inaccessible classes
+- NoClassDefFoundError on unknown classes
+
+This also verifies that we don't remove dead (code) new-instances which may
+throw.
diff --git a/test/435-new-instance/smali/instance.smali b/test/435-new-instance/smali/instance.smali
new file mode 100644
index 0000000..2189498
--- /dev/null
+++ b/test/435-new-instance/smali/instance.smali
@@ -0,0 +1,55 @@
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LNewInstance;
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+      .registers 1
+       invoke-direct {v0}, Ljava/lang/Object;-><init>()V
+       return-void
+.end method
+
+.method public newInstanceInterface()Ljava/lang/Object;
+      .registers 5
+      new-instance v1, LTestInterface;
+      # invoke-direct {v3}, LTestInterface;-><init>()V
+      # intentionally return v4 ("this")
+      return-object v4
+.end method
+
+.method public newInstanceClass()Ljava/lang/Object;
+      .registers 5
+      new-instance v1, LTestClass;
+      # invoke-direct {v3}, LTestClass;-><init>()V
+      # intentionally return v4 ("this")
+      return-object v4
+.end method
+
+.method public newInstancePrivateClass()Ljava/lang/Object;
+      .registers 5
+      new-instance v1, Lpkg/ProtectedClass;
+      # invoke-direct {v3}, Lpck/ProtectedClass;-><init>()V
+      # intentionally return v4 ("this")
+      return-object v4
+.end method
+
+.method public newInstanceUnknownClass()Ljava/lang/Object;
+      .registers 5
+      new-instance v1, LUnknownClass;
+      # invoke-direct {v3}, LUnknownClass;-><init>()V
+      # intentionally return v4 ("this")
+      return-object v4
+.end method
diff --git a/test/435-new-instance/src/Main.java b/test/435-new-instance/src/Main.java
new file mode 100644
index 0000000..b5431ad
--- /dev/null
+++ b/test/435-new-instance/src/Main.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+public class Main {
+
+  public static void main(String[] args) throws Throwable {
+    // Attempt to instantiate an interface.
+    $opt$NewInstance("newInstanceInterface", InstantiationError.class.getCanonicalName());
+    // Attempt to instantiate an abstract class.
+    $opt$NewInstance("newInstanceClass", InstantiationError.class.getCanonicalName());
+    // Attempt to instantiate an interface.
+    $opt$NewInstance("newInstancePrivateClass", IllegalAccessError.class.getCanonicalName());
+    // Attempt to instantiate an abstract class.
+    $opt$NewInstance("newInstanceUnknownClass", NoClassDefFoundError.class.getCanonicalName());
+  }
+
+  private static void $opt$NewInstance(String method, String errorName) throws Throwable {
+    try {
+      Class<?> c = Class.forName("NewInstance");
+      Method m = c.getMethod(method);
+      m.invoke(c.newInstance());
+      throw new RuntimeException("Failed to throw " + errorName);
+    } catch (InvocationTargetException e) {
+      if (!e.getCause().getClass().getCanonicalName().equals(errorName)) {
+        throw new RuntimeException("Failed to throw " + errorName
+            + ". Threw: " + e.getCause());
+      }
+    }
+  }
+}
diff --git a/test/435-new-instance/src/TestClass.java b/test/435-new-instance/src/TestClass.java
new file mode 100644
index 0000000..15b0a0d
--- /dev/null
+++ b/test/435-new-instance/src/TestClass.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public abstract class TestClass {
+  // Bogus fields to satisfy dex merger. See b/18051191
+  public int val;
+  public void method() {};
+}
diff --git a/test/435-new-instance/src/TestInterface.java b/test/435-new-instance/src/TestInterface.java
new file mode 100644
index 0000000..db6f3e0
--- /dev/null
+++ b/test/435-new-instance/src/TestInterface.java
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public interface TestInterface {
+  public void method();
+}
diff --git a/test/435-new-instance/src/pkg/ProtectedClass.java b/test/435-new-instance/src/pkg/ProtectedClass.java
new file mode 100644
index 0000000..b262155
--- /dev/null
+++ b/test/435-new-instance/src/pkg/ProtectedClass.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package pkg;
+
+class ProtectedClass {
+}
diff --git a/test/435-try-finally-without-catch/expected.txt b/test/435-try-finally-without-catch/expected.txt
new file mode 100644
index 0000000..8a67802
--- /dev/null
+++ b/test/435-try-finally-without-catch/expected.txt
@@ -0,0 +1,3 @@
+In finally
+In finally
+In finally
diff --git a/test/435-try-finally-without-catch/info.txt b/test/435-try-finally-without-catch/info.txt
new file mode 100644
index 0000000..46217c5
--- /dev/null
+++ b/test/435-try-finally-without-catch/info.txt
@@ -0,0 +1,26 @@
+Exercise a method containing a `try' statement with several
+instructions with a `finally' clause but without any `catch' block,
+enclosed in a loop.
+
+When dx processes an integer division (or modulo) enclosing a `try'
+block and whose result is assigned to a local value, it is smart
+enough not to emit a `div-int' (or `rem-int') instruction when the
+divisor is non-null, as it wouldn't be used.  However, dx is not
+that clever regarding exception handling: if the divisor is known to
+be non-null at compile-time (as is the case in this test), it will
+still emit a block with the exception catching and rethrowing
+mechanism, even if it is not used.
+
+This used to be a problem for a `try' block followed by a `finally'
+clause but with no `catch' block: in that case, the generated Dex code
+item would list zero catch block for this method (see
+art::CodeItem::tries_size_) and the optimizing compiler would have no
+clue that it contains a `try' statement, which it cannot optimize
+(yet).  With no hint that this method might contain one (or several)
+special block(s) related to `catch'-less `try' statement(s), the
+optimizing compiler considered this (these) as dead block(s) and
+improperly tried to remove its (their) instructions, sometimes
+removing instructions used by others instructions, thus triggering
+assertions.  The optimizing compiler was thus adjusted to remove these
+instructions in a proper fashion, by removing them as users first, and
+then by suppressing them for good.
diff --git a/test/435-try-finally-without-catch/src/Main.java b/test/435-try-finally-without-catch/src/Main.java
new file mode 100644
index 0000000..3c29ce8
--- /dev/null
+++ b/test/435-try-finally-without-catch/src/Main.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args){
+    foo();
+  }
+
+  // Reduced test case inspired by constantPropagationTest() from
+  // test/083-compiler-regressions.
+  static void foo() {
+    int a = 0;
+    int b = 1;
+
+    for (int i = 0; i < 3; i++) {
+      try {
+        a = 1;
+        // Would throw an ArithmeticException if b were null (hence
+        // the enclosing `try' statement).
+        int c = a % b;
+      }
+      finally {
+        System.out.println("In finally");
+      }
+    }
+  }
+}
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 7ec3168..0f7001f 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -7,4 +7,5 @@
 b/18380491
 invoke-super abstract
 BadCaseInOpRegRegReg
+CmpLong
 Done!
diff --git a/test/800-smali/smali/CmpLong.smali b/test/800-smali/smali/CmpLong.smali
new file mode 100644
index 0000000..d54812f
--- /dev/null
+++ b/test/800-smali/smali/CmpLong.smali
@@ -0,0 +1,18 @@
+.class public LCmpLong;
+.super Ljava/lang/Object;
+
+
+.method public constructor <init>()V
+.registers 1
+       invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+       return-void
+.end method
+
+.method public static run()I
+.registers 5000
+       const-wide v100, 5678233453L
+       move-wide/from16 v101, v100
+       const-wide v4, 5678233453L
+       cmp-long v0, v101, v4
+       return v0
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index abb53de..f2c1ab5 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -63,6 +63,7 @@
         testCases.add(new TestCase("invoke-super abstract", "B18380491ConcreteClass", "foo",
             new Object[]{0}, new AbstractMethodError(), null));
         testCases.add(new TestCase("BadCaseInOpRegRegReg", "BadCaseInOpRegRegReg", "getInt", null, null, 2));
+        testCases.add(new TestCase("CmpLong", "CmpLong", "run", null, null, 0));
     }
 
     public void runTests() {
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 47d186a..b85685b 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -299,15 +299,11 @@
 # Known broken tests for the arm64 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS := \
   003-omnibus-opcodes \
-  004-NativeAllocations \
   004-ReferenceMap \
   005-annotations \
   009-instanceof \
   010-instance \
-  012-math \
   023-many-interfaces \
-  027-arithmetic \
-  037-inherit \
   044-proxy \
   045-reflect-array \
   046-reflect \
@@ -317,21 +313,15 @@
   068-classloader \
   069-field-type \
   071-dexfile \
-  083-compiler-regressions \
   106-exceptions2 \
   107-int-math2 \
-  114-ParallelGC \
   201-built-in-exception-detail-messages \
   407-arrays \
   412-new-array \
   422-instanceof \
-  422-type-conversion \
   424-checkcast \
   427-bounds \
-  428-optimizing-arith-rem \
   430-live-register-slow-path \
-  431-optimizing-arith-shifts \
-  701-easy-div-rem \
   800-smali \
 
 ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
@@ -482,6 +472,7 @@
     else
       ifeq ($(4),default)
         test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEFAULT_RULES
+        run_test_options += --quick
       else
         $$(error found $(4) expected $(COMPILER_TYPES))
       endif
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index d2cd8ab..5c0f83f 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -108,6 +108,11 @@
         DEV_MODE="y"
         TIME_OUT="n"
         shift
+    elif [ "x$1" = "x--gdb-arg" ]; then
+        shift
+        gdb_arg="$1"
+        GDB_ARGS="${GDB_ARGS} $gdb_arg"
+        shift
     elif [ "x$1" = "x--zygote" ]; then
         ZYGOTE="-Xzygote"
         msg "Spawning from zygote"
@@ -229,11 +234,11 @@
   else
     if [ `uname` = "Darwin" ]; then
         GDB=lldb
-        GDB_ARGS="-- $DALVIKVM"
+        GDB_ARGS="$GDB_ARGS -- $DALVIKVM"
         DALVIKVM=
     else
         GDB=gdb
-        GDB_ARGS="--args $DALVIKVM"
+        GDB_ARGS="$GDB_ARGS --args $DALVIKVM"
         # Enable for Emacs "M-x gdb" support. TODO: allow extra gdb arguments on command line.
         # gdbargs="--annotate=3 $gdbargs"
     fi
diff --git a/test/run-test b/test/run-test
index e9dd86a..2abc1fa 100755
--- a/test/run-test
+++ b/test/run-test
@@ -171,6 +171,11 @@
         option="$1"
         run_args="${run_args} --runtime-option $option"
         shift
+    elif [ "x$1" = "x--gdb-arg" ]; then
+        shift
+        gdb_arg="$1"
+        run_args="${run_args} --gdb-arg $gdb_arg"
+        shift
     elif [ "x$1" = "x--debug" ]; then
         run_args="${run_args} --debug"
         shift
@@ -189,6 +194,9 @@
         run_args="${run_args} -Xcompiler-option --compiler-backend=Optimizing"
         image_suffix="-optimizing"
         shift
+    elif [ "x$1" = "x--quick" ]; then
+        run_args="${run_args} -Xcompiler-option --compiler-backend=Quick"
+        shift
     elif [ "x$1" = "x--no-verify" ]; then
         run_args="${run_args} --no-verify"
         shift
@@ -416,6 +424,7 @@
         echo "    --build-only          Build test files only (off by default)."
         echo "    --interpreter         Enable interpreter only mode (off by default)."
         echo "    --optimizing          Enable optimizing compiler (off by default)."
+        echo "    --quick               Use Quick compiler (default)."
         echo "    --no-verify           Turn off verification (on by default)."
         echo "    --no-optimize         Turn off optimization (on by default)."
         echo "    --no-precise          Turn off precise GC (on by default)."
diff --git a/tools/analyze-init-failures.py b/tools/analyze-init-failures.py
new file mode 100755
index 0000000..f803ea3
--- /dev/null
+++ b/tools/analyze-init-failures.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Analyzes the dump of initialization failures and creates a Graphviz dot file
+   representing dependencies."""
+
+import codecs
+import os
+import re
+import string
+import sys
+
+
+_CLASS_RE = re.compile(r'^L(.*);$')
+_ERROR_LINE_RE = re.compile(r'^java.lang.InternalError: (.*)')
+_STACK_LINE_RE = re.compile(r'^\s*at\s[^\s]*\s([^\s]*)')
+
+def Confused(filename, line_number, line):
+  sys.stderr.write('%s:%d: confused by:\n%s\n' % (filename, line_number, line))
+  raise Exception("giving up!")
+  sys.exit(1)
+
+
+def ProcessFile(filename):
+  lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n')
+  it = iter(lines)
+
+  class_fail_class = {}
+  class_fail_method = {}
+  root_failures = set()
+  root_errors = {}
+
+  while True:
+    try:
+      # We start with a class descriptor.
+      raw_line = it.next()
+      m = _CLASS_RE.search(raw_line)
+      # print(raw_line)
+      if m is None:
+        continue
+      # Found a class.
+      failed_clazz = m.group(1).replace('/','.')
+      # print('Is a class %s' % failed_clazz)
+      # The error line should be next.
+      raw_line = it.next()
+      m = _ERROR_LINE_RE.search(raw_line)
+      # print(raw_line)
+      if m is None:
+        Confused(filename, -1, raw_line)
+        continue
+      # Found an error line.
+      error = m.group(1)
+      # print('Is an error %s' % error)
+      # Get the top of the stack
+      raw_line = it.next()
+      m = _STACK_LINE_RE.search(raw_line)
+      if m is None:
+        continue
+      # Found a stack line. Get the method.
+      method = m.group(1)
+      # print('Is a stack element %s' % method)
+      (left_of_paren,paren,right_of_paren) = method.partition('(')
+      (root_err_class,dot,root_method_name) = left_of_paren.rpartition('.')
+      # print('Error class %s' % err_class)
+      # print('Error method %s' % method_name)
+      # Record the root error.
+      root_failures.add(root_err_class)
+      # Parse all the trace elements to find the "immediate" cause.
+      immediate_class = root_err_class
+      immediate_method = root_method_name
+      root_errors[root_err_class] = error
+      # Now go "up" the stack.
+      while True:
+        raw_line = it.next()
+        m = _STACK_LINE_RE.search(raw_line)
+        if m is None:
+          break  # Nothing more to see here.
+        method = m.group(1)
+        (left_of_paren,paren,right_of_paren) = method.partition('(')
+        (err_class,dot,err_method_name) = left_of_paren.rpartition('.')
+        if err_method_name == "<clinit>":
+          # A class initializer is on the stack...
+          class_fail_class[err_class] = immediate_class
+          class_fail_method[err_class] = immediate_method
+          immediate_class = err_class
+          immediate_method = err_method_name
+    except StopIteration:
+      # print('Done')
+      break  # Done
+
+  # Assign IDs.
+  fail_sources = set(class_fail_class.values());
+  all_classes = fail_sources | set(class_fail_class.keys())
+  i = 0
+  class_index = {}
+  for clazz in all_classes:
+    class_index[clazz] = i
+    i = i + 1
+
+  # Now create the nodes.
+  for (r_class, r_id) in class_index.items():
+    error_string = ''
+    if r_class in root_failures:
+      error_string = ',color=Red,tooltip="' + root_errors[r_class] + '",URL="' + root_errors[r_class] + '"'
+    print('  n%d [shape=box,label="%s"%s];' % (r_id, r_class, error_string))
+
+  # Some space.
+  print('')
+
+  # Connections.
+  for (failed_class,error_class) in class_fail_class.items():
+    print('  n%d -> n%d;' % (class_index[failed_class], class_index[error_class]))
+
+
+def main():
+  print('digraph {')
+  print('  overlap=false;')
+  print('  splines=true;')
+  ProcessFile(sys.argv[1])
+  print('}')
+  sys.exit(0)
+
+
+if __name__ == '__main__':
+  main()
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
new file mode 100644
index 0000000..8eac1d3
--- /dev/null
+++ b/tools/libcore_failures.txt
@@ -0,0 +1,21 @@
+/*
+ * This file contains expectations for ART's buildbot. The purpose of this file is
+ * to temporary and quickly list failing tests and not break the bots, until the
+ * libcore expectation files get properly updated. The script that uses this file
+ * is art/tools/run-libcore-tests.sh.
+ *
+ * It is also used to enable AOSP experiments, and not mess up with CTS's expectations.
+ */
+
+[
+{
+  description: "Assert.java differences between vogar and junit.",
+  result: EXEC_FAILED,
+  name: "libcore.java.math.RunCSVTests#test_csv"
+},
+{
+  description: "Test is currently being updated.",
+  result: EXEC_FAILED,
+  name: "libcore.java.util.OldTimeZoneTest#test_getDisplayNameZILjava_util_Locale"
+}
+]
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
new file mode 100755
index 0000000..9fa3fda
--- /dev/null
+++ b/tools/run-libcore-tests.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ ! -d libcore ]; then
+  echo "Script needs to be run at the root of the android tree"
+  exit 1
+fi
+
+# Jar containing all the tests.
+test_jar=out/target/common/obj/JAVA_LIBRARIES/core-tests_intermediates/javalib.jar
+
+if [ ! -f $test_jar ]; then
+  echo "Before running, you must build core-tests and vogar: make core-tests vogar vogar.jar"
+  exit 1
+fi
+
+# Packages that currently work correctly with the expectation files.
+working_packages=("libcore.java.lang"
+                  "libcore.java.math"
+                  "libcore.java.util"
+                  "org.apache.harmony.annotation"
+                  "org.apache.harmony.regex"
+                  "org.apache.harmony.tests.java.lang"
+                  "org.apache.harmony.tests.java.math"
+                  "org.apache.harmony.tests.java.util"
+                  "tests.java.lang.String")
+
+# Run the tests using vogar.
+echo "Running tests for the following test packages:"
+echo ${working_packages[@]} | tr " " "\n"
+vogar $@ --expectations art/tools/libcore_failures.txt --classpath $test_jar ${working_packages[@]}