Merge "Quick: Fix wide Phi detection in GVN, clean up INVOKEs."
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 3e76d91..ca718f1 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -78,6 +78,9 @@
 # Do you want run-tests with relocation disabled run?
 ART_TEST_RUN_TEST_NO_RELOCATE ?= $(ART_TEST_FULL)
 
+# Do you want run-tests with prebuilding?
+ART_TEST_RUN_TEST_PREBUILD ?= true
+
 # Do you want run-tests with no prebuilding enabled run?
 ART_TEST_RUN_TEST_NO_PREBUILD ?= $(ART_TEST_FULL)
 
@@ -96,6 +99,9 @@
 # Do you want run-tests with libart.so?
 ART_TEST_RUN_TEST_NDEBUG ?= $(ART_TEST_FULL)
 
+# Do you want run-tests with the host/target's second arch?
+ART_TEST_RUN_TEST_2ND_ARCH ?= true
+
 # Do you want failed tests to have their artifacts cleaned up?
 ART_TEST_RUN_TEST_ALWAYS_CLEAN ?= true
 
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 25b23a2..172c96c 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -88,6 +88,7 @@
 	optimizing/builder.cc \
 	optimizing/code_generator.cc \
 	optimizing/code_generator_arm.cc \
+	optimizing/code_generator_arm64.cc \
 	optimizing/code_generator_x86.cc \
 	optimizing/code_generator_x86_64.cc \
 	optimizing/constant_folding.cc \
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 359d6af..7e19e15 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -111,7 +111,7 @@
 #else
   // Only warn if not Intel as Intel doesn't have cache flush instructions.
 #if !defined(__i386__) && !defined(__x86_64__)
-  LOG(WARNING) << "UNIMPLEMENTED: cache flush";
+  UNIMPLEMENTED(WARNING) << "cache flush";
 #endif
 #endif
 }
diff --git a/compiler/compiler.cc b/compiler/compiler.cc
index fbfd8e6..36213ca 100644
--- a/compiler/compiler.cc
+++ b/compiler/compiler.cc
@@ -75,8 +75,8 @@
 
     default:
       LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
-  return nullptr;
 }
 
 }  // namespace art
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 78da420..beeb3ad 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -60,6 +60,14 @@
   kFArg5,
   kFArg6,
   kFArg7,
+  kFArg8,
+  kFArg9,
+  kFArg10,
+  kFArg11,
+  kFArg12,
+  kFArg13,
+  kFArg14,
+  kFArg15,
   kRet0,
   kRet1,
   kInvokeTgt,
@@ -306,6 +314,7 @@
   kMIRIgnoreRangeCheck,
   kMIRRangeCheckOnly,
   kMIRIgnoreClInitCheck,
+  kMirIgnoreDivZeroCheck,
   kMIRInlined,                        // Invoke is inlined (ie dead).
   kMIRInlinedPred,                    // Invoke is inlined via prediction.
   kMIRCallee,                         // Instruction is inlined from callee.
diff --git a/compiler/dex/compiler_ir.cc b/compiler/dex/compiler_ir.cc
index 909c995..a2b3fe4 100644
--- a/compiler/dex/compiler_ir.cc
+++ b/compiler/dex/compiler_ir.cc
@@ -16,6 +16,7 @@
 
 #include "compiler_ir.h"
 
+#include "base/dumpable.h"
 #include "backend.h"
 #include "frontend.h"
 #include "mir_graph.h"
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 2e21d05..3dc5655 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -19,6 +19,7 @@
 #include <cstdint>
 
 #include "backend.h"
+#include "base/dumpable.h"
 #include "compiler.h"
 #include "compiler_internals.h"
 #include "driver/compiler_driver.h"
@@ -134,15 +135,8 @@
         (cu.enable_debug & (1 << kDebugVerbose));
   }
 
-  if (gVerboseMethods.size() != 0) {
-    cu.verbose = false;
-    for (size_t i = 0; i < gVerboseMethods.size(); ++i) {
-      if (PrettyMethod(method_idx, dex_file).find(gVerboseMethods[i])
-          != std::string::npos) {
-        cu.verbose = true;
-        break;
-      }
-    }
+  if (driver.GetCompilerOptions().HasVerboseMethods()) {
+    cu.verbose = driver.GetCompilerOptions().IsVerboseMethod(PrettyMethod(method_idx, dex_file));
   }
 
   if (cu.verbose) {
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index a405af1..5c74e9e 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -147,6 +147,7 @@
 #define MIR_IGNORE_RANGE_CHECK          (1 << kMIRIgnoreRangeCheck)
 #define MIR_RANGE_CHECK_ONLY            (1 << kMIRRangeCheckOnly)
 #define MIR_IGNORE_CLINIT_CHECK         (1 << kMIRIgnoreClInitCheck)
+#define MIR_IGNORE_DIV_ZERO_CHECK       (1 << kMirIgnoreDivZeroCheck)
 #define MIR_INLINED                     (1 << kMIRInlined)
 #define MIR_INLINED_PRED                (1 << kMIRInlinedPred)
 #define MIR_CALLEE                      (1 << kMIRCallee)
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index d935bc3..36cb7a4 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -297,19 +297,20 @@
 constexpr RegStorage rs_dr31(RegStorage::kValid | dr31);
 #endif
 
-// RegisterLocation templates return values (r0, or r0/r1).
-const RegLocation arm_loc_c_return
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_wide
+// RegisterLocation templates return values (r0, r0/r1, s0, or d0).
+// Note: The return locations are shared between quick code and quick helper. This follows quick
+// ABI. Quick helper assembly routine needs to handle the ABI differences.
+const RegLocation arm_loc_c_return =
+    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_wide =
     {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_float
-    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k32BitSolo, r0), INVALID_SREG, INVALID_SREG};
-const RegLocation arm_loc_c_return_double
-    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
-     RegStorage(RegStorage::k64BitPair, r0, r1), INVALID_SREG, INVALID_SREG};
+     RegStorage::MakeRegPair(rs_r0, rs_r1), INVALID_SREG, INVALID_SREG};
+const RegLocation arm_loc_c_return_float = kArm32QuickCodeUseSoftFloat
+    ? arm_loc_c_return
+    : RegLocation({kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, rs_fr0, INVALID_SREG, INVALID_SREG});
+const RegLocation arm_loc_c_return_double = kArm32QuickCodeUseSoftFloat
+    ? arm_loc_c_return_wide
+    : RegLocation({kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, rs_dr0, INVALID_SREG, INVALID_SREG});
 
 enum ArmShiftEncodings {
   kArmLsl = 0x0,
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 1c87a03..442c4fc 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -25,6 +25,64 @@
 namespace art {
 
 class ArmMir2Lir FINAL : public Mir2Lir {
+ protected:
+  // TODO: Consolidate hard float target support.
+  // InToRegStorageMapper and InToRegStorageMapping can be shared with all backends.
+  // Base class used to get RegStorage for next argument.
+  class InToRegStorageMapper {
+   public:
+    virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+    virtual ~InToRegStorageMapper() {
+    }
+  };
+
+  // Inherited class for ARM backend.
+  class InToRegStorageArmMapper FINAL : public InToRegStorageMapper {
+   public:
+    InToRegStorageArmMapper()
+        : cur_core_reg_(0), cur_fp_reg_(0), cur_fp_double_reg_(0) {
+    }
+
+    virtual ~InToRegStorageArmMapper() {
+    }
+
+    RegStorage GetNextReg(bool is_double_or_float, bool is_wide) OVERRIDE;
+
+   private:
+    uint32_t cur_core_reg_;
+    uint32_t cur_fp_reg_;
+    uint32_t cur_fp_double_reg_;
+  };
+
+  // Class to map argument to RegStorage. The mapping object is initialized by a mapper.
+  class InToRegStorageMapping FINAL {
+   public:
+    InToRegStorageMapping()
+        : max_mapped_in_(0), is_there_stack_mapped_(false), initialized_(false) {
+    }
+
+    int GetMaxMappedIn() const {
+      return max_mapped_in_;
+    }
+
+    bool IsThereStackMapped() const {
+      return is_there_stack_mapped_;
+    }
+
+    bool IsInitialized() const {
+      return initialized_;
+    }
+
+    void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
+    RegStorage Get(int in_position) const;
+
+   private:
+    std::map<int, RegStorage> mapping_;
+    int max_mapped_in_;
+    bool is_there_stack_mapped_;
+    bool initialized_;
+  };
+
   public:
     ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
 
@@ -47,15 +105,30 @@
     void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg);
 
     // Required for target - register utilities.
-    RegStorage TargetReg(SpecialTargetRegister reg);
-    RegStorage GetArgMappingToPhysicalReg(int arg_num);
-    RegLocation GetReturnAlt();
-    RegLocation GetReturnWideAlt();
-    RegLocation LocCReturn();
-    RegLocation LocCReturnRef();
-    RegLocation LocCReturnDouble();
-    RegLocation LocCReturnFloat();
-    RegLocation LocCReturnWide();
+    RegStorage TargetReg(SpecialTargetRegister reg) OVERRIDE;
+    RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) OVERRIDE {
+      if (wide_kind == kWide) {
+        DCHECK((kArg0 <= reg && reg < kArg3) || (kFArg0 <= reg && reg < kFArg15) || (kRet0 == reg));
+        RegStorage ret_reg = RegStorage::MakeRegPair(TargetReg(reg),
+            TargetReg(static_cast<SpecialTargetRegister>(reg + 1)));
+        if (ret_reg.IsFloat()) {
+          // Regard double as double, be consistent with register allocation.
+          ret_reg = As64BitFloatReg(ret_reg);
+        }
+        return ret_reg;
+      } else {
+        return TargetReg(reg);
+      }
+    }
+
+    RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE;
+    RegLocation GetReturnAlt() OVERRIDE;
+    RegLocation GetReturnWideAlt() OVERRIDE;
+    RegLocation LocCReturn() OVERRIDE;
+    RegLocation LocCReturnRef() OVERRIDE;
+    RegLocation LocCReturnDouble() OVERRIDE;
+    RegLocation LocCReturnFloat() OVERRIDE;
+    RegLocation LocCReturnWide() OVERRIDE;
     ResourceMask GetRegMaskCommon(const RegStorage& reg) const OVERRIDE;
     void AdjustSpillMask();
     void ClobberCallerSave();
@@ -87,15 +160,15 @@
 
     // Required for target - Dalvik-level generators.
     void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                        RegLocation rl_src2) OVERRIDE;
+                        RegLocation rl_src2, int flags) OVERRIDE;
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_src2);
+                           RegLocation rl_src1, RegLocation rl_src2, int flags);
     void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                      RegLocation rl_index, RegLocation rl_dest, int scale);
     void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
                      RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_shift);
+                           RegLocation rl_src1, RegLocation rl_shift, int flags);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                           RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
@@ -210,6 +283,19 @@
     LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE;
     size_t GetInstructionOffset(LIR* lir);
 
+    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                             NextCallInsn next_call_insn,
+                             const MethodReference& target_method,
+                             uint32_t vtable_idx,
+                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                             bool skip_this) OVERRIDE;
+    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                           NextCallInsn next_call_insn,
+                           const MethodReference& target_method,
+                           uint32_t vtable_idx,
+                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                           bool skip_this) OVERRIDE;
+
   private:
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
     void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
@@ -224,12 +310,12 @@
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void AssignDataOffsets();
     RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                          bool is_div, bool check_zero);
-    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
-    typedef struct {
+                          bool is_div, int flags) OVERRIDE;
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) OVERRIDE;
+    struct EasyMultiplyOp {
       OpKind op;
       uint32_t shift;
-    } EasyMultiplyOp;
+    };
     bool GetEasyMultiplyOp(int lit, EasyMultiplyOp* op);
     bool GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops);
     void GenEasyMultiplyTwoOps(RegStorage r_dest, RegStorage r_src, EasyMultiplyOp* ops);
@@ -239,6 +325,36 @@
     static constexpr ResourceMask EncodeArmRegFpcsList(int reg_list);
 
     ArenaVector<LIR*> call_method_insns_;
+
+    /**
+     * @brief Given float register pair, returns Solo64 float register.
+     * @param reg #RegStorage containing a float register pair (e.g. @c s2 and @c s3).
+     * @return A Solo64 float mapping to the register pair (e.g. @c d1).
+     */
+    static RegStorage As64BitFloatReg(RegStorage reg) {
+      DCHECK(reg.IsFloat());
+
+      RegStorage low = reg.GetLow();
+      RegStorage high = reg.GetHigh();
+      DCHECK((low.GetRegNum() % 2 == 0) && (low.GetRegNum() + 1 == high.GetRegNum()));
+
+      return RegStorage::FloatSolo64(low.GetRegNum() / 2);
+    }
+
+    /**
+     * @brief Given Solo64 float register, returns float register pair.
+     * @param reg #RegStorage containing a Solo64 float register (e.g. @c d1).
+     * @return A float register pair mapping to the Solo64 float pair (e.g. @c s2 and s3).
+     */
+    static RegStorage As64BitFloatRegPair(RegStorage reg) {
+      DCHECK(reg.IsDouble() && reg.Is64BitSolo());
+
+      int reg_num = reg.GetRegNum();
+      return RegStorage::MakeRegPair(RegStorage::FloatSolo32(reg_num * 2),
+                                     RegStorage::FloatSolo32(reg_num * 2 + 1));
+    }
+
+    InToRegStorageMapping in_to_reg_storage_mapping_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 018dc1c..8e08f5f 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -442,6 +442,15 @@
     bool src_fp = r_src.IsFloat();
     DCHECK(r_dest.Is64Bit());
     DCHECK(r_src.Is64Bit());
+    // Note: If the register is get by register allocator, it should never be a pair.
+    // But some functions in mir_2_lir assume 64-bit registers are 32-bit register pairs.
+    // TODO: Rework Mir2Lir::LoadArg() and Mir2Lir::LoadArgDirect().
+    if (dest_fp && r_dest.IsPair()) {
+      r_dest = As64BitFloatReg(r_dest);
+    }
+    if (src_fp && r_src.IsPair()) {
+      r_src = As64BitFloatReg(r_src);
+    }
     if (dest_fp) {
       if (src_fp) {
         OpRegCopy(r_dest, r_src);
@@ -678,7 +687,7 @@
 }
 
 RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                      RegLocation rl_src2, bool is_div, bool check_zero) {
+                      RegLocation rl_src2, bool is_div, int flags) {
   LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
   return rl_dest;
 }
@@ -1264,7 +1273,7 @@
 }
 
 void ArmMir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                                RegLocation rl_src2) {
+                                RegLocation rl_src2, int flags) {
   switch (opcode) {
     case Instruction::MUL_LONG:
     case Instruction::MUL_LONG_2ADDR:
@@ -1279,7 +1288,7 @@
   }
 
   // Fallback for all other ops.
-  Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+  Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
 }
 
 /*
@@ -1464,7 +1473,8 @@
 
 
 void ArmMir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
-                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
+                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift,
+                                   int flags) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
   // Per spec, we only care about low 6 bits of shift amount.
   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
@@ -1537,11 +1547,12 @@
 }
 
 void ArmMir2Lir::GenArithImmOpLong(Instruction::Code opcode,
-                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                                   int flags) {
   if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
     if (!rl_src2.is_const) {
       // Don't bother with special handling for subtract from immediate.
-      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
       return;
     }
   } else {
@@ -1552,7 +1563,7 @@
     }
   }
   if (PartiallyIntersects(rl_src1, rl_dest)) {
-    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
     return;
   }
   DCHECK(rl_src2.is_const);
@@ -1569,7 +1580,7 @@
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
-        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
         return;
       }
       break;
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index dd8f7fe..7100a28 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -89,7 +89,7 @@
 
 // Return a target-dependent special register.
 RegStorage ArmMir2Lir::TargetReg(SpecialTargetRegister reg) {
-  RegStorage res_reg = RegStorage::InvalidReg();
+  RegStorage res_reg;
   switch (reg) {
     case kSelf: res_reg = rs_rARM_SELF; break;
 #ifdef ARM_R4_SUSPEND_FLAG
@@ -104,10 +104,22 @@
     case kArg1: res_reg = rs_r1; break;
     case kArg2: res_reg = rs_r2; break;
     case kArg3: res_reg = rs_r3; break;
-    case kFArg0: res_reg = rs_r0; break;
-    case kFArg1: res_reg = rs_r1; break;
-    case kFArg2: res_reg = rs_r2; break;
-    case kFArg3: res_reg = rs_r3; break;
+    case kFArg0: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r0 : rs_fr0; break;
+    case kFArg1: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r1 : rs_fr1; break;
+    case kFArg2: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r2 : rs_fr2; break;
+    case kFArg3: res_reg = kArm32QuickCodeUseSoftFloat ? rs_r3 : rs_fr3; break;
+    case kFArg4: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr4; break;
+    case kFArg5: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr5; break;
+    case kFArg6: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr6; break;
+    case kFArg7: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr7; break;
+    case kFArg8: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr8; break;
+    case kFArg9: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr9; break;
+    case kFArg10: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr10; break;
+    case kFArg11: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr11; break;
+    case kFArg12: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr12; break;
+    case kFArg13: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr13; break;
+    case kFArg14: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr14; break;
+    case kFArg15: res_reg = kArm32QuickCodeUseSoftFloat ? RegStorage::InvalidReg() : rs_fr15; break;
     case kRet0: res_reg = rs_r0; break;
     case kRet1: res_reg = rs_r1; break;
     case kInvokeTgt: res_reg = rs_rARM_LR; break;
@@ -119,20 +131,6 @@
   return res_reg;
 }
 
-RegStorage ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
-  switch (arg_num) {
-    case 0:
-      return rs_r1;
-    case 1:
-      return rs_r2;
-    case 2:
-      return rs_r3;
-    default:
-      return RegStorage::InvalidReg();
-  }
-}
-
 /*
  * Decode the register id.
  */
@@ -718,6 +716,32 @@
   LockTemp(rs_r1);
   LockTemp(rs_r2);
   LockTemp(rs_r3);
+  if (!kArm32QuickCodeUseSoftFloat) {
+    LockTemp(rs_fr0);
+    LockTemp(rs_fr1);
+    LockTemp(rs_fr2);
+    LockTemp(rs_fr3);
+    LockTemp(rs_fr4);
+    LockTemp(rs_fr5);
+    LockTemp(rs_fr6);
+    LockTemp(rs_fr7);
+    LockTemp(rs_fr8);
+    LockTemp(rs_fr9);
+    LockTemp(rs_fr10);
+    LockTemp(rs_fr11);
+    LockTemp(rs_fr12);
+    LockTemp(rs_fr13);
+    LockTemp(rs_fr14);
+    LockTemp(rs_fr15);
+    LockTemp(rs_dr0);
+    LockTemp(rs_dr1);
+    LockTemp(rs_dr2);
+    LockTemp(rs_dr3);
+    LockTemp(rs_dr4);
+    LockTemp(rs_dr5);
+    LockTemp(rs_dr6);
+    LockTemp(rs_dr7);
+  }
 }
 
 /* To be used when explicitly managing register use */
@@ -726,6 +750,32 @@
   FreeTemp(rs_r1);
   FreeTemp(rs_r2);
   FreeTemp(rs_r3);
+  if (!kArm32QuickCodeUseSoftFloat) {
+    FreeTemp(rs_fr0);
+    FreeTemp(rs_fr1);
+    FreeTemp(rs_fr2);
+    FreeTemp(rs_fr3);
+    FreeTemp(rs_fr4);
+    FreeTemp(rs_fr5);
+    FreeTemp(rs_fr6);
+    FreeTemp(rs_fr7);
+    FreeTemp(rs_fr8);
+    FreeTemp(rs_fr9);
+    FreeTemp(rs_fr10);
+    FreeTemp(rs_fr11);
+    FreeTemp(rs_fr12);
+    FreeTemp(rs_fr13);
+    FreeTemp(rs_fr14);
+    FreeTemp(rs_fr15);
+    FreeTemp(rs_dr0);
+    FreeTemp(rs_dr1);
+    FreeTemp(rs_dr2);
+    FreeTemp(rs_dr3);
+    FreeTemp(rs_dr4);
+    FreeTemp(rs_dr5);
+    FreeTemp(rs_dr6);
+    FreeTemp(rs_dr7);
+  }
 }
 
 RegStorage ArmMir2Lir::LoadHelper(QuickEntrypointEnum trampoline) {
@@ -847,4 +897,313 @@
   Mir2Lir::InstallLiteralPools();
 }
 
+RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(bool is_double_or_float, bool is_wide) {
+  const RegStorage coreArgMappingToPhysicalReg[] =
+      {rs_r1, rs_r2, rs_r3};
+  const int coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg);
+  const RegStorage fpArgMappingToPhysicalReg[] =
+      {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
+       rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15};
+  const uint32_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg);
+  COMPILE_ASSERT(fpArgMappingToPhysicalRegSize % 2 == 0, knum_of_fp_arg_regs_not_even);
+
+  if (kArm32QuickCodeUseSoftFloat) {
+    is_double_or_float = false;  // Regard double as long, float as int.
+    is_wide = false;  // Map long separately.
+  }
+
+  RegStorage result = RegStorage::InvalidReg();
+  if (is_double_or_float) {
+    // TODO: Remove "cur_fp_double_reg_ % 2 != 0" when we return double as double.
+    if (is_wide || cur_fp_double_reg_ % 2 != 0) {
+      cur_fp_double_reg_ = std::max(cur_fp_double_reg_, RoundUp(cur_fp_reg_, 2));
+      if (cur_fp_double_reg_ < fpArgMappingToPhysicalRegSize) {
+        // TODO: Replace by following code in the branch when FlushIns() support 64-bit registers.
+        // result = RegStorage::MakeRegPair(fpArgMappingToPhysicalReg[cur_fp_double_reg_],
+        //                                  fpArgMappingToPhysicalReg[cur_fp_double_reg_ + 1]);
+        // result = As64BitFloatReg(result);
+        // cur_fp_double_reg_ += 2;
+        result = fpArgMappingToPhysicalReg[cur_fp_double_reg_];
+        cur_fp_double_reg_++;
+      }
+    } else {
+      // TODO: Remove the check when we return double as double.
+      DCHECK_EQ(cur_fp_double_reg_ % 2, 0U);
+      if (cur_fp_reg_ % 2 == 0) {
+        cur_fp_reg_ = std::max(cur_fp_double_reg_, cur_fp_reg_);
+      }
+      if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
+        result = fpArgMappingToPhysicalReg[cur_fp_reg_];
+        cur_fp_reg_++;
+      }
+    }
+  } else {
+    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = coreArgMappingToPhysicalReg[cur_core_reg_++];
+      // TODO: Enable following code when FlushIns() support 64-bit registers.
+      // if (is_wide && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      //   result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]);
+      // }
+    }
+  }
+  return result;
+}
+
+RegStorage ArmMir2Lir::InToRegStorageMapping::Get(int in_position) const {
+  DCHECK(IsInitialized());
+  auto res = mapping_.find(in_position);
+  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+}
+
+void ArmMir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count,
+                                                   InToRegStorageMapper* mapper) {
+  DCHECK(mapper != nullptr);
+  max_mapped_in_ = -1;
+  is_there_stack_mapped_ = false;
+  for (int in_position = 0; in_position < count; in_position++) {
+     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
+                                         arg_locs[in_position].wide);
+     if (reg.Valid()) {
+       mapping_[in_position] = reg;
+       // TODO: Enable the following code when FlushIns() support 64-bit argument registers.
+       // if (arg_locs[in_position].wide) {
+       //  if (reg.Is32Bit()) {
+       //    // As it is a split long, the hi-part is on stack.
+       //    is_there_stack_mapped_ = true;
+       //  }
+       //  // We covered 2 v-registers, so skip the next one
+       //  in_position++;
+       // }
+       max_mapped_in_ = std::max(max_mapped_in_, in_position);
+     } else {
+       is_there_stack_mapped_ = true;
+     }
+  }
+  initialized_ = true;
+}
+
+// TODO: Should be able to return long, double registers.
+// Need check some common code as it will break some assumption.
+RegStorage ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  if (!in_to_reg_storage_mapping_.IsInitialized()) {
+    int start_vreg = mir_graph_->GetFirstInVR();
+    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
+
+    InToRegStorageArmMapper mapper;
+    in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper);
+  }
+  return in_to_reg_storage_mapping_.Get(arg_num);
+}
+
+int ArmMir2Lir::GenDalvikArgsNoRange(CallInfo* info,
+                                     int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
+                                     const MethodReference& target_method,
+                                     uint32_t vtable_idx, uintptr_t direct_code,
+                                     uintptr_t direct_method, InvokeType type, bool skip_this) {
+  if (kArm32QuickCodeUseSoftFloat) {
+    return Mir2Lir::GenDalvikArgsNoRange(info, call_state, pcrLabel, next_call_insn, target_method,
+                                         vtable_idx, direct_code, direct_method, type, skip_this);
+  } else {
+    return GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method, vtable_idx,
+                              direct_code, direct_method, type, skip_this);
+  }
+}
+
+int ArmMir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
+                                   LIR** pcrLabel, NextCallInsn next_call_insn,
+                                   const MethodReference& target_method,
+                                   uint32_t vtable_idx, uintptr_t direct_code,
+                                   uintptr_t direct_method, InvokeType type, bool skip_this) {
+  if (kArm32QuickCodeUseSoftFloat) {
+    return Mir2Lir::GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method,
+                                       vtable_idx, direct_code, direct_method, type, skip_this);
+  }
+
+  // TODO: Rework the implementation when argument register can be long or double.
+
+  /* If no arguments, just return */
+  if (info->num_arg_words == 0) {
+    return call_state;
+  }
+
+  const int start_index = skip_this ? 1 : 0;
+
+  InToRegStorageArmMapper mapper;
+  InToRegStorageMapping in_to_reg_storage_mapping;
+  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
+  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
+  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1);
+
+  // First of all, check whether it makes sense to use bulk copying.
+  // Bulk copying is done only for the range case.
+  // TODO: make a constant instead of 2
+  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
+    // Scan the rest of the args - if in phys_reg flush to memory
+    for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) {
+      RegLocation loc = info->args[next_arg];
+      if (loc.wide) {
+        // TODO: Only flush hi-part.
+        if (loc.high_word) {
+          loc = info->args[--next_arg];
+        }
+        loc = UpdateLocWide(loc);
+        if (loc.location == kLocPhysReg) {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile);
+        }
+        next_arg += 2;
+      } else {
+        loc = UpdateLoc(loc);
+        if (loc.location == kLocPhysReg) {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          if (loc.ref) {
+            StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
+          } else {
+            StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32,
+                          kNotVolatile);
+          }
+        }
+        next_arg++;
+      }
+    }
+
+    // The rest can be copied together
+    int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low);
+    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1,
+                                                   cu_->instruction_set);
+
+    int current_src_offset = start_offset;
+    int current_dest_offset = outs_offset;
+
+    // Only davik regs are accessed in this loop; no next_call_insn() calls.
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+    while (regs_left_to_pass_via_stack > 0) {
+      /*
+       * TODO: Improve by adding block copy for large number of arguments.  This
+       * should be done, if possible, as a target-depending helper.  For now, just
+       * copy a Dalvik vreg at a time.
+       */
+      // Moving 32-bits via general purpose register.
+      size_t bytes_to_move = sizeof(uint32_t);
+
+      // Instead of allocating a new temp, simply reuse one of the registers being used
+      // for argument passing.
+      RegStorage temp = TargetReg(kArg3, kNotWide);
+
+      // Now load the argument VR and store to the outs.
+      Load32Disp(TargetPtrReg(kSp), current_src_offset, temp);
+      Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp);
+
+      current_src_offset += bytes_to_move;
+      current_dest_offset += bytes_to_move;
+      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+    }
+    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
+  }
+
+  // Now handle rest not registers if they are
+  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
+    RegStorage regWide = TargetReg(kArg2, kWide);
+    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
+      RegLocation rl_arg = info->args[i];
+      rl_arg = UpdateRawLoc(rl_arg);
+      RegStorage reg = in_to_reg_storage_mapping.Get(i);
+      // TODO: Only pass split wide hi-part via stack.
+      if (!reg.Valid() || rl_arg.wide) {
+        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+
+        {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          if (rl_arg.wide) {
+            if (rl_arg.location == kLocPhysReg) {
+              StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile);
+            } else {
+              LoadValueDirectWideFixed(rl_arg, regWide);
+              StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile);
+            }
+          } else {
+            if (rl_arg.location == kLocPhysReg) {
+              if (rl_arg.ref) {
+                StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile);
+              } else {
+                StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile);
+              }
+            } else {
+              if (rl_arg.ref) {
+                RegStorage regSingle = TargetReg(kArg2, kRef);
+                LoadValueDirectFixed(rl_arg, regSingle);
+                StoreRefDisp(TargetPtrReg(kSp), out_offset, regSingle, kNotVolatile);
+              } else {
+                RegStorage regSingle = TargetReg(kArg2, kNotWide);
+                LoadValueDirectFixed(rl_arg, regSingle);
+                StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile);
+              }
+            }
+          }
+        }
+
+        call_state = next_call_insn(cu_, info, call_state, target_method,
+                                    vtable_idx, direct_code, direct_method, type);
+      }
+      if (rl_arg.wide) {
+        i++;
+      }
+    }
+  }
+
+  // Finish with mapped registers
+  for (int i = start_index; i <= last_mapped_in; i++) {
+    RegLocation rl_arg = info->args[i];
+    rl_arg = UpdateRawLoc(rl_arg);
+    RegStorage reg = in_to_reg_storage_mapping.Get(i);
+    if (reg.Valid()) {
+      if (reg.Is64Bit()) {
+        LoadValueDirectWideFixed(rl_arg, reg);
+      } else {
+        // TODO: Only split long should be the case we need to care about.
+        if (rl_arg.wide) {
+          ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+          int high_word = rl_arg.high_word ? 1 : 0;
+          rl_arg = high_word ? info->args[i - 1] : rl_arg;
+          if (rl_arg.location == kLocPhysReg) {
+            RegStorage rs_arg = rl_arg.reg;
+            if (rs_arg.IsDouble() && rs_arg.Is64BitSolo()) {
+              rs_arg = As64BitFloatRegPair(rs_arg);
+            }
+            RegStorage rs_arg_low = rs_arg.GetLow();
+            RegStorage rs_arg_high = rs_arg.GetHigh();
+            OpRegCopy(reg, high_word ? rs_arg_high : rs_arg_low);
+          } else {
+            Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + high_word), reg);
+          }
+        } else {
+          LoadValueDirectFixed(rl_arg, reg);
+        }
+      }
+      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                                  direct_code, direct_method, type);
+    }
+    if (reg.Is64Bit()) {
+      i++;
+    }
+  }
+
+  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                           direct_code, direct_method, type);
+  if (pcrLabel) {
+    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      Load32Disp(TargetReg(kArg1, kRef), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
+    }
+  }
+  return call_state;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 09acf4c..ce2de65 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -1007,6 +1007,12 @@
     // Intentional fall-though.
     case k64:
       if (r_src.IsFloat()) {
+        // Note: If the register is retrieved by register allocator, it should never be a pair.
+        // But some functions in mir2lir assume 64-bit registers are 32-bit register pairs.
+        // TODO: Rework Mir2Lir::LoadArg() and Mir2Lir::LoadArgDirect().
+        if (r_src.IsPair()) {
+          r_src = As64BitFloatReg(r_src);
+        }
         DCHECK(!r_src.IsPair());
         store = LoadStoreUsingInsnWithOffsetImm8Shl2(kThumb2Vstrd, r_base, displacement, r_src);
       } else {
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 510bd4c..9f02606 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -141,13 +141,13 @@
   void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                       RegLocation lr_shift) OVERRIDE;
   void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_src2) OVERRIDE;
+                         RegLocation rl_src2, int flags) OVERRIDE;
   void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
                    RegLocation rl_dest, int scale) OVERRIDE;
   void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, RegLocation rl_index,
                    RegLocation rl_src, int scale, bool card_mark) OVERRIDE;
   void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_shift) OVERRIDE;
+                         RegLocation rl_shift, int flags) OVERRIDE;
   void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                         RegLocation rl_src2) OVERRIDE;
   void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
@@ -173,7 +173,7 @@
   bool GenInlinedArrayCopyCharArray(CallInfo* info) OVERRIDE;
   void GenIntToLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
   void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                      RegLocation rl_src2) OVERRIDE;
+                      RegLocation rl_src2, int flags) OVERRIDE;
   RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div)
       OVERRIDE;
   RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div)
@@ -363,8 +363,8 @@
   void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
   void AssignDataOffsets();
   RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                        bool is_div, bool check_zero);
-  RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
+                        bool is_div, int flags) OVERRIDE;
+  RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) OVERRIDE;
   size_t GetLoadStoreSize(LIR* lir);
 
   bool SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
@@ -413,7 +413,7 @@
   void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
   void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
   void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                     RegLocation rl_src2, bool is_div);
+                     RegLocation rl_src2, bool is_div, int flags);
 
   InToRegStorageMapping in_to_reg_storage_mapping_;
   static const A64EncodingMap EncodingMap[kA64Last];
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index abcb30f..6e7241d 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -614,7 +614,7 @@
 }
 
 RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                                    RegLocation rl_src2, bool is_div, bool check_zero) {
+                                    RegLocation rl_src2, bool is_div, int flags) {
   LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
   return rl_dest;
 }
@@ -1020,7 +1020,7 @@
 }
 
 void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest,
-                                 RegLocation rl_src1, RegLocation rl_src2, bool is_div) {
+                                 RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags) {
   if (rl_src2.is_const) {
     DCHECK(rl_src2.wide);
     int64_t lit = mir_graph_->ConstantValueWide(rl_src2);
@@ -1032,7 +1032,9 @@
   RegLocation rl_result;
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-  GenDivZeroCheck(rl_src2.reg);
+  if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
+    GenDivZeroCheck(rl_src2.reg);
+  }
   rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div);
   StoreValueWide(rl_dest, rl_result);
 }
@@ -1067,7 +1069,7 @@
 }
 
 void Arm64Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                  RegLocation rl_src1, RegLocation rl_src2) {
+                                  RegLocation rl_src1, RegLocation rl_src2, int flags) {
   switch (opcode) {
     case Instruction::NOT_LONG:
       GenNotLong(rl_dest, rl_src2);
@@ -1086,11 +1088,11 @@
       return;
     case Instruction::DIV_LONG:
     case Instruction::DIV_LONG_2ADDR:
-      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
+      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags);
       return;
     case Instruction::REM_LONG:
     case Instruction::REM_LONG_2ADDR:
-      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
+      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags);
       return;
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
@@ -1312,7 +1314,8 @@
 }
 
 void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
-                                     RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift) {
+                                     RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift,
+                                     int flags) {
   OpKind op = kOpBkpt;
   // Per spec, we only care about low 6 bits of shift amount.
   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
@@ -1344,7 +1347,7 @@
 }
 
 void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                     RegLocation rl_src1, RegLocation rl_src2) {
+                                     RegLocation rl_src1, RegLocation rl_src2, int flags) {
   OpKind op = kOpBkpt;
   switch (opcode) {
     case Instruction::ADD_LONG:
@@ -1373,7 +1376,7 @@
 
   if (op == kOpSub) {
     if (!rl_src2.is_const) {
-      return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+      return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
     }
   } else {
     // Associativity.
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index 0883694..6985b73 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -768,8 +768,8 @@
       opcode = kA64Sub4RRre;
       break;
     default:
-      LOG(FATAL) << "Unimplemented opcode: " << op;
-      break;
+      UNIMPLEMENTED(FATAL) << "Unimplemented opcode: " << op;
+      UNREACHABLE();
   }
   A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
 
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index a33d15f..2abfcc3 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1501,7 +1501,7 @@
 
 
 void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
-                            RegLocation rl_src1, RegLocation rl_src2) {
+                            RegLocation rl_src1, RegLocation rl_src2, int flags) {
   DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64);
   OpKind op = kOpBkpt;
   bool is_div_rem = false;
@@ -1600,7 +1600,7 @@
     if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) {
       rl_src1 = LoadValue(rl_src1, kCoreReg);
       rl_src2 = LoadValue(rl_src2, kCoreReg);
-      if (check_zero) {
+      if (check_zero && (flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
         GenDivZeroCheck(rl_src2.reg);
       }
       rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, op == kOpDiv);
@@ -1612,7 +1612,7 @@
         // calculate using a MUL and subtract.
         rl_src1 = LoadValue(rl_src1, kCoreReg);
         rl_src2 = LoadValue(rl_src2, kCoreReg);
-        if (check_zero) {
+        if (check_zero && (flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
           GenDivZeroCheck(rl_src2.reg);
         }
         rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, op == kOpDiv);
@@ -1626,7 +1626,7 @@
       LoadValueDirectFixed(rl_src2, TargetReg(kArg1, kNotWide));
       RegStorage r_tgt = CallHelperSetup(kQuickIdivmod);
       LoadValueDirectFixed(rl_src1, TargetReg(kArg0, kNotWide));
-      if (check_zero) {
+      if (check_zero && (flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
         GenDivZeroCheck(TargetReg(kArg1, kNotWide));
       }
       // NOTE: callout here is not a safepoint.
@@ -1914,7 +1914,7 @@
 }
 
 void Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                             RegLocation rl_src1, RegLocation rl_src2) {
+                             RegLocation rl_src1, RegLocation rl_src2, int flags) {
   RegLocation rl_result;
   OpKind first_op = kOpBkpt;
   OpKind second_op = kOpBkpt;
@@ -1999,7 +1999,9 @@
       RegStorage r_tmp2 = TargetReg(kArg2, kWide);
       LoadValueDirectWideFixed(rl_src2, r_tmp2);
       RegStorage r_tgt = CallHelperSetup(target);
-      GenDivZeroCheckWide(r_tmp2);
+      if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
+        GenDivZeroCheckWide(r_tmp2);
+      }
       LoadValueDirectWideFixed(rl_src1, r_tmp1);
       // NOTE: callout here is not a safepoint
       CallHelper(r_tgt, target, false /* not safepoint */);
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 2bef7c5..bc4d00b 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -248,13 +248,13 @@
         if (cu_->instruction_set == kMips) {
           LoadValueDirectFixed(arg1, TargetReg(arg1.fp ? kFArg2 : kArg1, kNotWide));
         } else {
-          LoadValueDirectFixed(arg1, TargetReg(kArg1, kNotWide));
+          LoadValueDirectFixed(arg1, TargetReg(arg1.fp ? kFArg1 : kArg1, kNotWide));
         }
       } else {
         if (cu_->instruction_set == kMips) {
           LoadValueDirectWideFixed(arg1, TargetReg(arg1.fp ? kFArg2 : kArg2, kWide));
         } else {
-          LoadValueDirectWideFixed(arg1, TargetReg(kArg1, kWide));
+          LoadValueDirectWideFixed(arg1, TargetReg(arg1.fp ? kFArg1 : kArg1, kWide));
         }
       }
     } else {
@@ -365,6 +365,7 @@
  * ArgLocs is an array of location records describing the incoming arguments
  * with one location record per word of argument.
  */
+// TODO: Support 64-bit argument registers.
 void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
   /*
    * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod>
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index bd709f3..508d474 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -86,13 +86,13 @@
 
     // Required for target - Dalvik-level generators.
     void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_src2);
+                           RegLocation rl_src1, RegLocation rl_src2, int flags);
     void GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
                      RegLocation rl_index, RegLocation rl_dest, int scale);
     void GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
                      RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                           RegLocation rl_shift);
+                           RegLocation rl_shift, int flags);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                           RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
@@ -108,7 +108,7 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                        RegLocation rl_src2) OVERRIDE;
+                        RegLocation rl_src2, int flags) OVERRIDE;
     RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div);
     RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div);
     void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
@@ -190,8 +190,8 @@
 
     void ConvertShortToLongBranch(LIR* lir);
     RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                          RegLocation rl_src2, bool is_div, bool check_zero);
-    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
+                          RegLocation rl_src2, bool is_div, int flags) OVERRIDE;
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) OVERRIDE;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 30aa611..baf7311 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -263,7 +263,7 @@
 }
 
 RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                      RegLocation rl_src2, bool is_div, bool check_zero) {
+                      RegLocation rl_src2, bool is_div, int flags) {
   LOG(FATAL) << "Unexpected use of GenDivRem for Mips";
   return rl_dest;
 }
@@ -437,7 +437,7 @@
 }
 
 void MipsMir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                                 RegLocation rl_src2) {
+                                 RegLocation rl_src2, int flags) {
   switch (opcode) {
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
@@ -456,7 +456,7 @@
   }
 
   // Fallback for all other ops.
-  Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+  Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
 }
 
 void MipsMir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
@@ -628,15 +628,16 @@
 }
 
 void MipsMir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                    RegLocation rl_src1, RegLocation rl_shift) {
+                                    RegLocation rl_src1, RegLocation rl_shift, int flags) {
   // Default implementation is just to ignore the constant case.
   GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
 }
 
 void MipsMir2Lir::GenArithImmOpLong(Instruction::Code opcode,
-                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                                    int flags) {
   // Default - bail to non-const handler.
-  GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+  GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 4399981..408606d 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -928,12 +928,12 @@
 
     case Instruction::NEG_INT:
     case Instruction::NOT_INT:
-      GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[0]);
+      GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[0], opt_flags);
       break;
 
     case Instruction::NEG_LONG:
     case Instruction::NOT_LONG:
-      GenArithOpLong(opcode, rl_dest, rl_src[0], rl_src[0]);
+      GenArithOpLong(opcode, rl_dest, rl_src[0], rl_src[0], opt_flags);
       break;
 
     case Instruction::NEG_FLOAT:
@@ -993,7 +993,7 @@
         GenArithOpIntLit(opcode, rl_dest, rl_src[0],
                              mir_graph_->ConstantValue(rl_src[1].orig_sreg));
       } else {
-        GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[1]);
+        GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[1], opt_flags);
       }
       break;
 
@@ -1013,7 +1013,7 @@
           InexpensiveConstantInt(mir_graph_->ConstantValue(rl_src[1]), opcode)) {
         GenArithOpIntLit(opcode, rl_dest, rl_src[0], mir_graph_->ConstantValue(rl_src[1]));
       } else {
-        GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[1]);
+        GenArithOpInt(opcode, rl_dest, rl_src[0], rl_src[1], opt_flags);
       }
       break;
 
@@ -1028,7 +1028,7 @@
     case Instruction::OR_LONG_2ADDR:
     case Instruction::XOR_LONG_2ADDR:
       if (rl_src[0].is_const || rl_src[1].is_const) {
-        GenArithImmOpLong(opcode, rl_dest, rl_src[0], rl_src[1]);
+        GenArithImmOpLong(opcode, rl_dest, rl_src[0], rl_src[1], opt_flags);
         break;
       }
       FALLTHROUGH_INTENDED;
@@ -1038,7 +1038,7 @@
     case Instruction::MUL_LONG_2ADDR:
     case Instruction::DIV_LONG_2ADDR:
     case Instruction::REM_LONG_2ADDR:
-      GenArithOpLong(opcode, rl_dest, rl_src[0], rl_src[1]);
+      GenArithOpLong(opcode, rl_dest, rl_src[0], rl_src[1], opt_flags);
       break;
 
     case Instruction::SHL_LONG:
@@ -1048,7 +1048,7 @@
     case Instruction::SHR_LONG_2ADDR:
     case Instruction::USHR_LONG_2ADDR:
       if (rl_src[1].is_const) {
-        GenShiftImmOpLong(opcode, rl_dest, rl_src[0], rl_src[1]);
+        GenShiftImmOpLong(opcode, rl_dest, rl_src[0], rl_src[1], opt_flags);
       } else {
         GenShiftOpLong(opcode, rl_dest, rl_src[0], rl_src[1]);
       }
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ea93bbe..f4e6dfe 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -857,7 +857,7 @@
     void GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest,
                           RegLocation rl_src, int lit);
     virtual void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                RegLocation rl_src1, RegLocation rl_src2);
+                                RegLocation rl_src1, RegLocation rl_src2, int flags);
     void GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_dest, RegLocation rl_src);
     virtual void GenSuspendTest(int opt_flags);
     virtual void GenSuspendTestAndBranch(int opt_flags, LIR* target);
@@ -865,7 +865,7 @@
     // This will be overridden by x86 implementation.
     virtual void GenConstWide(RegLocation rl_dest, int64_t value);
     virtual void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
-                       RegLocation rl_src1, RegLocation rl_src2);
+                       RegLocation rl_src1, RegLocation rl_src2, int flags);
 
     // Shared by all targets - implemented in gen_invoke.cc.
     LIR* CallHelper(RegStorage r_tgt, QuickEntrypointEnum trampoline, bool safepoint_pc,
@@ -1191,13 +1191,17 @@
      */
     virtual RegStorage TargetReg(SpecialTargetRegister reg, WideKind wide_kind) {
       if (wide_kind == kWide) {
-        DCHECK((kArg0 <= reg && reg < kArg7) || (kFArg0 <= reg && reg < kFArg7) || (kRet0 == reg));
+        DCHECK((kArg0 <= reg && reg < kArg7) || (kFArg0 <= reg && reg < kFArg15) || (kRet0 == reg));
         COMPILE_ASSERT((kArg1 == kArg0 + 1) && (kArg2 == kArg1 + 1) && (kArg3 == kArg2 + 1) &&
                        (kArg4 == kArg3 + 1) && (kArg5 == kArg4 + 1) && (kArg6 == kArg5 + 1) &&
                        (kArg7 == kArg6 + 1), kargs_range_unexpected);
         COMPILE_ASSERT((kFArg1 == kFArg0 + 1) && (kFArg2 == kFArg1 + 1) && (kFArg3 == kFArg2 + 1) &&
                        (kFArg4 == kFArg3 + 1) && (kFArg5 == kFArg4 + 1) && (kFArg6 == kFArg5 + 1) &&
-                       (kFArg7 == kFArg6 + 1), kfargs_range_unexpected);
+                       (kFArg7 == kFArg6 + 1) && (kFArg8 == kFArg7 + 1) && (kFArg9 == kFArg8 + 1) &&
+                       (kFArg10 == kFArg9 + 1) && (kFArg11 == kFArg10 + 1) &&
+                       (kFArg12 == kFArg11 + 1) && (kFArg13 == kFArg12 + 1) &&
+                       (kFArg14 == kFArg13 + 1) && (kFArg15 == kFArg14 + 1),
+                       kfargs_range_unexpected);
         COMPILE_ASSERT(kRet1 == kRet0 + 1, kret_range_unexpected);
         return RegStorage::MakeRegPair(TargetReg(reg),
                                        TargetReg(static_cast<SpecialTargetRegister>(reg + 1)));
@@ -1259,7 +1263,7 @@
 
     // Required for target - Dalvik-level generators.
     virtual void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src1, RegLocation rl_src2) = 0;
+                                   RegLocation rl_src1, RegLocation rl_src2, int flags) = 0;
     virtual void GenArithOpDouble(Instruction::Code opcode,
                                   RegLocation rl_dest, RegLocation rl_src1,
                                   RegLocation rl_src2) = 0;
@@ -1297,10 +1301,11 @@
      * @param rl_src1 Numerator Location.
      * @param rl_src2 Divisor Location.
      * @param is_div 'true' if this is a division, 'false' for a remainder.
-     * @param check_zero 'true' if an exception should be generated if the divisor is 0.
+     * @param flags The instruction optimization flags. It can include information
+     * if exception check can be elided.
      */
     virtual RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                                  RegLocation rl_src2, bool is_div, bool check_zero) = 0;
+                                  RegLocation rl_src2, bool is_div, int flags) = 0;
     /*
      * @brief Generate an integer div or rem operation by a literal.
      * @param rl_dest Destination Location.
@@ -1382,7 +1387,7 @@
                              RegLocation rl_index, RegLocation rl_src, int scale,
                              bool card_mark) = 0;
     virtual void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src1, RegLocation rl_shift) = 0;
+                                   RegLocation rl_src1, RegLocation rl_shift, int flags) = 0;
 
     // Required for target - single operation generators.
     virtual LIR* OpUnconditionalBranch(LIR* target) = 0;
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 6f2a647..8f7bd30 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -425,6 +425,21 @@
     kMirOpSelect,
 };
 
+static int kInvokeOpcodes[] = {
+    Instruction::INVOKE_VIRTUAL,
+    Instruction::INVOKE_SUPER,
+    Instruction::INVOKE_DIRECT,
+    Instruction::INVOKE_STATIC,
+    Instruction::INVOKE_INTERFACE,
+    Instruction::INVOKE_VIRTUAL_RANGE,
+    Instruction::INVOKE_SUPER_RANGE,
+    Instruction::INVOKE_DIRECT_RANGE,
+    Instruction::INVOKE_STATIC_RANGE,
+    Instruction::INVOKE_INTERFACE_RANGE,
+    Instruction::INVOKE_VIRTUAL_QUICK,
+    Instruction::INVOKE_VIRTUAL_RANGE_QUICK,
+};
+
 // Unsupported opcodes. nullptr can be used when everything is supported. Size of the lists is
 // recorded below.
 static const int* kUnsupportedOpcodes[] = {
@@ -523,8 +538,8 @@
     for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
       int opcode = mir->dalvikInsn.opcode;
       // Check if we support the byte code.
-      if (std::find(unsupport_list, unsupport_list + unsupport_list_size,
-                    opcode) != unsupport_list + unsupport_list_size) {
+      if (std::find(unsupport_list, unsupport_list + unsupport_list_size, opcode)
+          != unsupport_list + unsupport_list_size) {
         if (!MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
           VLOG(compiler) << "Unsupported dalvik byte code : "
               << mir->dalvikInsn.opcode;
@@ -535,11 +550,8 @@
         return false;
       }
       // Check if it invokes a prototype that we cannot support.
-      if (Instruction::INVOKE_VIRTUAL == opcode ||
-          Instruction::INVOKE_SUPER == opcode ||
-          Instruction::INVOKE_DIRECT == opcode ||
-          Instruction::INVOKE_STATIC == opcode ||
-          Instruction::INVOKE_INTERFACE == opcode) {
+      if (std::find(kInvokeOpcodes, kInvokeOpcodes + arraysize(kInvokeOpcodes), opcode)
+          != kInvokeOpcodes + arraysize(kInvokeOpcodes)) {
         uint32_t invoke_method_idx = mir->dalvikInsn.vB;
         const char* invoke_method_shorty = dex_file.GetMethodShorty(
             dex_file.GetMethodId(invoke_method_idx));
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index b3544da..7b5b831 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -180,11 +180,11 @@
 
   // Long instructions.
   void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                      RegLocation rl_src2) OVERRIDE;
+                      RegLocation rl_src2, int flags) OVERRIDE;
   void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_src2) OVERRIDE;
+                         RegLocation rl_src2, int flags) OVERRIDE;
   void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                         RegLocation rl_src1, RegLocation rl_shift) OVERRIDE;
+                         RegLocation rl_src1, RegLocation rl_shift, int flags) OVERRIDE;
   void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) OVERRIDE;
   void GenIntToLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
   void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
@@ -314,9 +314,10 @@
    * @param rl_dest Destination for the result.
    * @param rl_lhs Left hand operand.
    * @param rl_rhs Right hand operand.
+   * @param flags The instruction optimization flags.
    */
   void GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_lhs,
-                     RegLocation rl_rhs) OVERRIDE;
+                     RegLocation rl_rhs, int flags) OVERRIDE;
 
   /*
    * @brief Load the Method* of a dex method into the register.
@@ -768,10 +769,11 @@
    * @param rl_src1 Numerator Location.
    * @param rl_src2 Divisor Location.
    * @param is_div 'true' if this is a division, 'false' for a remainder.
-   * @param check_zero 'true' if an exception should be generated if the divisor is 0.
+   * @param flags The instruction optimization flags. It can include information
+   * if exception check can be elided.
    */
   RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
-                        bool is_div, bool check_zero);
+                        bool is_div, int flags);
 
   /*
    * @brief Generate an integer div or rem operation by a literal.
@@ -788,10 +790,11 @@
    * @param rl_dest The destination.
    * @param rl_src The value to be shifted.
    * @param shift_amount How much to shift.
+   * @param flags The instruction optimization flags.
    * @returns the RegLocation of the result.
    */
   RegLocation GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                RegLocation rl_src, int shift_amount);
+                                RegLocation rl_src, int shift_amount, int flags);
   /*
    * Generate an imul of a register by a constant or a better sequence.
    * @param dest Destination Register.
@@ -858,13 +861,13 @@
 
   // Try to do a long multiplication where rl_src2 is a constant. This simplified setup might fail,
   // in which case false will be returned.
-  bool GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val);
+  bool GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val, int flags);
   void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                  RegLocation rl_src2);
+                  RegLocation rl_src2, int flags);
   void GenNotLong(RegLocation rl_dest, RegLocation rl_src);
   void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
   void GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
-                     RegLocation rl_src2, bool is_div);
+                     RegLocation rl_src2, bool is_div, int flags);
 
   void SpillCoreRegs();
   void UnSpillCoreRegs();
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index acf5599..aa1bf7f 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -768,7 +768,7 @@
 }
 
 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
-                                  RegLocation rl_src2, bool is_div, bool check_zero) {
+                                  RegLocation rl_src2, bool is_div, int flags) {
   // We have to use fixed registers, so flush all the temps.
 
   // Prepare for explicit register usage.
@@ -783,7 +783,7 @@
   // Copy LHS sign bit into EDX.
   NewLIR0(kx86Cdq32Da);
 
-  if (check_zero) {
+  if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
     // Handle division by zero case.
     GenDivZeroCheck(rs_r1);
   }
@@ -1506,7 +1506,7 @@
 }
 
 void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
-                                RegLocation rl_src2) {
+                                RegLocation rl_src2, int flags) {
   if (!cu_->target64) {
     // Some x86 32b ops are fallback.
     switch (opcode) {
@@ -1515,7 +1515,7 @@
       case Instruction::DIV_LONG_2ADDR:
       case Instruction::REM_LONG:
       case Instruction::REM_LONG_2ADDR:
-        Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+        Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
         return;
 
       default:
@@ -1541,17 +1541,17 @@
 
     case Instruction::MUL_LONG:
     case Instruction::MUL_LONG_2ADDR:
-      GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
+      GenMulLong(opcode, rl_dest, rl_src1, rl_src2, flags);
       return;
 
     case Instruction::DIV_LONG:
     case Instruction::DIV_LONG_2ADDR:
-      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
+      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags);
       return;
 
     case Instruction::REM_LONG:
     case Instruction::REM_LONG_2ADDR:
-      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
+      GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags);
       return;
 
     case Instruction::AND_LONG_2ADDR:
@@ -1579,7 +1579,7 @@
   }
 }
 
-bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val) {
+bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val, int flags) {
   // All memory accesses below reference dalvik regs.
   ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
 
@@ -1597,14 +1597,14 @@
     StoreValueWide(rl_dest, rl_src1);
     return true;
   } else if (val == 2) {
-    GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1);
+    GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1, flags);
     return true;
   } else if (IsPowerOfTwo(val)) {
     int shift_amount = LowestSetBit(val);
     if (!PartiallyIntersects(rl_src1, rl_dest)) {
       rl_src1 = LoadValueWide(rl_src1, kCoreReg);
       RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1,
-                                                shift_amount);
+                                                shift_amount, flags);
       StoreValueWide(rl_dest, rl_result);
       return true;
     }
@@ -1658,13 +1658,13 @@
 }
 
 void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
+                            RegLocation rl_src2, int flags) {
   if (rl_src1.is_const) {
     std::swap(rl_src1, rl_src2);
   }
 
   if (rl_src2.is_const) {
-    if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2))) {
+    if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2), flags)) {
       return;
     }
   }
@@ -2164,7 +2164,7 @@
 }
 
 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
-                               RegLocation rl_src2, bool is_div) {
+                               RegLocation rl_src2, bool is_div, int flags) {
   if (!cu_->target64) {
     LOG(FATAL) << "Unexpected use GenDivRemLong()";
     return;
@@ -2191,7 +2191,9 @@
   NewLIR0(kx86Cqo64Da);
 
   // Handle division by zero case.
-  GenDivZeroCheckWide(rs_r1q);
+  if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
+    GenDivZeroCheckWide(rs_r1q);
+  }
 
   // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
   NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
@@ -2392,7 +2394,7 @@
 }
 
 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                          RegLocation rl_src, int shift_amount) {
+                                          RegLocation rl_src, int shift_amount, int flags) {
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
   if (cu_->target64) {
     OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
@@ -2477,7 +2479,7 @@
 }
 
 void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
-                                   RegLocation rl_src, RegLocation rl_shift) {
+                                   RegLocation rl_src, RegLocation rl_shift, int flags) {
   // Per spec, we only care about low 6 bits of shift amount.
   int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
   if (shift_amount == 0) {
@@ -2487,7 +2489,7 @@
   } else if (shift_amount == 1 &&
             (opcode ==  Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) {
     // Need to handle this here to avoid calling StoreValueWide twice.
-    GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src);
+    GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src, flags);
     return;
   }
   if (PartiallyIntersects(rl_src, rl_dest)) {
@@ -2495,12 +2497,13 @@
     return;
   }
   rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount);
+  RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount, flags);
   StoreValueWide(rl_dest, rl_result);
 }
 
 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
-                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+                                   int flags) {
   bool isConstSuccess = false;
   switch (opcode) {
     case Instruction::ADD_LONG:
@@ -2519,7 +2522,7 @@
       if (rl_src2.is_const) {
         isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
       } else {
-        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+        GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
         isConstSuccess = true;
       }
       break;
@@ -2545,7 +2548,7 @@
 
   if (!isConstSuccess) {
     // Default - bail to non-const handler.
-    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
   }
 }
 
@@ -2917,7 +2920,7 @@
 }
 
 void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
-                            RegLocation rl_lhs, RegLocation rl_rhs) {
+                            RegLocation rl_lhs, RegLocation rl_rhs, int flags) {
   OpKind op = kOpBkpt;
   bool is_div_rem = false;
   bool unary = false;
@@ -3022,7 +3025,7 @@
 
   // Get the div/rem stuff out of the way.
   if (is_div_rem) {
-    rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true);
+    rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, flags);
     StoreValue(rl_dest, rl_result);
     return;
   }
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 2ef4c21..79d5eeb 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -2166,7 +2166,7 @@
       NewLIR2(kX86MovdrxRR, temp_loc.reg.GetHighReg(), vector_src.GetReg());
     }
 
-    GenArithOpLong(Instruction::ADD_LONG_2ADDR, rl_dest, temp_loc, temp_loc);
+    GenArithOpLong(Instruction::ADD_LONG_2ADDR, rl_dest, temp_loc, temp_loc, mir->optimization_flags);
   } else if (opsize == kSignedByte || opsize == kUnsignedByte) {
     RegStorage rs_tmp = Get128BitRegister(AllocTempDouble());
     NewLIR2(kX86PxorRR, rs_tmp.GetReg(), rs_tmp.GetReg());
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index bdfab13..f6c7d52 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -378,7 +378,20 @@
         changed |= SetWide(defs[1]);
         changed |= SetHigh(defs[1]);
       }
+
+      bool has_ins = (GetNumOfInVRs() > 0);
+
       for (int i = 0; i < ssa_rep->num_uses; i++) {
+        if (has_ins && IsInVReg(uses[i])) {
+          // NB: The SSA name for the first def of an in-reg will be the same as
+          // the reg's actual name.
+          if (!reg_location_[uses[i]].fp && defined_fp) {
+            // If we were about to infer that this first def of an in-reg is a float
+            // when it wasn't previously (because float/int is set during SSA initialization),
+            // do not allow this to happen.
+            continue;
+          }
+        }
         changed |= SetFp(uses[i], defined_fp);
         changed |= SetCore(uses[i], defined_core);
         changed |= SetRef(uses[i], defined_ref);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 4528688..a60c5bc 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -347,15 +347,11 @@
       image_(image),
       image_classes_(image_classes),
       thread_count_(thread_count),
-      start_ns_(0),
       stats_(new AOTCompilationStats),
       dump_stats_(dump_stats),
       dump_passes_(dump_passes),
       timings_logger_(timer),
-      compiler_library_(nullptr),
       compiler_context_(nullptr),
-      compiler_enable_auto_elf_loading_(nullptr),
-      compiler_get_method_code_addr_(nullptr),
       support_boot_image_fixup_(instruction_set != kMips),
       dedupe_code_("dedupe code"),
       dedupe_src_mapping_table_("dedupe source mapping table"),
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 3d59ef1..0796f48 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -503,7 +503,6 @@
   std::unique_ptr<std::set<std::string>> image_classes_;
 
   size_t thread_count_;
-  uint64_t start_ns_;
 
   class AOTCompilationStats;
   std::unique_ptr<AOTCompilationStats> stats_;
@@ -516,8 +515,6 @@
   typedef void (*CompilerCallbackFn)(CompilerDriver& driver);
   typedef MutexLock* (*CompilerMutexLockFn)(CompilerDriver& driver);
 
-  void* compiler_library_;
-
   typedef void (*DexToDexCompilerFn)(CompilerDriver& driver,
                                      const DexFile::CodeItem* code_item,
                                      uint32_t access_flags, InvokeType invoke_type,
@@ -533,13 +530,6 @@
   // Arena pool used by the compiler.
   ArenaPool arena_pool_;
 
-  typedef void (*CompilerEnableAutoElfLoadingFn)(CompilerDriver& driver);
-  CompilerEnableAutoElfLoadingFn compiler_enable_auto_elf_loading_;
-
-  typedef const void* (*CompilerGetMethodCodeAddrFn)
-      (const CompilerDriver& driver, const CompiledMethod* cm, const mirror::ArtMethod* method);
-  CompilerGetMethodCodeAddrFn compiler_get_method_code_addr_;
-
   bool support_boot_image_fixup_;
 
   // DeDuplication data structures, these own the corresponding byte arrays.
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 3a50bfd..fb7aeb9 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -17,9 +17,14 @@
 #ifndef ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_
 #define ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_
 
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+
 namespace art {
 
-class CompilerOptions {
+class CompilerOptions FINAL {
  public:
   enum CompilerFilter {
     kVerifyNone,          // Skip verification and compile nothing except JNI stubs.
@@ -60,11 +65,12 @@
     implicit_null_checks_(false),
     implicit_so_checks_(false),
     implicit_suspend_checks_(false),
-    compile_pic_(false)
+    compile_pic_(false),
 #ifdef ART_SEA_IR_MODE
-    , sea_ir_mode_(false)
+    sea_ir_mode_(false),
 #endif
-    {}
+    verbose_methods_(nullptr) {
+  }
 
   CompilerOptions(CompilerFilter compiler_filter,
                   size_t huge_method_threshold,
@@ -79,10 +85,11 @@
                   bool implicit_null_checks,
                   bool implicit_so_checks,
                   bool implicit_suspend_checks,
-                  bool compile_pic
+                  bool compile_pic,
 #ifdef ART_SEA_IR_MODE
-                  , bool sea_ir_mode
+                  bool sea_ir_mode,
 #endif
+                  const std::vector<std::string>* verbose_methods
                   ) :  // NOLINT(whitespace/parens)
     compiler_filter_(compiler_filter),
     huge_method_threshold_(huge_method_threshold),
@@ -97,11 +104,12 @@
     implicit_null_checks_(implicit_null_checks),
     implicit_so_checks_(implicit_so_checks),
     implicit_suspend_checks_(implicit_suspend_checks),
-    compile_pic_(compile_pic)
+    compile_pic_(compile_pic),
 #ifdef ART_SEA_IR_MODE
-    , sea_ir_mode_(sea_ir_mode)
+    sea_ir_mode_(sea_ir_mode),
 #endif
-    {}
+    verbose_methods_(verbose_methods) {
+  }
 
   CompilerFilter GetCompilerFilter() const {
     return compiler_filter_;
@@ -168,28 +176,18 @@
     return implicit_null_checks_;
   }
 
-  void SetImplicitNullChecks(bool new_val) {
-    implicit_null_checks_ = new_val;
-  }
-
   bool GetImplicitStackOverflowChecks() const {
     return implicit_so_checks_;
   }
 
-  void SetImplicitStackOverflowChecks(bool new_val) {
-    implicit_so_checks_ = new_val;
-  }
-
   bool GetImplicitSuspendChecks() const {
     return implicit_suspend_checks_;
   }
 
-  void SetImplicitSuspendChecks(bool new_val) {
-    implicit_suspend_checks_ = new_val;
-  }
-
 #ifdef ART_SEA_IR_MODE
-  bool GetSeaIrMode();
+  bool GetSeaIrMode() const {
+    return sea_ir_mode_;
+  }
 #endif
 
   bool GetGenerateGDBInformation() const {
@@ -205,25 +203,44 @@
     return compile_pic_;
   }
 
+  bool HasVerboseMethods() const {
+    return verbose_methods_ != nullptr && !verbose_methods_->empty();
+  }
+
+  bool IsVerboseMethod(const std::string& pretty_method) const {
+    for (const std::string& cur_method : *verbose_methods_) {
+      if (pretty_method.find(cur_method) != std::string::npos) {
+        return true;
+      }
+    }
+    return false;
+  }
+
  private:
   CompilerFilter compiler_filter_;
-  size_t huge_method_threshold_;
-  size_t large_method_threshold_;
-  size_t small_method_threshold_;
-  size_t tiny_method_threshold_;
-  size_t num_dex_methods_threshold_;
-  bool generate_gdb_information_;
-  bool include_patch_information_;
+  const size_t huge_method_threshold_;
+  const size_t large_method_threshold_;
+  const size_t small_method_threshold_;
+  const size_t tiny_method_threshold_;
+  const size_t num_dex_methods_threshold_;
+  const bool generate_gdb_information_;
+  const bool include_patch_information_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
-  double top_k_profile_threshold_;
-  bool include_debug_symbols_;
-  bool implicit_null_checks_;
-  bool implicit_so_checks_;
-  bool implicit_suspend_checks_;
-  bool compile_pic_;
+  const double top_k_profile_threshold_;
+  const bool include_debug_symbols_;
+  const bool implicit_null_checks_;
+  const bool implicit_so_checks_;
+  const bool implicit_suspend_checks_;
+  const bool compile_pic_;
+
 #ifdef ART_SEA_IR_MODE
-  bool sea_ir_mode_;
+  const bool sea_ir_mode_;
 #endif
+
+  // Vector of methods to have verbose output enabled for.
+  const std::vector<std::string>* const verbose_methods_;
+
+  DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
 };
 
 }  // namespace art
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 35a3d4b..235aba8 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -206,7 +206,7 @@
       break;
     default:
       LOG(FATAL) << "Unreachable.";
-      break;
+      UNREACHABLE();
   }
   object->SetLockWord(LockWord::FromForwardingAddress(offset), false);
   DCHECK(IsImageOffsetAssigned(object));
diff --git a/compiler/jni/portable/jni_compiler.cc b/compiler/jni/portable/jni_compiler.cc
index d2f54f8..ff37d85 100644
--- a/compiler/jni/portable/jni_compiler.cc
+++ b/compiler/jni/portable/jni_compiler.cc
@@ -298,6 +298,7 @@
     case 'D': ret_type =  irb_.getJDoubleTy(); break;
     case 'L': ret_type =  irb_.getJObjectTy(); break;
     default: LOG(FATAL)  << "Unreachable: unexpected return type in shorty " << shorty;
+      UNREACHABLE();
   }
   // Get argument type
   std::vector< ::llvm::Type*> args_type;
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index f0c0ed7..9545896 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -21,6 +21,22 @@
 namespace art {
 namespace arm {
 
+// Used by hard float.
+static const Register kHFCoreArgumentRegisters[] = {
+  R0, R1, R2, R3
+};
+
+static const SRegister kHFSArgumentRegisters[] = {
+  S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15
+};
+
+static const DRegister kHFDArgumentRegisters[] = {
+  D0, D1, D2, D3, D4, D5, D6, D7
+};
+
+COMPILE_ASSERT(arraysize(kHFDArgumentRegisters) * 2 == arraysize(kHFSArgumentRegisters),
+    ks_d_argument_registers_mismatch);
+
 // Calling convention
 
 ManagedRegister ArmManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
@@ -31,26 +47,43 @@
   return ArmManagedRegister::FromCoreRegister(IP);  // R12
 }
 
-static ManagedRegister ReturnRegisterForShorty(const char* shorty) {
-  if (shorty[0] == 'F') {
-    return ArmManagedRegister::FromCoreRegister(R0);
-  } else if (shorty[0] == 'D') {
-    return ArmManagedRegister::FromRegisterPair(R0_R1);
-  } else if (shorty[0] == 'J') {
-    return ArmManagedRegister::FromRegisterPair(R0_R1);
-  } else if (shorty[0] == 'V') {
-    return ArmManagedRegister::NoRegister();
+ManagedRegister ArmManagedRuntimeCallingConvention::ReturnRegister() {
+  if (kArm32QuickCodeUseSoftFloat) {
+    switch (GetShorty()[0]) {
+    case 'V':
+      return ArmManagedRegister::NoRegister();
+    case 'D':
+    case 'J':
+      return ArmManagedRegister::FromRegisterPair(R0_R1);
+    default:
+      return ArmManagedRegister::FromCoreRegister(R0);
+    }
   } else {
-    return ArmManagedRegister::FromCoreRegister(R0);
+    switch (GetShorty()[0]) {
+    case 'V':
+      return ArmManagedRegister::NoRegister();
+    case 'D':
+      return ArmManagedRegister::FromDRegister(D0);
+    case 'F':
+      return ArmManagedRegister::FromSRegister(S0);
+    case 'J':
+      return ArmManagedRegister::FromRegisterPair(R0_R1);
+    default:
+      return ArmManagedRegister::FromCoreRegister(R0);
+    }
   }
 }
 
-ManagedRegister ArmManagedRuntimeCallingConvention::ReturnRegister() {
-  return ReturnRegisterForShorty(GetShorty());
-}
-
 ManagedRegister ArmJniCallingConvention::ReturnRegister() {
-  return ReturnRegisterForShorty(GetShorty());
+  switch (GetShorty()[0]) {
+  case 'V':
+    return ArmManagedRegister::NoRegister();
+  case 'D':
+  case 'J':
+    return ArmManagedRegister::FromRegisterPair(R0_R1);
+  default:
+    return ArmManagedRegister::FromCoreRegister(R0);
+  }
 }
 
 ManagedRegister ArmJniCallingConvention::IntReturnRegister() {
@@ -88,17 +121,70 @@
 const ManagedRegisterEntrySpills& ArmManagedRuntimeCallingConvention::EntrySpills() {
   // We spill the argument registers on ARM to free them up for scratch use, we then assume
   // all arguments are on the stack.
-  if (entry_spills_.size() == 0) {
-    size_t num_spills = NumArgs() + NumLongOrDoubleArgs();
-    if (num_spills > 0) {
-      entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R1));
-      if (num_spills > 1) {
-        entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R2));
-        if (num_spills > 2) {
-          entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R3));
+  if (kArm32QuickCodeUseSoftFloat) {
+    if (entry_spills_.size() == 0) {
+      size_t num_spills = NumArgs() + NumLongOrDoubleArgs();
+      if (num_spills > 0) {
+        entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R1));
+        if (num_spills > 1) {
+          entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R2));
+          if (num_spills > 2) {
+            entry_spills_.push_back(ArmManagedRegister::FromCoreRegister(R3));
+          }
         }
       }
     }
+  } else {
+    if ((entry_spills_.size() == 0) && (NumArgs() > 0)) {
+      uint32_t gpr_index = 1;  // R0 ~ R3. Reserve r0 for ArtMethod*.
+      uint32_t fpr_index = 0;  // S0 ~ S15.
+      uint32_t fpr_double_index = 0;  // D0 ~ D7.
+
+      ResetIterator(FrameOffset(0));
+      while (HasNext()) {
+        if (IsCurrentParamAFloatOrDouble()) {
+          if (IsCurrentParamADouble()) {  // Double.
+            // Double should not overlap with float.
+            fpr_double_index = (std::max(fpr_double_index * 2, RoundUp(fpr_index, 2))) / 2;
+            if (fpr_double_index < arraysize(kHFDArgumentRegisters)) {
+              entry_spills_.push_back(
+                  ArmManagedRegister::FromDRegister(kHFDArgumentRegisters[fpr_double_index++]));
+            } else {
+              entry_spills_.push_back(ManagedRegister::NoRegister(), 8);
+            }
+          } else {  // Float.
+            // Float should not overlap with double.
+            if (fpr_index % 2 == 0) {
+              fpr_index = std::max(fpr_double_index * 2, fpr_index);
+            }
+            if (fpr_index < arraysize(kHFSArgumentRegisters)) {
+              entry_spills_.push_back(
+                  ArmManagedRegister::FromSRegister(kHFSArgumentRegisters[fpr_index++]));
+            } else {
+              entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
+            }
+          }
+        } else {
+          // FIXME: Pointer this returns as both reference and long.
+          if (IsCurrentParamALong() && !IsCurrentParamAReference()) {  // Long.
+            if (gpr_index < arraysize(kHFCoreArgumentRegisters)) {
+              entry_spills_.push_back(
+                  ArmManagedRegister::FromCoreRegister(kHFCoreArgumentRegisters[gpr_index++]));
+            } else {
+              entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
+            }
+          }
+          // High part of long or 32-bit argument.
+          if (gpr_index < arraysize(kHFCoreArgumentRegisters)) {
+            entry_spills_.push_back(
+                ArmManagedRegister::FromCoreRegister(kHFCoreArgumentRegisters[gpr_index++]));
+          } else {
+            entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
+          }
+        }
+        Next();
+      }
+    }
   }
   return entry_spills_;
 }
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 4d575cb..e4dee46 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -550,7 +550,7 @@
 }
 
 void HGraphBuilder::BuildFillWideArrayData(HInstruction* object,
-                                           const uint64_t* data,
+                                           const int64_t* data,
                                            uint32_t element_count,
                                            uint32_t dex_offset) {
   for (uint32_t i = 0; i < element_count; ++i) {
@@ -748,6 +748,16 @@
       break;
     }
 
+    case Instruction::NEG_LONG: {
+      Unop_12x<HNeg>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
+    case Instruction::NOT_INT: {
+      Unop_12x<HNot>(instruction, Primitive::kPrimInt);
+      break;
+    }
+
     case Instruction::ADD_INT: {
       Binop_23x<HAdd>(instruction, Primitive::kPrimInt);
       break;
@@ -778,6 +788,16 @@
       break;
     }
 
+    case Instruction::SUB_FLOAT: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimFloat);
+      break;
+    }
+
+    case Instruction::SUB_DOUBLE: {
+      Binop_23x<HSub>(instruction, Primitive::kPrimDouble);
+      break;
+    }
+
     case Instruction::ADD_INT_2ADDR: {
       Binop_12x<HAdd>(instruction, Primitive::kPrimInt);
       break;
@@ -828,6 +848,16 @@
       break;
     }
 
+    case Instruction::SUB_FLOAT_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimFloat);
+      break;
+    }
+
+    case Instruction::SUB_DOUBLE_2ADDR: {
+      Binop_12x<HSub>(instruction, Primitive::kPrimDouble);
+      break;
+    }
+
     case Instruction::MUL_INT_2ADDR: {
       Binop_12x<HMul>(instruction, Primitive::kPrimInt);
       break;
@@ -934,25 +964,29 @@
 
       switch (payload->element_width) {
         case 1:
-          BuildFillArrayData(null_check, data, element_count, Primitive::kPrimByte, dex_offset);
+          BuildFillArrayData(null_check,
+                             reinterpret_cast<const int8_t*>(data),
+                             element_count,
+                             Primitive::kPrimByte,
+                             dex_offset);
           break;
         case 2:
           BuildFillArrayData(null_check,
-                             reinterpret_cast<const uint16_t*>(data),
+                             reinterpret_cast<const int16_t*>(data),
                              element_count,
                              Primitive::kPrimShort,
                              dex_offset);
           break;
         case 4:
           BuildFillArrayData(null_check,
-                             reinterpret_cast<const uint32_t*>(data),
+                             reinterpret_cast<const int32_t*>(data),
                              element_count,
                              Primitive::kPrimInt,
                              dex_offset);
           break;
         case 8:
           BuildFillWideArrayData(null_check,
-                                 reinterpret_cast<const uint64_t*>(data),
+                                 reinterpret_cast<const int64_t*>(data),
                                  element_count,
                                  dex_offset);
           break;
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index c5e02db..b55ef07 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -154,7 +154,7 @@
   // Fills the given object with data as specified in the fill-array-data
   // instruction. The data must be for long and double arrays.
   void BuildFillWideArrayData(HInstruction* object,
-                              const uint64_t* data,
+                              const int64_t* data,
                               uint32_t element_count,
                               uint32_t dex_offset);
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index d5cd490..c4286a4 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -17,6 +17,7 @@
 #include "code_generator.h"
 
 #include "code_generator_arm.h"
+#include "code_generator_arm64.h"
 #include "code_generator_x86.h"
 #include "code_generator_x86_64.h"
 #include "compiled_method.h"
@@ -281,6 +282,9 @@
     case kThumb2: {
       return new (allocator) arm::CodeGeneratorARM(graph);
     }
+    case kArm64: {
+      return new (allocator) arm64::CodeGeneratorARM64(graph);
+    }
     case kMips:
       return nullptr;
     case kX86: {
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 7ed802e..a3b31d8 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1043,11 +1043,13 @@
       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
   switch (neg->GetResultType()) {
     case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      bool output_overlaps = (neg->GetResultType() == Primitive::kPrimLong);
       locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), output_overlaps);
       break;
+    }
 
-    case Primitive::kPrimLong:
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
@@ -1069,6 +1071,26 @@
       break;
 
     case Primitive::kPrimLong:
+      DCHECK(in.IsRegisterPair());
+      // out.lo = 0 - in.lo (and update the carry/borrow (C) flag)
+      __ rsbs(out.AsRegisterPairLow<Register>(),
+              in.AsRegisterPairLow<Register>(),
+              ShifterOperand(0));
+      // We cannot emit an RSC (Reverse Subtract with Carry)
+      // instruction here, as it does not exist in the Thumb-2
+      // instruction set.  We use the following approach
+      // using SBC and SUB instead.
+      //
+      // out.hi = -C
+      __ sbc(out.AsRegisterPairHigh<Register>(),
+             out.AsRegisterPairHigh<Register>(),
+             ShifterOperand(out.AsRegisterPairHigh<Register>()));
+      // out.hi = out.hi - in.hi
+      __ sub(out.AsRegisterPairHigh<Register>(),
+             out.AsRegisterPairHigh<Register>(),
+             ShifterOperand(in.AsRegisterPairHigh<Register>()));
+      break;
+
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
@@ -1157,53 +1179,60 @@
       locations->SetOut(Location::RequiresRegister(), output_overlaps);
       break;
     }
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
       break;
-
+    }
     default:
-      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   }
 }
 
 void InstructionCodeGeneratorARM::VisitSub(HSub* sub) {
   LocationSummary* locations = sub->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt: {
-      if (locations->InAt(1).IsRegister()) {
-        __ sub(locations->Out().As<Register>(),
-               locations->InAt(0).As<Register>(),
-               ShifterOperand(locations->InAt(1).As<Register>()));
+      if (second.IsRegister()) {
+        __ sub(out.As<Register>(), first.As<Register>(), ShifterOperand(second.As<Register>()));
       } else {
-        __ AddConstant(locations->Out().As<Register>(),
-                       locations->InAt(0).As<Register>(),
-                       -locations->InAt(1).GetConstant()->AsIntConstant()->GetValue());
+        __ AddConstant(out.As<Register>(),
+                       first.As<Register>(),
+                       -second.GetConstant()->AsIntConstant()->GetValue());
       }
       break;
     }
 
-    case Primitive::kPrimLong:
-      __ subs(locations->Out().AsRegisterPairLow<Register>(),
-              locations->InAt(0).AsRegisterPairLow<Register>(),
-              ShifterOperand(locations->InAt(1).AsRegisterPairLow<Register>()));
-      __ sbc(locations->Out().AsRegisterPairHigh<Register>(),
-             locations->InAt(0).AsRegisterPairHigh<Register>(),
-             ShifterOperand(locations->InAt(1).AsRegisterPairHigh<Register>()));
+    case Primitive::kPrimLong: {
+      __ subs(out.AsRegisterPairLow<Register>(),
+              first.AsRegisterPairLow<Register>(),
+              ShifterOperand(second.AsRegisterPairLow<Register>()));
+      __ sbc(out.AsRegisterPairHigh<Register>(),
+             first.AsRegisterPairHigh<Register>(),
+             ShifterOperand(second.AsRegisterPairHigh<Register>()));
       break;
+    }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+    case Primitive::kPrimFloat: {
+      __ vsubs(FromDToLowS(out.As<DRegister>()),
+               FromDToLowS(first.As<DRegister>()),
+               FromDToLowS(second.As<DRegister>()));
       break;
+    }
+
+    case Primitive::kPrimDouble: {
+      __ vsubd(out.As<DRegister>(), first.As<DRegister>(), second.As<DRegister>());
+      break;
+    }
+
 
     default:
-      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   }
 }
 
@@ -1351,17 +1380,33 @@
   // Nothing to do, the parameter is already at its location.
 }
 
-void LocationsBuilderARM::VisitNot(HNot* instruction) {
+void LocationsBuilderARM::VisitNot(HNot* not_) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
-void InstructionCodeGeneratorARM::VisitNot(HNot* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  __ eor(locations->Out().As<Register>(),
-         locations->InAt(0).As<Register>(), ShifterOperand(1));
+void InstructionCodeGeneratorARM::VisitNot(HNot* not_) {
+  LocationSummary* locations = not_->GetLocations();
+  Location out = locations->Out();
+  Location in = locations->InAt(0);
+  switch (not_->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+      __ eor(out.As<Register>(), in.As<Register>(), ShifterOperand(1));
+      break;
+
+    case Primitive::kPrimInt:
+      __ mvn(out.As<Register>(), ShifterOperand(in.As<Register>()));
+      break;
+
+    case Primitive::kPrimLong:
+      LOG(FATAL) << "Not yet implemented type for not operation " << not_->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
+  }
 }
 
 void LocationsBuilderARM::VisitCompare(HCompare* compare) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
new file mode 100644
index 0000000..79528ac
--- /dev/null
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -0,0 +1,1205 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm64.h"
+
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "gc/accounting/card_table.h"
+#include "mirror/array-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/class.h"
+#include "thread.h"
+#include "utils/arm64/assembler_arm64.h"
+#include "utils/assembler.h"
+#include "utils/stack_checks.h"
+
+
+using namespace vixl;   // NOLINT(build/namespaces)
+
+#ifdef __
+#error "ARM64 Codegen VIXL macro-assembler macro already defined."
+#endif
+
+
+namespace art {
+
+namespace arm64 {
+
+static bool IsFPType(Primitive::Type type) {
+  return type == Primitive::kPrimFloat || type == Primitive::kPrimDouble;
+}
+
+// TODO: clean-up some of the constant definitions.
+static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
+static constexpr int kCurrentMethodStackOffset = 0;
+
+namespace {
+// Convenience helpers to ease conversion to and from VIXL operands.
+
+int VIXLRegCodeFromART(int code) {
+  // TODO: static check?
+  DCHECK_EQ(SP, 31);
+  DCHECK_EQ(WSP, 31);
+  DCHECK_EQ(XZR, 32);
+  DCHECK_EQ(WZR, 32);
+  if (code == SP) {
+    return vixl::kSPRegInternalCode;
+  }
+  if (code == XZR) {
+    return vixl::kZeroRegCode;
+  }
+  return code;
+}
+
+int ARTRegCodeFromVIXL(int code) {
+  // TODO: static check?
+  DCHECK_EQ(SP, 31);
+  DCHECK_EQ(WSP, 31);
+  DCHECK_EQ(XZR, 32);
+  DCHECK_EQ(WZR, 32);
+  if (code == vixl::kSPRegInternalCode) {
+    return SP;
+  }
+  if (code == vixl::kZeroRegCode) {
+    return XZR;
+  }
+  return code;
+}
+
+Register XRegisterFrom(Location location) {
+  return Register::XRegFromCode(VIXLRegCodeFromART(location.reg()));
+}
+
+Register WRegisterFrom(Location location) {
+  return Register::WRegFromCode(VIXLRegCodeFromART(location.reg()));
+}
+
+Register RegisterFrom(Location location, Primitive::Type type) {
+  DCHECK(type != Primitive::kPrimVoid && !IsFPType(type));
+  return type == Primitive::kPrimLong ? XRegisterFrom(location) : WRegisterFrom(location);
+}
+
+Register OutputRegister(HInstruction* instr) {
+  return RegisterFrom(instr->GetLocations()->Out(), instr->GetType());
+}
+
+Register InputRegisterAt(HInstruction* instr, int input_index) {
+  return RegisterFrom(instr->GetLocations()->InAt(input_index),
+                      instr->InputAt(input_index)->GetType());
+}
+
+int64_t Int64ConstantFrom(Location location) {
+  HConstant* instr = location.GetConstant();
+  return instr->IsIntConstant() ? instr->AsIntConstant()->GetValue()
+                                : instr->AsLongConstant()->GetValue();
+}
+
+Operand OperandFrom(Location location, Primitive::Type type) {
+  if (location.IsRegister()) {
+    return Operand(RegisterFrom(location, type));
+  } else {
+    return Operand(Int64ConstantFrom(location));
+  }
+}
+
+Operand InputOperandAt(HInstruction* instr, int input_index) {
+  return OperandFrom(instr->GetLocations()->InAt(input_index),
+                     instr->InputAt(input_index)->GetType());
+}
+
+MemOperand StackOperandFrom(Location location) {
+  return MemOperand(sp, location.GetStackIndex());
+}
+
+MemOperand HeapOperand(const Register& base, Offset offset) {
+  // A heap reference must be 32bit, so fit in a W register.
+  DCHECK(base.IsW());
+  return MemOperand(base.X(), offset.SizeValue());
+}
+
+MemOperand HeapOperandFrom(Location location, Primitive::Type type, Offset offset) {
+  return HeapOperand(RegisterFrom(location, type), offset);
+}
+
+Location LocationFrom(const Register& reg) {
+  return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.code()));
+}
+
+}  // namespace
+
+inline Condition ARM64Condition(IfCondition cond) {
+  switch (cond) {
+    case kCondEQ: return eq;
+    case kCondNE: return ne;
+    case kCondLT: return lt;
+    case kCondLE: return le;
+    case kCondGT: return gt;
+    case kCondGE: return ge;
+    default:
+      LOG(FATAL) << "Unknown if condition";
+  }
+  return nv;  // Unreachable.
+}
+
+static const Register kRuntimeParameterCoreRegisters[] = { x0, x1, x2, x3, x4, x5, x6, x7 };
+static constexpr size_t kRuntimeParameterCoreRegistersLength =
+    arraysize(kRuntimeParameterCoreRegisters);
+static const FPRegister kRuntimeParameterFpuRegisters[] = { };
+static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
+
+class InvokeRuntimeCallingConvention : public CallingConvention<Register, FPRegister> {
+ public:
+  static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+  InvokeRuntimeCallingConvention()
+      : CallingConvention(kRuntimeParameterCoreRegisters,
+                          kRuntimeParameterCoreRegistersLength,
+                          kRuntimeParameterFpuRegisters,
+                          kRuntimeParameterFpuRegistersLength) {}
+
+  Location GetReturnLocation(Primitive::Type return_type);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
+};
+
+Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type return_type) {
+  DCHECK_NE(return_type, Primitive::kPrimVoid);
+  if (return_type == Primitive::kPrimFloat || return_type == Primitive::kPrimDouble) {
+    LOG(FATAL) << "Unimplemented return type " << return_type;
+  }
+  return LocationFrom(x0);
+}
+
+#define __ reinterpret_cast<Arm64Assembler*>(codegen->GetAssembler())->vixl_masm_->
+
+class SlowPathCodeARM64 : public SlowPathCode {
+ public:
+  SlowPathCodeARM64() : entry_label_(), exit_label_() {}
+
+  vixl::Label* GetEntryLabel() { return &entry_label_; }
+  vixl::Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+  vixl::Label entry_label_;
+  vixl::Label exit_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
+};
+
+class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction,
+                                    Location index_location,
+                                    Location length_location)
+      : instruction_(instruction),
+        index_location_(index_location),
+        length_location_(length_location) {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARM64* arm64_codegen = reinterpret_cast<CodeGeneratorARM64*>(codegen);
+    __ Bind(GetEntryLabel());
+    InvokeRuntimeCallingConvention calling_convention;
+    arm64_codegen->MoveHelper(LocationFrom(calling_convention.GetRegisterAt(0)),
+                              index_location_, Primitive::kPrimInt);
+    arm64_codegen->MoveHelper(LocationFrom(calling_convention.GetRegisterAt(1)),
+                              length_location_, Primitive::kPrimInt);
+    size_t offset = QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pThrowArrayBounds).SizeValue();
+    __ Ldr(lr, MemOperand(tr, offset));
+    __ Blr(lr);
+    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+  }
+
+ private:
+  HBoundsCheck* const instruction_;
+  const Location index_location_;
+  const Location length_location_;
+
+  DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
+};
+
+class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  explicit NullCheckSlowPathARM64(HNullCheck* instr) : instruction_(instr) {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    __ Bind(GetEntryLabel());
+    int32_t offset = QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pThrowNullPointer).Int32Value();
+    __ Ldr(lr, MemOperand(tr, offset));
+    __ Blr(lr);
+    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+  }
+
+ private:
+  HNullCheck* const instruction_;
+
+  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
+};
+
+class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  explicit SuspendCheckSlowPathARM64(HSuspendCheck* instruction,
+                                     HBasicBlock* successor)
+      : instruction_(instruction), successor_(successor) {}
+
+  virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    size_t offset = QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pTestSuspend).SizeValue();
+    __ Bind(GetEntryLabel());
+    __ Ldr(lr, MemOperand(tr, offset));
+    __ Blr(lr);
+    codegen->RecordPcInfo(instruction_, instruction_->GetDexPc());
+    __ B(GetReturnLabel());
+  }
+
+  vixl::Label* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
+
+
+ private:
+  HSuspendCheck* const instruction_;
+  // If not null, the block to branch to after the suspend check.
+  HBasicBlock* const successor_;
+
+  // If `successor_` is null, the label to branch to after the suspend check.
+  vixl::Label return_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
+};
+
+#undef __
+
+Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+  Location next_location;
+  if (type == Primitive::kPrimVoid) {
+    LOG(FATAL) << "Unreachable type " << type;
+  }
+
+  if (type == Primitive::kPrimFloat || type == Primitive::kPrimDouble) {
+    LOG(FATAL) << "Unimplemented type " << type;
+  }
+
+  if (gp_index_ < calling_convention.GetNumberOfRegisters()) {
+    next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_));
+    if (type == Primitive::kPrimLong) {
+      // Double stack slot reserved on the stack.
+      stack_index_++;
+    }
+  } else {  // Stack.
+    if (type == Primitive::kPrimLong) {
+      next_location = Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_));
+      // Double stack slot reserved on the stack.
+      stack_index_++;
+    } else {
+      next_location = Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_));
+    }
+  }
+  // Move to the next register/stack slot.
+  gp_index_++;
+  stack_index_++;
+  return next_location;
+}
+
+CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph)
+    : CodeGenerator(graph,
+                    kNumberOfAllocatableRegisters,
+                    kNumberOfAllocatableFloatingPointRegisters,
+                    kNumberOfAllocatableRegisterPairs),
+      block_labels_(nullptr),
+      location_builder_(graph, this),
+      instruction_visitor_(graph, this) {}
+
+#define __ reinterpret_cast<Arm64Assembler*>(GetAssembler())->vixl_masm_->
+
+void CodeGeneratorARM64::GenerateFrameEntry() {
+  // TODO: Add proper support for the stack overflow check.
+  UseScratchRegisterScope temps(assembler_.vixl_masm_);
+  Register temp = temps.AcquireX();
+  __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
+  __ Ldr(temp, MemOperand(temp, 0));
+  RecordPcInfo(nullptr, 0);
+
+  CPURegList preserved_regs = GetFramePreservedRegisters();
+  int frame_size = GetFrameSize();
+  core_spill_mask_ |= preserved_regs.list();
+
+  __ Str(w0, MemOperand(sp, -frame_size, PreIndex));
+  __ PokeCPURegList(preserved_regs, frame_size - preserved_regs.TotalSizeInBytes());
+
+  // Stack layout:
+  // sp[frame_size - 8]        : lr.
+  // ...                       : other preserved registers.
+  // sp[frame_size - regs_size]: first preserved register.
+  // ...                       : reserved frame space.
+  // sp[0]                     : context pointer.
+}
+
+void CodeGeneratorARM64::GenerateFrameExit() {
+  int frame_size = GetFrameSize();
+  CPURegList preserved_regs = GetFramePreservedRegisters();
+  __ PeekCPURegList(preserved_regs, frame_size - preserved_regs.TotalSizeInBytes());
+  __ Drop(frame_size);
+}
+
+void CodeGeneratorARM64::Bind(HBasicBlock* block) {
+  __ Bind(GetLabelOf(block));
+}
+
+void CodeGeneratorARM64::MoveHelper(Location destination,
+                                    Location source,
+                                    Primitive::Type type) {
+  if (source.Equals(destination)) {
+    return;
+  }
+  if (destination.IsRegister()) {
+    Register dst = RegisterFrom(destination, type);
+    if (source.IsRegister()) {
+      Register src = RegisterFrom(source, type);
+      DCHECK(dst.IsSameSizeAndType(src));
+      __ Mov(dst, src);
+    } else {
+      DCHECK(dst.Is64Bits() || !source.IsDoubleStackSlot());
+      __ Ldr(dst, StackOperandFrom(source));
+    }
+  } else {
+    DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
+    if (source.IsRegister()) {
+      __ Str(RegisterFrom(source, type), StackOperandFrom(destination));
+    } else {
+      UseScratchRegisterScope temps(assembler_.vixl_masm_);
+      Register temp = destination.IsDoubleStackSlot() ? temps.AcquireX() : temps.AcquireW();
+      __ Ldr(temp, StackOperandFrom(source));
+      __ Str(temp, StackOperandFrom(destination));
+    }
+  }
+}
+
+void CodeGeneratorARM64::Move(HInstruction* instruction,
+                              Location location,
+                              HInstruction* move_for) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (locations != nullptr && locations->Out().Equals(location)) {
+    return;
+  }
+
+  Primitive::Type type = instruction->GetType();
+
+  if (instruction->IsIntConstant() || instruction->IsLongConstant()) {
+    int64_t value = instruction->IsIntConstant() ? instruction->AsIntConstant()->GetValue()
+                                                 : instruction->AsLongConstant()->GetValue();
+    if (location.IsRegister()) {
+      Register dst = RegisterFrom(location, type);
+      DCHECK((instruction->IsIntConstant() && dst.Is32Bits()) ||
+             (instruction->IsLongConstant() && dst.Is64Bits()));
+      __ Mov(dst, value);
+    } else {
+      DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
+      UseScratchRegisterScope temps(assembler_.vixl_masm_);
+      Register temp = instruction->IsIntConstant() ? temps.AcquireW() : temps.AcquireX();
+      __ Mov(temp, value);
+      __ Str(temp, StackOperandFrom(location));
+    }
+
+  } else if (instruction->IsLoadLocal()) {
+    uint32_t stack_slot = GetStackSlot(instruction->AsLoadLocal()->GetLocal());
+    switch (type) {
+      case Primitive::kPrimNot:
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+        MoveHelper(location, Location::StackSlot(stack_slot), type);
+        break;
+      case Primitive::kPrimLong:
+        MoveHelper(location, Location::DoubleStackSlot(stack_slot), type);
+        break;
+      default:
+        LOG(FATAL) << "Unimplemented type" << type;
+    }
+
+  } else {
+    DCHECK((instruction->GetNext() == move_for) || instruction->GetNext()->IsTemporary());
+    MoveHelper(location, locations->Out(), type);
+  }
+}
+
+size_t CodeGeneratorARM64::FrameEntrySpillSize() const {
+  return GetFramePreservedRegistersSize();
+}
+
+Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const {
+  Primitive::Type type = load->GetType();
+  switch (type) {
+    case Primitive::kPrimNot:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+      return Location::StackSlot(GetStackSlot(load->GetLocal()));
+    case Primitive::kPrimLong:
+      return Location::DoubleStackSlot(GetStackSlot(load->GetLocal()));
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented type " << type;
+      break;
+    case Primitive::kPrimVoid:
+    default:
+      LOG(FATAL) << "Unexpected type " << type;
+  }
+  LOG(FATAL) << "Unreachable";
+  return Location::NoLocation();
+}
+
+void CodeGeneratorARM64::MarkGCCard(Register object, Register value) {
+  UseScratchRegisterScope temps(assembler_.vixl_masm_);
+  Register card = temps.AcquireX();
+  Register temp = temps.AcquireX();
+  vixl::Label done;
+  __ Cbz(value, &done);
+  __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64WordSize>().Int32Value()));
+  __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
+  __ Strb(card, MemOperand(card, temp));
+  __ Bind(&done);
+}
+
+void CodeGeneratorARM64::SetupBlockedRegisters() const {
+  // Block reserved registers:
+  //   ip0 (VIXL temporary)
+  //   ip1 (VIXL temporary)
+  //   xSuspend (Suspend counter)
+  //   lr
+  // sp is not part of the allocatable registers, so we don't need to block it.
+  CPURegList reserved_core_registers = vixl_reserved_core_registers;
+  reserved_core_registers.Combine(runtime_reserved_core_registers);
+  // TODO: See if we should instead allow allocating but preserve those if used.
+  reserved_core_registers.Combine(quick_callee_saved_registers);
+  while (!reserved_core_registers.IsEmpty()) {
+    blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true;
+  }
+}
+
+Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
+  if (type == Primitive::kPrimVoid) {
+    LOG(FATAL) << "Unreachable type " << type;
+  }
+
+  if (type == Primitive::kPrimFloat || type == Primitive::kPrimDouble) {
+    LOG(FATAL) << "Unimplemented support for floating-point";
+  }
+
+  ssize_t reg = FindFreeEntry(blocked_core_registers_, kNumberOfXRegisters);
+  DCHECK_NE(reg, -1);
+  blocked_core_registers_[reg] = true;
+
+  if (IsFPType(type)) {
+    return Location::FpuRegisterLocation(reg);
+  } else {
+    return Location::RegisterLocation(reg);
+  }
+}
+
+void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
+  stream << Arm64ManagedRegister::FromXRegister(XRegister(reg));
+}
+
+void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
+  stream << Arm64ManagedRegister::FromDRegister(DRegister(reg));
+}
+
+#undef __
+#define __ assembler_->vixl_masm_->
+
+InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
+                                                             CodeGeneratorARM64* codegen)
+      : HGraphVisitor(graph),
+        assembler_(codegen->GetAssembler()),
+        codegen_(codegen) {}
+
+#define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)              \
+  M(ArrayGet)                                              \
+  M(ArraySet)                                              \
+  M(DoubleConstant)                                        \
+  M(FloatConstant)                                         \
+  M(Mul)                                                   \
+  M(Neg)                                                   \
+  M(NewArray)                                              \
+  M(ParallelMove)
+
+#define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode
+
+enum UnimplementedInstructionBreakCode {
+#define ENUM_UNIMPLEMENTED_INSTRUCTION(name) UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name),
+  FOR_EACH_UNIMPLEMENTED_INSTRUCTION(ENUM_UNIMPLEMENTED_INSTRUCTION)
+#undef ENUM_UNIMPLEMENTED_INSTRUCTION
+};
+
+#define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS(name)                               \
+  void InstructionCodeGeneratorARM64::Visit##name(H##name* instr) {                   \
+    __ Brk(UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name));                               \
+  }                                                                                   \
+  void LocationsBuilderARM64::Visit##name(H##name* instr) {                           \
+    LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); \
+    locations->SetOut(Location::Any());                                               \
+  }
+  FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS)
+#undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS
+
+#undef UNIMPLEMENTED_INSTRUCTION_BREAK_CODE
+
+void LocationsBuilderARM64::HandleAddSub(HBinaryOperation* instr) {
+  DCHECK(instr->IsAdd() || instr->IsSub());
+  DCHECK_EQ(instr->InputCount(), 2U);
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+  Primitive::Type type = instr->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected " << instr->DebugName() <<  " type " << type;
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented " << instr->DebugName() << " type " << type;
+  }
+}
+
+void InstructionCodeGeneratorARM64::HandleAddSub(HBinaryOperation* instr) {
+  DCHECK(instr->IsAdd() || instr->IsSub());
+
+  Primitive::Type type = instr->GetType();
+  Register dst = OutputRegister(instr);
+  Register lhs = InputRegisterAt(instr, 0);
+  Operand rhs = InputOperandAt(instr, 1);
+
+  switch (type) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      if (instr->IsAdd()) {
+        __ Add(dst, lhs, rhs);
+      } else {
+        __ Sub(dst, lhs, rhs);
+      }
+      break;
+
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      LOG(FATAL) << "Unexpected add/sub type " << type;
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented add/sub type " << type;
+  }
+}
+
+void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
+  HandleAddSub(instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
+  HandleAddSub(instruction);
+}
+
+void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
+  __ Ldr(OutputRegister(instruction),
+         HeapOperand(InputRegisterAt(instruction, 0), mirror::Array::LengthOffset()));
+}
+
+void LocationsBuilderARM64::VisitCompare(HCompare* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitCompare(HCompare* instruction) {
+  Primitive::Type in_type = instruction->InputAt(0)->GetType();
+
+  DCHECK_EQ(in_type, Primitive::kPrimLong);
+  switch (in_type) {
+    case Primitive::kPrimLong: {
+      vixl::Label done;
+      Register result = OutputRegister(instruction);
+      Register left = InputRegisterAt(instruction, 0);
+      Operand right = InputOperandAt(instruction, 1);
+      __ Subs(result, left, right);
+      __ B(eq, &done);
+      __ Mov(result, 1);
+      __ Cneg(result, result, le);
+      __ Bind(&done);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << in_type;
+  }
+}
+
+void LocationsBuilderARM64::VisitCondition(HCondition* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (instruction->NeedsMaterialization()) {
+    locations->SetOut(Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitCondition(HCondition* instruction) {
+  if (!instruction->NeedsMaterialization()) {
+    return;
+  }
+
+  LocationSummary* locations = instruction->GetLocations();
+  Register lhs = InputRegisterAt(instruction, 0);
+  Operand rhs = InputOperandAt(instruction, 1);
+  Register res = RegisterFrom(locations->Out(), instruction->GetType());
+  Condition cond = ARM64Condition(instruction->GetCondition());
+
+  __ Cmp(lhs, rhs);
+  __ Csel(res, vixl::Assembler::AppropriateZeroRegFor(res), Operand(1), InvertCondition(cond));
+}
+
+#define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
+  M(Equal)                                                                               \
+  M(NotEqual)                                                                            \
+  M(LessThan)                                                                            \
+  M(LessThanOrEqual)                                                                     \
+  M(GreaterThan)                                                                         \
+  M(GreaterThanOrEqual)
+#define DEFINE_CONDITION_VISITORS(Name)                                                  \
+void LocationsBuilderARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }         \
+void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); }
+FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
+#undef FOR_EACH_CONDITION_INSTRUCTION
+
+void LocationsBuilderARM64::VisitExit(HExit* exit) {
+  exit->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitExit(HExit* exit) {
+  if (kIsDebugBuild) {
+    down_cast<Arm64Assembler*>(GetAssembler())->Comment("Unreachable");
+    __ Brk(0);    // TODO: Introduce special markers for such code locations.
+  }
+}
+
+void LocationsBuilderARM64::VisitGoto(HGoto* got) {
+  got->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
+  HBasicBlock* successor = got->GetSuccessor();
+  // TODO: Support for suspend checks emission.
+  if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
+    __ B(codegen_->GetLabelOf(successor));
+  }
+}
+
+void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+  HInstruction* cond = if_instr->InputAt(0);
+  DCHECK(cond->IsCondition());
+  if (cond->AsCondition()->NeedsMaterialization()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
+  HInstruction* cond = if_instr->InputAt(0);
+  DCHECK(cond->IsCondition());
+  HCondition* condition = cond->AsCondition();
+  vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
+  vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
+
+  // TODO: Support constant condition input in VisitIf.
+
+  if (condition->NeedsMaterialization()) {
+    // The condition instruction has been materialized, compare the output to 0.
+    Location cond_val = if_instr->GetLocations()->InAt(0);
+    DCHECK(cond_val.IsRegister());
+    __ Cbnz(InputRegisterAt(if_instr, 0), true_target);
+
+  } else {
+    // The condition instruction has not been materialized, use its inputs as
+    // the comparison and its condition as the branch condition.
+    Register lhs = InputRegisterAt(condition, 0);
+    Operand rhs = InputOperandAt(condition, 1);
+    Condition cond = ARM64Condition(condition->GetCondition());
+    if ((cond == eq || cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+      if (cond == eq) {
+        __ Cbz(lhs, true_target);
+      } else {
+        __ Cbnz(lhs, true_target);
+      }
+    } else {
+      __ Cmp(lhs, rhs);
+      __ B(cond, true_target);
+    }
+  }
+
+  if (!codegen_->GoesToNextBlock(if_instr->GetBlock(), if_instr->IfFalseSuccessor())) {
+    __ B(false_target);
+  }
+}
+
+void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  Primitive::Type res_type = instruction->GetType();
+  Register res = OutputRegister(instruction);
+  Register obj = InputRegisterAt(instruction, 0);
+  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+
+  switch (res_type) {
+    case Primitive::kPrimBoolean: {
+      __ Ldrb(res, MemOperand(obj, offset));
+      break;
+    }
+    case Primitive::kPrimByte: {
+      __ Ldrsb(res, MemOperand(obj, offset));
+      break;
+    }
+    case Primitive::kPrimShort: {
+      __ Ldrsh(res, MemOperand(obj, offset));
+      break;
+    }
+    case Primitive::kPrimChar: {
+      __ Ldrh(res, MemOperand(obj, offset));
+      break;
+    }
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+    case Primitive::kPrimLong: {  // TODO: support volatile.
+      DCHECK(res.IsX() == (res_type == Primitive::kPrimLong));
+      __ Ldr(res, MemOperand(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register res_type " << res_type;
+      break;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable res_type " << res_type;
+  }
+}
+
+void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  Register obj = InputRegisterAt(instruction, 0);
+  Register value = InputRegisterAt(instruction, 1);
+  Primitive::Type field_type = instruction->InputAt(1)->GetType();
+  uint32_t offset = instruction->GetFieldOffset().Uint32Value();
+
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte: {
+      __ Strb(value, MemOperand(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar: {
+      __ Strh(value, MemOperand(obj, offset));
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+    case Primitive::kPrimLong: {
+      DCHECK(value.IsX() == (field_type == Primitive::kPrimLong));
+      __ Str(value, MemOperand(obj, offset));
+
+      if (field_type == Primitive::kPrimNot) {
+        codegen_->MarkGCCard(obj, value);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      LOG(FATAL) << "Unimplemented register type " << field_type;
+      break;
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << field_type;
+  }
+}
+
+void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderARM64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  HandleInvoke(invoke);
+}
+
+void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  HandleInvoke(invoke);
+}
+
+void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
+  locations->AddTemp(LocationFrom(x0));
+
+  InvokeDexCallingConventionVisitor calling_convention_visitor;
+  for (size_t i = 0; i < invoke->InputCount(); i++) {
+    HInstruction* input = invoke->InputAt(i);
+    locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
+  }
+
+  Primitive::Type return_type = invoke->GetType();
+  if (return_type != Primitive::kPrimVoid) {
+    locations->SetOut(calling_convention_visitor.GetReturnLocation(return_type));
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitInvokeStatic(HInvokeStatic* invoke) {
+  Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  // Make sure that ArtMethod* is passed in W0 as per the calling convention
+  DCHECK(temp.Is(w0));
+  size_t index_in_cache = mirror::Array::DataOffset(kHeapRefSize).SizeValue() +
+    invoke->GetIndexInDexCache() * kHeapRefSize;
+
+  // TODO: Implement all kinds of calls:
+  // 1) boot -> boot
+  // 2) app -> boot
+  // 3) app -> app
+  //
+  // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+  // temp = method;
+  __ Ldr(temp, MemOperand(sp, kCurrentMethodStackOffset));
+  // temp = temp->dex_cache_resolved_methods_;
+  __ Ldr(temp, MemOperand(temp.X(), mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+  // temp = temp[index_in_cache];
+  __ Ldr(temp, MemOperand(temp.X(), index_in_cache));
+  // lr = temp->entry_point_from_quick_compiled_code_;
+  __ Ldr(lr, MemOperand(temp.X(), mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset().SizeValue()));
+  // lr();
+  __ Blr(lr);
+
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
+}
+
+void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  Location receiver = locations->InAt(0);
+  Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
+    invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
+  Offset class_offset = mirror::Object::ClassOffset();
+  Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset();
+
+  // temp = object->GetClass();
+  if (receiver.IsStackSlot()) {
+    __ Ldr(temp.W(), MemOperand(sp, receiver.GetStackIndex()));
+    __ Ldr(temp.W(), MemOperand(temp, class_offset.SizeValue()));
+  } else {
+    DCHECK(receiver.IsRegister());
+    __ Ldr(temp.W(), HeapOperandFrom(receiver, Primitive::kPrimNot,
+                                     class_offset));
+  }
+  // temp = temp->GetMethodAt(method_offset);
+  __ Ldr(temp.W(), MemOperand(temp, method_offset));
+  // lr = temp->GetEntryPoint();
+  __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
+  // lr();
+  __ Blr(lr);
+  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderARM64::VisitLoadLocal(HLoadLocal* load) {
+  load->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitLoadLocal(HLoadLocal* load) {
+  // Nothing to do, this is driven by the code generator.
+}
+
+void LocationsBuilderARM64::VisitLocal(HLocal* local) {
+  local->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitLocal(HLocal* local) {
+  DCHECK_EQ(local->GetBlock(), GetGraph()->GetEntryBlock());
+}
+
+void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt);
+  DCHECK(type_index.Is(w0));
+  Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot);
+  DCHECK(current_method.Is(w1));
+  __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset));
+  __ Mov(type_index, instruction->GetTypeIndex());
+  __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocObjectWithAccessCheck).Int32Value()));
+  __ Blr(lr);
+  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  DCHECK(!codegen_->IsLeafMethod());
+}
+
+void LocationsBuilderARM64::VisitNot(HNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
+  switch (instruction->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+      __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), Operand(1));
+      break;
+
+    case Primitive::kPrimInt:
+      __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
+      break;
+
+    case Primitive::kPrimLong:
+      LOG(FATAL) << "Not yet implemented type for not operation " << instruction->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
+  }
+}
+
+void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
+  SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj = locations->InAt(0);
+  if (obj.IsRegister()) {
+    __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
+  } else {
+    DCHECK(obj.IsConstant()) << obj;
+    DCHECK_EQ(obj.GetConstant()->AsIntConstant()->GetValue(), 0);
+    __ B(slow_path->GetEntryLabel());
+  }
+}
+
+void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+  if (location.IsStackSlot()) {
+    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  } else if (location.IsDoubleStackSlot()) {
+    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  }
+  locations->SetOut(location);
+}
+
+void InstructionCodeGeneratorARM64::VisitParameterValue(HParameterValue* instruction) {
+  // Nothing to do, the parameter is already at its location.
+}
+
+void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+}
+
+void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction) {
+  LOG(FATAL) << "Unreachable";
+}
+
+void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  Primitive::Type return_type = instruction->InputAt(0)->GetType();
+
+  if (return_type == Primitive::kPrimFloat || return_type == Primitive::kPrimDouble) {
+    LOG(FATAL) << "Unimplemented return type " << return_type;
+  }
+
+  locations->SetInAt(0, LocationFrom(x0));
+}
+
+void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) {
+  if (kIsDebugBuild) {
+    Primitive::Type type = instruction->InputAt(0)->GetType();
+    switch (type) {
+      case Primitive::kPrimBoolean:
+      case Primitive::kPrimByte:
+      case Primitive::kPrimChar:
+      case Primitive::kPrimShort:
+      case Primitive::kPrimInt:
+      case Primitive::kPrimNot:
+        DCHECK(InputRegisterAt(instruction, 0).Is(w0));
+        break;
+
+      case Primitive::kPrimLong:
+        DCHECK(InputRegisterAt(instruction, 0).Is(x0));
+        break;
+
+      default:
+        LOG(FATAL) << "Unimplemented return type " << type;
+    }
+  }
+  codegen_->GenerateFrameExit();
+  __ Br(lr);
+}
+
+void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
+  instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) {
+  codegen_->GenerateFrameExit();
+  __ Br(lr);
+}
+
+void LocationsBuilderARM64::VisitStoreLocal(HStoreLocal* store) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(store);
+  Primitive::Type field_type = store->InputAt(1)->GetType();
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot:
+      locations->SetInAt(1, Location::StackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    case Primitive::kPrimLong:
+      locations->SetInAt(1, Location::DoubleStackSlot(codegen_->GetStackSlot(store->GetLocal())));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented local type " << field_type;
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitStoreLocal(HStoreLocal* store) {
+}
+
+void LocationsBuilderARM64::VisitSub(HSub* instruction) {
+  HandleAddSub(instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
+  HandleAddSub(instruction);
+}
+
+void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  BoundsCheckSlowPathARM64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(
+      instruction, locations->InAt(0), locations->InAt(1));
+  codegen_->AddSlowPath(slow_path);
+
+  __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1));
+  __ B(slow_path->GetEntryLabel(), hs);
+}
+
+void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
+  new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+}
+
+void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
+  // TODO: Improve support for suspend checks.
+  SuspendCheckSlowPathARM64* slow_path =
+      new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, nullptr);
+  codegen_->AddSlowPath(slow_path);
+
+  __ Subs(wSuspend, wSuspend, 1);
+  __ B(slow_path->GetEntryLabel(), le);
+  __ Bind(slow_path->GetReturnLabel());
+}
+
+void LocationsBuilderARM64::VisitTemporary(HTemporary* temp) {
+  temp->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitTemporary(HTemporary* temp) {
+  // Nothing to do, this is driven by the code generator.
+}
+
+}  // namespace arm64
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
new file mode 100644
index 0000000..a4003ff
--- /dev/null
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
+
+#include "code_generator.h"
+#include "nodes.h"
+#include "parallel_move_resolver.h"
+#include "utils/arm64/assembler_arm64.h"
+#include "a64/disasm-a64.h"
+#include "a64/macro-assembler-a64.h"
+#include "arch/arm64/quick_method_frame_info_arm64.h"
+
+namespace art {
+namespace arm64 {
+
+class CodeGeneratorARM64;
+
+static constexpr size_t kArm64WordSize = 8;
+static const vixl::Register kParameterCoreRegisters[] = {
+  vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7
+};
+static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+static const vixl::FPRegister kParameterFPRegisters[] = {
+  vixl::d0, vixl::d1, vixl::d2, vixl::d3, vixl::d4, vixl::d5, vixl::d6, vixl::d7
+};
+static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
+
+const vixl::Register tr = vixl::x18;        // Thread Register
+const vixl::Register wSuspend = vixl::w19;  // Suspend Register
+const vixl::Register xSuspend = vixl::x19;
+
+const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
+const vixl::CPURegList runtime_reserved_core_registers(tr, xSuspend, vixl::lr);
+const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister,
+                                                    vixl::kXRegSize,
+                                                    kArm64CalleeSaveRefSpills);
+
+class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl::FPRegister> {
+ public:
+  InvokeDexCallingConvention()
+      : CallingConvention(kParameterCoreRegisters,
+                          kParameterCoreRegistersLength,
+                          kParameterFPRegisters,
+                          kParameterFPRegistersLength) {}
+
+  Location GetReturnLocation(Primitive::Type return_type) {
+    DCHECK_NE(return_type, Primitive::kPrimVoid);
+    if (return_type == Primitive::kPrimFloat || return_type == Primitive::kPrimDouble) {
+      LOG(FATAL) << "Unimplemented return type " << return_type;
+    }
+    return Location::RegisterLocation(X0);
+  }
+
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
+};
+
+class InvokeDexCallingConventionVisitor {
+ public:
+  InvokeDexCallingConventionVisitor() : gp_index_(0), stack_index_(0) {}
+
+  Location GetNextLocation(Primitive::Type type);
+  Location GetReturnLocation(Primitive::Type return_type) {
+    return calling_convention.GetReturnLocation(return_type);
+  }
+
+ private:
+  InvokeDexCallingConvention calling_convention;
+  // The current index for core registers.
+  uint32_t gp_index_;
+  // The current stack index.
+  uint32_t stack_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+};
+
+class InstructionCodeGeneratorARM64 : public HGraphVisitor {
+ public:
+  InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) \
+  virtual void Visit##name(H##name* instr);
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+#undef DECLARE_VISIT_INSTRUCTION
+
+  void LoadCurrentMethod(XRegister reg);
+
+  Arm64Assembler* GetAssembler() const { return assembler_; }
+
+ private:
+  void HandleAddSub(HBinaryOperation* instr);
+
+  Arm64Assembler* const assembler_;
+  CodeGeneratorARM64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorARM64);
+};
+
+class LocationsBuilderARM64 : public HGraphVisitor {
+ public:
+  explicit LocationsBuilderARM64(HGraph* graph, CodeGeneratorARM64* codegen)
+      : HGraphVisitor(graph), codegen_(codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) \
+  virtual void Visit##name(H##name* instr);
+  FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+  void HandleAddSub(HBinaryOperation* instr);
+  void HandleInvoke(HInvoke* instr);
+
+  CodeGeneratorARM64* const codegen_;
+  InvokeDexCallingConventionVisitor parameter_visitor_;
+
+  DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
+};
+
+class CodeGeneratorARM64 : public CodeGenerator {
+ public:
+  explicit CodeGeneratorARM64(HGraph* graph);
+  virtual ~CodeGeneratorARM64() { }
+
+  virtual void GenerateFrameEntry() OVERRIDE;
+  virtual void GenerateFrameExit() OVERRIDE;
+
+  static const vixl::CPURegList& GetFramePreservedRegisters() {
+    static const vixl::CPURegList frame_preserved_regs =
+        vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, vixl::lr.Bit());
+    return frame_preserved_regs;
+  }
+  static int GetFramePreservedRegistersSize() {
+    return GetFramePreservedRegisters().TotalSizeInBytes();
+  }
+
+  virtual void Bind(HBasicBlock* block) OVERRIDE;
+
+  vixl::Label* GetLabelOf(HBasicBlock* block) const {
+    return block_labels_ + block->GetBlockId();
+  }
+
+  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
+
+  virtual size_t GetWordSize() const OVERRIDE {
+    return kArm64WordSize;
+  }
+
+  virtual size_t FrameEntrySpillSize() const OVERRIDE;
+
+  virtual HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
+  virtual HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
+  virtual Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
+
+  // Emit a write barrier.
+  void MarkGCCard(vixl::Register object, vixl::Register value);
+
+  // Register allocation.
+
+  virtual void SetupBlockedRegisters() const OVERRIDE;
+  // AllocateFreeRegister() is only used when allocating registers locally
+  // during CompileBaseline().
+  virtual Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
+
+  virtual Location GetStackLocation(HLoadLocal* load) const OVERRIDE;
+
+  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE {
+    UNIMPLEMENTED(INFO) << "TODO: SaveCoreRegister";
+    return 0;
+  }
+
+  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE {
+    UNIMPLEMENTED(INFO) << "TODO: RestoreCoreRegister";
+    return 0;
+  }
+
+  // The number of registers that can be allocated. The register allocator may
+  // decide to reserve and not use a few of them.
+  // We do not consider registers sp, xzr, wzr. They are either not allocatable
+  // (xzr, wzr), or make for poor allocatable registers (sp alignment
+  // requirements, etc.). This also facilitates our task as all other registers
+  // can easily be mapped via to or from their type and index or code.
+  static const int kNumberOfAllocatableCoreRegisters = vixl::kNumberOfRegisters - 1;
+  static const int kNumberOfAllocatableFloatingPointRegisters = vixl::kNumberOfFPRegisters;
+  static const int kNumberOfAllocatableRegisters =
+      kNumberOfAllocatableCoreRegisters + kNumberOfAllocatableFloatingPointRegisters;
+  static constexpr int kNumberOfAllocatableRegisterPairs = 0;
+
+  virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
+  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kArm64;
+  }
+
+  void MoveHelper(Location destination, Location source, Primitive::Type type);
+
+  virtual void Initialize() OVERRIDE {
+    HGraph* graph = GetGraph();
+    int length = graph->GetBlocks().Size();
+    block_labels_ = graph->GetArena()->AllocArray<vixl::Label>(length);
+    for (int i = 0; i < length; ++i) {
+      new(block_labels_ + i) vixl::Label();
+    }
+  }
+
+ private:
+  // Labels for each block that will be compiled.
+  vixl::Label* block_labels_;
+
+  LocationsBuilderARM64 location_builder_;
+  InstructionCodeGeneratorARM64 instruction_visitor_;
+  Arm64Assembler assembler_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
+};
+
+}  // namespace arm64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 5f01265..aa0f06b 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -990,11 +990,11 @@
       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
   switch (neg->GetResultType()) {
     case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetOut(Location::SameAsFirstInput());
       break;
 
-    case Primitive::kPrimLong:
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
@@ -1016,6 +1016,17 @@
       break;
 
     case Primitive::kPrimLong:
+      DCHECK(in.IsRegisterPair());
+      __ negl(out.AsRegisterPairLow<Register>());
+      // Negation is similar to subtraction from zero.  The least
+      // significant byte triggers a borrow when it is different from
+      // zero; to take it into account, add 1 to the most significant
+      // byte if the carry flag (CF) is set to 1 after the first NEGL
+      // operation.
+      __ adcl(out.AsRegisterPairHigh<Register>(), Immediate(0));
+      __ negl(out.AsRegisterPairHigh<Register>());
+      break;
+
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
@@ -1056,16 +1067,13 @@
   LocationSummary* locations = add->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-
+  DCHECK(first.Equals(locations->Out()));
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
-      DCHECK_EQ(first.As<Register>(), locations->Out().As<Register>());
       if (second.IsRegister()) {
         __ addl(first.As<Register>(), second.As<Register>());
       } else if (second.IsConstant()) {
-        HConstant* instruction = second.GetConstant();
-        Immediate imm(instruction->AsIntConstant()->GetValue());
-        __ addl(first.As<Register>(), imm);
+        __ addl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
       } else {
         __ addl(first.As<Register>(), Address(ESP, second.GetStackIndex()));
       }
@@ -1073,10 +1081,6 @@
     }
 
     case Primitive::kPrimLong: {
-      DCHECK_EQ(first.AsRegisterPairLow<Register>(),
-                locations->Out().AsRegisterPairLow<Register>());
-      DCHECK_EQ(first.AsRegisterPairHigh<Register>(),
-                locations->Out().AsRegisterPairHigh<Register>());
       if (second.IsRegister()) {
         __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
         __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
@@ -1122,16 +1126,16 @@
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   }
 }
 
@@ -1139,52 +1143,43 @@
   LocationSummary* locations = sub->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
+  DCHECK(first.Equals(locations->Out()));
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt: {
-      DCHECK_EQ(first.As<Register>(),
-                locations->Out().As<Register>());
       if (second.IsRegister()) {
-        __ subl(first.As<Register>(),
-                second.As<Register>());
+        __ subl(first.As<Register>(), second.As<Register>());
       } else if (second.IsConstant()) {
-        HConstant* instruction = second.GetConstant();
-        Immediate imm(instruction->AsIntConstant()->GetValue());
-        __ subl(first.As<Register>(), imm);
+        __ subl(first.As<Register>(), Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
       } else {
-        __ subl(first.As<Register>(),
-                Address(ESP, second.GetStackIndex()));
+        __ subl(first.As<Register>(), Address(ESP, second.GetStackIndex()));
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-      DCHECK_EQ(first.AsRegisterPairLow<Register>(),
-                locations->Out().AsRegisterPairLow<Register>());
-      DCHECK_EQ(first.AsRegisterPairHigh<Register>(),
-                locations->Out().AsRegisterPairHigh<Register>());
       if (second.IsRegister()) {
-        __ subl(first.AsRegisterPairLow<Register>(),
-                second.AsRegisterPairLow<Register>());
-        __ sbbl(first.AsRegisterPairHigh<Register>(),
-                second.AsRegisterPairHigh<Register>());
+        __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>());
+        __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>());
       } else {
-        __ subl(first.AsRegisterPairLow<Register>(),
-                Address(ESP, second.GetStackIndex()));
+        __ subl(first.AsRegisterPairLow<Register>(), Address(ESP, second.GetStackIndex()));
         __ sbbl(first.AsRegisterPairHigh<Register>(),
                 Address(ESP, second.GetHighStackIndex(kX86WordSize)));
       }
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+    case Primitive::kPrimFloat: {
+      __ subss(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
+    }
+
+    case Primitive::kPrimDouble: {
+      __ subsd(first.As<XmmRegister>(), second.As<XmmRegister>());
+      break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   }
 }
 
@@ -1356,18 +1351,34 @@
 void InstructionCodeGeneratorX86::VisitParameterValue(HParameterValue* instruction) {
 }
 
-void LocationsBuilderX86::VisitNot(HNot* instruction) {
+void LocationsBuilderX86::VisitNot(HNot* not_) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::SameAsFirstInput());
 }
 
-void InstructionCodeGeneratorX86::VisitNot(HNot* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
+void InstructionCodeGeneratorX86::VisitNot(HNot* not_) {
+  LocationSummary* locations = not_->GetLocations();
+  DCHECK_EQ(locations->InAt(0).As<Register>(), locations->Out().As<Register>());
   Location out = locations->Out();
   DCHECK_EQ(locations->InAt(0).As<Register>(), out.As<Register>());
-  __ xorl(out.As<Register>(), Immediate(1));
+  switch (not_->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+      __ xorl(out.As<Register>(), Immediate(1));
+      break;
+
+    case Primitive::kPrimInt:
+      __ notl(out.As<Register>());
+      break;
+
+    case Primitive::kPrimLong:
+      LOG(FATAL) << "Not yet implemented type for not operation " << not_->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
+  }
 }
 
 void LocationsBuilderX86::VisitCompare(HCompare* compare) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 38a40dc..892ca9d 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -976,11 +976,11 @@
       new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
   switch (neg->GetResultType()) {
     case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetOut(Location::SameAsFirstInput());
       break;
 
-    case Primitive::kPrimLong:
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
@@ -1002,6 +1002,10 @@
       break;
 
     case Primitive::kPrimLong:
+      DCHECK(in.IsRegister());
+      __ negq(out.As<CpuRegister>());
+      break;
+
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       LOG(FATAL) << "Not yet implemented neg type " << neg->GetResultType();
@@ -1047,19 +1051,17 @@
   LocationSummary* locations = add->GetLocations();
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
-
   DCHECK(first.Equals(locations->Out()));
+
   switch (add->GetResultType()) {
     case Primitive::kPrimInt: {
       if (second.IsRegister()) {
         __ addl(first.As<CpuRegister>(), second.As<CpuRegister>());
       } else if (second.IsConstant()) {
-        HConstant* instruction = second.GetConstant();
-        Immediate imm(instruction->AsIntConstant()->GetValue());
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
         __ addl(first.As<CpuRegister>(), imm);
       } else {
-        __ addl(first.As<CpuRegister>(),
-                Address(CpuRegister(RSP), second.GetStackIndex()));
+        __ addl(first.As<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
     }
@@ -1100,53 +1102,52 @@
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
-
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
       break;
-
+    }
     default:
-      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   }
 }
 
 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
   LocationSummary* locations = sub->GetLocations();
-  DCHECK_EQ(locations->InAt(0).As<CpuRegister>().AsRegister(),
-            locations->Out().As<CpuRegister>().AsRegister());
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  DCHECK(first.Equals(locations->Out()));
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt: {
-      if (locations->InAt(1).IsRegister()) {
-        __ subl(locations->InAt(0).As<CpuRegister>(),
-                locations->InAt(1).As<CpuRegister>());
-      } else if (locations->InAt(1).IsConstant()) {
-        HConstant* instruction = locations->InAt(1).GetConstant();
-        Immediate imm(instruction->AsIntConstant()->GetValue());
-        __ subl(locations->InAt(0).As<CpuRegister>(), imm);
+      if (second.IsRegister()) {
+        __ subl(first.As<CpuRegister>(), second.As<CpuRegister>());
+      } else if (second.IsConstant()) {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
+        __ subl(first.As<CpuRegister>(), imm);
       } else {
-        __ subl(locations->InAt(0).As<CpuRegister>(),
-                Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex()));
+        __ subl(first.As<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
       }
       break;
     }
     case Primitive::kPrimLong: {
-      __ subq(locations->InAt(0).As<CpuRegister>(),
-              locations->InAt(1).As<CpuRegister>());
+      __ subq(first.As<CpuRegister>(), second.As<CpuRegister>());
       break;
     }
 
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
+    case Primitive::kPrimFloat: {
+      __ subss(first.As<XmmRegister>(), second.As<XmmRegister>());
       break;
+    }
+
+    case Primitive::kPrimDouble: {
+      __ subsd(first.As<XmmRegister>(), second.As<XmmRegister>());
+      break;
+    }
 
     default:
-      LOG(FATAL) << "Unimplemented sub type " << sub->GetResultType();
+      LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
   }
 }
 
@@ -1276,18 +1277,34 @@
   // Nothing to do, the parameter is already at its location.
 }
 
-void LocationsBuilderX86_64::VisitNot(HNot* instruction) {
+void LocationsBuilderX86_64::VisitNot(HNot* not_) {
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetOut(Location::SameAsFirstInput());
 }
 
-void InstructionCodeGeneratorX86_64::VisitNot(HNot* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
+void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
+  LocationSummary* locations = not_->GetLocations();
   DCHECK_EQ(locations->InAt(0).As<CpuRegister>().AsRegister(),
             locations->Out().As<CpuRegister>().AsRegister());
-  __ xorq(locations->Out().As<CpuRegister>(), Immediate(1));
+  Location out = locations->Out();
+  switch (not_->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+      __ xorq(out.As<CpuRegister>(), Immediate(1));
+      break;
+
+    case Primitive::kPrimInt:
+      __ notl(out.As<CpuRegister>());
+      break;
+
+    case Primitive::kPrimLong:
+      LOG(FATAL) << "Not yet implemented type for not operation " << not_->GetResultType();
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
+  }
 }
 
 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index af4cf73..03951e2 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -16,8 +16,10 @@
 
 #include <functional>
 
+#include "base/macros.h"
 #include "builder.h"
 #include "code_generator_arm.h"
+#include "code_generator_arm64.h"
 #include "code_generator_x86.h"
 #include "code_generator_x86_64.h"
 #include "common_compiler_test.h"
@@ -93,6 +95,12 @@
   if (kRuntimeISA == kX86_64) {
     Run(allocator, codegenX86_64, has_result, expected);
   }
+
+  arm64::CodeGeneratorARM64 codegenARM64(graph);
+  codegenARM64.CompileBaseline(&allocator, true);
+  if (kRuntimeISA == kArm64) {
+    Run(allocator, codegenARM64, has_result, expected);
+  }
 }
 
 static void RunCodeOptimized(CodeGenerator* codegen,
@@ -134,8 +142,8 @@
   HGraphBuilder builder(&arena);
   const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
   HGraph* graph = builder.BuildGraph(*item);
-  // Remove suspend checks, they cannot be executed in this context.
   ASSERT_NE(graph, nullptr);
+  // Remove suspend checks, they cannot be executed in this context.
   RemoveSuspendChecks(graph);
   RunCodeBaseline(graph, has_result, expected);
 }
@@ -260,6 +268,31 @@
   TestCode(data, true, 0);
 }
 
+// Exercise bit-wise (one's complement) not-int instruction.
+#define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \
+TEST(CodegenTest, TEST_NAME) {                          \
+  const int32_t input = INPUT;                          \
+  const uint16_t input_lo = input & 0x0000FFFF;         \
+  const uint16_t input_hi = input >> 16;                \
+  const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(      \
+      Instruction::CONST | 0 << 8, input_lo, input_hi,  \
+      Instruction::NOT_INT | 1 << 8 | 0 << 12 ,         \
+      Instruction::RETURN | 1 << 8);                    \
+                                                        \
+  TestCode(data, true, EXPECTED_OUTPUT);                \
+}
+
+NOT_INT_TEST(ReturnNotIntMinus2, -2, 1)
+NOT_INT_TEST(ReturnNotIntMinus1, -1, 0)
+NOT_INT_TEST(ReturnNotInt0, 0, -1)
+NOT_INT_TEST(ReturnNotInt1, 1, -2)
+NOT_INT_TEST(ReturnNotIntINT_MIN, -2147483648, 2147483647)  // (2^31) - 1
+NOT_INT_TEST(ReturnNotIntINT_MINPlus1, -2147483647, 2147483646)  // (2^31) - 2
+NOT_INT_TEST(ReturnNotIntINT_MAXMinus1, 2147483646, -2147483647)  // -(2^31) - 1
+NOT_INT_TEST(ReturnNotIntINT_MAX, 2147483647, -2147483648)  // -(2^31)
+
+#undef NOT_INT_TEST
+
 TEST(CodegenTest, ReturnAdd1) {
   const uint16_t data[] = TWO_REGISTERS_CODE_ITEM(
     Instruction::CONST_4 | 3 << 12 | 0,
@@ -370,12 +403,16 @@
     TestCode(data, true, 12);                         \
   }
 
+#if !defined(__aarch64__)
 MUL_TEST(INT, MulInt);
 MUL_TEST(LONG, MulLong);
-// MUL_TEST(FLOAT, Float);
-// MUL_TEST(DOUBLE, Double);
+#endif
 
+#if defined(__aarch64__)
+TEST(CodegenTest, DISABLED_ReturnMulIntLit8) {
+#else
 TEST(CodegenTest, ReturnMulIntLit8) {
+#endif
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT8, 3 << 8 | 0,
@@ -384,7 +421,11 @@
   TestCode(data, true, 12);
 }
 
+#if defined(__aarch64__)
+TEST(CodegenTest, DISABLED_ReturnMulIntLit16) {
+#else
 TEST(CodegenTest, ReturnMulIntLit16) {
+#endif
   const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
     Instruction::CONST_4 | 4 << 12 | 0 << 8,
     Instruction::MUL_INT_LIT16, 3,
@@ -393,5 +434,121 @@
   TestCode(data, true, 12);
 }
 
+TEST(CodegenTest, MaterializedCondition1) {
+  // Check that condition are materialized correctly. A materialized condition
+  // should yield `1` if it evaluated to true, and `0` otherwise.
+  // We force the materialization of comparisons for different combinations of
+  // inputs and check the results.
+
+  int lhs[] = {1, 2, -1, 2, 0xabc};
+  int rhs[] = {2, 1, 2, -1, 0xabc};
+
+  for (size_t i = 0; i < arraysize(lhs); i++) {
+    ArenaPool pool;
+    ArenaAllocator allocator(&pool);
+    HGraph* graph = new (&allocator) HGraph(&allocator);
+
+    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(entry_block);
+    graph->SetEntryBlock(entry_block);
+    entry_block->AddInstruction(new (&allocator) HGoto());
+    HBasicBlock* code_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(code_block);
+    HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(exit_block);
+    exit_block->AddInstruction(new (&allocator) HExit());
+
+    entry_block->AddSuccessor(code_block);
+    code_block->AddSuccessor(exit_block);
+    graph->SetExitBlock(exit_block);
+
+    HIntConstant cst_lhs(lhs[i]);
+    code_block->AddInstruction(&cst_lhs);
+    HIntConstant cst_rhs(rhs[i]);
+    code_block->AddInstruction(&cst_rhs);
+    HLessThan cmp_lt(&cst_lhs, &cst_rhs);
+    code_block->AddInstruction(&cmp_lt);
+    HReturn ret(&cmp_lt);
+    code_block->AddInstruction(&ret);
+
+    auto hook_before_codegen = [](HGraph* graph) {
+      HBasicBlock* block = graph->GetEntryBlock()->GetSuccessors().Get(0);
+      HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena());
+      block->InsertInstructionBefore(move, block->GetLastInstruction());
+    };
+
+    RunCodeOptimized(graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+  }
+}
+
+TEST(CodegenTest, MaterializedCondition2) {
+  // Check that HIf correctly interprets a materialized condition.
+  // We force the materialization of comparisons for different combinations of
+  // inputs. An HIf takes the materialized combination as input and returns a
+  // value that we verify.
+
+  int lhs[] = {1, 2, -1, 2, 0xabc};
+  int rhs[] = {2, 1, 2, -1, 0xabc};
+
+
+  for (size_t i = 0; i < arraysize(lhs); i++) {
+    ArenaPool pool;
+    ArenaAllocator allocator(&pool);
+    HGraph* graph = new (&allocator) HGraph(&allocator);
+
+    HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(entry_block);
+    graph->SetEntryBlock(entry_block);
+    entry_block->AddInstruction(new (&allocator) HGoto());
+
+    HBasicBlock* if_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(if_block);
+    HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(if_true_block);
+    HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(if_false_block);
+    HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph);
+    graph->AddBlock(exit_block);
+    exit_block->AddInstruction(new (&allocator) HExit());
+
+    graph->SetEntryBlock(entry_block);
+    entry_block->AddSuccessor(if_block);
+    if_block->AddSuccessor(if_true_block);
+    if_block->AddSuccessor(if_false_block);
+    if_true_block->AddSuccessor(exit_block);
+    if_false_block->AddSuccessor(exit_block);
+    graph->SetExitBlock(exit_block);
+
+    HIntConstant cst_lhs(lhs[i]);
+    if_block->AddInstruction(&cst_lhs);
+    HIntConstant cst_rhs(rhs[i]);
+    if_block->AddInstruction(&cst_rhs);
+    HLessThan cmp_lt(&cst_lhs, &cst_rhs);
+    if_block->AddInstruction(&cmp_lt);
+    // We insert a temporary to separate the HIf from the HLessThan and force
+    // the materialization of the condition.
+    HTemporary force_materialization(0);
+    if_block->AddInstruction(&force_materialization);
+    HIf if_lt(&cmp_lt);
+    if_block->AddInstruction(&if_lt);
+
+    HIntConstant cst_lt(1);
+    if_true_block->AddInstruction(&cst_lt);
+    HReturn ret_lt(&cst_lt);
+    if_true_block->AddInstruction(&ret_lt);
+    HIntConstant cst_ge(0);
+    if_false_block->AddInstruction(&cst_ge);
+    HReturn ret_ge(&cst_ge);
+    if_false_block->AddInstruction(&ret_ge);
+
+    auto hook_before_codegen = [](HGraph* graph) {
+      HBasicBlock* block = graph->GetEntryBlock()->GetSuccessors().Get(0);
+      HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena());
+      block->InsertInstructionBefore(move, block->GetLastInstruction());
+    };
+
+    RunCodeOptimized(graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+  }
+}
 
 }  // namespace art
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index db31306..badf21d 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -53,7 +53,7 @@
   }
 
   // Print detected errors on output stream `os`.
-  void Dump(std::ostream& os) {
+  void Dump(std::ostream& os) const {
     for (size_t i = 0, e = errors_.Size(); i < e; ++i) {
       os << dump_prefix_ << errors_.Get(i) << std::endl;
     }
@@ -68,7 +68,7 @@
 
  private:
   // String displayed before dumped errors.
-  const char* dump_prefix_;
+  const char* const dump_prefix_;
 
   DISALLOW_COPY_AND_ASSIGN(GraphChecker);
 };
diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h
index 05984a0..4d8bec2 100644
--- a/compiler/optimizing/graph_visualizer.h
+++ b/compiler/optimizing/graph_visualizer.h
@@ -17,6 +17,8 @@
 #ifndef ART_COMPILER_OPTIMIZING_GRAPH_VISUALIZER_H_
 #define ART_COMPILER_OPTIMIZING_GRAPH_VISUALIZER_H_
 
+#include <ostream>
+
 #include "base/value_object.h"
 
 namespace art {
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2d9e35c..29eabe7 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -50,7 +50,7 @@
       // Replace (bool_value == 0) with !bool_value
       DCHECK_EQ(input2->AsIntConstant()->GetValue(), 0);
       equal->GetBlock()->ReplaceAndRemoveInstructionWith(
-          equal, new (GetGraph()->GetArena()) HNot(input1));
+          equal, new (GetGraph()->GetArena()) HNot(Primitive::kPrimBoolean, input1));
     }
   }
 }
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 9b7ff88..7adb840 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -485,7 +485,7 @@
   M(Local, Instruction)                                                 \
   M(LongConstant, Constant)                                             \
   M(NewInstance, Instruction)                                           \
-  M(Not, Instruction)                                                   \
+  M(Not, UnaryOperation)                                                \
   M(ParameterValue, Instruction)                                        \
   M(ParallelMove, Instruction)                                          \
   M(Phi, Instruction)                                                   \
@@ -1708,15 +1708,17 @@
   DISALLOW_COPY_AND_ASSIGN(HParameterValue);
 };
 
-class HNot : public HExpression<1> {
+class HNot : public HUnaryOperation {
  public:
-  explicit HNot(HInstruction* input) : HExpression(Primitive::kPrimBoolean, SideEffects::None()) {
-    SetRawInputAt(0, input);
-  }
+  explicit HNot(Primitive::Type result_type, HInstruction* input)
+      : HUnaryOperation(result_type, input) {}
 
   virtual bool CanBeMoved() const { return true; }
   virtual bool InstructionDataEquals(HInstruction* other) const { return true; }
 
+  virtual int32_t Evaluate(int32_t x) const OVERRIDE { return ~x; }
+  virtual int64_t Evaluate(int64_t x) const OVERRIDE { return ~x; }
+
   DECLARE_INSTRUCTION(Not);
 
  private:
@@ -2123,7 +2125,7 @@
 #undef DECLARE_VISIT_INSTRUCTION
 
  private:
-  HGraph* graph_;
+  HGraph* const graph_;
 
   DISALLOW_COPY_AND_ASSIGN(HGraphVisitor);
 };
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 33dc040..ea98186 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -16,6 +16,7 @@
 
 #include "optimization.h"
 
+#include "base/dumpable.h"
 #include "graph_checker.h"
 
 namespace art {
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index dce8e6d..0555c00 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -215,7 +215,10 @@
   }
 
   // Do not attempt to compile on architectures we do not support.
-  if (instruction_set != kX86 && instruction_set != kX86_64 && instruction_set != kThumb2) {
+  if (instruction_set != kArm64 &&
+      instruction_set != kThumb2 &&
+      instruction_set != kX86 &&
+      instruction_set != kX86_64) {
     return nullptr;
   }
 
@@ -230,23 +233,30 @@
   bool shouldOptimize =
       dex_compilation_unit.GetSymbol().find("00024reg_00024") != std::string::npos;
 
+  if (instruction_set == kThumb2 && !kArm32QuickCodeUseSoftFloat) {
+    uint32_t shorty_len;
+    const char* shorty = dex_compilation_unit.GetShorty(&shorty_len);
+    for (uint32_t i = 0; i < shorty_len; ++i) {
+      if (shorty[i] == 'D' || shorty[i] == 'F') {
+        CHECK(!shouldCompile) << "Hard float ARM32 parameters are not yet supported";
+        return nullptr;
+      }
+    }
+  }
+
   ArenaPool pool;
   ArenaAllocator arena(&pool);
   HGraphBuilder builder(&arena, &dex_compilation_unit, &dex_file, GetCompilerDriver());
 
   HGraph* graph = builder.BuildGraph(*code_item);
   if (graph == nullptr) {
-    if (shouldCompile) {
-      LOG(FATAL) << "Could not build graph in optimizing compiler";
-    }
+    CHECK(!shouldCompile) << "Could not build graph in optimizing compiler";
     return nullptr;
   }
 
   CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set);
   if (codegen == nullptr) {
-    if (shouldCompile) {
-      LOG(FATAL) << "Could not find code generator for optimizing compiler";
-    }
+    CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler";
     return nullptr;
   }
 
@@ -302,7 +312,7 @@
                               stack_map);
   } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) {
     LOG(FATAL) << "Could not allocate registers in optimizing compiler";
-    return nullptr;
+    UNREACHABLE();
   } else {
     unoptimized_compiled_methods_++;
     codegen->CompileBaseline(&allocator);
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index fc65f97..f95c4a4 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -16,6 +16,8 @@
 
 #include "register_allocator.h"
 
+#include <sstream>
+
 #include "base/bit_vector-inl.h"
 #include "code_generator.h"
 #include "ssa_liveness_analysis.h"
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 8f71848..7dda4f6 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -32,6 +32,7 @@
         live_in_(allocator, number_of_ssa_values, false),
         live_out_(allocator, number_of_ssa_values, false),
         kill_(allocator, number_of_ssa_values, false) {
+    UNUSED(block_);
     live_in_.ClearAllBits();
     live_out_.ClearAllBits();
     kill_.ClearAllBits();
diff --git a/compiler/utils/arena_object.h b/compiler/utils/arena_object.h
index 50909f75..8f6965e 100644
--- a/compiler/utils/arena_object.h
+++ b/compiler/utils/arena_object.h
@@ -31,6 +31,7 @@
 
   void operator delete(void*, size_t) {
     LOG(FATAL) << "UNREACHABLE";
+    UNREACHABLE();
   }
 };
 
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index b430c7e..e9788f9 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -324,7 +324,7 @@
       return IsAbsoluteUint(10, offset);  // VFP addressing mode.
     default:
       LOG(FATAL) << "UNREACHABLE";
-      return false;
+      UNREACHABLE();
   }
 }
 
@@ -342,7 +342,7 @@
       return IsAbsoluteUint(10, offset);  // VFP addressing mode.
     default:
       LOG(FATAL) << "UNREACHABLE";
-      return false;
+      UNREACHABLE();
   }
 }
 
@@ -359,9 +359,9 @@
       return IsAbsoluteUint(10, offset);  // VFP addressing mode.
     case kLoadWordPair:
       return IsAbsoluteUint(10, offset);
-  default:
+    default:
       LOG(FATAL) << "UNREACHABLE";
-      return false;
+      UNREACHABLE();
   }
 }
 
@@ -377,9 +377,9 @@
       return IsAbsoluteUint(10, offset);  // VFP addressing mode.
     case kStoreWordPair:
       return IsAbsoluteUint(10, offset);
-  default:
+    default:
       LOG(FATAL) << "UNREACHABLE";
-      return false;
+      UNREACHABLE();
   }
 }
 
@@ -417,9 +417,23 @@
   StoreToOffset(kStoreWord, R0, SP, 0);
 
   // Write out entry spills.
+  int32_t offset = frame_size + sizeof(StackReference<mirror::ArtMethod>);
   for (size_t i = 0; i < entry_spills.size(); ++i) {
-    Register reg = entry_spills.at(i).AsArm().AsCoreRegister();
-    StoreToOffset(kStoreWord, reg, SP, frame_size + kFramePointerSize + (i * kFramePointerSize));
+    ArmManagedRegister reg = entry_spills.at(i).AsArm();
+    if (reg.IsNoRegister()) {
+      // only increment stack offset.
+      ManagedRegisterSpill spill = entry_spills.at(i);
+      offset += spill.getSize();
+    } else if (reg.IsCoreRegister()) {
+      StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsSRegister()) {
+      StoreSToOffset(reg.AsSRegister(), SP, offset);
+      offset += 4;
+    } else if (reg.IsDRegister()) {
+      StoreDToOffset(reg.AsDRegister(), SP, offset);
+      offset += 8;
+    }
   }
 }
 
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 6af69c8..3d46617 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1356,6 +1356,7 @@
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
 }
 
@@ -1427,6 +1428,7 @@
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
 }
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 7968a77..37478c4 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2406,6 +2406,7 @@
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
 }
 
@@ -2477,6 +2478,7 @@
       break;
     default:
       LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
 }
 
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index c144991..1b1d121 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -219,12 +219,13 @@
   void AddConstant(XRegister rd, int32_t value, vixl::Condition cond = vixl::al);
   void AddConstant(XRegister rd, XRegister rn, int32_t value, vixl::Condition cond = vixl::al);
 
-  // Vixl assembler.
-  vixl::MacroAssembler* const vixl_masm_;
-
   // List of exception blocks to generate at the end of the code cache.
   std::vector<Arm64Exception*> exception_blocks_;
 
+ public:
+  // Vixl assembler.
+  vixl::MacroAssembler* const vixl_masm_;
+
   // Used for testing.
   friend class Arm64ManagedRegister_VixlRegisters_Test;
 };
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 91b8d8ab..2b0c94c 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -118,6 +118,7 @@
   friend class arm::ArmAssembler;
   friend class arm::Arm32Assembler;
   friend class arm::Thumb2Assembler;
+  friend class arm64::Arm64Assembler;
   friend class mips::MipsAssembler;
   friend class x86::X86Assembler;
   friend class x86_64::X86_64Assembler;
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 5bfa462..91237ae 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -24,7 +24,6 @@
 #include <cstdio>
 #include <cstdlib>
 #include <fstream>
-#include <iostream>
 #include <iterator>
 #include <sys/stat.h>
 
@@ -118,9 +117,8 @@
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
     for (auto reg : registers) {
       for (int64_t imm : imms) {
-        Imm* new_imm = CreateImmediate(imm);
-        (assembler_.get()->*f)(*reg, *new_imm);
-        delete new_imm;
+        Imm new_imm = CreateImmediate(imm);
+        (assembler_.get()->*f)(*reg, new_imm);
         std::string base = fmt;
 
         size_t reg_index = base.find("{reg}");
@@ -154,9 +152,8 @@
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
     for (int64_t imm : imms) {
-      Imm* new_imm = CreateImmediate(imm);
-      (assembler_.get()->*f)(*new_imm);
-      delete new_imm;
+      Imm new_imm = CreateImmediate(imm);
+      (assembler_.get()->*f)(new_imm);
       std::string base = fmt;
 
       size_t imm_index = base.find("{imm}");
@@ -333,7 +330,7 @@
   }
 
   // Create an immediate from the specific value.
-  virtual Imm* CreateImmediate(int64_t imm_value) = 0;
+  virtual Imm CreateImmediate(int64_t imm_value) = 0;
 
  private:
   // Driver() assembles and compares the results. If the results are not equal and we have a
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index c7eada3..b5bf31b 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -29,7 +29,7 @@
 namespace art {
 namespace x86 {
 
-class Immediate {
+class Immediate : public ValueObject {
  public:
   explicit Immediate(int32_t value) : value_(value) {}
 
@@ -47,7 +47,7 @@
 };
 
 
-class Operand {
+class Operand : public ValueObject {
  public:
   uint8_t mod() const {
     return (encoding_at(0) >> 6) & 3;
@@ -129,8 +129,6 @@
   }
 
   friend class X86Assembler;
-
-  DISALLOW_COPY_AND_ASSIGN(Operand);
 };
 
 
@@ -168,7 +166,6 @@
     }
   }
 
-
   Address(Register index, ScaleFactor scale, int32_t disp) {
     CHECK_NE(index, ESP);  // Illegal addressing mode.
     SetModRM(0, ESP);
@@ -205,14 +202,12 @@
 
  private:
   Address() {}
-
-  DISALLOW_COPY_AND_ASSIGN(Address);
 };
 
 
 class X86Assembler FINAL : public Assembler {
  public:
-  explicit X86Assembler() {}
+  explicit X86Assembler() : cfi_cfa_offset_(0), cfi_pc_(0) {}
   virtual ~X86Assembler() {}
 
   /*
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index f4c9862..bf58b16 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1222,7 +1222,7 @@
 
 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  EmitOptionalRex32(reg);
+  EmitOptionalRex32(reg, reg);
   EmitUint8(0x69);
   EmitOperand(reg.LowBits(), Operand(reg));
   EmitImmediate(imm);
@@ -1340,6 +1340,13 @@
   EmitOperand(3, Operand(reg));
 }
 
+void X86_64Assembler::negq(CpuRegister reg) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(reg);
+  EmitUint8(0xF7);
+  EmitOperand(3, Operand(reg));
+}
+
 
 void X86_64Assembler::notl(CpuRegister reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 7e5859c..ec29271 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -36,7 +36,7 @@
 //
 // Note: As we support cross-compilation, the value type must be int64_t. Please be aware of
 // conversion rules in expressions regarding negation, especially size_t on 32b.
-class Immediate {
+class Immediate : public ValueObject {
  public:
   explicit Immediate(int64_t value) : value_(value) {}
 
@@ -54,12 +54,10 @@
 
  private:
   const int64_t value_;
-
-  DISALLOW_COPY_AND_ASSIGN(Immediate);
 };
 
 
-class Operand {
+class Operand : public ValueObject {
  public:
   uint8_t mod() const {
     return (encoding_at(0) >> 6) & 3;
@@ -157,8 +155,6 @@
   }
 
   friend class X86_64Assembler;
-
-  DISALLOW_COPY_AND_ASSIGN(Operand);
 };
 
 
@@ -247,8 +243,6 @@
 
  private:
   Address() {}
-
-  DISALLOW_COPY_AND_ASSIGN(Address);
 };
 
 
@@ -456,6 +450,7 @@
   void shrq(CpuRegister reg, const Immediate& imm);
 
   void negl(CpuRegister reg);
+  void negq(CpuRegister reg);
   void notl(CpuRegister reg);
 
   void enter(const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 37a0932..0e8ea5b 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -72,8 +72,8 @@
     return registers_;
   }
 
-  x86_64::Immediate* CreateImmediate(int64_t imm_value) OVERRIDE {
-    return new x86_64::Immediate(imm_value);
+  x86_64::Immediate CreateImmediate(int64_t imm_value) OVERRIDE {
+    return x86_64::Immediate(imm_value);
   }
 
  private:
@@ -186,6 +186,15 @@
   DriverStr(expected, "movw");
 }
 
+TEST_F(AssemblerX86_64Test, IMulImmediate) {
+  GetAssembler()->imull(x86_64::CpuRegister(x86_64::RAX), x86_64::Immediate(0x40000));
+  GetAssembler()->imull(x86_64::CpuRegister(x86_64::R8), x86_64::Immediate(0x40000));
+  const char* expected =
+    "imull $0x40000,%eax,%eax\n"
+    "imull $0x40000,%r8d,%r8d\n";
+  DriverStr(expected, "imul");
+}
+
 
 std::string setcc_test_fn(x86_64::X86_64Assembler* assembler) {
   // From Condition
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 326fa72..98712cd 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -30,6 +30,10 @@
 #include <sys/utsname.h>
 #endif
 
+#define ATRACE_TAG ATRACE_TAG_DALVIK
+#include "cutils/trace.h"
+
+#include "base/dumpable.h"
 #include "base/stl_util.h"
 #include "base/stringpiece.h"
 #include "base/timing_logger.h"
@@ -832,6 +836,7 @@
   int small_method_threshold = CompilerOptions::kDefaultSmallMethodThreshold;
   int tiny_method_threshold = CompilerOptions::kDefaultTinyMethodThreshold;
   int num_dex_methods_threshold = CompilerOptions::kDefaultNumDexMethodsThreshold;
+  std::vector<std::string> verbose_methods;
 
   // Initialize ISA and ISA features to default values.
   InstructionSet instruction_set = kRuntimeISA;
@@ -1058,6 +1063,11 @@
       include_patch_information = true;
     } else if (option == "--no-include-patch-information") {
       include_patch_information = false;
+    } else if (option.starts_with("--verbose-methods=")) {
+      // TODO: rather than switch off compiler logging, make all VLOG(compiler) messages conditional
+      //       on having verbost methods.
+      gLogVerbosity.compiler = false;
+      Split(option.substr(strlen("--verbose-methods=")).ToString(), ',', &verbose_methods);
     } else {
       Usage("Unknown argument %s", option.data());
     }
@@ -1213,25 +1223,25 @@
     PassDriverMEOpts::PrintPassOptions();
   }
 
-  std::unique_ptr<CompilerOptions> compiler_options(new CompilerOptions(compiler_filter,
-                                                                        huge_method_threshold,
-                                                                        large_method_threshold,
-                                                                        small_method_threshold,
-                                                                        tiny_method_threshold,
-                                                                        num_dex_methods_threshold,
-                                                                        generate_gdb_information,
-                                                                        include_patch_information,
-                                                                        top_k_profile_threshold,
-                                                                        include_debug_symbols,
-                                                                        implicit_null_checks,
-                                                                        implicit_so_checks,
-                                                                        implicit_suspend_checks,
-                                                                        compile_pic
+  std::unique_ptr<CompilerOptions> compiler_options(
+      new CompilerOptions(compiler_filter,
+                          huge_method_threshold,
+                          large_method_threshold,
+                          small_method_threshold,
+                          tiny_method_threshold,
+                          num_dex_methods_threshold,
+                          generate_gdb_information,
+                          include_patch_information,
+                          top_k_profile_threshold,
+                          include_debug_symbols,
+                          implicit_null_checks,
+                          implicit_so_checks,
+                          implicit_suspend_checks,
+                          compile_pic,
 #ifdef ART_SEA_IR_MODE
-                                                                        , compiler_options.sea_ir_ =
-                                                                              true;
+                          true,
 #endif
-  ));  // NOLINT(whitespace/parens)
+                          verbose_methods.empty() ? nullptr : &verbose_methods));
 
   // Done with usage checks, enable watchdog if requested
   WatchDog watch_dog(watch_dog_enabled);
diff --git a/disassembler/disassembler.cc b/disassembler/disassembler.cc
index c97bf64..bf68204 100644
--- a/disassembler/disassembler.cc
+++ b/disassembler/disassembler.cc
@@ -16,7 +16,7 @@
 
 #include "disassembler.h"
 
-#include <iostream>
+#include <ostream>
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index ac883fe..ee652b3 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -18,7 +18,8 @@
 
 #include <inttypes.h>
 
-#include <iostream>
+#include <ostream>
+#include <sstream>
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
diff --git a/disassembler/disassembler_arm64.cc b/disassembler/disassembler_arm64.cc
index fc1065a..229ac97 100644
--- a/disassembler/disassembler_arm64.cc
+++ b/disassembler/disassembler_arm64.cc
@@ -18,7 +18,7 @@
 
 #include <inttypes.h>
 
-#include <iostream>
+#include <ostream>
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index bd5fac7..97c06f1 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -16,7 +16,8 @@
 
 #include "disassembler_mips.h"
 
-#include <iostream>
+#include <ostream>
+#include <sstream>
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 63a74c7..b9e98f6 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -16,12 +16,14 @@
 
 #include "disassembler_x86.h"
 
-#include <iostream>
+#include <inttypes.h>
+
+#include <ostream>
+#include <sstream>
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
 #include "thread.h"
-#include <inttypes.h>
 
 namespace art {
 namespace x86 {
@@ -506,7 +508,7 @@
           case 0x5D: opcode << "min"; break;
           case 0x5E: opcode << "div"; break;
           case 0x5F: opcode << "max"; break;
-          default: LOG(FATAL) << "Unreachable";
+          default: LOG(FATAL) << "Unreachable"; UNREACHABLE();
         }
         if (prefix[2] == 0x66) {
           opcode << "pd";
@@ -1237,7 +1239,7 @@
     case 0xF2: prefixed_opcode << "repne "; break;
     case 0xF3: prefixed_opcode << "repe "; break;
     case 0: break;
-    default: LOG(FATAL) << "Unreachable";
+    default: LOG(FATAL) << "Unreachable"; UNREACHABLE();
   }
   prefixed_opcode << opcode.str();
   os << FormatInstructionPointer(begin_instr)
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index c0c96e5..8e5af53 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -24,6 +24,7 @@
 #include <string>
 #include <vector>
 
+#include "base/dumpable.h"
 #include "base/scoped_flock.h"
 #include "base/stringpiece.h"
 #include "base/stringprintf.h"
diff --git a/runtime/Android.mk b/runtime/Android.mk
index d9b4139..6f6dcbc 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -209,7 +209,6 @@
 
 LIBART_TARGET_SRC_FILES := \
   $(LIBART_COMMON_SRC_FILES) \
-  base/logging_android.cc \
   jdwp/jdwp_adb.cc \
   monitor_android.cc \
   runtime_android.cc \
@@ -223,6 +222,7 @@
   arch/arm/memcmp16_arm.S \
   arch/arm/portable_entrypoints_arm.S \
   arch/arm/quick_entrypoints_arm.S \
+  arch/arm/quick_entrypoints_cc_arm.cc \
   arch/arm/thread_arm.cc \
   arch/arm/fault_handler_arm.cc
 
@@ -282,7 +282,6 @@
 
 LIBART_HOST_SRC_FILES := \
   $(LIBART_COMMON_SRC_FILES) \
-  base/logging_linux.cc \
   monitor_linux.cc \
   runtime_linux.cc \
   thread_linux.cc
@@ -477,7 +476,7 @@
     endif
   endif
   LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
-#  LOCAL_ADDITIONAL_DEPENDENCIES += $$(LOCAL_PATH)/Android.mk
+  LOCAL_ADDITIONAL_DEPENDENCIES += $$(LOCAL_PATH)/Android.mk
 
   ifeq ($$(art_target_or_host),target)
     LOCAL_MODULE_TARGET_ARCH := $$(ART_TARGET_SUPPORTED_ARCH)
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 5388cc0..8cd2a27 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -19,9 +19,9 @@
 
 #include "asm_support.h"
 
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 112
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 48
+#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 112
 
 // Flag for enabling R4 optimization in arm runtime
 #define ARM_R4_SUSPEND_FLAG
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 96ffc93..fd9c626 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -97,6 +97,23 @@
   gprs_[R1] = const_cast<uint32_t*>(&gZero);
   gprs_[R2] = nullptr;
   gprs_[R3] = nullptr;
+
+  fprs_[S0] = nullptr;
+  fprs_[S1] = nullptr;
+  fprs_[S2] = nullptr;
+  fprs_[S3] = nullptr;
+  fprs_[S4] = nullptr;
+  fprs_[S5] = nullptr;
+  fprs_[S6] = nullptr;
+  fprs_[S7] = nullptr;
+  fprs_[S8] = nullptr;
+  fprs_[S9] = nullptr;
+  fprs_[S10] = nullptr;
+  fprs_[S11] = nullptr;
+  fprs_[S12] = nullptr;
+  fprs_[S13] = nullptr;
+  fprs_[S14] = nullptr;
+  fprs_[S15] = nullptr;
 }
 
 extern "C" void art_quick_do_long_jump(uint32_t*, uint32_t*);
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index ff0eb4a..24e9b1d 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -77,23 +77,17 @@
 extern "C" void art_quick_lock_object(void*);
 extern "C" void art_quick_unlock_object(void*);
 
-// Math entrypoints.
-extern int32_t CmpgDouble(double a, double b);
-extern int32_t CmplDouble(double a, double b);
-extern int32_t CmpgFloat(float a, float b);
-extern int32_t CmplFloat(float a, float b);
-
-// Math conversions.
-extern "C" int32_t __aeabi_f2iz(float op1);        // FLOAT_TO_INT
-extern "C" int32_t __aeabi_d2iz(double op1);       // DOUBLE_TO_INT
-extern "C" float __aeabi_l2f(int64_t op1);         // LONG_TO_FLOAT
-extern "C" double __aeabi_l2d(int64_t op1);        // LONG_TO_DOUBLE
-
+// Used by soft float.
 // Single-precision FP arithmetics.
-extern "C" float fmodf(float a, float b);          // REM_FLOAT[_2ADDR]
-
+extern "C" float fmodf(float a, float b);              // REM_FLOAT[_2ADDR]
 // Double-precision FP arithmetics.
-extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
+extern "C" double fmod(double a, double b);            // REM_DOUBLE[_2ADDR]
+
+// Used by hard float.
+extern "C" int64_t art_quick_f2l(float f);             // FLOAT_TO_LONG
+extern "C" int64_t art_quick_d2l(double d);            // DOUBLE_TO_LONG
+extern "C" float art_quick_fmodf(float a, float b);    // REM_FLOAT[_2ADDR]
+extern "C" double art_quick_fmod(double a, double b);  // REM_DOUBLE[_2ADDR]
 
 // Integer arithmetics.
 extern "C" int __aeabi_idivmod(int32_t, int32_t);  // [DIV|REM]_INT[_2ADDR|_LIT8|_LIT16]
@@ -205,25 +199,24 @@
   qpoints->pUnlockObject = art_quick_unlock_object;
 
   // Math
-  qpoints->pCmpgDouble = CmpgDouble;
-  qpoints->pCmpgFloat = CmpgFloat;
-  qpoints->pCmplDouble = CmplDouble;
-  qpoints->pCmplFloat = CmplFloat;
-  qpoints->pFmod = fmod;
-  qpoints->pL2d = __aeabi_l2d;
-  qpoints->pFmodf = fmodf;
-  qpoints->pL2f = __aeabi_l2f;
-  qpoints->pD2iz = __aeabi_d2iz;
-  qpoints->pF2iz = __aeabi_f2iz;
   qpoints->pIdivmod = __aeabi_idivmod;
-  qpoints->pD2l = art_d2l;
-  qpoints->pF2l = art_f2l;
   qpoints->pLdiv = __aeabi_ldivmod;
   qpoints->pLmod = __aeabi_ldivmod;  // result returned in r2:r3
   qpoints->pLmul = art_quick_mul_long;
   qpoints->pShlLong = art_quick_shl_long;
   qpoints->pShrLong = art_quick_shr_long;
   qpoints->pUshrLong = art_quick_ushr_long;
+  if (kArm32QuickCodeUseSoftFloat) {
+    qpoints->pFmod = fmod;
+    qpoints->pFmodf = fmodf;
+    qpoints->pD2l = art_d2l;
+    qpoints->pF2l = art_f2l;
+  } else {
+    qpoints->pFmod = art_quick_fmod;
+    qpoints->pFmodf = art_quick_fmodf;
+    qpoints->pD2l = art_quick_d2l;
+    qpoints->pF2l = art_quick_f2l;
+  }
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index aae0c94..632b414 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -40,10 +40,10 @@
     .cfi_rel_offset r10, 24
     .cfi_rel_offset r11, 28
     .cfi_rel_offset lr, 32
-    vpush {s0-s31}                                @ 32 words (128 bytes) of floats.
-    .pad #128
-    .cfi_adjust_cfa_offset 128
-    sub sp, #12                                   @ 3 words of space, bottom word will hold Method*.
+    vpush {s16-s31}                               @ 16 words (64 bytes) of floats.
+    .pad #64
+    .cfi_adjust_cfa_offset 64
+    sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
     .pad #12
     .cfi_adjust_cfa_offset 12
     RUNTIME_CURRENT1 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
@@ -53,7 +53,7 @@
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 
      // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 36 + 128 + 12)
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 36 + 64 + 12)
 #error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM) size not as expected."
 #endif
 .endm
@@ -101,15 +101,7 @@
 .endm
 
 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
-    add sp, #4               @ bottom word holds Method*
-    pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7
-    .cfi_restore r8
-    .cfi_restore r10
-    .cfi_restore r11
-    .cfi_adjust_cfa_offset -FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     bx  lr                   @ return
 .endm
 
@@ -117,9 +109,10 @@
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
      */
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2
-    push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+    push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves and args.
     .save {r1-r3, r5-r8, r10-r11, lr}
+    .cfi_adjust_cfa_offset 40
     .cfi_rel_offset r1, 0
     .cfi_rel_offset r2, 4
     .cfi_rel_offset r3, 8
@@ -130,47 +123,39 @@
     .cfi_rel_offset r10, 28
     .cfi_rel_offset r11, 32
     .cfi_rel_offset lr, 36
-    .cfi_adjust_cfa_offset 40
+    vpush {s0-s15}                     @ 16 words of float args.
+    .pad #64
+    .cfi_adjust_cfa_offset 64
     sub sp, #8                         @ 2 words of space, bottom word will hold Method*
     .pad #8
     .cfi_adjust_cfa_offset 8
+    // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 64 + 8)
+#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#endif
+.endm
+
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
     RUNTIME_CURRENT3 \rTemp1, \rTemp2  @ Load Runtime::Current into rTemp1.
     THIS_LOAD_REQUIRES_READ_BARRIER
      @ rTemp1 is kRefsAndArgs Method*.
     ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET]
     str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
-
-    // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 8)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected."
-#endif
 .endm
 
 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
-    push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves
-    .save {r1-r3, r5-r8, r10-r11, lr}
-    .cfi_rel_offset r1, 0
-    .cfi_rel_offset r2, 4
-    .cfi_rel_offset r3, 8
-    .cfi_rel_offset r5, 12
-    .cfi_rel_offset r6, 16
-    .cfi_rel_offset r7, 20
-    .cfi_rel_offset r8, 24
-    .cfi_rel_offset r10, 28
-    .cfi_rel_offset r11, 32
-    .cfi_rel_offset lr, 36
-    .cfi_adjust_cfa_offset 40
-    sub sp, #8                         @ 2 words of space, bottom word will hold Method*
-    .pad #8
-    .cfi_adjust_cfa_offset 8
-
+    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
     str r0, [sp, #0]                   @ Store ArtMethod* to bottom of stack.
     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
 .endm
 
 .macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
     add  sp, #8                      @ rewind sp
+    .cfi_adjust_cfa_offset -8
+    vpop {s0-s15}
+    .cfi_adjust_cfa_offset -64
     pop {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
     .cfi_restore r1
     .cfi_restore r2
@@ -181,7 +166,7 @@
     .cfi_restore r8
     .cfi_restore r10
     .cfi_restore r11
-    .cfi_adjust_cfa_offset -48
+    .cfi_adjust_cfa_offset -40
 .endm
 
 
@@ -373,60 +358,91 @@
 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
 
     /*
-     * Quick invocation stub.
+     * Quick invocation stub internal.
      * On entry:
      *   r0 = method pointer
      *   r1 = argument array or NULL for no argument methods
      *   r2 = size of argument array in bytes
      *   r3 = (managed) thread pointer
      *   [sp] = JValue* result
-     *   [sp + 4] = shorty
+     *   [sp + 4] = result_in_float
+     *   [sp + 8] = core register argument array
+     *   [sp + 12] = fp register argument array
+     *  +-------------------------+
+     *  | uint32_t* fp_reg_args   |
+     *  | uint32_t* core_reg_args |
+     *  |   result_in_float       | <- Caller frame
+     *  |   Jvalue* result        |
+     *  +-------------------------+
+     *  |          lr             |
+     *  |          r11            |
+     *  |          r9             |
+     *  |          r4             | <- r11
+     *  +-------------------------+
+     *  | uint32_t out[n-1]       |
+     *  |    :      :             |        Outs
+     *  | uint32_t out[0]         |
+     *  | StackRef<ArtMethod>     | <- SP  value=null
+     *  +-------------------------+
      */
-ENTRY art_quick_invoke_stub
-    push   {r0, r4, r5, r9, r11, lr}       @ spill regs
-    .save  {r0, r4, r5, r9, r11, lr}
-    .pad #24
-    .cfi_adjust_cfa_offset 24
-    .cfi_rel_offset r0, 0
-    .cfi_rel_offset r4, 4
-    .cfi_rel_offset r5, 8
-    .cfi_rel_offset r9, 12
-    .cfi_rel_offset r11, 16
-    .cfi_rel_offset lr, 20
+ENTRY art_quick_invoke_stub_internal
+    push   {r4, r9, r11, lr}               @ spill regs
+    .save  {r4, r9, r11, lr}
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    .cfi_rel_offset r4, 0
+    .cfi_rel_offset r9, 4
+    .cfi_rel_offset r11, 8
+    .cfi_rel_offset lr, 12
     mov    r11, sp                         @ save the stack pointer
     .cfi_def_cfa_register r11
+
     mov    r9, r3                          @ move managed thread pointer into r9
+
+    add    r4, r2, #4                      @ create space for method pointer in frame
+    sub    r4, sp, r4                      @ reserve & align *stack* to 16 bytes: native calling
+    and    r4, #0xFFFFFFF0                 @ convention only aligns to 8B, so we have to ensure ART
+    mov    sp, r4                          @ 16B alignment ourselves.
+
+    mov    r4, r0                          @ save method*
+    add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
+    bl     memcpy                          @ memcpy (dest, src, bytes)
+    mov    ip, #0                          @ set ip to 0
+    str    ip, [sp]                        @ store NULL for method* at bottom of frame
+
+    ldr    ip, [r11, #28]                  @ load fp register argument array pointer
+    vldm   ip, {s0-s15}                    @ copy s0 - s15
+
+    ldr    ip, [r11, #24]                  @ load core register argument array pointer
+    mov    r0, r4                          @ restore method*
+    add    ip, ip, #4                      @ skip r0
+    ldm    ip, {r1-r3}                     @ copy r1 - r3
+
 #ifdef ARM_R4_SUSPEND_FLAG
     mov    r4, #SUSPEND_CHECK_INTERVAL     @ reset r4 to suspend check interval
 #endif
-    add    r5, r2, #4                      @ create space for method pointer in frame
 
-    sub    r5, sp, r5                      @ reserve & align *stack* to 16 bytes: native calling
-    and    r5, #0xFFFFFFF0                 @ convention only aligns to 8B, so we have to ensure ART
-    mov    sp, r5                          @ 16B alignment ourselves.
-
-    add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
-    bl     memcpy                          @ memcpy (dest, src, bytes)
-    ldr    r0, [r11]                       @ restore method*
-    ldr    r1, [sp, #4]                    @ copy arg value for r1
-    ldr    r2, [sp, #8]                    @ copy arg value for r2
-    ldr    r3, [sp, #12]                   @ copy arg value for r3
-    mov    ip, #0                          @ set ip to 0
-    str    ip, [sp]                        @ store NULL for method* at bottom of frame
     ldr    ip, [r0, #MIRROR_ART_METHOD_QUICK_CODE_OFFSET]  @ get pointer to the code
     blx    ip                              @ call the method
+
     mov    sp, r11                         @ restore the stack pointer
-    ldr    ip, [sp, #24]                   @ load the result pointer
-    strd   r0, [ip]                        @ store r0/r1 into result pointer
-    pop    {r0, r4, r5, r9, r11, lr}       @ restore spill regs
-    .cfi_restore r0
+    .cfi_def_cfa_register sp
+
+    ldr    r4, [sp, #20]                   @ load result_is_float
+    ldr    r9, [sp, #16]                   @ load the result pointer
+    cmp    r4, #0
+    ite    eq
+    strdeq r0, [r9]                        @ store r0/r1 into result pointer
+    vstrne d0, [r9]                        @ store s0-s1/d0 into result pointer
+
+    pop    {r4, r9, r11, lr}               @ restore spill regs
     .cfi_restore r4
-    .cfi_restore r5
     .cfi_restore r9
+    .cfi_restore r11
     .cfi_restore lr
-    .cfi_adjust_cfa_offset -24
+    .cfi_adjust_cfa_offset -16
     bx     lr
-END art_quick_invoke_stub
+END art_quick_invoke_stub_internal
 
     /*
      * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
@@ -869,13 +885,14 @@
     mov     r3, sp                 @ pass SP
     blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
-    add     sp, #16                @ skip r1-r3, 4 bytes padding.
-    .cfi_adjust_cfa_offset -16
-    cbnz    r2, 1f                 @ success if no exception is pending
+    // Tear down the callee-save frame. Skip arg registers.
+    add     sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+    cbnz    r2, 1f                 @ success if no exception is pending
+    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
     bx      lr                     @ return on success
 1:
-    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     DELIVER_PENDING_EXCEPTION
 END art_quick_proxy_invoke_handler
 
@@ -977,20 +994,13 @@
     ldr r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
     cbnz r2, .Lexception_in_native
 
-    // Tear down the callee-save frame.
-    add  sp, #12                      @ rewind sp
-    // Do not pop r0 and r1, they contain the return value.
-    pop {r2-r3, r5-r8, r10-r11, lr}  @ 9 words of callee saves
-    .cfi_restore r2
-    .cfi_restore r3
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7
-    .cfi_restore r8
-    .cfi_restore r10
-    .cfi_restore r11
-    .cfi_adjust_cfa_offset -48
+    // Tear down the callee-save frame. Skip arg registers.
+    add     sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
 
+    // store into fpr, for when it's a fpr return...
+    vmov d0, r0, r1
     bx lr      // ret
 
 .Lentry_error:
@@ -1010,11 +1020,13 @@
     mov     r2, sp                 @ pass SP
     blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
-    add     sp, #16                @ skip r1-r3, 4 bytes padding.
-    .cfi_adjust_cfa_offset -16
+    // Tear down the callee-save frame. Skip arg registers.
+    add     sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+    .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
     cbnz    r2, 1f                 @ success if no exception is pending
-    bx    lr                       @ return on success
+    vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
+    bx      lr                     @ return on success
 1:
     DELIVER_PENDING_EXCEPTION
 END art_quick_to_interpreter_bridge
@@ -1435,3 +1447,54 @@
 .Ldone:
     pop   {r4, r7-r12, pc}
 END art_quick_string_compareto
+
+    /* Assembly routines used to handle ABI differences. */
+
+    /* double fmod(double a, double b) */
+    .extern fmod
+ENTRY art_quick_fmod
+    push  {lr}
+    .cfi_adjust_cfa_offset 4
+    .cfi_rel_offset lr, 0
+    sub   sp, #4
+    .cfi_adjust_cfa_offset 4
+    vmov  r0, r1, d0
+    vmov  r2, r3, d1
+    bl    fmod
+    vmov  d0, r0, r1
+    add   sp, #4
+    .cfi_adjust_cfa_offset -4
+    pop   {pc}
+    .cfi_adjust_cfa_offset -4
+END art_quick_fmod
+
+    /* float fmodf(float a, float b) */
+     .extern fmodf
+ENTRY art_quick_fmodf
+    push  {lr}
+    .cfi_adjust_cfa_offset 4
+    .cfi_rel_offset lr, 0
+    sub   sp, #4
+    .cfi_adjust_cfa_offset 4
+    vmov  r0, r1, d0
+    bl    fmodf
+    vmov  s0, r0
+    add   sp, #4
+    .cfi_adjust_cfa_offset -4
+    pop   {pc}
+    .cfi_adjust_cfa_offset -4
+END art_quick_fmod
+
+    /* int64_t art_d2l(double d) */
+    .extern art_d2l
+ENTRY art_quick_d2l
+    vmov  r0, r1, d0
+    b     art_d2l
+END art_quick_d2l
+
+    /* int64_t art_f2l(float f) */
+    .extern art_f2l
+ENTRY art_quick_f2l
+    vmov  r0, s0
+    b     art_f2l
+END art_quick_f2l
diff --git a/runtime/arch/arm/quick_entrypoints_cc_arm.cc b/runtime/arch/arm/quick_entrypoints_cc_arm.cc
new file mode 100644
index 0000000..e21e6c1
--- /dev/null
+++ b/runtime/arch/arm/quick_entrypoints_cc_arm.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mirror/art_method.h"
+#include "utils.h"  // For RoundUp().
+
+namespace art {
+
+// Assembly stub that does the final part of the up-call into Java.
+extern "C" void art_quick_invoke_stub_internal(mirror::ArtMethod*, uint32_t*, uint32_t,
+                                               Thread* self, JValue* result, uint32_t, uint32_t*,
+                                               uint32_t*);
+
+template <bool kIsStatic>
+static void quick_invoke_reg_setup(mirror::ArtMethod* method, uint32_t* args, uint32_t args_size,
+                                   Thread* self, JValue* result, const char* shorty) {
+  // Note: We do not follow aapcs ABI in quick code for both softfp and hardfp.
+  uint32_t core_reg_args[4];  // r0 ~ r3
+  uint32_t fp_reg_args[16];  // s0 ~ s15 (d0 ~ d7)
+  uint32_t gpr_index = 1;  // Index into core registers. Reserve r0 for mirror::ArtMethod*.
+  uint32_t fpr_index = 0;  // Index into float registers.
+  uint32_t fpr_double_index = 0;  // Index into float registers for doubles.
+  uint32_t arg_index = 0;  // Index into argument array.
+  const uint32_t result_in_float = kArm32QuickCodeUseSoftFloat ? 0 :
+      (shorty[0] == 'F' || shorty[0] == 'D') ? 1 : 0;
+
+  if (!kIsStatic) {
+    // Copy receiver for non-static methods.
+    core_reg_args[gpr_index++] = args[arg_index++];
+  }
+
+  for (uint32_t shorty_index = 1; shorty[shorty_index] != '\0'; ++shorty_index, ++arg_index) {
+    char arg_type = shorty[shorty_index];
+    if (kArm32QuickCodeUseSoftFloat) {
+      arg_type = (arg_type == 'D') ? 'J' : arg_type;  // Regard double as long.
+      arg_type = (arg_type == 'F') ? 'I' : arg_type;  // Regard float as int.
+    }
+    switch (arg_type) {
+      case 'D': {
+        // Copy double argument into fp_reg_args if there are still floating point reg arguments.
+        // Double should not overlap with float.
+        fpr_double_index = std::max(fpr_double_index, RoundUp(fpr_index, 2));
+        if (fpr_double_index < arraysize(fp_reg_args)) {
+          fp_reg_args[fpr_double_index++] = args[arg_index];
+          fp_reg_args[fpr_double_index++] = args[arg_index + 1];
+        }
+        ++arg_index;
+        break;
+      }
+      case 'F':
+        // Copy float argument into fp_reg_args if there are still floating point reg arguments.
+        // If fpr_index is odd then its pointing at a hole next to an existing float argument. If we
+        // encounter a float argument then pick it up from that hole. In the case fpr_index is even,
+        // ensure that we don't pick up an argument that overlaps with with a double from
+        // fpr_double_index. In either case, take care not to go beyond the maximum number of
+        // floating point arguments.
+        if (fpr_index % 2 == 0) {
+          fpr_index = std::max(fpr_double_index, fpr_index);
+        }
+        if (fpr_index < arraysize(fp_reg_args)) {
+          fp_reg_args[fpr_index++] = args[arg_index];
+        }
+        break;
+      case 'J':
+        if (gpr_index < arraysize(core_reg_args)) {
+          core_reg_args[gpr_index++] = args[arg_index];
+        }
+        ++arg_index;
+        FALLTHROUGH_INTENDED;  // Fall-through to take of the high part.
+      default:
+        if (gpr_index < arraysize(core_reg_args)) {
+          core_reg_args[gpr_index++] = args[arg_index];
+        }
+        break;
+    }
+  }
+
+  art_quick_invoke_stub_internal(method, args, args_size, self, result, result_in_float,
+      core_reg_args, fp_reg_args);
+}
+
+// Called by art::mirror::ArtMethod::Invoke to do entry into a non-static method.
+// TODO: migrate into an assembly implementation as with ARM64.
+extern "C" void art_quick_invoke_stub(mirror::ArtMethod* method, uint32_t* args, uint32_t args_size,
+                                      Thread* self, JValue* result, const char* shorty) {
+  quick_invoke_reg_setup<false>(method, args, args_size, self, result, shorty);
+}
+
+// Called by art::mirror::ArtMethod::Invoke to do entry into a static method.
+// TODO: migrate into an assembly implementation as with ARM64.
+extern "C" void art_quick_invoke_static_stub(mirror::ArtMethod* method, uint32_t* args,
+                                             uint32_t args_size, Thread* self, JValue* result,
+                                             const char* shorty) {
+  quick_invoke_reg_setup<true>(method, args, args_size, self, result, shorty);
+}
+
+}  // namespace art
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
index 7595e94..c1f3fc2 100644
--- a/runtime/arch/arm/quick_method_frame_info_arm.h
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -25,6 +25,8 @@
 namespace art {
 namespace arm {
 
+static constexpr uint32_t kArmCalleeSaveAlwaysSpills =
+    (1 << art::arm::LR);
 static constexpr uint32_t kArmCalleeSaveRefSpills =
     (1 << art::arm::R5) | (1 << art::arm::R6)  | (1 << art::arm::R7) | (1 << art::arm::R8) |
     (1 << art::arm::R10) | (1 << art::arm::R11);
@@ -32,23 +34,30 @@
     (1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3);
 static constexpr uint32_t kArmCalleeSaveAllSpills =
     (1 << art::arm::R4) | (1 << art::arm::R9);
-static constexpr uint32_t kArmCalleeSaveFpAllSpills =
+
+static constexpr uint32_t kArmCalleeSaveFpAlwaysSpills = 0;
+static constexpr uint32_t kArmCalleeSaveFpRefSpills = 0;
+static constexpr uint32_t kArmCalleeSaveFpArgSpills =
     (1 << art::arm::S0)  | (1 << art::arm::S1)  | (1 << art::arm::S2)  | (1 << art::arm::S3)  |
     (1 << art::arm::S4)  | (1 << art::arm::S5)  | (1 << art::arm::S6)  | (1 << art::arm::S7)  |
     (1 << art::arm::S8)  | (1 << art::arm::S9)  | (1 << art::arm::S10) | (1 << art::arm::S11) |
-    (1 << art::arm::S12) | (1 << art::arm::S13) | (1 << art::arm::S14) | (1 << art::arm::S15) |
+    (1 << art::arm::S12) | (1 << art::arm::S13) | (1 << art::arm::S14) | (1 << art::arm::S15);
+static constexpr uint32_t kArmCalleeSaveFpAllSpills =
     (1 << art::arm::S16) | (1 << art::arm::S17) | (1 << art::arm::S18) | (1 << art::arm::S19) |
     (1 << art::arm::S20) | (1 << art::arm::S21) | (1 << art::arm::S22) | (1 << art::arm::S23) |
     (1 << art::arm::S24) | (1 << art::arm::S25) | (1 << art::arm::S26) | (1 << art::arm::S27) |
     (1 << art::arm::S28) | (1 << art::arm::S29) | (1 << art::arm::S30) | (1 << art::arm::S31);
 
 constexpr uint32_t ArmCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
-  return kArmCalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0) | (1 << art::arm::LR);
+  return kArmCalleeSaveAlwaysSpills | kArmCalleeSaveRefSpills |
+      (type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
+      (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0);
 }
 
 constexpr uint32_t ArmCalleeSaveFpSpills(Runtime::CalleeSaveType type) {
-  return type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0;
+  return kArmCalleeSaveFpAlwaysSpills | kArmCalleeSaveFpRefSpills |
+      (type == Runtime::kRefsAndArgs ? kArmCalleeSaveFpArgSpills: 0) |
+      (type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0);
 }
 
 constexpr uint32_t ArmCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/arm64/context_arm64.h b/runtime/arch/arm64/context_arm64.h
index 7b6aac9..d9a433b 100644
--- a/runtime/arch/arm64/context_arm64.h
+++ b/runtime/arch/arm64/context_arm64.h
@@ -34,7 +34,7 @@
 
   void Reset() OVERRIDE;
 
-  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE;
+  void FillCalleeSaves(const StackVisitor& fr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void SetSP(uintptr_t new_sp) OVERRIDE {
     bool success = SetGPR(SP, new_sp);
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 0fb96d7..147d434 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1285,7 +1285,7 @@
 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
 THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
-THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
 THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
 
 // This is separated out as the argument order is different.
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
index 15c6c07..0e1e32b 100644
--- a/runtime/arch/arm64/quick_method_frame_info_arm64.h
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -54,7 +54,7 @@
     (1 << art::arm64::D0) | (1 << art::arm64::D1) | (1 << art::arm64::D2) |
     (1 << art::arm64::D3) | (1 << art::arm64::D4) | (1 << art::arm64::D5) |
     (1 << art::arm64::D6) | (1 << art::arm64::D7);
-static constexpr uint32_t kArm64FpAllSpills =
+static constexpr uint32_t kArm64CalleeSaveFpAllSpills =
     (1 << art::arm64::D8)  | (1 << art::arm64::D9)  | (1 << art::arm64::D10) |
     (1 << art::arm64::D11)  | (1 << art::arm64::D12)  | (1 << art::arm64::D13) |
     (1 << art::arm64::D14)  | (1 << art::arm64::D15);
@@ -68,7 +68,7 @@
 constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
   return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills |
       (type == Runtime::kRefsAndArgs ? kArm64CalleeSaveFpArgSpills: 0) |
-      (type == Runtime::kSaveAll ? kArm64FpAllSpills : 0);
+      (type == Runtime::kSaveAll ? kArm64CalleeSaveFpAllSpills : 0);
 }
 
 constexpr uint32_t Arm64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 905b867..aff2da7 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -65,9 +65,10 @@
     .cfi_rel_offset 16, 20
     # 1 word for alignment, 4 open words for args $a0-$a3, bottom will hold Method*
 
-    ld $t0, _ZN3art7Runtime9instance_E
+    lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
+    lw $t0, 0($t0)
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ld $t0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
@@ -107,9 +108,10 @@
     .cfi_rel_offset 18, 28
     # 3 words for alignment and extra args, 4 open words for args $a0-$a3, bottom will hold Method*
 
-    ld $t0, _ZN3art7Runtime9instance_E
+    lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
+    lw $t0, 0($t0)
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ld $t0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
@@ -201,9 +203,10 @@
     .cfi_rel_offset 5, 4
     # bottom will hold Method*
 
-    ld $t0, _ZN3art7Runtime9instance_E
+    lw $t0, %got(_ZN3art7Runtime9instance_E)($gp)
+    lw $t0, 0($t0)
     THIS_LOAD_REQUIRES_READ_BARRIER
-    ld $t0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t0)
+    lw $t0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t0)
     sw $t0, 0($sp)                                # Place Method* at bottom of stack.
     sw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)  # Place sp in Thread::Current()->top_quick_frame.
 .endm
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index ea586b8..c5a0f6c 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1148,7 +1148,7 @@
   // For some reason this does not work, as the type_idx is artificial and outside what the
   // resolved types of c_obj allow...
 
-  if (false) {
+  if ((false)) {
     // Use an arbitrary method from c to use as referrer
     size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
                             reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0)),  // arbitrary
diff --git a/runtime/base/bit_vector.cc b/runtime/base/bit_vector.cc
index 1d67962..5b8b6e2 100644
--- a/runtime/base/bit_vector.cc
+++ b/runtime/base/bit_vector.cc
@@ -16,6 +16,8 @@
 
 #include "bit_vector.h"
 
+#include <sstream>
+
 #include "allocator.h"
 #include "bit_vector-inl.h"
 
diff --git a/runtime/base/casts.h b/runtime/base/casts.h
index be94c2e..138c2fd 100644
--- a/runtime/base/casts.h
+++ b/runtime/base/casts.h
@@ -19,6 +19,8 @@
 
 #include <assert.h>
 #include <string.h>
+#include <type_traits>
+
 #include "base/macros.h"
 
 namespace art {
@@ -65,16 +67,9 @@
 
 template<typename To, typename From>     // use like this: down_cast<T*>(foo);
 inline To down_cast(From* f) {                   // so we only accept pointers
-  // Ensures that To is a sub-type of From *.  This test is here only
-  // for compile-time type checking, and has no overhead in an
-  // optimized build at run-time, as it will be optimized away
-  // completely.
-  if (false) {
-    implicit_cast<From*, To>(0);
-  }
+  static_assert(std::is_base_of<From, typename std::remove_pointer<To>::type>::value,
+                "down_cast unsafe as To is not a subtype of From");
 
-  //
-  // assert(f == NULL || dynamic_cast<To>(f) != NULL);  // RTTI: debug mode only!
   return static_cast<To>(f);
 }
 
diff --git a/runtime/base/dumpable.h b/runtime/base/dumpable.h
new file mode 100644
index 0000000..3c316cc
--- /dev/null
+++ b/runtime/base/dumpable.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_DUMPABLE_H_
+#define ART_RUNTIME_BASE_DUMPABLE_H_
+
+#include "base/macros.h"
+
+namespace art {
+
+// A convenience to allow any class with a "Dump(std::ostream& os)" member function
+// but without an operator<< to be used as if it had an operator<<. Use like this:
+//
+//   os << Dumpable<MyType>(my_type_instance);
+//
+template<typename T>
+class Dumpable FINAL {
+ public:
+  explicit Dumpable(const T& value) : value_(value) {
+  }
+
+  void Dump(std::ostream& os) const {
+    value_.Dump(os);
+  }
+
+ private:
+  const T& value_;
+
+  DISALLOW_COPY_AND_ASSIGN(Dumpable);
+};
+
+template<typename T>
+std::ostream& operator<<(std::ostream& os, const Dumpable<T>& rhs) {
+  rhs.Dump(os);
+  return os;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_DUMPABLE_H_
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 5af597b..46c3538 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -16,17 +16,25 @@
 
 #include "logging.h"
 
+#include <sstream>
+
 #include "base/mutex.h"
 #include "runtime.h"
 #include "thread-inl.h"
 #include "utils.h"
 
+// Headers for LogMessage::LogLine.
+#ifdef HAVE_ANDROID_OS
+#include "cutils/log.h"
+#else
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+
 namespace art {
 
 LogVerbosity gLogVerbosity;
 
-std::vector<std::string> gVerboseMethods;
-
 unsigned int gAborting = 0;
 
 static LogSeverity gMinimumLogSeverity = INFO;
@@ -47,14 +55,6 @@
                                                         : "art";
 }
 
-// Configure logging based on ANDROID_LOG_TAGS environment variable.
-// We need to parse a string that looks like
-//
-//      *:v jdwp:d dalvikvm:d dalvikvm-gc:i dalvikvmi:i
-//
-// The tag (or '*' for the global level) comes first, followed by a colon
-// and a letter indicating the minimum priority level we're expected to log.
-// This can be used to reveal or conceal logs with specific tags.
 void InitLogging(char* argv[]) {
   if (gCmdLine.get() != nullptr) {
     return;
@@ -65,22 +65,22 @@
   // Stash the command line for later use. We can use /proc/self/cmdline on Linux to recover this,
   // but we don't have that luxury on the Mac, and there are a couple of argv[0] variants that are
   // commonly used.
-  if (argv != NULL) {
+  if (argv != nullptr) {
     gCmdLine.reset(new std::string(argv[0]));
-    for (size_t i = 1; argv[i] != NULL; ++i) {
+    for (size_t i = 1; argv[i] != nullptr; ++i) {
       gCmdLine->append(" ");
       gCmdLine->append(argv[i]);
     }
     gProgramInvocationName.reset(new std::string(argv[0]));
     const char* last_slash = strrchr(argv[0], '/');
-    gProgramInvocationShortName.reset(new std::string((last_slash != NULL) ? last_slash + 1
+    gProgramInvocationShortName.reset(new std::string((last_slash != nullptr) ? last_slash + 1
                                                                            : argv[0]));
   } else {
-    // TODO: fall back to /proc/self/cmdline when argv is NULL on Linux
+    // TODO: fall back to /proc/self/cmdline when argv is NULL on Linux.
     gCmdLine.reset(new std::string("<unset>"));
   }
   const char* tags = getenv("ANDROID_LOG_TAGS");
-  if (tags == NULL) {
+  if (tags == nullptr) {
     return;
   }
 
@@ -119,47 +119,121 @@
   }
 }
 
-LogMessageData::LogMessageData(const char* file, int line, LogSeverity severity, int error)
-    : file(file),
-      line_number(line),
-      severity(severity),
-      error(error) {
-  const char* last_slash = strrchr(file, '/');
-  file = (last_slash == NULL) ? file : last_slash + 1;
-}
+// This indirection greatly reduces the stack impact of having
+// lots of checks/logging in a function.
+class LogMessageData {
+ public:
+  LogMessageData(const char* file, unsigned int line, LogSeverity severity, int error)
+      : file_(file),
+        line_number_(line),
+        severity_(severity),
+        error_(error) {
+    const char* last_slash = strrchr(file, '/');
+    file = (last_slash == nullptr) ? file : last_slash + 1;
+  }
 
+  const char * GetFile() const {
+    return file_;
+  }
+
+  unsigned int GetLineNumber() const {
+    return line_number_;
+  }
+
+  LogSeverity GetSeverity() const {
+    return severity_;
+  }
+
+  int GetError() const {
+    return error_;
+  }
+
+  std::ostream& GetBuffer() {
+    return buffer_;
+  }
+
+  std::string ToString() const {
+    return buffer_.str();
+  }
+
+ private:
+  std::ostringstream buffer_;
+  const char* const file_;
+  const unsigned int line_number_;
+  const LogSeverity severity_;
+  const int error_;
+
+  DISALLOW_COPY_AND_ASSIGN(LogMessageData);
+};
+
+
+LogMessage::LogMessage(const char* file, unsigned int line, LogSeverity severity, int error)
+  : data_(new LogMessageData(file, line, severity, error)) {
+}
 LogMessage::~LogMessage() {
-  if (data_->severity < gMinimumLogSeverity) {
+  if (data_->GetSeverity() < gMinimumLogSeverity) {
     return;  // No need to format something we're not going to output.
   }
 
   // Finish constructing the message.
-  if (data_->error != -1) {
-    data_->buffer << ": " << strerror(data_->error);
+  if (data_->GetError() != -1) {
+    data_->GetBuffer() << ": " << strerror(data_->GetError());
   }
-  std::string msg(data_->buffer.str());
+  std::string msg(data_->ToString());
 
   // Do the actual logging with the lock held.
   {
     MutexLock mu(Thread::Current(), *Locks::logging_lock_);
     if (msg.find('\n') == std::string::npos) {
-      LogLine(*data_, msg.c_str());
+      LogLine(data_->GetFile(), data_->GetLineNumber(), data_->GetSeverity(), msg.c_str());
     } else {
       msg += '\n';
       size_t i = 0;
       while (i < msg.size()) {
         size_t nl = msg.find('\n', i);
         msg[nl] = '\0';
-        LogLine(*data_, &msg[i]);
+        LogLine(data_->GetFile(), data_->GetLineNumber(), data_->GetSeverity(), &msg[i]);
         i = nl + 1;
       }
     }
   }
 
   // Abort if necessary.
-  if (data_->severity == FATAL) {
+  if (data_->GetSeverity() == FATAL) {
     Runtime::Abort();
   }
 }
 
+std::ostream& LogMessage::stream() {
+  return data_->GetBuffer();
+}
+
+#ifdef HAVE_ANDROID_OS
+static const android_LogPriority kLogSeverityToAndroidLogPriority[] = {
+  ANDROID_LOG_VERBOSE, ANDROID_LOG_DEBUG, ANDROID_LOG_INFO, ANDROID_LOG_WARN,
+  ANDROID_LOG_ERROR, ANDROID_LOG_FATAL, ANDROID_LOG_FATAL
+};
+COMPILE_ASSERT(arraysize(kLogSeverityToAndroidLogPriority) == INTERNAL_FATAL + 1,
+               mismatch_in_size_of_kLogSeverityToAndroidLogPriority_and_values_in_LogSeverity);
+#endif
+
+void LogMessage::LogLine(const char* file, unsigned int line, LogSeverity log_severity,
+                         const char* message) {
+#ifdef HAVE_ANDROID_OS
+  const char* tag = ProgramInvocationShortName();
+  int priority = kLogSeverityToAndroidLogPriority[log_severity];
+  if (priority == ANDROID_LOG_FATAL) {
+    LOG_PRI(priority, tag, "%s:%u] %s", file, line, message);
+  } else {
+    LOG_PRI(priority, tag, "%s", message);
+  }
+#else
+  static const char* log_characters = "VDIWEFF";
+  CHECK_EQ(strlen(log_characters), INTERNAL_FATAL + 1U);
+  char severity = log_characters[log_severity];
+  fprintf(stderr, "%s %c %5d %5d %s:%u] %s\n",
+          ProgramInvocationShortName(), severity, getpid(), ::art::GetTid(), file, line, message);
+#endif
+}
+
 }  // namespace art
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index cf3e763..baa83e3 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -17,29 +17,113 @@
 #ifndef ART_RUNTIME_BASE_LOGGING_H_
 #define ART_RUNTIME_BASE_LOGGING_H_
 
-#include <cerrno>
-#include <cstring>
-#include <iostream>  // NOLINT
 #include <memory>
-#include <sstream>
-#include <signal.h>
-#include <vector>
+#include <ostream>
 
 #include "base/macros.h"
-#include "log_severity.h"
 
+namespace art {
+
+enum LogSeverity {
+  VERBOSE,
+  DEBUG,
+  INFO,
+  WARNING,
+  ERROR,
+  FATAL,
+  INTERNAL_FATAL,  // For Runtime::Abort.
+};
+
+// The members of this struct are the valid arguments to VLOG and VLOG_IS_ON in code,
+// and the "-verbose:" command line argument.
+struct LogVerbosity {
+  bool class_linker;  // Enabled with "-verbose:class".
+  bool compiler;
+  bool gc;
+  bool heap;
+  bool jdwp;
+  bool jni;
+  bool monitor;
+  bool profiler;
+  bool signals;
+  bool startup;
+  bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
+  bool threads;
+  bool verifier;
+};
+
+// Global log verbosity setting, initialized by InitLogging.
+extern LogVerbosity gLogVerbosity;
+
+// 0 if not abort, non-zero if an abort is in progress. Used on fatal exit to prevents recursive
+// aborts. Global declaration allows us to disable some error checking to ensure fatal shutdown
+// makes forward progress.
+extern unsigned int gAborting;
+
+// Configure logging based on ANDROID_LOG_TAGS environment variable.
+// We need to parse a string that looks like
+//
+//      *:v jdwp:d dalvikvm:d dalvikvm-gc:i dalvikvmi:i
+//
+// The tag (or '*' for the global level) comes first, followed by a colon
+// and a letter indicating the minimum priority level we're expected to log.
+// This can be used to reveal or conceal logs with specific tags.
+extern void InitLogging(char* argv[]);
+
+// Returns the command line used to invoke the current tool or nullptr if InitLogging hasn't been
+// performed.
+extern const char* GetCmdLine();
+
+// The command used to start the ART runtime, such as "/system/bin/dalvikvm". If InitLogging hasn't
+// been performed then just returns "art"
+extern const char* ProgramInvocationName();
+
+// A short version of the command used to start the ART runtime, such as "dalvikvm". If InitLogging
+// hasn't been performed then just returns "art"
+extern const char* ProgramInvocationShortName();
+
+// Logs a message to logcat on Android otherwise to stderr. If the severity is FATAL it also causes
+// an abort. For example: LOG(FATAL) << "We didn't expect to reach here";
+#define LOG(severity) ::art::LogMessage(__FILE__, __LINE__, severity, -1).stream()
+
+// A variant of LOG that also logs the current errno value. To be used when library calls fail.
+#define PLOG(severity) ::art::LogMessage(__FILE__, __LINE__, severity, errno).stream()
+
+// Marker that code is yet to be implemented.
+#define UNIMPLEMENTED(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
+
+// Is verbose logging enabled for the given module? Where the module is defined in LogVerbosity.
+#define VLOG_IS_ON(module) UNLIKELY(::art::gLogVerbosity.module)
+
+// Variant of LOG that logs when verbose logging is enabled for a module. For example,
+// VLOG(jni) << "A JNI operation was performed";
+#define VLOG(module) \
+  if (VLOG_IS_ON(module)) \
+    ::art::LogMessage(__FILE__, __LINE__, INFO, -1).stream()
+
+// Return the stream associated with logging for the given module.
+#define VLOG_STREAM(module) ::art::LogMessage(__FILE__, __LINE__, INFO, -1).stream()
+
+// Check whether condition x holds and LOG(FATAL) if not. The value of the expression x is only
+// evaluated once. Extra logging can be appended using << after. For example,
+// CHECK(false == true) results in a log message of "Check failed: false == true".
 #define CHECK(x) \
   if (UNLIKELY(!(x))) \
-    ::art::LogMessage(__FILE__, __LINE__, FATAL, -1).stream() \
+    ::art::LogMessage(__FILE__, __LINE__, ::art::FATAL, -1).stream() \
         << "Check failed: " #x << " "
 
+// Helper for CHECK_xx(x,y) macros.
 #define CHECK_OP(LHS, RHS, OP) \
   for (auto _values = ::art::MakeEagerEvaluator(LHS, RHS); \
        UNLIKELY(!(_values.lhs OP _values.rhs)); /* empty */) \
-    ::art::LogMessage(__FILE__, __LINE__, FATAL, -1).stream() \
+    ::art::LogMessage(__FILE__, __LINE__, ::art::FATAL, -1).stream() \
         << "Check failed: " << #LHS << " " << #OP << " " << #RHS \
         << " (" #LHS "=" << _values.lhs << ", " #RHS "=" << _values.rhs << ") "
 
+
+// Check whether a condition holds between x and y, LOG(FATAL) if not. The value of the expressions
+// x and y is evaluated once. Extra logging can be appended using << after. For example,
+// CHECK_NE(0 == 1, false) results in "Check failed: false != false (0==1=false, false=false) ".
 #define CHECK_EQ(x, y) CHECK_OP(x, y, ==)
 #define CHECK_NE(x, y) CHECK_OP(x, y, !=)
 #define CHECK_LE(x, y) CHECK_OP(x, y, <=)
@@ -47,22 +131,25 @@
 #define CHECK_GE(x, y) CHECK_OP(x, y, >=)
 #define CHECK_GT(x, y) CHECK_OP(x, y, >)
 
+// Helper for CHECK_STRxx(s1,s2) macros.
 #define CHECK_STROP(s1, s2, sense) \
   if (UNLIKELY((strcmp(s1, s2) == 0) != sense)) \
-    LOG(FATAL) << "Check failed: " \
-               << "\"" << s1 << "\"" \
-               << (sense ? " == " : " != ") \
-               << "\"" << s2 << "\""
+    LOG(::art::FATAL) << "Check failed: " \
+        << "\"" << s1 << "\"" \
+        << (sense ? " == " : " != ") \
+        << "\"" << s2 << "\""
 
+// Check for string (const char*) equality between s1 and s2, LOG(FATAL) if not.
 #define CHECK_STREQ(s1, s2) CHECK_STROP(s1, s2, true)
 #define CHECK_STRNE(s1, s2) CHECK_STROP(s1, s2, false)
 
+// Perform the pthread function call(args), LOG(FATAL) on error.
 #define CHECK_PTHREAD_CALL(call, args, what) \
   do { \
     int rc = call args; \
     if (rc != 0) { \
       errno = rc; \
-      PLOG(FATAL) << # call << " failed for " << what; \
+      PLOG(::art::FATAL) << # call << " failed for " << what; \
     } \
   } while (false)
 
@@ -74,81 +161,34 @@
 //          n / 2;
 //    }
 #define CHECK_CONSTEXPR(x, out, dummy) \
-  (UNLIKELY(!(x))) ? (LOG(FATAL) << "Check failed: " << #x out, dummy) :
+  (UNLIKELY(!(x))) ? (LOG(::art::FATAL) << "Check failed: " << #x out, dummy) :
 
-#ifndef NDEBUG
 
-#define DCHECK(x) CHECK(x)
-#define DCHECK_EQ(x, y) CHECK_EQ(x, y)
-#define DCHECK_NE(x, y) CHECK_NE(x, y)
-#define DCHECK_LE(x, y) CHECK_LE(x, y)
-#define DCHECK_LT(x, y) CHECK_LT(x, y)
-#define DCHECK_GE(x, y) CHECK_GE(x, y)
-#define DCHECK_GT(x, y) CHECK_GT(x, y)
-#define DCHECK_STREQ(s1, s2) CHECK_STREQ(s1, s2)
-#define DCHECK_STRNE(s1, s2) CHECK_STRNE(s1, s2)
-#define DCHECK_CONSTEXPR(x, out, dummy) CHECK_CONSTEXPR(x, out, dummy)
-
-#else  // NDEBUG
-
-#define DCHECK(condition) \
-  while (false) \
-    CHECK(condition)
-
-#define DCHECK_EQ(val1, val2) \
-  while (false) \
-    CHECK_EQ(val1, val2)
-
-#define DCHECK_NE(val1, val2) \
-  while (false) \
-    CHECK_NE(val1, val2)
-
-#define DCHECK_LE(val1, val2) \
-  while (false) \
-    CHECK_LE(val1, val2)
-
-#define DCHECK_LT(val1, val2) \
-  while (false) \
-    CHECK_LT(val1, val2)
-
-#define DCHECK_GE(val1, val2) \
-  while (false) \
-    CHECK_GE(val1, val2)
-
-#define DCHECK_GT(val1, val2) \
-  while (false) \
-    CHECK_GT(val1, val2)
-
-#define DCHECK_STREQ(str1, str2) \
-  while (false) \
-    CHECK_STREQ(str1, str2)
-
-#define DCHECK_STRNE(str1, str2) \
-  while (false) \
-    CHECK_STRNE(str1, str2)
-
-#define DCHECK_CONSTEXPR(x, out, dummy) \
-  (false && (x)) ? (dummy) :
-
+// DCHECKs are debug variants of CHECKs only enabled in debug builds. Generally CHECK should be
+// used unless profiling identifies a CHECK as being in performance critical code.
+#if defined(NDEBUG)
+static constexpr bool kEnableDChecks = false;
+#else
+static constexpr bool kEnableDChecks = true;
 #endif
 
-#define LOG(severity) ::art::LogMessage(__FILE__, __LINE__, severity, -1).stream()
-#define PLOG(severity) ::art::LogMessage(__FILE__, __LINE__, severity, errno).stream()
+#define DCHECK(x) if (::art::kEnableDChecks) CHECK(x)
+#define DCHECK_EQ(x, y) if (::art::kEnableDChecks) CHECK_EQ(x, y)
+#define DCHECK_NE(x, y) if (::art::kEnableDChecks) CHECK_NE(x, y)
+#define DCHECK_LE(x, y) if (::art::kEnableDChecks) CHECK_LE(x, y)
+#define DCHECK_LT(x, y) if (::art::kEnableDChecks) CHECK_LT(x, y)
+#define DCHECK_GE(x, y) if (::art::kEnableDChecks) CHECK_GE(x, y)
+#define DCHECK_GT(x, y) if (::art::kEnableDChecks) CHECK_GT(x, y)
+#define DCHECK_STREQ(s1, s2) if (::art::kEnableDChecks) CHECK_STREQ(s1, s2)
+#define DCHECK_STRNE(s1, s2) if (::art::kEnableDChecks) CHECK_STRNE(s1, s2)
+#if defined(NDEBUG)
+#define DCHECK_CONSTEXPR(x, out, dummy)
+#else
+#define DCHECK_CONSTEXPR(x, out, dummy) CHECK_CONSTEXPR(x, out, dummy)
+#endif
 
-#define LG LOG(INFO)
-
-#define UNIMPLEMENTED(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
-
-#define VLOG_IS_ON(module) UNLIKELY(::art::gLogVerbosity.module)
-#define VLOG(module) if (VLOG_IS_ON(module)) ::art::LogMessage(__FILE__, __LINE__, INFO, -1).stream()
-#define VLOG_STREAM(module) ::art::LogMessage(__FILE__, __LINE__, INFO, -1).stream()
-
-//
-// Implementation details beyond this point.
-//
-
-namespace art {
-
+// Temporary class created to evaluate the LHS and RHS, used with MakeEagerEvaluator to infer the
+// types of LHS and RHS.
 template <typename LHS, typename RHS>
 struct EagerEvaluator {
   EagerEvaluator(LHS l, RHS r) : lhs(l), rhs(r) { }
@@ -156,10 +196,14 @@
   RHS rhs;
 };
 
-// We want char*s to be treated as pointers, not strings. If you want them treated like strings,
-// you'd need to use CHECK_STREQ and CHECK_STRNE anyway to compare the characters rather than their
-// addresses. We could express this more succinctly with std::remove_const, but this is quick and
-// easy to understand, and works before we have C++0x. We rely on signed/unsigned warnings to
+// Helper function for CHECK_xx.
+template <typename LHS, typename RHS>
+static inline EagerEvaluator<LHS, RHS> MakeEagerEvaluator(LHS lhs, RHS rhs) {
+  return EagerEvaluator<LHS, RHS>(lhs, rhs);
+}
+
+// Explicitly instantiate EagerEvalue for pointers so that char*s aren't treated as strings. To
+// compare strings use CHECK_STREQ and CHECK_STRNE. We rely on signed/unsigned warnings to
 // protect you against combinations not explicitly listed below.
 #define EAGER_PTR_EVALUATOR(T1, T2) \
   template <> struct EagerEvaluator<T1, T2> { \
@@ -182,153 +226,30 @@
 EAGER_PTR_EVALUATOR(signed char*, const signed char*);
 EAGER_PTR_EVALUATOR(signed char*, signed char*);
 
-template <typename LHS, typename RHS>
-EagerEvaluator<LHS, RHS> MakeEagerEvaluator(LHS lhs, RHS rhs) {
-  return EagerEvaluator<LHS, RHS>(lhs, rhs);
-}
+// Data for the log message, not stored in LogMessage to avoid increasing the stack size.
+class LogMessageData;
 
-// This indirection greatly reduces the stack impact of having
-// lots of checks/logging in a function.
-struct LogMessageData {
- public:
-  LogMessageData(const char* file, int line, LogSeverity severity, int error);
-  std::ostringstream buffer;
-  const char* const file;
-  const int line_number;
-  const LogSeverity severity;
-  const int error;
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(LogMessageData);
-};
-
+// A LogMessage is a temporarily scoped object used by LOG and the unlikely part of a CHECK. The
+// destructor will abort if the severity is FATAL.
 class LogMessage {
  public:
-  LogMessage(const char* file, int line, LogSeverity severity, int error)
-    : data_(new LogMessageData(file, line, severity, error)) {
-  }
+  LogMessage(const char* file, unsigned int line, LogSeverity severity, int error);
 
   ~LogMessage();  // TODO: enable LOCKS_EXCLUDED(Locks::logging_lock_).
 
-  std::ostream& stream() {
-    return data_->buffer;
-  }
+  // Returns the stream associated with the message, the LogMessage performs output when it goes
+  // out of scope.
+  std::ostream& stream();
+
+  // The routine that performs the actual logging.
+  static void LogLine(const char* file, unsigned int line, LogSeverity severity, const char* msg);
 
  private:
-  static void LogLine(const LogMessageData& data, const char*);
-
   const std::unique_ptr<LogMessageData> data_;
 
-  friend void HandleUnexpectedSignal(int signal_number, siginfo_t* info, void* raw_context);
-  friend class Mutex;
   DISALLOW_COPY_AND_ASSIGN(LogMessage);
 };
 
-// A convenience to allow any class with a "Dump(std::ostream& os)" member function
-// but without an operator<< to be used as if it had an operator<<. Use like this:
-//
-//   os << Dumpable<MyType>(my_type_instance);
-//
-template<typename T>
-class Dumpable {
- public:
-  explicit Dumpable(T& value) : value_(value) {
-  }
-
-  void Dump(std::ostream& os) const {
-    value_.Dump(os);
-  }
-
- private:
-  T& value_;
-
-  DISALLOW_COPY_AND_ASSIGN(Dumpable);
-};
-
-template<typename T>
-std::ostream& operator<<(std::ostream& os, const Dumpable<T>& rhs) {
-  rhs.Dump(os);
-  return os;
-}
-
-template<typename T>
-class ConstDumpable {
- public:
-  explicit ConstDumpable(const T& value) : value_(value) {
-  }
-
-  void Dump(std::ostream& os) const {
-    value_.Dump(os);
-  }
-
- private:
-  const T& value_;
-
-  DISALLOW_COPY_AND_ASSIGN(ConstDumpable);
-};
-
-template<typename T>
-std::ostream& operator<<(std::ostream& os, const ConstDumpable<T>& rhs) {
-  rhs.Dump(os);
-  return os;
-}
-
-// Helps you use operator<< in a const char*-like context such as our various 'F' methods with
-// format strings.
-template<typename T>
-class ToStr {
- public:
-  explicit ToStr(const T& value) {
-    std::ostringstream os;
-    os << value;
-    s_ = os.str();
-  }
-
-  const char* c_str() const {
-    return s_.c_str();
-  }
-
-  const std::string& str() const {
-    return s_;
-  }
-
- private:
-  std::string s_;
-  DISALLOW_COPY_AND_ASSIGN(ToStr);
-};
-
-// The members of this struct are the valid arguments to VLOG and VLOG_IS_ON in code,
-// and the "-verbose:" command line argument.
-struct LogVerbosity {
-  bool class_linker;  // Enabled with "-verbose:class".
-  bool compiler;
-  bool gc;
-  bool heap;
-  bool jdwp;
-  bool jni;
-  bool monitor;
-  bool profiler;
-  bool signals;
-  bool startup;
-  bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
-  bool threads;
-  bool verifier;
-};
-
-extern LogVerbosity gLogVerbosity;
-
-extern std::vector<std::string> gVerboseMethods;
-
-// Used on fatal exit. Prevents recursive aborts. Allows us to disable
-// some error checking to ensure fatal shutdown makes forward progress.
-extern unsigned int gAborting;
-
-extern void InitLogging(char* argv[]);
-
-extern const char* GetCmdLine();
-extern const char* ProgramInvocationName();
-extern const char* ProgramInvocationShortName();
-
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_LOGGING_H_
diff --git a/runtime/base/logging_android.cc b/runtime/base/logging_android.cc
deleted file mode 100644
index 9b1ac58..0000000
--- a/runtime/base/logging_android.cc
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "logging.h"
-
-#include <unistd.h>
-
-#include <iostream>
-
-#include "base/stringprintf.h"
-#include "cutils/log.h"
-
-namespace art {
-
-static const int kLogSeverityToAndroidLogPriority[] = {
-  ANDROID_LOG_VERBOSE, ANDROID_LOG_DEBUG, ANDROID_LOG_INFO, ANDROID_LOG_WARN,
-  ANDROID_LOG_ERROR, ANDROID_LOG_FATAL, ANDROID_LOG_FATAL
-};
-
-void LogMessage::LogLine(const LogMessageData& data, const char* message) {
-  const char* tag = ProgramInvocationShortName();
-  int priority = kLogSeverityToAndroidLogPriority[data.severity];
-  if (priority == ANDROID_LOG_FATAL) {
-    LOG_PRI(priority, tag, "%s:%d] %s", data.file, data.line_number, message);
-  } else {
-    LOG_PRI(priority, tag, "%s", message);
-  }
-}
-
-}  // namespace art
diff --git a/runtime/base/logging_linux.cc b/runtime/base/logging_linux.cc
deleted file mode 100644
index 0399128..0000000
--- a/runtime/base/logging_linux.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "logging.h"
-
-#include <sys/types.h>
-#include <unistd.h>
-
-#include <cstdio>
-#include <cstring>
-#include <iostream>
-
-#include "base/stringprintf.h"
-#include "utils.h"
-
-namespace art {
-
-void LogMessage::LogLine(const LogMessageData& data, const char* message) {
-  char severity = "VDIWEFF"[data.severity];
-  fprintf(stderr, "%s %c %5d %5d %s:%d] %s\n",
-          ProgramInvocationShortName(), severity, getpid(), ::art::GetTid(),
-          data.file, data.line_number, message);
-}
-
-}  // namespace art
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index c80d35e..febea61 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -68,22 +68,28 @@
 #define ART_FRIEND_TEST(test_set_name, individual_test)\
 friend class test_set_name##_##individual_test##_Test
 
-// DISALLOW_COPY_AND_ASSIGN disallows the copy and operator= functions.
-// It goes in the private: declarations in a class.
+// DISALLOW_COPY_AND_ASSIGN disallows the copy and operator= functions. It goes in the private:
+// declarations in a class.
 #define DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName&);               \
-  void operator=(const TypeName&)
+  TypeName(const TypeName&) = delete;  \
+  void operator=(const TypeName&) = delete
 
-// A macro to disallow all the implicit constructors, namely the
-// default constructor, copy constructor and operator= functions.
+// A macro to disallow all the implicit constructors, namely the default constructor, copy
+// constructor and operator= functions.
 //
-// This should be used in the private: declarations for a class
-// that wants to prevent anyone from instantiating it. This is
-// especially useful for classes containing only static methods.
+// This should be used in the private: declarations for a class that wants to prevent anyone from
+// instantiating it. This is especially useful for classes containing only static methods.
 #define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
-  TypeName();                                    \
+  TypeName() = delete;  \
   DISALLOW_COPY_AND_ASSIGN(TypeName)
 
+// A macro to disallow new and delete operators for a class. It goes in the private: declarations.
+#define DISALLOW_ALLOCATION() \
+  public: \
+    ALWAYS_INLINE void operator delete(void*, size_t) { UNREACHABLE(); } \
+  private: \
+    void* operator new(size_t) = delete
+
 // The arraysize(arr) macro returns the # of elements in an array arr.
 // The expression is a compile-time constant, and therefore can be
 // used in defining new arrays, for example.  If you use arraysize on
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index f70db35..e066787 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -21,11 +21,8 @@
 
 #include "mutex.h"
 
-#define ATRACE_TAG ATRACE_TAG_DALVIK
-
-#include "cutils/trace.h"
-
 #include "base/stringprintf.h"
+#include "base/value_object.h"
 #include "runtime.h"
 #include "thread.h"
 
@@ -44,35 +41,6 @@
 }
 #endif  // ART_USE_FUTEXES
 
-class ScopedContentionRecorder {
- public:
-  ScopedContentionRecorder(BaseMutex* mutex, uint64_t blocked_tid, uint64_t owner_tid)
-      : mutex_(kLogLockContentions ? mutex : NULL),
-        blocked_tid_(kLogLockContentions ? blocked_tid : 0),
-        owner_tid_(kLogLockContentions ? owner_tid : 0),
-        start_nano_time_(kLogLockContentions ? NanoTime() : 0) {
-    if (ATRACE_ENABLED()) {
-      std::string msg = StringPrintf("Lock contention on %s (owner tid: %" PRIu64 ")",
-                                     mutex->GetName(), owner_tid);
-      ATRACE_BEGIN(msg.c_str());
-    }
-  }
-
-  ~ScopedContentionRecorder() {
-    ATRACE_END();
-    if (kLogLockContentions) {
-      uint64_t end_nano_time = NanoTime();
-      mutex_->RecordContention(blocked_tid_, owner_tid_, end_nano_time - start_nano_time_);
-    }
-  }
-
- private:
-  BaseMutex* const mutex_;
-  const uint64_t blocked_tid_;
-  const uint64_t owner_tid_;
-  const uint64_t start_nano_time_;
-};
-
 static inline uint64_t SafeGetTid(const Thread* self) {
   if (self != NULL) {
     return static_cast<uint64_t>(self->GetTid());
@@ -158,15 +126,7 @@
       // Add as an extra reader.
       done = state_.CompareExchangeWeakAcquire(cur_state, cur_state + 1);
     } else {
-      // Owner holds it exclusively, hang up.
-      ScopedContentionRecorder scr(this, GetExclusiveOwnerTid(), SafeGetTid(self));
-      ++num_pending_readers_;
-      if (futex(state_.Address(), FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
-        if (errno != EAGAIN) {
-          PLOG(FATAL) << "futex wait failed for " << name_;
-        }
-      }
-      --num_pending_readers_;
+      HandleSharedLockContention(self, cur_state);
     }
   } while (!done);
 #else
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 70b6f7e..423ea77 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -19,8 +19,12 @@
 #include <errno.h>
 #include <sys/time.h>
 
+#define ATRACE_TAG ATRACE_TAG_DALVIK
+#include "cutils/trace.h"
+
 #include "atomic.h"
 #include "base/logging.h"
+#include "base/value_object.h"
 #include "mutex-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change.h"
@@ -106,6 +110,36 @@
   const BaseMutex* const mutex_;
 };
 
+// Scoped class that generates events at the beginning and end of lock contention.
+class ScopedContentionRecorder FINAL : public ValueObject {
+ public:
+  ScopedContentionRecorder(BaseMutex* mutex, uint64_t blocked_tid, uint64_t owner_tid)
+      : mutex_(kLogLockContentions ? mutex : NULL),
+        blocked_tid_(kLogLockContentions ? blocked_tid : 0),
+        owner_tid_(kLogLockContentions ? owner_tid : 0),
+        start_nano_time_(kLogLockContentions ? NanoTime() : 0) {
+    if (ATRACE_ENABLED()) {
+      std::string msg = StringPrintf("Lock contention on %s (owner tid: %" PRIu64 ")",
+                                     mutex->GetName(), owner_tid);
+      ATRACE_BEGIN(msg.c_str());
+    }
+  }
+
+  ~ScopedContentionRecorder() {
+    ATRACE_END();
+    if (kLogLockContentions) {
+      uint64_t end_nano_time = NanoTime();
+      mutex_->RecordContention(blocked_tid_, owner_tid_, end_nano_time - start_nano_time_);
+    }
+  }
+
+ private:
+  BaseMutex* const mutex_;
+  const uint64_t blocked_tid_;
+  const uint64_t owner_tid_;
+  const uint64_t start_nano_time_;
+};
+
 BaseMutex::BaseMutex(const char* name, LockLevel level) : level_(level), name_(name) {
   if (kLogLockContentions) {
     ScopedAllMutexesLock mu(this);
@@ -428,9 +462,9 @@
         if (this != Locks::logging_lock_) {
           LOG(FATAL) << "Unexpected state_ in unlock " << cur_state << " for " << name_;
         } else {
-          LogMessageData data(__FILE__, __LINE__, INTERNAL_FATAL, -1);
-          LogMessage::LogLine(data, StringPrintf("Unexpected state_ %d in unlock for %s",
-                                                 cur_state, name_).c_str());
+          LogMessage::LogLine(__FILE__, __LINE__, INTERNAL_FATAL,
+                              StringPrintf("Unexpected state_ %d in unlock for %s",
+                                           cur_state, name_).c_str());
           _exit(1);
         }
       }
@@ -612,6 +646,20 @@
 }
 #endif
 
+#if ART_USE_FUTEXES
+void ReaderWriterMutex::HandleSharedLockContention(Thread* self, int32_t cur_state) {
+  // Owner holds it exclusively, hang up.
+  ScopedContentionRecorder scr(this, GetExclusiveOwnerTid(), SafeGetTid(self));
+  ++num_pending_readers_;
+  if (futex(state_.Address(), FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
+    if (errno != EAGAIN) {
+      PLOG(FATAL) << "futex wait failed for " << name_;
+    }
+  }
+  --num_pending_readers_;
+}
+#endif
+
 bool ReaderWriterMutex::SharedTryLock(Thread* self) {
   DCHECK(self == NULL || self == Thread::Current());
 #if ART_USE_FUTEXES
@@ -680,7 +728,7 @@
   CHECK_MUTEX_CALL(pthread_condattr_init, (&cond_attrs));
 #if !defined(__APPLE__)
   // Apple doesn't have CLOCK_MONOTONIC or pthread_condattr_setclock.
-  CHECK_MUTEX_CALL(pthread_condattr_setclock(&cond_attrs, CLOCK_MONOTONIC));
+  CHECK_MUTEX_CALL(pthread_condattr_setclock, (&cond_attrs, CLOCK_MONOTONIC));
 #endif
   CHECK_MUTEX_CALL(pthread_cond_init, (&cond_, &cond_attrs));
 #endif
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 516fa07..628231a 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -361,6 +361,9 @@
 
  private:
 #if ART_USE_FUTEXES
+  // Out-of-inline path for handling contention for a SharedLock.
+  void HandleSharedLockContention(Thread* self, int32_t cur_state);
+
   // -1 implies held exclusive, +ve shared held by state_ many owners.
   AtomicInteger state_;
   // Exclusive owner. Modification guarded by this mutex.
diff --git a/runtime/base/stringpiece.cc b/runtime/base/stringpiece.cc
index 824ee48..2570bad 100644
--- a/runtime/base/stringpiece.cc
+++ b/runtime/base/stringpiece.cc
@@ -16,7 +16,7 @@
 
 #include "stringpiece.h"
 
-#include <iostream>
+#include <ostream>
 #include <utility>
 
 #include "logging.h"
diff --git a/runtime/base/to_str.h b/runtime/base/to_str.h
new file mode 100644
index 0000000..6b1c84c
--- /dev/null
+++ b/runtime/base/to_str.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_TO_STR_H_
+#define ART_RUNTIME_BASE_TO_STR_H_
+
+#include <sstream>
+
+namespace art {
+
+// Helps you use operator<< in a const char*-like context such as our various 'F' methods with
+// format strings.
+template<typename T>
+class ToStr {
+ public:
+  explicit ToStr(const T& value) {
+    std::ostringstream os;
+    os << value;
+    s_ = os.str();
+  }
+
+  const char* c_str() const {
+    return s_.c_str();
+  }
+
+  const std::string& str() const {
+    return s_;
+  }
+
+ private:
+  std::string s_;
+  DISALLOW_COPY_AND_ASSIGN(ToStr);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_TO_STR_H_
diff --git a/runtime/base/unix_file/mapped_file.cc b/runtime/base/unix_file/mapped_file.cc
index 63927b1..77f4d02 100644
--- a/runtime/base/unix_file/mapped_file.cc
+++ b/runtime/base/unix_file/mapped_file.cc
@@ -42,7 +42,7 @@
   struct stat st;
   int result = TEMP_FAILURE_RETRY(fstat(Fd(), &st));
   if (result == -1) {
-    PLOG(WARNING) << "Failed to stat file '" << GetPath() << "'";
+    PLOG(::art::WARNING) << "Failed to stat file '" << GetPath() << "'";
     return false;
   }
   file_size_ = st.st_size;
@@ -50,8 +50,8 @@
     mapped_file_ = mmap(NULL, file_size_, PROT_READ, MAP_PRIVATE, Fd(), 0);
   } while (mapped_file_ == MAP_FAILED && errno == EINTR);
   if (mapped_file_ == MAP_FAILED) {
-    PLOG(WARNING) << "Failed to mmap file '" << GetPath() << "' of size "
-                  << file_size_ << " bytes to memory";
+    PLOG(::art::WARNING) << "Failed to mmap file '" << GetPath() << "' of size "
+        << file_size_ << " bytes to memory";
     return false;
   }
   map_mode_ = kMapReadOnly;
@@ -67,8 +67,7 @@
   int result = TEMP_FAILURE_RETRY(ftruncate(Fd(), file_size));
 #endif
   if (result == -1) {
-    PLOG(ERROR) << "Failed to truncate file '" << GetPath()
-                << "' to size " << file_size;
+    PLOG(::art::ERROR) << "Failed to truncate file '" << GetPath() << "' to size " << file_size;
     return false;
   }
   file_size_ = file_size;
@@ -77,7 +76,7 @@
         mmap(NULL, file_size_, PROT_READ | PROT_WRITE, MAP_SHARED, Fd(), 0);
   } while (mapped_file_ == MAP_FAILED && errno == EINTR);
   if (mapped_file_ == MAP_FAILED) {
-    PLOG(WARNING) << "Failed to mmap file '" << GetPath() << "' of size "
+    PLOG(::art::WARNING) << "Failed to mmap file '" << GetPath() << "' of size "
                   << file_size_ << " bytes to memory";
     return false;
   }
@@ -89,8 +88,7 @@
   CHECK(IsMapped());
   int result = TEMP_FAILURE_RETRY(munmap(mapped_file_, file_size_));
   if (result == -1) {
-    PLOG(WARNING) << "Failed unmap file '" << GetPath() << "' of size "
-                  << file_size_;
+    PLOG(::art::WARNING) << "Failed unmap file '" << GetPath() << "' of size " << file_size_;
     return false;
   } else {
     mapped_file_ = NULL;
diff --git a/runtime/base/value_object.h b/runtime/base/value_object.h
index ee0e2a0..8c752a9 100644
--- a/runtime/base/value_object.h
+++ b/runtime/base/value_object.h
@@ -17,19 +17,13 @@
 #ifndef ART_RUNTIME_BASE_VALUE_OBJECT_H_
 #define ART_RUNTIME_BASE_VALUE_OBJECT_H_
 
-#include "base/logging.h"
+#include "base/macros.h"
 
 namespace art {
 
 class ValueObject {
- public:
-  void* operator new(size_t size) {
-    LOG(FATAL) << "UNREACHABLE";
-    abort();
-  }
-  void operator delete(void*, size_t) {
-    LOG(FATAL) << "UNREACHABLE";
-  }
+ private:
+  DISALLOW_ALLOCATION();
 };
 
 }  // namespace art
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index fec1824..b2df091 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -20,6 +20,7 @@
 #include <zlib.h>
 
 #include "base/logging.h"
+#include "base/to_str.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 8c2293f..9d2d59c 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -37,7 +37,7 @@
       CHECK_EQ(GetDexPc(), DexFile::kDexNoIndex);
     }
 
-    if (!m || m->IsNative() || m->IsRuntimeMethod() || IsShadowFrame()) {
+    if (m == nullptr || m->IsNative() || m->IsRuntimeMethod() || IsShadowFrame()) {
       return true;
     }
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index bbbb9e0..854effd 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -17,6 +17,7 @@
 #include "class_linker.h"
 
 #include <deque>
+#include <iostream>
 #include <memory>
 #include <queue>
 #include <string>
@@ -5110,7 +5111,7 @@
     bool seen_non_ref = false;
     for (size_t i = 0; i < num_fields; i++) {
       mirror::ArtField* field = fields->Get(i);
-      if (false) {  // enable to debug field layout
+      if ((false)) {  // enable to debug field layout
         LOG(INFO) << "LinkFields: " << (is_static ? "static" : "instance")
                     << " class=" << PrettyClass(klass.Get())
                     << " field=" << PrettyField(field)
@@ -5283,6 +5284,7 @@
       break;
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << type;
+      UNREACHABLE();
   }
   if (resolved == nullptr) {
     // Search by name, which works across dex files.
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index ea3da64..6e3ebc2 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -44,7 +44,7 @@
 
 int main(int argc, char **argv) {
   art::InitLogging(argv);
-  LOG(INFO) << "Running main() from common_runtime_test.cc...";
+  LOG(::art::INFO) << "Running main() from common_runtime_test.cc...";
   testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index b676c62..d9061c8 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -2017,7 +2017,7 @@
   } else if (error == JDWP::ERR_NONE) {
     mirror::Class* c = soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_Thread);
     CHECK(c != nullptr);
-    mirror::ArtField* f = c->FindInstanceField("group", "Ljava/lang/ThreadGroup;");
+    mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_group);
     CHECK(f != nullptr);
     mirror::Object* group = f->GetObject(thread_object);
     CHECK(group != nullptr);
@@ -2058,8 +2058,7 @@
     return error;
   }
   ScopedAssertNoThreadSuspension ants(soa.Self(), "Debugger: GetThreadGroupName");
-  mirror::Class* c = soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_ThreadGroup);
-  mirror::ArtField* f = c->FindInstanceField("name", "Ljava/lang/String;");
+  mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
   CHECK(f != nullptr);
   mirror::String* s = reinterpret_cast<mirror::String*>(f->GetObject(thread_group));
 
@@ -2078,9 +2077,7 @@
   mirror::Object* parent;
   {
     ScopedAssertNoThreadSuspension ants(soa.Self(), "Debugger: GetThreadGroupParent");
-    mirror::Class* c = soa.Decode<mirror::Class*>(WellKnownClasses::java_lang_ThreadGroup);
-    CHECK(c != nullptr);
-    mirror::ArtField* f = c->FindInstanceField("parent", "Ljava/lang/ThreadGroup;");
+    mirror::ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_parent);
     CHECK(f != nullptr);
     parent = f->GetObject(thread_group);
   }
@@ -2095,12 +2092,20 @@
   CHECK(thread_group != nullptr);
 
   // Get the ArrayList<ThreadGroup> "groups" out of this thread group...
-  mirror::ArtField* groups_field = thread_group->GetClass()->FindInstanceField("groups", "Ljava/util/List;");
+  mirror::ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
   mirror::Object* groups_array_list = groups_field->GetObject(thread_group);
+  {
+    // The "groups" field is declared as a java.util.List: check it really is
+    // an instance of java.util.ArrayList.
+    CHECK(groups_array_list != nullptr);
+    mirror::Class* java_util_ArrayList_class =
+        soa.Decode<mirror::Class*>(WellKnownClasses::java_util_ArrayList);
+    CHECK(groups_array_list->InstanceOf(java_util_ArrayList_class));
+  }
 
   // Get the array and size out of the ArrayList<ThreadGroup>...
-  mirror::ArtField* array_field = groups_array_list->GetClass()->FindInstanceField("array", "[Ljava/lang/Object;");
-  mirror::ArtField* size_field = groups_array_list->GetClass()->FindInstanceField("size", "I");
+  mirror::ArtField* array_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_array);
+  mirror::ArtField* size_field = soa.DecodeField(WellKnownClasses::java_util_ArrayList_size);
   mirror::ObjectArray<mirror::Object>* groups_array =
       array_field->GetObject(groups_array_list)->AsObjectArray<mirror::Object>();
   const int32_t size = size_field->GetInt(groups_array_list);
@@ -2386,7 +2391,7 @@
 }
 
 void Dbg::ResumeVM() {
-  Runtime::Current()->GetThreadList()->UndoDebuggerSuspensions();
+  Runtime::Current()->GetThreadList()->ResumeAllForDebugger();
 }
 
 JDWP::JdwpError Dbg::SuspendThread(JDWP::ObjectId thread_id, bool request_suspension) {
@@ -4116,7 +4121,6 @@
   HeapChunkContext(bool merge, bool native)
       : buf_(16384 - 16),
         type_(0),
-        merge_(merge),
         chunk_overhead_(0) {
     Reset();
     if (native) {
@@ -4327,7 +4331,6 @@
   void* startOfNextMemoryChunk_;
   size_t totalAllocationUnits_;
   uint32_t type_;
-  bool merge_;
   bool needHeader_;
   size_t chunk_overhead_;
 
@@ -4678,7 +4681,7 @@
  * between the contents of these tables.
  */
 jbyteArray Dbg::GetRecentAllocations() {
-  if (false) {
+  if ((false)) {
     DumpRecentAllocations();
   }
 
diff --git a/runtime/debugger.h b/runtime/debugger.h
index cb7adae..48e457f 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -246,7 +246,9 @@
    */
   static int64_t LastDebuggerActivity();
 
-  static void UndoDebuggerSuspensions();
+  static void UndoDebuggerSuspensions()
+    LOCKS_EXCLUDED(Locks::thread_list_lock_,
+                   Locks::thread_suspend_count_lock_);
 
   /*
    * Class, Object, Array
@@ -459,7 +461,9 @@
   static void SuspendVM()
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
-  static void ResumeVM();
+  static void ResumeVM()
+      LOCKS_EXCLUDED(Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_);
   static JDWP::JdwpError SuspendThread(JDWP::ObjectId thread_id, bool request_suspension = true)
       LOCKS_EXCLUDED(Locks::mutator_lock_,
                      Locks::thread_list_lock_,
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index f408386..761441e 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -23,7 +23,9 @@
 #include <string.h>
 #include <sys/file.h>
 #include <sys/stat.h>
+
 #include <memory>
+#include <sstream>
 
 #include "base/logging.h"
 #include "base/stringprintf.h"
@@ -1180,13 +1182,14 @@
   case kArray:
   case kAnnotation:
     UNIMPLEMENTED(FATAL) << ": type " << type_;
-    break;
+    UNREACHABLE();
   case kNull:
     jval_.l = NULL;
     width = 0;
     break;
   default:
     LOG(FATAL) << "Unreached";
+    UNREACHABLE();
   }
   ptr_ += width;
 }
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 7e775f4..a802759 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -19,6 +19,7 @@
 #include <inttypes.h>
 
 #include <iomanip>
+#include <sstream>
 
 #include "base/stringprintf.h"
 #include "dex_file-inl.h"
@@ -112,7 +113,7 @@
         return 1;  // NOP.
       } else {
         LOG(FATAL) << "Unreachable: " << DumpString(nullptr);
-        return 0;
+        UNREACHABLE();
       }
   }
 }
diff --git a/runtime/dex_instruction_visitor_test.cc b/runtime/dex_instruction_visitor_test.cc
index c5e63eb..5273084 100644
--- a/runtime/dex_instruction_visitor_test.cc
+++ b/runtime/dex_instruction_visitor_test.cc
@@ -16,7 +16,6 @@
 
 #include "dex_instruction_visitor.h"
 
-#include <iostream>
 #include <memory>
 
 #include "gtest/gtest.h"
diff --git a/runtime/dex_method_iterator_test.cc b/runtime/dex_method_iterator_test.cc
index b8f180b..c6f333f 100644
--- a/runtime/dex_method_iterator_test.cc
+++ b/runtime/dex_method_iterator_test.cc
@@ -38,8 +38,8 @@
     const DexFile& dex_file = it.GetDexFile();
     InvokeType invoke_type = it.GetInvokeType();
     uint32_t method_idx = it.GetMemberIndex();
-    if (false) {
-      LG << invoke_type << " " << PrettyMethod(method_idx, dex_file);
+    if ((false)) {
+      LOG(INFO) << invoke_type << " " << PrettyMethod(method_idx, dex_file);
     }
     it.Next();
   }
diff --git a/runtime/dwarf.h b/runtime/dwarf.h
index 370ad95..7daa5f1 100644
--- a/runtime/dwarf.h
+++ b/runtime/dwarf.h
@@ -364,38 +364,38 @@
   DW_OP_reg29 = 0x6d,
   DW_OP_reg30 = 0x6e,
   DW_OP_reg31 = 0x6f,
-  DW_OP_breg0 = 0x50,
-  DW_OP_breg1 = 0x51,
-  DW_OP_breg2 = 0x52,
-  DW_OP_breg3 = 0x53,
-  DW_OP_breg4 = 0x54,
-  DW_OP_breg5 = 0x55,
-  DW_OP_breg6 = 0x56,
-  DW_OP_breg7 = 0x57,
-  DW_OP_breg8 = 0x58,
-  DW_OP_breg9 = 0x59,
-  DW_OP_breg10 = 0x5a,
-  DW_OP_breg11 = 0x5b,
-  DW_OP_breg12 = 0x5c,
-  DW_OP_breg13 = 0x5d,
-  DW_OP_breg14 = 0x5e,
-  DW_OP_breg15 = 0x5f,
-  DW_OP_breg16 = 0x60,
-  DW_OP_breg17 = 0x61,
-  DW_OP_breg18 = 0x62,
-  DW_OP_breg19 = 0x63,
-  DW_OP_breg20 = 0x64,
-  DW_OP_breg21 = 0x65,
-  DW_OP_breg22 = 0x66,
-  DW_OP_breg23 = 0x67,
-  DW_OP_breg24 = 0x68,
-  DW_OP_breg25 = 0x69,
-  DW_OP_breg26 = 0x6a,
-  DW_OP_breg27 = 0x6b,
-  DW_OP_breg28 = 0x6c,
-  DW_OP_breg29 = 0x6d,
-  DW_OP_breg30 = 0x6e,
-  DW_OP_breg31 = 0x6f,
+  DW_OP_breg0 = 0x70,
+  DW_OP_breg1 = 0x71,
+  DW_OP_breg2 = 0x72,
+  DW_OP_breg3 = 0x73,
+  DW_OP_breg4 = 0x74,
+  DW_OP_breg5 = 0x75,
+  DW_OP_breg6 = 0x76,
+  DW_OP_breg7 = 0x77,
+  DW_OP_breg8 = 0x78,
+  DW_OP_breg9 = 0x79,
+  DW_OP_breg10 = 0x7a,
+  DW_OP_breg11 = 0x7b,
+  DW_OP_breg12 = 0x7c,
+  DW_OP_breg13 = 0x7d,
+  DW_OP_breg14 = 0x7e,
+  DW_OP_breg15 = 0x7f,
+  DW_OP_breg16 = 0x80,
+  DW_OP_breg17 = 0x81,
+  DW_OP_breg18 = 0x82,
+  DW_OP_breg19 = 0x83,
+  DW_OP_breg20 = 0x84,
+  DW_OP_breg21 = 0x85,
+  DW_OP_breg22 = 0x86,
+  DW_OP_breg23 = 0x87,
+  DW_OP_breg24 = 0x88,
+  DW_OP_breg25 = 0x89,
+  DW_OP_breg26 = 0x8a,
+  DW_OP_breg27 = 0x8b,
+  DW_OP_breg28 = 0x8c,
+  DW_OP_breg29 = 0x8d,
+  DW_OP_breg30 = 0x8e,
+  DW_OP_breg31 = 0x8f,
   DW_OP_regx = 0x90,
   DW_OP_fbreg = 0x91,
   DW_OP_bregx = 0x92,
diff --git a/runtime/elf_utils.h b/runtime/elf_utils.h
index 676cd52..7b00bad 100644
--- a/runtime/elf_utils.h
+++ b/runtime/elf_utils.h
@@ -24,6 +24,8 @@
 
 #include "base/logging.h"
 
+namespace art {
+
 // Architecture dependent flags for the ELF header.
 #define EF_ARM_EABI_VER5 0x05000000
 #define EF_MIPS_ABI_O32 0x00001000
@@ -163,4 +165,6 @@
   }
 }
 
+}  // namespace art
+
 #endif  // ART_RUNTIME_ELF_UTILS_H_
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 9fb9a3b..ccc5d83 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -499,11 +499,8 @@
     case StaticPrimitiveRead:    is_primitive = true;  is_set = false; is_static = true;  break;
     case StaticPrimitiveWrite:   is_primitive = true;  is_set = true;  is_static = true;  break;
     default:
-      LOG(FATAL) << "UNREACHABLE";  // Assignment below to avoid GCC warnings.
-      is_primitive = true;
-      is_set = true;
-      is_static = true;
-      break;
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
   }
   if (UNLIKELY(resolved_field->IsStatic() != is_static)) {
     // Incompatible class change.
diff --git a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
index c3664bf..61d66ba 100644
--- a/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/portable/portable_trampoline_entrypoints.cc
@@ -172,7 +172,7 @@
         break;
       case Primitive::kPrimVoid:
         LOG(FATAL) << "UNREACHABLE";
-        break;
+        UNREACHABLE();
     }
     ++cur_reg_;
   }
@@ -261,8 +261,7 @@
         break;
       case Primitive::kPrimVoid:
         LOG(FATAL) << "UNREACHABLE";
-        val.j = 0;
-        break;
+        UNREACHABLE();
     }
     args_.push_back(val);
   }
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index a2869ec..7dbfdd5 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -248,7 +248,8 @@
     }
 #endif
     default: {
-      LOG(FATAL) << "Unimplemented";
+      UNIMPLEMENTED(FATAL);
+      UNREACHABLE();
     }
   }
 }
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index 6b3e9dc..f78273f 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -52,7 +52,7 @@
                                                               uint64_t fpr_result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   // Compute address of return PC and sanity check that it currently holds 0.
-  uint32_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly);
+  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kRefsOnly);
   uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
                                                       return_pc_offset);
   CHECK_EQ(*return_pc, 0U);
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index c239535..c1276b5 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -15,6 +15,7 @@
  */
 
 #include "entrypoints/entrypoint_utils-inl.h"
+#include "mirror/art_method-inl.h"
 #include "mirror/object-inl.h"
 #include "thread-inl.h"
 #include "verify_object-inl.h"
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index af341bb..e0aab75 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -50,15 +50,19 @@
   // | arg1 spill |  |
   // | Method*    | ---
   // | LR         |
-  // | ...        |    callee saves
-  // | R3         |    arg3
-  // | R2         |    arg2
-  // | R1         |    arg1
-  // | R0         |    padding
+  // | ...        |    4x6 bytes callee saves
+  // | R3         |
+  // | R2         |
+  // | R1         |
+  // | S15        |
+  // | :          |
+  // | S0         |
+  // |            |    4x2 bytes padding
   // | Method*    |  <- sp
-  static constexpr bool kQuickSoftFloatAbi = true;  // This is a soft float ABI.
-  static constexpr size_t kNumQuickGprArgs = 3;  // 3 arguments passed in GPRs.
-  static constexpr size_t kNumQuickFprArgs = 0;  // 0 arguments passed in FPRs.
+  static constexpr bool kQuickSoftFloatAbi = kArm32QuickCodeUseSoftFloat;
+  static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = !kArm32QuickCodeUseSoftFloat;
+  static constexpr size_t kNumQuickGprArgs = 3;
+  static constexpr size_t kNumQuickFprArgs = kArm32QuickCodeUseSoftFloat ? 0 : 16;
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
       arm::ArmCalleeSaveFpr1Offset(Runtime::kRefsAndArgs);  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset =
@@ -90,6 +94,7 @@
   // |            |    padding
   // | Method*    |  <- sp
   static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
+  static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
   static constexpr size_t kNumQuickGprArgs = 7;  // 7 arguments passed in GPRs.
   static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
@@ -117,6 +122,7 @@
   // | A1         |    arg1
   // | A0/Method* |  <- sp
   static constexpr bool kQuickSoftFloatAbi = true;  // This is a soft float ABI.
+  static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
   static constexpr size_t kNumQuickGprArgs = 3;  // 3 arguments passed in GPRs.
   static constexpr size_t kNumQuickFprArgs = 0;  // 0 arguments passed in FPRs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
@@ -141,6 +147,7 @@
   // | ECX         |    arg1
   // | EAX/Method* |  <- sp
   static constexpr bool kQuickSoftFloatAbi = true;  // This is a soft float ABI.
+  static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
   static constexpr size_t kNumQuickGprArgs = 3;  // 3 arguments passed in GPRs.
   static constexpr size_t kNumQuickFprArgs = 0;  // 0 arguments passed in FPRs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0;  // Offset of first FPR arg.
@@ -178,6 +185,7 @@
   // | Padding         |
   // | RDI/Method*     |  <- sp
   static constexpr bool kQuickSoftFloatAbi = false;  // This is a hard float ABI.
+  static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
   static constexpr size_t kNumQuickGprArgs = 5;  // 5 arguments passed in GPRs.
   static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16;  // Offset of first FPR arg.
@@ -222,8 +230,16 @@
           fpr_args_(reinterpret_cast<uint8_t*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset),
           stack_args_(reinterpret_cast<uint8_t*>(sp) + kQuickCalleeSaveFrame_RefAndArgs_FrameSize
                       + StackArgumentStartFromShorty(is_static, shorty, shorty_len)),
-          gpr_index_(0), fpr_index_(0), stack_index_(0), cur_type_(Primitive::kPrimVoid),
-          is_split_long_or_double_(false) {}
+          gpr_index_(0), fpr_index_(0), fpr_double_index_(0), stack_index_(0),
+          cur_type_(Primitive::kPrimVoid), is_split_long_or_double_(false) {
+    COMPILE_ASSERT(kQuickSoftFloatAbi == (kNumQuickFprArgs == 0), knum_of_quick_fpr_arg_unexpected);
+    COMPILE_ASSERT(!(kQuickSoftFloatAbi && kQuickDoubleRegAlignedFloatBackFilled),
+        kdouble_align_unexpected);
+    // For register alignment, we want to assume that counters(fpr_double_index_) are even if the
+    // next register is even.
+    COMPILE_ASSERT(!kQuickDoubleRegAlignedFloatBackFilled || kNumQuickFprArgs % 2 == 0,
+        knum_quick_fpr_args_not_even);
+  }
 
   virtual ~QuickArgumentVisitor() {}
 
@@ -237,7 +253,11 @@
     if (!kQuickSoftFloatAbi) {
       Primitive::Type type = GetParamPrimitiveType();
       if (UNLIKELY((type == Primitive::kPrimDouble) || (type == Primitive::kPrimFloat))) {
-        if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) {
+        if (type == Primitive::kPrimDouble && kQuickDoubleRegAlignedFloatBackFilled) {
+          if (fpr_double_index_ + 2 < kNumQuickFprArgs + 1) {
+            return fpr_args_ + (fpr_double_index_ * GetBytesPerFprSpillLocation(kRuntimeISA));
+          }
+        } else if (fpr_index_ + 1 < kNumQuickFprArgs + 1) {
           return fpr_args_ + (fpr_index_ * GetBytesPerFprSpillLocation(kRuntimeISA));
         }
         return stack_args_ + (stack_index_ * kBytesStackArgLocation);
@@ -268,28 +288,30 @@
 
   uint64_t ReadSplitLongParam() const {
     DCHECK(IsSplitLongOrDouble());
+    // Read low half from register.
     uint64_t low_half = *reinterpret_cast<uint32_t*>(GetParamAddress());
-    uint64_t high_half = *reinterpret_cast<uint32_t*>(stack_args_);
+    // Read high half from the stack. As current stack_index_ indexes the argument, the high part
+    // index should be (stack_index_ + 1).
+    uint64_t high_half = *reinterpret_cast<uint32_t*>(stack_args_
+        + (stack_index_ + 1) * kBytesStackArgLocation);
     return (low_half & 0xffffffffULL) | (high_half << 32);
   }
 
   void VisitArguments() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    // This implementation doesn't support reg-spill area for hard float
-    // ABI targets such as x86_64 and aarch64. So, for those targets whose
-    // 'kQuickSoftFloatAbi' is 'false':
-    //     (a) 'stack_args_' should point to the first method's argument
-    //     (b) whatever the argument type it is, the 'stack_index_' should
-    //         be moved forward along with every visiting.
+    // (a) 'stack_args_' should point to the first method's argument
+    // (b) whatever the argument type it is, the 'stack_index_' should
+    //     be moved forward along with every visiting.
     gpr_index_ = 0;
     fpr_index_ = 0;
+    if (kQuickDoubleRegAlignedFloatBackFilled) {
+      fpr_double_index_ = 0;
+    }
     stack_index_ = 0;
     if (!is_static_) {  // Handle this.
       cur_type_ = Primitive::kPrimNot;
       is_split_long_or_double_ = false;
       Visit();
-      if (!kQuickSoftFloatAbi || kNumQuickGprArgs == 0) {
-        stack_index_++;
-      }
+      stack_index_++;
       if (kNumQuickGprArgs > 0) {
         gpr_index_++;
       }
@@ -305,9 +327,7 @@
         case Primitive::kPrimInt:
           is_split_long_or_double_ = false;
           Visit();
-          if (!kQuickSoftFloatAbi || kNumQuickGprArgs == gpr_index_) {
-            stack_index_++;
-          }
+          stack_index_++;
           if (gpr_index_ < kNumQuickGprArgs) {
             gpr_index_++;
           }
@@ -315,17 +335,24 @@
         case Primitive::kPrimFloat:
           is_split_long_or_double_ = false;
           Visit();
+          stack_index_++;
           if (kQuickSoftFloatAbi) {
             if (gpr_index_ < kNumQuickGprArgs) {
               gpr_index_++;
-            } else {
-              stack_index_++;
             }
           } else {
-            if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) {
+            if (fpr_index_ + 1 < kNumQuickFprArgs + 1) {
               fpr_index_++;
+              if (kQuickDoubleRegAlignedFloatBackFilled) {
+                // Double should not overlap with float.
+                // For example, if fpr_index_ = 3, fpr_double_index_ should be at least 4.
+                fpr_double_index_ = std::max(fpr_double_index_, RoundUp(fpr_index_, 2));
+                // Float should not overlap with double.
+                if (fpr_index_ % 2 == 0) {
+                  fpr_index_ = std::max(fpr_double_index_, fpr_index_);
+                }
+              }
             }
-            stack_index_++;
           }
           break;
         case Primitive::kPrimDouble:
@@ -334,42 +361,46 @@
             is_split_long_or_double_ = (GetBytesPerGprSpillLocation(kRuntimeISA) == 4) &&
                 ((gpr_index_ + 1) == kNumQuickGprArgs);
             Visit();
-            if (!kQuickSoftFloatAbi || kNumQuickGprArgs == gpr_index_) {
-              if (kBytesStackArgLocation == 4) {
-                stack_index_+= 2;
-              } else {
-                CHECK_EQ(kBytesStackArgLocation, 8U);
-                stack_index_++;
-              }
+            if (kBytesStackArgLocation == 4) {
+              stack_index_+= 2;
+            } else {
+              CHECK_EQ(kBytesStackArgLocation, 8U);
+              stack_index_++;
             }
             if (gpr_index_ < kNumQuickGprArgs) {
               gpr_index_++;
               if (GetBytesPerGprSpillLocation(kRuntimeISA) == 4) {
                 if (gpr_index_ < kNumQuickGprArgs) {
                   gpr_index_++;
-                } else if (kQuickSoftFloatAbi) {
-                  stack_index_++;
                 }
               }
             }
           } else {
             is_split_long_or_double_ = (GetBytesPerFprSpillLocation(kRuntimeISA) == 4) &&
-                ((fpr_index_ + 1) == kNumQuickFprArgs);
+                ((fpr_index_ + 1) == kNumQuickFprArgs) && !kQuickDoubleRegAlignedFloatBackFilled;
             Visit();
-            if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) {
-              fpr_index_++;
-              if (GetBytesPerFprSpillLocation(kRuntimeISA) == 4) {
-                if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) {
-                  fpr_index_++;
-                }
-              }
-            }
             if (kBytesStackArgLocation == 4) {
               stack_index_+= 2;
             } else {
               CHECK_EQ(kBytesStackArgLocation, 8U);
               stack_index_++;
             }
+            if (kQuickDoubleRegAlignedFloatBackFilled) {
+              if (fpr_double_index_ + 2 < kNumQuickFprArgs + 1) {
+                fpr_double_index_ += 2;
+                // Float should not overlap with double.
+                if (fpr_index_ % 2 == 0) {
+                  fpr_index_ = std::max(fpr_double_index_, fpr_index_);
+                }
+              }
+            } else if (fpr_index_ + 1 < kNumQuickFprArgs + 1) {
+              fpr_index_++;
+              if (GetBytesPerFprSpillLocation(kRuntimeISA) == 4) {
+                if (fpr_index_ + 1 < kNumQuickFprArgs + 1) {
+                  fpr_index_++;
+                }
+              }
+            }
           }
           break;
         default:
@@ -381,16 +412,8 @@
  private:
   static size_t StackArgumentStartFromShorty(bool is_static, const char* shorty,
                                              uint32_t shorty_len) {
-    if (kQuickSoftFloatAbi) {
-      CHECK_EQ(kNumQuickFprArgs, 0U);
-      return (kNumQuickGprArgs * GetBytesPerGprSpillLocation(kRuntimeISA))
-          + sizeof(StackReference<mirror::ArtMethod>) /* StackReference<ArtMethod> */;
-    } else {
-      // For now, there is no reg-spill area for the targets with
-      // hard float ABI. So, the offset pointing to the first method's
-      // parameter ('this' for non-static methods) should be returned.
-      return sizeof(StackReference<mirror::ArtMethod>);  // Skip StackReference<ArtMethod>.
-    }
+    // 'stack_args_' points to the first method's argument
+    return sizeof(StackReference<mirror::ArtMethod>);  // Skip StackReference<ArtMethod>.
   }
 
  protected:
@@ -403,7 +426,14 @@
   uint8_t* const fpr_args_;  // Address of FPR arguments in callee save frame.
   uint8_t* const stack_args_;  // Address of stack arguments in caller's frame.
   uint32_t gpr_index_;  // Index into spilled GPRs.
-  uint32_t fpr_index_;  // Index into spilled FPRs.
+  // Index into spilled FPRs.
+  // In case kQuickDoubleRegAlignedFloatBackFilled, it may index a hole while fpr_double_index_
+  // holds a higher register number.
+  uint32_t fpr_index_;
+  // Index into spilled FPRs for aligned double.
+  // Only used when kQuickDoubleRegAlignedFloatBackFilled. Next available double register indexed in
+  // terms of singles, may be behind fpr_index.
+  uint32_t fpr_double_index_;
   uint32_t stack_index_;  // Index into arguments on the stack.
   // The current type of argument during VisitArguments.
   Primitive::Type cur_type_;
@@ -456,7 +486,7 @@
       break;
     case Primitive::kPrimVoid:
       LOG(FATAL) << "UNREACHABLE";
-      break;
+      UNREACHABLE();
   }
   ++cur_reg_;
 }
@@ -564,8 +594,7 @@
       break;
     case Primitive::kPrimVoid:
       LOG(FATAL) << "UNREACHABLE";
-      val.j = 0;
-      break;
+      UNREACHABLE();
   }
   args_->push_back(val);
 }
@@ -943,8 +972,8 @@
         delegate_(delegate) {
     // For register alignment, we want to assume that counters (gpr_index_, fpr_index_) are even iff
     // the next register is even; counting down is just to make the compiler happy...
-    CHECK_EQ(kNumNativeGprArgs % 2, 0U);
-    CHECK_EQ(kNumNativeFprArgs % 2, 0U);
+    COMPILE_ASSERT(kNumNativeGprArgs % 2 == 0U, knum_native_gpr_args_not_even);
+    COMPILE_ASSERT(kNumNativeFprArgs % 2 == 0U, knum_native_fpr_args_not_even);
   }
 
   virtual ~BuildNativeCallFrameStateMachine() {}
@@ -1557,7 +1586,7 @@
       break;
     case Primitive::kPrimVoid:
       LOG(FATAL) << "UNREACHABLE";
-      break;
+      UNREACHABLE();
   }
 }
 
diff --git a/runtime/gc/allocator/dlmalloc.cc b/runtime/gc/allocator/dlmalloc.cc
index a6a3ee7..fbeba7f 100644
--- a/runtime/gc/allocator/dlmalloc.cc
+++ b/runtime/gc/allocator/dlmalloc.cc
@@ -39,11 +39,11 @@
 
 
 static void art_heap_corruption(const char* function) {
-  LOG(FATAL) << "Corrupt heap detected in: " << function;
+  LOG(::art::FATAL) << "Corrupt heap detected in: " << function;
 }
 
 static void art_heap_usage_error(const char* function, void* p) {
-  LOG(FATAL) << "Incorrect use of function '" << function << "' argument " << p << " not expected";
+  LOG(::art::FATAL) << "Incorrect use of function '" << function << "' argument " << p << " not expected";
 }
 
 #include "globals.h"
@@ -63,7 +63,7 @@
     int rc = madvise(start, length, MADV_DONTNEED);
     if (UNLIKELY(rc != 0)) {
       errno = rc;
-      PLOG(FATAL) << "madvise failed during heap trimming";
+      PLOG(::art::FATAL) << "madvise failed during heap trimming";
     }
     size_t* reclaimed = reinterpret_cast<size_t*>(arg);
     *reclaimed += length;
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index a3da532..fa531a7 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -24,6 +24,7 @@
 
 #include <map>
 #include <list>
+#include <sstream>
 #include <vector>
 
 namespace art {
@@ -1143,7 +1144,7 @@
 
 size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
   size_t freed_bytes = 0;
-  if (false) {
+  if ((false)) {
     // Used only to test Free() as GC uses only BulkFree().
     for (size_t i = 0; i < num_ptrs; ++i) {
       freed_bytes += FreeInternal(self, ptrs[i]);
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 07b61e6..9e6a800 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -18,6 +18,10 @@
 
 #include "garbage_collector.h"
 
+#define ATRACE_TAG ATRACE_TAG_DALVIK
+#include "cutils/trace.h"
+
+#include "base/dumpable.h"
 #include "base/histogram-inl.h"
 #include "base/logging.h"
 #include "base/mutex-inl.h"
@@ -188,7 +192,7 @@
   if (iterations == 0) {
     return;
   }
-  os << ConstDumpable<CumulativeLogger>(logger);
+  os << Dumpable<CumulativeLogger>(logger);
   const uint64_t total_ns = logger.GetTotalNs();
   double seconds = NsToMs(logger.GetTotalNs()) / 1000.0;
   const uint64_t freed_bytes = GetTotalFreedBytes();
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index 942b556..ad3bb11 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -21,6 +21,9 @@
 #include <climits>
 #include <vector>
 
+#define ATRACE_TAG ATRACE_TAG_DALVIK
+#include "cutils/trace.h"
+
 #include "base/bounded_fifo.h"
 #include "base/logging.h"
 #include "base/macros.h"
@@ -810,6 +813,7 @@
           break;
         default:
           LOG(FATAL) << "Unreachable";
+          UNREACHABLE();
         }
         TimingLogger::ScopedTiming t(name, GetTimings());
         ScanObjectVisitor visitor(this);
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 9459a3b..e141b6f 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -16,9 +16,10 @@
 
 #include "semi_space-inl.h"
 
+#include <climits>
 #include <functional>
 #include <numeric>
-#include <climits>
+#include <sstream>
 #include <vector>
 
 #include "base/logging.h"
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index f0e1512..6be683d 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -35,8 +35,8 @@
     case kGcCauseTrim: return "HeapTrim";
     default:
       LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
   }
-  return "";
 }
 
 std::ostream& operator<<(std::ostream& os, const GcCause& gc_cause) {
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index b9d69d5..c0008aa 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -24,6 +24,7 @@
 #include <vector>
 
 #include "base/allocator.h"
+#include "base/dumpable.h"
 #include "base/histogram-inl.h"
 #include "base/stl_util.h"
 #include "common_throws.h"
@@ -436,7 +437,7 @@
 MemMap* Heap::MapAnonymousPreferredAddress(const char* name, uint8_t* request_begin, size_t capacity,
                                            int prot_flags, std::string* out_error_str) {
   while (true) {
-    MemMap* map = MemMap::MapAnonymous(kMemMapSpaceName[0], request_begin, capacity,
+    MemMap* map = MemMap::MapAnonymous(name, request_begin, capacity,
                                        PROT_READ | PROT_WRITE, true, out_error_str);
     if (map != nullptr || request_begin == nullptr) {
       return map;
@@ -1324,8 +1325,9 @@
               // Throw OOM by default.
               break;
             default: {
-              LOG(FATAL) << "Unimplemented homogeneous space compaction result "
-                         << static_cast<size_t>(result);
+              UNIMPLEMENTED(FATAL) << "homogeneous space compaction result: "
+                  << static_cast<size_t>(result);
+              UNREACHABLE();
             }
           }
           // Always print that we ran homogeneous space compation since this can cause jank.
@@ -1760,7 +1762,8 @@
         break;
       }
       default: {
-        LOG(FATAL) << "Unimplemented";
+        UNIMPLEMENTED(FATAL);
+        UNREACHABLE();
       }
     }
     if (IsGcConcurrent()) {
@@ -2137,6 +2140,13 @@
   } else {
     LOG(FATAL) << "Invalid current allocator " << current_allocator_;
   }
+  if (IsGcConcurrent()) {
+    // Disable concurrent GC check so that we don't have spammy JNI requests.
+    // This gets recalculated in GrowForUtilization. It is important that it is disabled /
+    // calculated in the same thread so that there aren't any races that can cause it to become
+    // permanantly disabled. b/17942071
+    concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
+  }
   CHECK(collector != nullptr)
       << "Could not find garbage collector with collector_type="
       << static_cast<size_t>(collector_type_) << " and gc_type=" << gc_type;
@@ -2178,7 +2188,7 @@
               << percent_free << "% free, " << PrettySize(current_heap_size) << "/"
               << PrettySize(total_memory) << ", " << "paused " << pause_string.str()
               << " total " << PrettyDuration((duration / 1000) * 1000);
-    VLOG(heap) << ConstDumpable<TimingLogger>(*current_gc_iteration_.GetTimings());
+    VLOG(heap) << Dumpable<TimingLogger>(*current_gc_iteration_.GetTimings());
   }
   FinishGC(self, gc_type);
   // Inform DDMS that a GC completed.
@@ -2955,9 +2965,6 @@
       self->IsHandlingStackOverflow()) {
     return;
   }
-  // We already have a request pending, no reason to start more until we update
-  // concurrent_start_bytes_.
-  concurrent_start_bytes_ = std::numeric_limits<size_t>::max();
   JNIEnv* env = self->GetJniEnv();
   DCHECK(WellKnownClasses::java_lang_Daemons != nullptr);
   DCHECK(WellKnownClasses::java_lang_Daemons_requestGC != nullptr);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index ba85c55..ff1e38b 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -601,9 +601,6 @@
   void RemoveRememberedSet(space::Space* space);
 
   bool IsCompilingBoot() const;
-  bool RunningOnValgrind() const {
-    return running_on_valgrind_;
-  }
   bool HasImageSpace() const;
 
   ReferenceProcessor* GetReferenceProcessor() {
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index 8f42642..0a55b52 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -201,8 +201,8 @@
 }
 
 accounting::ContinuousSpaceBitmap::SweepCallback* BumpPointerSpace::GetSweepCallback() {
-  LOG(FATAL) << "Unimplemented";
-  return nullptr;
+  UNIMPLEMENTED(FATAL);
+  UNREACHABLE();
 }
 
 uint64_t BumpPointerSpace::GetBytesAllocated() {
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 39d82cc..d479038 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -21,6 +21,7 @@
 
 #include <random>
 
+#include "base/macros.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "base/scoped_flock.h"
diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h
index 7230116..cfde460 100644
--- a/runtime/gc/space/malloc_space.h
+++ b/runtime/gc/space/malloc_space.h
@@ -19,7 +19,7 @@
 
 #include "space.h"
 
-#include <iostream>
+#include <ostream>
 #include <valgrind.h>
 #include <memcheck/memcheck.h>
 
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index d25694a..161eba9 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -17,6 +17,9 @@
 
 #include "rosalloc_space-inl.h"
 
+#define ATRACE_TAG ATRACE_TAG_DALVIK
+#include "cutils/trace.h"
+
 #include "gc/accounting/card_table.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
@@ -73,8 +76,9 @@
   uint8_t* begin = mem_map->Begin();
   // TODO: Fix RosAllocSpace to support valgrind. There is currently some issues with
   // AllocationSize caused by redzones. b/12944686
-  if (false && Runtime::Current()->GetHeap()->RunningOnValgrind()) {
-    LOG(FATAL) << "Unimplemented";
+  if (Runtime::Current()->RunningOnValgrind()) {
+    UNIMPLEMENTED(FATAL);
+    UNREACHABLE();
   } else {
     return new RosAllocSpace(name, mem_map, rosalloc, begin, end, begin + capacity, growth_limit,
                              can_move_objects, starting_size, initial_size, low_memory_mode);
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index bff28f6..b233805 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -39,33 +39,33 @@
 }
 
 DlMallocSpace* Space::AsDlMallocSpace() {
-  LOG(FATAL) << "Unreachable";
-  return nullptr;
+  UNIMPLEMENTED(FATAL) << "Unreachable";
+  UNREACHABLE();
 }
 
 RosAllocSpace* Space::AsRosAllocSpace() {
-  LOG(FATAL) << "Unreachable";
-  return nullptr;
+  UNIMPLEMENTED(FATAL) << "Unreachable";
+  UNREACHABLE();
 }
 
 ZygoteSpace* Space::AsZygoteSpace() {
-  LOG(FATAL) << "Unreachable";
-  return nullptr;
+  UNIMPLEMENTED(FATAL) << "Unreachable";
+  UNREACHABLE();
 }
 
 BumpPointerSpace* Space::AsBumpPointerSpace() {
-  LOG(FATAL) << "Unreachable";
-  return nullptr;
+  UNIMPLEMENTED(FATAL) << "Unreachable";
+  UNREACHABLE();
 }
 
 AllocSpace* Space::AsAllocSpace() {
-  LOG(FATAL) << "Unimplemented";
-  return nullptr;
+  UNIMPLEMENTED(FATAL) << "Unreachable";
+  UNREACHABLE();
 }
 
 ContinuousMemMapAllocSpace* Space::AsContinuousMemMapAllocSpace() {
-  LOG(FATAL) << "Unimplemented";
-  return nullptr;
+  UNIMPLEMENTED(FATAL) << "Unreachable";
+  UNREACHABLE();
 }
 
 DiscontinuousSpace::DiscontinuousSpace(const std::string& name,
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
index 51d84f5..9de0548 100644
--- a/runtime/gc/space/zygote_space.cc
+++ b/runtime/gc/space/zygote_space.cc
@@ -58,7 +58,8 @@
 }
 
 void ZygoteSpace::Clear() {
-  LOG(FATAL) << "Unimplemented";
+  UNIMPLEMENTED(FATAL);
+  UNREACHABLE();
 }
 
 ZygoteSpace::ZygoteSpace(const std::string& name, MemMap* mem_map, size_t objects_allocated)
diff --git a/runtime/globals.h b/runtime/globals.h
index b7bd44d..4d33196 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -112,6 +112,8 @@
 
 static constexpr bool kDefaultMustRelocate = true;
 
+static constexpr bool kArm32QuickCodeUseSoftFloat = false;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_GLOBALS_H_
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index c1455fd..4d177a3 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -126,7 +126,7 @@
   }
   table_[index].Add(obj);
   result = ToIndirectRef(index);
-  if (false) {
+  if ((false)) {
     LOG(INFO) << "+++ added at " << ExtractIndex(result) << " top=" << segment_state_.parts.topIndex
               << " holes=" << segment_state_.parts.numHoles;
   }
@@ -193,7 +193,7 @@
     int numHoles = segment_state_.parts.numHoles - prevState.parts.numHoles;
     if (numHoles != 0) {
       while (--topIndex > bottomIndex && numHoles != 0) {
-        if (false) {
+        if ((false)) {
           LOG(INFO) << "+++ checking for hole at " << topIndex - 1
                     << " (cookie=" << cookie << ") val="
                     << table_[topIndex - 1].GetReference()->Read<kWithoutReadBarrier>();
@@ -201,7 +201,7 @@
         if (!table_[topIndex - 1].GetReference()->IsNull()) {
           break;
         }
-        if (false) {
+        if ((false)) {
           LOG(INFO) << "+++ ate hole at " << (topIndex - 1);
         }
         numHoles--;
@@ -210,7 +210,7 @@
       segment_state_.parts.topIndex = topIndex;
     } else {
       segment_state_.parts.topIndex = topIndex-1;
-      if (false) {
+      if ((false)) {
         LOG(INFO) << "+++ ate last entry " << topIndex - 1;
       }
     }
@@ -228,7 +228,7 @@
 
     *table_[idx].GetReference() = GcRoot<mirror::Object>(nullptr);
     segment_state_.parts.numHoles++;
-    if (false) {
+    if ((false)) {
       LOG(INFO) << "+++ left hole at " << idx << ", holes=" << segment_state_.parts.numHoles;
     }
   }
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index adbece0..fc3da36 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -18,12 +18,15 @@
 
 #include <sys/uio.h>
 
+#include <sstream>
+
 #include "arch/context.h"
 #include "atomic.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
+#include "entrypoints/quick/quick_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc_root-inl.h"
@@ -34,9 +37,6 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "nth_caller_visitor.h"
-#if !defined(ART_USE_PORTABLE_COMPILER)
-#include "entrypoints/quick/quick_entrypoints.h"
-#endif
 #include "os.h"
 #include "scoped_thread_state_change.h"
 #include "thread.h"
@@ -633,7 +633,6 @@
     SetEntrypointsInstrumented(true);
   }
   ++quick_alloc_entry_points_instrumentation_counter_;
-  LOG(INFO) << "Counter: " << quick_alloc_entry_points_instrumentation_counter_;
 }
 
 void Instrumentation::UninstrumentQuickAllocEntryPointsLocked() {
@@ -643,7 +642,6 @@
   if (quick_alloc_entry_points_instrumentation_counter_ == 0) {
     SetEntrypointsInstrumented(false);
   }
-  LOG(INFO) << "Counter: " << quick_alloc_entry_points_instrumentation_counter_;
 }
 
 void Instrumentation::ResetQuickAllocEntryPoints() {
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index dfb03cd..9de12f2 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -325,7 +325,7 @@
 JValue ExecuteGotoImpl(Thread* self, MethodHelper& mh, const DexFile::CodeItem* code_item,
                        ShadowFrame& shadow_frame, JValue result_register) {
   LOG(FATAL) << "UNREACHABLE";
-  exit(0);
+  UNREACHABLE();
 }
 // Explicit definitions of ExecuteGotoImpl.
 template<> SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 3ccdd03..c887a88 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -80,6 +80,7 @@
       break;
     default:
       LOG(FATAL) << "Unreachable: " << field_type;
+      UNREACHABLE();
   }
   return true;
 }
@@ -153,6 +154,7 @@
       break;
     default:
       LOG(FATAL) << "Unreachable: " << field_type;
+      UNREACHABLE();
   }
   return true;
 }
@@ -195,7 +197,7 @@
       break;
     default:
       LOG(FATAL) << "Unreachable: " << field_type;
-      break;
+      UNREACHABLE();
   }
   return field_value;
 }
@@ -285,6 +287,7 @@
     }
     default:
       LOG(FATAL) << "Unreachable: " << field_type;
+      UNREACHABLE();
   }
   return true;
 }
@@ -369,6 +372,7 @@
       break;
     default:
       LOG(FATAL) << "Unreachable: " << field_type;
+      UNREACHABLE();
   }
   return true;
 }
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index a8345ad..fa03fc7 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -21,6 +21,9 @@
 
 #include <math.h>
 
+#include <iostream>
+#include <sstream>
+
 #include "base/logging.h"
 #include "class_linker-inl.h"
 #include "common_throws.h"
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index fed8bf0..19e03d8 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -18,6 +18,7 @@
 
 #include <dlfcn.h>
 
+#include "base/dumpable.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
 #include "check_jni.h"
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index 35aaf0a..4f34896 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -16,6 +16,7 @@
 
 #include "object_registry.h"
 
+#include "handle_scope-inl.h"
 #include "mirror/class.h"
 #include "scoped_thread_state_change.h"
 
@@ -46,12 +47,17 @@
     return 0;
   }
 
+  Thread* const self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Object> obj_h(hs.NewHandle(o));
+
   // Call IdentityHashCode here to avoid a lock level violation between lock_ and monitor_lock.
-  int32_t identity_hash_code = o->IdentityHashCode();
-  ScopedObjectAccessUnchecked soa(Thread::Current());
+  int32_t identity_hash_code = obj_h->IdentityHashCode();
+
+  ScopedObjectAccessUnchecked soa(self);
   MutexLock mu(soa.Self(), lock_);
   ObjectRegistryEntry* entry = nullptr;
-  if (ContainsLocked(soa.Self(), o, identity_hash_code, &entry)) {
+  if (ContainsLocked(soa.Self(), obj_h.Get(), identity_hash_code, &entry)) {
     // This object was already in our map.
     ++entry->reference_count;
   } else {
@@ -66,7 +72,7 @@
     // This object isn't in the registry yet, so add it.
     JNIEnv* env = soa.Env();
 
-    jobject local_reference = soa.AddLocalReference<jobject>(o);
+    jobject local_reference = soa.AddLocalReference<jobject>(obj_h.Get());
 
     entry->jni_reference_type = JNIWeakGlobalRefType;
     entry->jni_reference = env->NewWeakGlobalRef(local_reference);
@@ -80,17 +86,6 @@
   return entry->id;
 }
 
-bool ObjectRegistry::Contains(mirror::Object* o, ObjectRegistryEntry** out_entry) {
-  if (o == nullptr) {
-    return false;
-  }
-  // Call IdentityHashCode here to avoid a lock level violation between lock_ and monitor_lock.
-  int32_t identity_hash_code = o->IdentityHashCode();
-  Thread* self = Thread::Current();
-  MutexLock mu(self, lock_);
-  return ContainsLocked(self, o, identity_hash_code, out_entry);
-}
-
 bool ObjectRegistry::ContainsLocked(Thread* self, mirror::Object* o, int32_t identity_hash_code,
                                     ObjectRegistryEntry** out_entry) {
   DCHECK(o != nullptr);
diff --git a/runtime/jdwp/object_registry.h b/runtime/jdwp/object_registry.h
index faddff1..0693f33 100644
--- a/runtime/jdwp/object_registry.h
+++ b/runtime/jdwp/object_registry.h
@@ -75,10 +75,6 @@
     return down_cast<T>(InternalGet(id, error));
   }
 
-  bool Contains(mirror::Object* o) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-    return Contains(o, nullptr);
-  }
-
   void Clear() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void DisableCollection(JDWP::ObjectId id)
@@ -114,9 +110,6 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
-  bool Contains(mirror::Object* o, ObjectRegistryEntry** out_entry)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(lock_);
-
   bool ContainsLocked(Thread* self, mirror::Object* o, int32_t identity_hash_code,
                       ObjectRegistryEntry** out_entry)
       EXCLUSIVE_LOCKS_REQUIRED(lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 70754f2..ad06b85 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -2705,7 +2705,7 @@
     os << "JNIWeakGlobalRefType";
     return os;
   default:
-    LOG(FATAL) << "jobjectRefType[" << static_cast<int>(rhs) << "]";
-    return os;
+    LOG(::art::FATAL) << "jobjectRefType[" << static_cast<int>(rhs) << "]";
+    UNREACHABLE();
   }
 }
diff --git a/runtime/log_severity.h b/runtime/log_severity.h
deleted file mode 100644
index 31682df..0000000
--- a/runtime/log_severity.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_LOG_SEVERITY_H_
-#define ART_RUNTIME_LOG_SEVERITY_H_
-
-typedef int LogSeverity;
-
-const int VERBOSE = 0, DEBUG = 1, INFO = 2, WARNING = 3, ERROR = 4, FATAL = 5;
-const int INTERNAL_FATAL = 6;  // For Runtime::Abort.
-
-#endif  // ART_RUNTIME_LOG_SEVERITY_H_
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 3144ce1..c118471 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -15,11 +15,12 @@
  */
 
 #include "mem_map.h"
-#include "thread-inl.h"
 
-#include <inttypes.h>
 #include <backtrace/BacktraceMap.h>
+#include <inttypes.h>
+
 #include <memory>
+#include <sstream>
 
 // See CreateStartPos below.
 #ifdef __BIONIC__
@@ -28,6 +29,7 @@
 
 #include "base/stringprintf.h"
 #include "ScopedFd.h"
+#include "thread-inl.h"
 #include "utils.h"
 
 #define USE_ASHMEM 1
diff --git a/runtime/mirror/art_method-inl.h b/runtime/mirror/art_method-inl.h
index 664a412..d262fd5 100644
--- a/runtime/mirror/art_method-inl.h
+++ b/runtime/mirror/art_method-inl.h
@@ -171,7 +171,7 @@
     }
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << type;
-      return true;
+      UNREACHABLE();
   }
 }
 
@@ -223,9 +223,7 @@
 }
 
 inline const uint8_t* ArtMethod::GetVmapTable(const void* code_pointer) {
-  if (IsOptimized()) {
-    LOG(FATAL) << "Unimplemented vmap table for optimized compiler";
-  }
+  CHECK(!IsOptimized()) << "Unimplemented vmap table for optimized compiler";
   DCHECK(code_pointer != nullptr);
   DCHECK(code_pointer == GetQuickOatCodePointer());
   uint32_t offset =
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index 9584d15..b219004 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -43,7 +43,7 @@
 extern "C" void art_portable_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*, char);
 extern "C" void art_quick_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                       const char*);
-#ifdef __LP64__
+#if defined(__LP64__) || defined(__arm__)
 extern "C" void art_quick_invoke_static_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                              const char*);
 #endif
@@ -396,7 +396,7 @@
       }
 
       if (!IsPortableCompiled()) {
-#ifdef __LP64__
+#if defined(__LP64__) || defined(__arm__)
         if (!IsStatic()) {
           (*art_quick_invoke_stub)(this, args, args_size, self, result, shorty);
         } else {
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index b89da9d..c9e60bc 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -121,7 +121,7 @@
       OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_));
 #else
   LOG(FATAL) << "Unreachable";
-  return nullptr;
+  UNREACHABLE();
 #endif
 }
 
@@ -134,6 +134,7 @@
       OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), rb_ptr);
 #else
   LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
 #endif
 }
 
@@ -156,7 +157,7 @@
   return true;
 #else
   LOG(FATAL) << "Unreachable";
-  return false;
+  UNREACHABLE();
 #endif
 }
 
@@ -166,13 +167,12 @@
     DCHECK(obj->GetReadBarrierPointer() == nullptr)
         << "Bad Baker pointer: obj=" << reinterpret_cast<void*>(obj)
         << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer());
-  } else if (kUseBrooksReadBarrier) {
+  } else {
+    CHECK(kUseBrooksReadBarrier);
     Object* obj = const_cast<Object*>(this);
     DCHECK_EQ(obj, obj->GetReadBarrierPointer())
         << "Bad Brooks pointer: obj=" << reinterpret_cast<void*>(obj)
         << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer());
-  } else {
-    LOG(FATAL) << "Unreachable";
   }
 }
 
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 1bbcf8e..b2b2420 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -478,6 +478,7 @@
   friend struct art::ObjectOffsets;  // for verifying offset information
   friend class CopyObjectVisitor;  // for CopyObject().
   friend class CopyClassVisitor;   // for CopyObject().
+  DISALLOW_ALLOCATION();
   DISALLOW_IMPLICIT_CONSTRUCTORS(Object);
 };
 
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index 6123934..5020ced 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -877,7 +877,7 @@
     }
     default: {
       LOG(FATAL) << "Unreachable";
-      return ThreadList::kInvalidThreadId;
+      UNREACHABLE();
     }
   }
 }
@@ -1032,7 +1032,7 @@
       return true;
     default:
       LOG(FATAL) << "Unreachable";
-      return false;
+      UNREACHABLE();
   }
 }
 
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index ec7d82d..c35bb30 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -326,6 +326,7 @@
       break;
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << invoke_type;
+      UNREACHABLE();
   }
   if (method == NULL) {
     return;
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index ee99e78..43681a7 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -93,7 +93,7 @@
     switch (dstComponentPrimitiveType) {
       case Primitive::kPrimVoid:
         LOG(FATAL) << "Unreachable, cannot have arrays of type void";
-        return;
+        UNREACHABLE();
       case Primitive::kPrimBoolean:
       case Primitive::kPrimByte:
         DCHECK_EQ(Primitive::ComponentSize(dstComponentPrimitiveType), 1U);
@@ -122,7 +122,7 @@
       }
       default:
         LOG(FATAL) << "Unknown array type: " << PrettyTypeOf(srcArray);
-        return;
+        UNREACHABLE();
     }
   }
   // If one of the arrays holds a primitive type the other array must hold the exact same type.
diff --git a/runtime/native/scoped_fast_native_object_access.h b/runtime/native/scoped_fast_native_object_access.h
index 606d62d..dfabff5 100644
--- a/runtime/native/scoped_fast_native_object_access.h
+++ b/runtime/native/scoped_fast_native_object_access.h
@@ -17,7 +17,7 @@
 #ifndef ART_RUNTIME_NATIVE_SCOPED_FAST_NATIVE_OBJECT_ACCESS_H_
 #define ART_RUNTIME_NATIVE_SCOPED_FAST_NATIVE_OBJECT_ACCESS_H_
 
-#include "mirror/art_method.h"
+#include "mirror/art_method-inl.h"
 #include "scoped_thread_state_change.h"
 
 namespace art {
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index bc191b4..b0d8e87 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -107,10 +107,11 @@
   GetMethodShorty, GetNativeMethodCount, GetNativeMethods
 };
 
-void LoadNativeBridge(std::string& native_bridge_library_filename) {
-  android::LoadNativeBridge(native_bridge_library_filename.c_str(), &native_bridge_art_callbacks_);
+bool LoadNativeBridge(std::string& native_bridge_library_filename) {
   VLOG(startup) << "Runtime::Setup native bridge library: "
       << (native_bridge_library_filename.empty() ? "(empty)" : native_bridge_library_filename);
+  return android::LoadNativeBridge(native_bridge_library_filename.c_str(),
+                                   &native_bridge_art_callbacks_);
 }
 
 void PreInitializeNativeBridge(std::string dir) {
diff --git a/runtime/native_bridge_art_interface.h b/runtime/native_bridge_art_interface.h
index 026cd82..090cddb 100644
--- a/runtime/native_bridge_art_interface.h
+++ b/runtime/native_bridge_art_interface.h
@@ -26,7 +26,7 @@
 // Mirror libnativebridge interface. Done to have the ART callbacks out of line, and not require
 // the system/core header file in other files.
 
-void LoadNativeBridge(std::string& native_bridge_library_filename);
+bool LoadNativeBridge(std::string& native_bridge_library_filename);
 
 // This is mostly for testing purposes, as in a full system this is called by Zygote code.
 void PreInitializeNativeBridge(std::string dir);
diff --git a/runtime/offsets.cc b/runtime/offsets.cc
index 3691401..f59ed88 100644
--- a/runtime/offsets.cc
+++ b/runtime/offsets.cc
@@ -16,7 +16,7 @@
 
 #include "offsets.h"
 
-#include <iostream>  // NOLINT
+#include <ostream>
 
 namespace art {
 
diff --git a/runtime/offsets.h b/runtime/offsets.h
index 72a6b0f..9d5063f 100644
--- a/runtime/offsets.h
+++ b/runtime/offsets.h
@@ -17,7 +17,8 @@
 #ifndef ART_RUNTIME_OFFSETS_H_
 #define ART_RUNTIME_OFFSETS_H_
 
-#include <iostream>  // NOLINT
+#include <ostream>
+
 #include "globals.h"
 
 namespace art {
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index dcca9d3..6b64c25 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -16,6 +16,8 @@
 
 #include "parsed_options.h"
 
+#include <sstream>
+
 #ifdef HAVE_ANDROID_OS
 #include "cutils/properties.h"
 #endif
@@ -534,9 +536,6 @@
           return false;
         }
       }
-    } else if (StartsWith(option, "-verbose-methods:")) {
-      gLogVerbosity.compiler = false;
-      Split(option.substr(strlen("-verbose-methods:")), ',', &gVerboseMethods);
     } else if (StartsWith(option, "-Xlockprofthreshold:")) {
       if (!ParseUnsignedInteger(option, ':', &lock_profiling_threshold_)) {
         return false;
diff --git a/runtime/profiler.cc b/runtime/profiler.cc
index 1d06d35..e399195 100644
--- a/runtime/profiler.cc
+++ b/runtime/profiler.cc
@@ -97,7 +97,7 @@
   switch (profile_options.GetProfileType()) {
     case kProfilerMethod: {
       mirror::ArtMethod* method = thread->GetCurrentMethod(nullptr);
-      if (false && method == nullptr) {
+      if ((false) && method == nullptr) {
         LOG(INFO) << "No current method available";
         std::ostringstream os;
         thread->Dump(os);
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 8e57837..c58735a 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -300,7 +300,7 @@
   InstrumentationStackVisitor(Thread* self, bool is_deoptimization, size_t frame_depth)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       : StackVisitor(self, nullptr),
-        self_(self), frame_depth_(frame_depth),
+        frame_depth_(frame_depth),
         instrumentation_frames_to_pop_(0) {
     CHECK_NE(frame_depth_, kInvalidFrameDepth);
   }
@@ -324,7 +324,6 @@
   }
 
  private:
-  Thread* const self_;
   const size_t frame_depth_;
   size_t instrumentation_frames_to_pop_;
 
diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h
index b93769c..cf1ecbf 100644
--- a/runtime/quick_exception_handler.h
+++ b/runtime/quick_exception_handler.h
@@ -40,6 +40,7 @@
 
   ~QuickExceptionHandler() {
     LOG(FATAL) << "UNREACHABLE";  // Expected to take long jump.
+    UNREACHABLE();
   }
 
   void FindCatch(const ThrowLocation& throw_location, mirror::Throwable* exception,
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index c16e9ed..8ba098f 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -45,6 +45,7 @@
 #include "arch/x86_64/registers_x86_64.h"
 #include "asm_support.h"
 #include "atomic.h"
+#include "base/dumpable.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "debugger.h"
@@ -146,11 +147,15 @@
       target_sdk_version_(0),
       implicit_null_checks_(false),
       implicit_so_checks_(false),
-      implicit_suspend_checks_(false) {
+      implicit_suspend_checks_(false),
+      is_native_bridge_loaded_(false) {
   CheckAsmSupportOffsetsAndSizes();
 }
 
 Runtime::~Runtime() {
+  if (is_native_bridge_loaded_) {
+    UnloadNativeBridge();
+  }
   if (dump_gc_performance_on_shutdown_) {
     // This can't be called from the Heap destructor below because it
     // could call RosAlloc::InspectAll() which needs the thread_list
@@ -204,7 +209,7 @@
 }
 
 struct AbortState {
-  void Dump(std::ostream& os) {
+  void Dump(std::ostream& os) const {
     if (gAborting > 1) {
       os << "Runtime aborting --- recursively, so no thread-specific detail!\n";
       return;
@@ -235,7 +240,7 @@
   }
 
   // No thread-safety analysis as we do explicitly test for holding the mutator lock.
-  void DumpThread(std::ostream& os, Thread* self) NO_THREAD_SAFETY_ANALYSIS {
+  void DumpThread(std::ostream& os, Thread* self) const NO_THREAD_SAFETY_ANALYSIS {
     DCHECK(Locks::mutator_lock_->IsExclusiveHeld(self) || Locks::mutator_lock_->IsSharedHeld(self));
     self->Dump(os);
     if (self->IsExceptionPending()) {
@@ -247,7 +252,7 @@
     }
   }
 
-  void DumpAllThreads(std::ostream& os, Thread* self) {
+  void DumpAllThreads(std::ostream& os, Thread* self) const {
     Runtime* runtime = Runtime::Current();
     if (runtime != nullptr) {
       ThreadList* thread_list = runtime->GetThreadList();
@@ -430,12 +435,11 @@
       return false;
     }
   } else {
-    bool have_native_bridge = !native_bridge_library_filename_.empty();
-    if (have_native_bridge) {
+    if (is_native_bridge_loaded_) {
       PreInitializeNativeBridge(".");
     }
-    DidForkFromZygote(self->GetJniEnv(), have_native_bridge ? NativeBridgeAction::kInitialize :
-        NativeBridgeAction::kUnload, GetInstructionSetString(kRuntimeISA));
+    DidForkFromZygote(self->GetJniEnv(), NativeBridgeAction::kInitialize,
+                      GetInstructionSetString(kRuntimeISA));
   }
 
   StartDaemonThreads();
@@ -514,14 +518,17 @@
 void Runtime::DidForkFromZygote(JNIEnv* env, NativeBridgeAction action, const char* isa) {
   is_zygote_ = false;
 
-  switch (action) {
-    case NativeBridgeAction::kUnload:
-      UnloadNativeBridge();
-      break;
+  if (is_native_bridge_loaded_) {
+    switch (action) {
+      case NativeBridgeAction::kUnload:
+        UnloadNativeBridge();
+        is_native_bridge_loaded_ = false;
+        break;
 
-    case NativeBridgeAction::kInitialize:
-      InitializeNativeBridge(env, isa);
-      break;
+      case NativeBridgeAction::kInitialize:
+        InitializeNativeBridge(env, isa);
+        break;
+    }
   }
 
   // Create the thread pool.
@@ -889,8 +896,7 @@
   // Runtime::Start():
   //   DidForkFromZygote(kInitialize) -> try to initialize any native bridge given.
   //   No-op wrt native bridge.
-  native_bridge_library_filename_ = options->native_bridge_library_filename_;
-  LoadNativeBridge(native_bridge_library_filename_);
+  is_native_bridge_loaded_ = LoadNativeBridge(options->native_bridge_library_filename_);
 
   VLOG(startup) << "Runtime::Init exiting";
   return true;
diff --git a/runtime/runtime.h b/runtime/runtime.h
index f3bea17..bfa7d72 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -635,14 +635,16 @@
   bool implicit_so_checks_;         // StackOverflow checks are implicit.
   bool implicit_suspend_checks_;    // Thread suspension checks are implicit.
 
-  // The filename to the native bridge library. If this is not empty the native bridge will be
-  // initialized and loaded from the given file (initialized and available). An empty value means
-  // that there's no native bridge (initialized but not available).
+  // Whether or not a native bridge has been loaded.
   //
   // The native bridge allows running native code compiled for a foreign ISA. The way it works is,
   // if standard dlopen fails to load native library associated with native activity, it calls to
   // the native bridge to load it and then gets the trampoline for the entry to native activity.
-  std::string native_bridge_library_filename_;
+  //
+  // The option 'native_bridge_library_filename' specifies the name of the native bridge.
+  // When non-empty the native bridge will be loaded from the given file. An empty value means
+  // that there's no native bridge.
+  bool is_native_bridge_loaded_;
 
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
diff --git a/runtime/runtime_android.cc b/runtime/runtime_android.cc
index 079d7e5..33600dd 100644
--- a/runtime/runtime_android.cc
+++ b/runtime/runtime_android.cc
@@ -32,13 +32,12 @@
 
 struct sigaction old_action;
 void HandleUnexpectedSignal(int signal_number, siginfo_t* info, void* raw_context) {
-  static bool handlingUnexpectedSignal = false;
-  if (handlingUnexpectedSignal) {
-    LogMessageData data(__FILE__, __LINE__, INTERNAL_FATAL, -1);
-    LogMessage::LogLine(data, "HandleUnexpectedSignal reentered\n");
+  static bool handling_unexpected_signal = false;
+  if (handling_unexpected_signal) {
+    LogMessage::LogLine(__FILE__, __LINE__, INTERNAL_FATAL, "HandleUnexpectedSignal reentered\n");
     _exit(1);
   }
-  handlingUnexpectedSignal = true;
+  handling_unexpected_signal = true;
   gAborting++;  // set before taking any locks
   MutexLock mu(Thread::Current(), *Locks::unexpected_signal_lock_);
 
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index 46ee274..1de035c 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -21,6 +21,9 @@
 #include <sys/utsname.h>
 #include <inttypes.h>
 
+#include <sstream>
+
+#include "base/dumpable.h"
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "base/stringprintf.h"
@@ -32,13 +35,13 @@
 static constexpr bool kDumpHeapObjectOnSigsevg = false;
 
 struct Backtrace {
-  void Dump(std::ostream& os) {
+  void Dump(std::ostream& os) const {
     DumpNativeStack(os, GetTid(), "\t");
   }
 };
 
 struct OsInfo {
-  void Dump(std::ostream& os) {
+  void Dump(std::ostream& os) const {
     utsname info;
     uname(&info);
     // Linux 2.6.38.8-gg784 (x86_64)
@@ -132,9 +135,11 @@
 }
 
 struct UContext {
-  explicit UContext(void* raw_context) : context(reinterpret_cast<ucontext_t*>(raw_context)->uc_mcontext) {}
+  explicit UContext(void* raw_context) :
+      context(reinterpret_cast<ucontext_t*>(raw_context)->uc_mcontext) {
+  }
 
-  void Dump(std::ostream& os) {
+  void Dump(std::ostream& os) const {
     // TODO: support non-x86 hosts (not urgent because this code doesn't run on targets).
 #if defined(__APPLE__) && defined(__i386__)
     DumpRegister32(os, "eax", context->__ss.__eax);
@@ -228,15 +233,15 @@
 #endif
   }
 
-  void DumpRegister32(std::ostream& os, const char* name, uint32_t value) {
+  void DumpRegister32(std::ostream& os, const char* name, uint32_t value) const {
     os << StringPrintf(" %6s: 0x%08x", name, value);
   }
 
-  void DumpRegister64(std::ostream& os, const char* name, uint64_t value) {
+  void DumpRegister64(std::ostream& os, const char* name, uint64_t value) const {
     os << StringPrintf(" %6s: 0x%016" PRIx64, name, value);
   }
 
-  void DumpX86Flags(std::ostream& os, uint32_t flags) {
+  void DumpX86Flags(std::ostream& os, uint32_t flags) const {
     os << " [";
     if ((flags & (1 << 0)) != 0) {
       os << " CF";
@@ -274,8 +279,7 @@
 void HandleUnexpectedSignal(int signal_number, siginfo_t* info, void* raw_context) {
   static bool handlingUnexpectedSignal = false;
   if (handlingUnexpectedSignal) {
-    LogMessageData data(__FILE__, __LINE__, INTERNAL_FATAL, -1);
-    LogMessage::LogLine(data, "HandleUnexpectedSignal reentered\n");
+    LogMessage::LogLine(__FILE__, __LINE__, INTERNAL_FATAL, "HandleUnexpectedSignal reentered\n");
     _exit(1);
   }
   handlingUnexpectedSignal = true;
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index 336340e..d4ec803 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -25,6 +25,8 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include <sstream>
+
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "gc/heap.h"
@@ -131,7 +133,7 @@
 
   runtime->DumpForSigQuit(os);
 
-  if (false) {
+  if ((false)) {
     std::string maps;
     if (ReadFileToString("/proc/self/maps", &maps)) {
       os << "/proc/self/maps:\n" << maps;
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 0cdc984..0adf031 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -127,7 +127,8 @@
     }
   } else if (m->IsOptimized()) {
     // TODO: Implement, currently only used for exceptions when jdwp is enabled.
-    LOG(WARNING) << "StackVisitor::GetThisObject is unimplemented with the optimizing compiler";
+    UNIMPLEMENTED(WARNING)
+        << "StackVisitor::GetThisObject is unimplemented with the optimizing compiler";
     return nullptr;
   } else {
     const DexFile::CodeItem* code_item = m->GetCodeItem();
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index b1c46a9..a58ecab 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -286,7 +286,7 @@
       }
     }
     LOG(FATAL) << "Unreachable";
-    return StackMap(MemoryRegion());
+    UNREACHABLE();
   }
 
   StackMap GetStackMapForNativePcOffset(uint32_t native_pc_offset) {
@@ -298,7 +298,7 @@
       }
     }
     LOG(FATAL) << "Unreachable";
-    return StackMap(MemoryRegion());
+    UNREACHABLE();
   }
 
  private:
diff --git a/runtime/thread.cc b/runtime/thread.cc
index f93b15b..da82c76 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -29,9 +29,11 @@
 #include <cerrno>
 #include <iostream>
 #include <list>
+#include <sstream>
 
 #include "arch/context.h"
 #include "base/mutex.h"
+#include "base/to_str.h"
 #include "class_linker-inl.h"
 #include "class_linker.h"
 #include "debugger.h"
@@ -1969,6 +1971,7 @@
   exception_handler.UpdateInstrumentationStack();
   exception_handler.DoLongJump();
   LOG(FATAL) << "UNREACHABLE";
+  UNREACHABLE();
 }
 
 Context* Thread::GetLongJumpContext() {
diff --git a/runtime/thread_linux.cc b/runtime/thread_linux.cc
index 1254056..0284364 100644
--- a/runtime/thread_linux.cc
+++ b/runtime/thread_linux.cc
@@ -16,6 +16,8 @@
 
 #include "thread.h"
 
+#include <signal.h>
+
 namespace art {
 
 void Thread::SetNativePriority(int) {
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 646830a..f8c8fdb 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -25,6 +25,8 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include <sstream>
+
 #include "base/mutex.h"
 #include "base/mutex-inl.h"
 #include "base/timing_logger.h"
@@ -480,17 +482,18 @@
   VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") complete";
 }
 
-static void ThreadSuspendByPeerWarning(Thread* self, int level, const char* message, jobject peer) {
+static void ThreadSuspendByPeerWarning(Thread* self, LogSeverity severity, const char* message,
+                                       jobject peer) {
   JNIEnvExt* env = self->GetJniEnv();
   ScopedLocalRef<jstring>
       scoped_name_string(env, (jstring)env->GetObjectField(peer,
                                                           WellKnownClasses::java_lang_Thread_name));
   ScopedUtfChars scoped_name_chars(env, scoped_name_string.get());
   if (scoped_name_chars.c_str() == NULL) {
-      LOG(level) << message << ": " << peer;
+      LOG(severity) << message << ": " << peer;
       env->ExceptionClear();
   } else {
-      LOG(level) << message << ": " << peer << ":" << scoped_name_chars.c_str();
+      LOG(severity) << message << ": " << peer << ":" << scoped_name_chars.c_str();
   }
 }
 
@@ -562,8 +565,9 @@
   }
 }
 
-static void ThreadSuspendByThreadIdWarning(int level, const char* message, uint32_t thread_id) {
-  LOG(level) << StringPrintf("%s: %d", message, thread_id);
+static void ThreadSuspendByThreadIdWarning(LogSeverity severity, const char* message,
+                                           uint32_t thread_id) {
+  LOG(severity) << StringPrintf("%s: %d", message, thread_id);
 }
 
 Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id, bool debug_suspension,
@@ -662,6 +666,7 @@
     {
       MutexLock mu(self, *Locks::thread_suspend_count_lock_);
       // Update global suspend all state for attaching threads.
+      DCHECK_GE(suspend_all_count_, debug_suspend_all_count_);
       ++suspend_all_count_;
       ++debug_suspend_all_count_;
       // Increment everybody's suspend count (except our own).
@@ -753,6 +758,55 @@
   VLOG(threads) << *self << " self-reviving (debugger)";
 }
 
+void ThreadList::ResumeAllForDebugger() {
+  Thread* self = Thread::Current();
+  Thread* debug_thread = Dbg::GetDebugThread();
+  bool needs_resume = false;
+
+  VLOG(threads) << *self << " ResumeAllForDebugger starting...";
+
+  // Threads can't resume if we exclusively hold the mutator lock.
+  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
+
+  {
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    {
+      MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+      // Update global suspend all state for attaching threads.
+      DCHECK_GE(suspend_all_count_, debug_suspend_all_count_);
+      needs_resume = (debug_suspend_all_count_ > 0);
+      if (needs_resume) {
+        --suspend_all_count_;
+        --debug_suspend_all_count_;
+        // Decrement everybody's suspend count (except our own).
+        for (const auto& thread : list_) {
+          if (thread == self || thread == debug_thread) {
+            continue;
+          }
+          if (thread->GetDebugSuspendCount() == 0) {
+            // This thread may have been individually resumed with ThreadReference.Resume.
+            continue;
+          }
+          VLOG(threads) << "requesting thread resume: " << *thread;
+          thread->ModifySuspendCount(self, -1, true);
+        }
+      } else {
+        // We've been asked to resume all threads without being asked to
+        // suspend them all before. Let's print a warning.
+        LOG(WARNING) << "Debugger attempted to resume all threads without "
+                     << "having suspended them all before.";
+      }
+    }
+  }
+
+  if (needs_resume) {
+    MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+    Thread::resume_cond_->Broadcast(self);
+  }
+
+  VLOG(threads) << *self << " ResumeAllForDebugger complete";
+}
+
 void ThreadList::UndoDebuggerSuspensions() {
   Thread* self = Thread::Current();
 
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 9f47f9f..a7f2c53 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -105,6 +105,11 @@
   void SuspendSelfForDebugger()
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_);
 
+  // Resume all threads
+  void ResumeAllForDebugger()
+      LOCKS_EXCLUDED(Locks::thread_list_lock_,
+                     Locks::thread_suspend_count_lock_);
+
   void UndoDebuggerSuspensions()
       LOCKS_EXCLUDED(Locks::thread_list_lock_,
                      Locks::thread_suspend_count_lock_);
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 91a37fd..b3158a4 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -18,6 +18,9 @@
 
 #include <sys/uio.h>
 
+#define ATRACE_TAG ATRACE_TAG_DALVIK
+#include "cutils/trace.h"
+
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
diff --git a/runtime/verifier/register_line.h b/runtime/verifier/register_line.h
index c7fd369..8f7823a 100644
--- a/runtime/verifier/register_line.h
+++ b/runtime/verifier/register_line.h
@@ -118,9 +118,7 @@
 
   void FillWithGarbage() {
     memset(&line_, 0xf1, num_regs_ * sizeof(uint16_t));
-    while (!monitors_.empty()) {
-      monitors_.pop_back();
-    }
+    monitors_.clear();
     reg_to_lock_depths_.clear();
   }
 
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 4a3c3ec..16338c4 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -18,6 +18,8 @@
 
 #include <stdlib.h>
 
+#include <sstream>
+
 #include "base/logging.h"
 #include "mirror/class.h"
 #include "ScopedLocalRef.h"
@@ -52,6 +54,7 @@
 jclass WellKnownClasses::java_lang_ThreadGroup;
 jclass WellKnownClasses::java_lang_Throwable;
 jclass WellKnownClasses::java_nio_DirectByteBuffer;
+jclass WellKnownClasses::java_util_ArrayList;
 jclass WellKnownClasses::java_util_Collections;
 jclass WellKnownClasses::libcore_util_EmptyArray;
 jclass WellKnownClasses::org_apache_harmony_dalvik_ddmc_Chunk;
@@ -95,8 +98,10 @@
 jfieldID WellKnownClasses::java_lang_Thread_priority;
 jfieldID WellKnownClasses::java_lang_Thread_uncaughtHandler;
 jfieldID WellKnownClasses::java_lang_Thread_nativePeer;
+jfieldID WellKnownClasses::java_lang_ThreadGroup_groups;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_mainThreadGroup;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_name;
+jfieldID WellKnownClasses::java_lang_ThreadGroup_parent;
 jfieldID WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup;
 jfieldID WellKnownClasses::java_lang_Throwable_cause;
 jfieldID WellKnownClasses::java_lang_Throwable_detailMessage;
@@ -108,6 +113,8 @@
 jfieldID WellKnownClasses::java_lang_reflect_Proxy_h;
 jfieldID WellKnownClasses::java_nio_DirectByteBuffer_capacity;
 jfieldID WellKnownClasses::java_nio_DirectByteBuffer_effectiveDirectAddress;
+jfieldID WellKnownClasses::java_util_ArrayList_array;
+jfieldID WellKnownClasses::java_util_ArrayList_size;
 jfieldID WellKnownClasses::java_util_Collections_EMPTY_LIST;
 jfieldID WellKnownClasses::libcore_util_EmptyArray_STACK_TRACE_ELEMENT;
 jfieldID WellKnownClasses::org_apache_harmony_dalvik_ddmc_Chunk_data;
@@ -187,6 +194,7 @@
   java_lang_ThreadGroup = CacheClass(env, "java/lang/ThreadGroup");
   java_lang_Throwable = CacheClass(env, "java/lang/Throwable");
   java_nio_DirectByteBuffer = CacheClass(env, "java/nio/DirectByteBuffer");
+  java_util_ArrayList = CacheClass(env, "java/util/ArrayList");
   java_util_Collections = CacheClass(env, "java/util/Collections");
   libcore_util_EmptyArray = CacheClass(env, "libcore/util/EmptyArray");
   org_apache_harmony_dalvik_ddmc_Chunk = CacheClass(env, "org/apache/harmony/dalvik/ddmc/Chunk");
@@ -225,8 +233,10 @@
   java_lang_Thread_priority = CacheField(env, java_lang_Thread, false, "priority", "I");
   java_lang_Thread_uncaughtHandler = CacheField(env, java_lang_Thread, false, "uncaughtHandler", "Ljava/lang/Thread$UncaughtExceptionHandler;");
   java_lang_Thread_nativePeer = CacheField(env, java_lang_Thread, false, "nativePeer", "J");
+  java_lang_ThreadGroup_groups = CacheField(env, java_lang_ThreadGroup, false, "groups", "Ljava/util/List;");
   java_lang_ThreadGroup_mainThreadGroup = CacheField(env, java_lang_ThreadGroup, true, "mainThreadGroup", "Ljava/lang/ThreadGroup;");
   java_lang_ThreadGroup_name = CacheField(env, java_lang_ThreadGroup, false, "name", "Ljava/lang/String;");
+  java_lang_ThreadGroup_parent = CacheField(env, java_lang_ThreadGroup, false, "parent", "Ljava/lang/ThreadGroup;");
   java_lang_ThreadGroup_systemThreadGroup = CacheField(env, java_lang_ThreadGroup, true, "systemThreadGroup", "Ljava/lang/ThreadGroup;");
   java_lang_Throwable_cause = CacheField(env, java_lang_Throwable, false, "cause", "Ljava/lang/Throwable;");
   java_lang_Throwable_detailMessage = CacheField(env, java_lang_Throwable, false, "detailMessage", "Ljava/lang/String;");
@@ -238,6 +248,8 @@
   java_lang_reflect_Proxy_h = CacheField(env, java_lang_reflect_Proxy, false, "h", "Ljava/lang/reflect/InvocationHandler;");
   java_nio_DirectByteBuffer_capacity = CacheField(env, java_nio_DirectByteBuffer, false, "capacity", "I");
   java_nio_DirectByteBuffer_effectiveDirectAddress = CacheField(env, java_nio_DirectByteBuffer, false, "effectiveDirectAddress", "J");
+  java_util_ArrayList_array = CacheField(env, java_util_ArrayList, false, "array", "[Ljava/lang/Object;");
+  java_util_ArrayList_size = CacheField(env, java_util_ArrayList, false, "size", "I");
   java_util_Collections_EMPTY_LIST = CacheField(env, java_util_Collections, true, "EMPTY_LIST", "Ljava/util/List;");
   libcore_util_EmptyArray_STACK_TRACE_ELEMENT = CacheField(env, libcore_util_EmptyArray, true, "STACK_TRACE_ELEMENT", "[Ljava/lang/StackTraceElement;");
   org_apache_harmony_dalvik_ddmc_Chunk_data = CacheField(env, org_apache_harmony_dalvik_ddmc_Chunk, false, "data", "[B");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index 790d7f7..d651b90 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -64,6 +64,7 @@
   static jclass java_lang_ThreadGroup;
   static jclass java_lang_Thread__UncaughtExceptionHandler;
   static jclass java_lang_Throwable;
+  static jclass java_util_ArrayList;
   static jclass java_util_Collections;
   static jclass java_nio_DirectByteBuffer;
   static jclass libcore_util_EmptyArray;
@@ -111,8 +112,10 @@
   static jfieldID java_lang_Thread_priority;
   static jfieldID java_lang_Thread_uncaughtHandler;
   static jfieldID java_lang_Thread_nativePeer;
+  static jfieldID java_lang_ThreadGroup_groups;
   static jfieldID java_lang_ThreadGroup_mainThreadGroup;
   static jfieldID java_lang_ThreadGroup_name;
+  static jfieldID java_lang_ThreadGroup_parent;
   static jfieldID java_lang_ThreadGroup_systemThreadGroup;
   static jfieldID java_lang_Throwable_cause;
   static jfieldID java_lang_Throwable_detailMessage;
@@ -121,6 +124,8 @@
   static jfieldID java_lang_Throwable_suppressedExceptions;
   static jfieldID java_nio_DirectByteBuffer_capacity;
   static jfieldID java_nio_DirectByteBuffer_effectiveDirectAddress;
+  static jfieldID java_util_ArrayList_array;
+  static jfieldID java_util_ArrayList_size;
   static jfieldID java_util_Collections_EMPTY_LIST;
   static jfieldID libcore_util_EmptyArray_STACK_TRACE_ELEMENT;
   static jfieldID org_apache_harmony_dalvik_ddmc_Chunk_data;
diff --git a/sigchainlib/sigchain_dummy.cc b/sigchainlib/sigchain_dummy.cc
index fbc8c3f..17bfe8f 100644
--- a/sigchainlib/sigchain_dummy.cc
+++ b/sigchainlib/sigchain_dummy.cc
@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+#include <stdio.h>
+#include <stdlib.h>
+
 #ifdef HAVE_ANDROID_OS
 #include <android/log.h>
 #else
@@ -21,8 +24,6 @@
 #include <iostream>
 #endif
 
-#include <stdlib.h>
-
 #include "sigchain.h"
 
 static void log(const char* format, ...) {
diff --git a/test/115-native-bridge/run b/test/115-native-bridge/run
index e475cd6..32a9975 100644
--- a/test/115-native-bridge/run
+++ b/test/115-native-bridge/run
@@ -18,9 +18,9 @@
 
 # Use libnativebridgetest as a native bridge, start NativeBridgeMain (Main is JniTest main file).
 LIBPATH=$(echo ${ARGS} | sed -r 's/.*Djava.library.path=([^ ]*) .*/\1/')
-cp ${LIBPATH}/libnativebridgetest.so .
+ln -s ${LIBPATH}/libnativebridgetest.so .
 touch libarttest.so
-cp ${LIBPATH}/libarttest.so libarttest2.so
+ln -s ${LIBPATH}/libarttest.so libarttest2.so
 
 # pwd likely has /, so it's a pain to put that into a sed rule.
 LEFT=$(echo ${ARGS} | sed -r 's/-Djava.library.path.*//')
diff --git a/test/411-optimizing-arith/src/Main.java b/test/411-optimizing-arith/src/Main.java
index 4de2271..a22c516 100644
--- a/test/411-optimizing-arith/src/Main.java
+++ b/test/411-optimizing-arith/src/Main.java
@@ -74,7 +74,6 @@
 
   public static void main(String[] args) {
     mul();
-    neg();
   }
 
   public static void mul() {
@@ -164,34 +163,6 @@
     expectEquals(Double.POSITIVE_INFINITY, $opt$Mul(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY));
   }
 
-  public static void neg() {
-    expectEquals(-1, $opt$Neg(1));
-    expectEquals(1, $opt$Neg(-1));
-    expectEquals(0, $opt$Neg(0));
-    expectEquals(51, $opt$Neg(-51));
-    expectEquals(-51, $opt$Neg(51));
-    expectEquals(2147483647, $opt$Neg(-2147483647));  // (2^31 - 1)
-    expectEquals(-2147483647, $opt$Neg(2147483647));  // -(2^31 - 1)
-    // From the Java 7 SE Edition specification:
-    // http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.15.4
-    //
-    //   For integer values, negation is the same as subtraction from
-    //   zero.  The Java programming language uses two's-complement
-    //   representation for integers, and the range of two's-complement
-    //   values is not symmetric, so negation of the maximum negative
-    //   int or long results in that same maximum negative number.
-    //   Overflow occurs in this case, but no exception is thrown.
-    //   For all integer values x, -x equals (~x)+1.''
-    expectEquals(-2147483648, $opt$Neg(-2147483648)); // -(2^31)
-
-    $opt$InplaceNegOne(1);
-  }
-
-  public static void $opt$InplaceNegOne(int a) {
-    a = -a;
-    expectEquals(-1, a);
-  }
-
   static int $opt$Mul(int a, int b) {
     return a * b;
   }
@@ -207,9 +178,4 @@
   static double $opt$Mul(double a, double b) {
     return a * b;
   }
-
-  static int $opt$Neg(int a){
-    return -a;
-  }
-
 }
diff --git a/test/412-new-array/src/Main.java b/test/412-new-array/src/Main.java
index 3c74275..168420c 100644
--- a/test/412-new-array/src/Main.java
+++ b/test/412-new-array/src/Main.java
@@ -24,6 +24,8 @@
   public static void main(String[] args) throws Exception {
     $opt$TestAllocations();
     $opt$TestWithInitializations();
+    $opt$TestNegativeValueNewByteArray();
+    $opt$TestNegativeValueNewCharArray();
     testSmaliFilledNewArray();
     testSmaliFillArrayData();
     testSmaliVerifyError();
@@ -109,6 +111,24 @@
     assertEquals(obj2, i[1]);
   }
 
+  static void $opt$TestNegativeValueNewByteArray() {
+    // Use an array initializer to hint the use of filled-new-array.
+    byte[] a = { (byte)0xa0, (byte)0xa1, (byte)0xa2, (byte)0xa3,
+                 (byte)0xa4, (byte)0xa5, (byte)0xa6, (byte)0xa7 };
+    for (int i = 0; i < a.length; i++) {
+      assertEquals((byte)0xa0 + i, a[i]);
+    }
+  }
+
+  static void $opt$TestNegativeValueNewCharArray() {
+    // Use an array initializer to hint the use of filled-new-array.
+    char[] a = { (char)0xa000, (char)0xa001, (char)0xa002, (char)0xa003,
+                 (char)0xa004, (char)0xa005, (char)0xa006, (char)0xa007 };
+    for (int i = 0; i < a.length; i++) {
+      assertEquals((char)0xa000 + i, a[i]);
+    }
+  }
+
   public static void testSmaliFilledNewArray() throws Exception {
     Class<?> c = Class.forName("FilledNewArray");
 
diff --git a/test/414-optimizing-arith-sub/expected.txt b/test/414-optimizing-arith-sub/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/414-optimizing-arith-sub/expected.txt
diff --git a/test/414-optimizing-arith-sub/info.txt b/test/414-optimizing-arith-sub/info.txt
new file mode 100644
index 0000000..1eaa148
--- /dev/null
+++ b/test/414-optimizing-arith-sub/info.txt
@@ -0,0 +1 @@
+Subtraction tests.
diff --git a/test/414-optimizing-arith-sub/src/Main.java b/test/414-optimizing-arith-sub/src/Main.java
new file mode 100644
index 0000000..30e8436
--- /dev/null
+++ b/test/414-optimizing-arith-sub/src/Main.java
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Note that $opt$ is a marker for the optimizing compiler to ensure
+// it does compile the method.
+public class Main {
+
+  public static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(double expected, double result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectApproxEquals(float a, float b) {
+    float maxDelta = 0.0001F;
+    boolean aproxEquals = (a > b) ? ((a - b) < maxDelta) : ((b - a) < maxDelta);
+    if (!aproxEquals) {
+      throw new Error("Expected: " + a + ", found: " + b + ", with delta: " + maxDelta + " " + (a - b));
+    }
+  }
+
+  public static void expectApproxEquals(double a, double b) {
+    double maxDelta = 0.00001D;
+    boolean aproxEquals = (a > b) ? ((a - b) < maxDelta) : ((b - a) < maxDelta);
+    if (!aproxEquals) {
+      throw new Error("Expected: " + a + ", found: " + b + ", with delta: " + maxDelta + " " + (a - b));
+    }
+  }
+
+  public static void expectNaN(float a) {
+    if (a == a) {
+      throw new Error("Expected NaN: " + a);
+    }
+  }
+
+  public static void expectNaN(double a) {
+    if (a == a) {
+      throw new Error("Expected NaN: " + a);
+    }
+  }
+
+  public static void main(String[] args) {
+    subInt();
+    subLong();
+    subFloat();
+    subDouble();
+  }
+
+  private static void subInt() {
+    expectEquals(2, $opt$Sub(5, 3));
+    expectEquals(0, $opt$Sub(0, 0));
+    expectEquals(-3, $opt$Sub(0, 3));
+    expectEquals(3, $opt$Sub(3, 0));
+    expectEquals(4, $opt$Sub(1, -3));
+    expectEquals(-9, $opt$Sub(-12, -3));
+    expectEquals(134217724, $opt$Sub(134217729, 5)); // (2^27 + 1) - 5
+  }
+
+  private static void subLong() {
+    expectEquals(2L, $opt$Sub(5L, 3L));
+    expectEquals(0L, $opt$Sub(0L, 0L));
+    expectEquals(-3L, $opt$Sub(0L, 3L));
+    expectEquals(3L, $opt$Sub(3L, 0L));
+    expectEquals(4L, $opt$Sub(1L, -3L));
+    expectEquals(-9L, $opt$Sub(-12L, -3L));
+    expectEquals(134217724L, $opt$Sub(134217729L, 5L)); // (2^27 + 1) - 5
+    expectEquals(34359738362L, $opt$Sub(34359738369L, 7L)); // (2^35 + 1) - 7
+  }
+
+  private static void subFloat() {
+    expectApproxEquals(2F, $opt$Sub(5F, 3F));
+    expectApproxEquals(0F, $opt$Sub(0F, 0F));
+    expectApproxEquals(-3F, $opt$Sub(0F, 3F));
+    expectApproxEquals(3F, $opt$Sub(3F, 0F));
+    expectApproxEquals(4F, $opt$Sub(1F, -3F));
+    expectApproxEquals(-9F, $opt$Sub(-12F, -3F));
+    expectApproxEquals(34359738362F, $opt$Sub(34359738369F, 7F)); // (2^35 + 1) - 7
+    expectApproxEquals(-0.1F, $opt$Sub(0.1F, 0.2F));
+    expectApproxEquals(0.2F, $opt$Sub(-0.5F, -0.7F));
+
+    expectNaN($opt$Sub(Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY));
+    expectNaN($opt$Sub(Float.POSITIVE_INFINITY, Float.POSITIVE_INFINITY));
+    expectNaN($opt$Sub(Float.NaN, 11F));
+    expectNaN($opt$Sub(Float.NaN, -11F));
+    expectNaN($opt$Sub(Float.NaN, Float.NEGATIVE_INFINITY));
+    expectNaN($opt$Sub(Float.NaN, Float.POSITIVE_INFINITY));
+
+    expectEquals(Float.NEGATIVE_INFINITY, $opt$Sub(-Float.MAX_VALUE, Float.MAX_VALUE));
+    expectEquals(Float.NEGATIVE_INFINITY, $opt$Sub(2F, Float.POSITIVE_INFINITY));
+    expectEquals(Float.POSITIVE_INFINITY, $opt$Sub(Float.MAX_VALUE, -Float.MAX_VALUE));
+    expectEquals(Float.POSITIVE_INFINITY, $opt$Sub(2F, Float.NEGATIVE_INFINITY));
+    expectEquals(Float.POSITIVE_INFINITY, $opt$Sub(Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY));
+    expectEquals(Float.NEGATIVE_INFINITY, $opt$Sub(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
+  }
+
+  private static void subDouble() {
+    expectApproxEquals(2D, $opt$Sub(5D, 3D));
+    expectApproxEquals(0D, $opt$Sub(0D, 0D));
+    expectApproxEquals(-3D, $opt$Sub(0D, 3D));
+    expectApproxEquals(3D, $opt$Sub(3D, 0D));
+    expectApproxEquals(4D, $opt$Sub(1D, -3D));
+    expectApproxEquals(-9D, $opt$Sub(-12D, -3D));
+    expectApproxEquals(134217724D, $opt$Sub(134217729D, 5D)); // (2^27 + 1) - 5
+    expectApproxEquals(34359738362D, $opt$Sub(34359738369D, 7D)); // (2^35 + 1) - 7
+    expectApproxEquals(-0.1D, $opt$Sub(0.1D, 0.2D));
+    expectApproxEquals(0.2D, $opt$Sub(-0.5D, -0.7D));
+
+    expectNaN($opt$Sub(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY));
+    expectNaN($opt$Sub(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY));
+    expectNaN($opt$Sub(Double.NaN, 11D));
+    expectNaN($opt$Sub(Double.NaN, -11D));
+    expectNaN($opt$Sub(Double.NaN, Double.NEGATIVE_INFINITY));
+    expectNaN($opt$Sub(Double.NaN, Double.POSITIVE_INFINITY));
+
+    expectEquals(Double.NEGATIVE_INFINITY, $opt$Sub(-Double.MAX_VALUE, Double.MAX_VALUE));
+    expectEquals(Double.NEGATIVE_INFINITY, $opt$Sub(2D, Double.POSITIVE_INFINITY));
+    expectEquals(Double.POSITIVE_INFINITY, $opt$Sub(Double.MAX_VALUE, -Double.MAX_VALUE));
+    expectEquals(Double.POSITIVE_INFINITY, $opt$Sub(2D, Double.NEGATIVE_INFINITY));
+    expectEquals(Double.POSITIVE_INFINITY, $opt$Sub(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
+    expectEquals(Double.NEGATIVE_INFINITY, $opt$Sub(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
+  }
+
+  static int $opt$Sub(int a, int b) {
+    return a - b;
+  }
+
+  static long $opt$Sub(long a, long b) {
+    return a - b;
+  }
+
+  static float $opt$Sub(float a, float b) {
+    return a - b;
+  }
+
+  static double $opt$Sub(double a, double b) {
+    return a - b;
+  }
+
+}
diff --git a/test/415-optimizing-arith-neg/expected.txt b/test/415-optimizing-arith-neg/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/415-optimizing-arith-neg/expected.txt
diff --git a/test/415-optimizing-arith-neg/info.txt b/test/415-optimizing-arith-neg/info.txt
new file mode 100644
index 0000000..8494aad
--- /dev/null
+++ b/test/415-optimizing-arith-neg/info.txt
@@ -0,0 +1 @@
+Tests for arithmetic negation operations.
diff --git a/test/415-optimizing-arith-neg/src/Main.java b/test/415-optimizing-arith-neg/src/Main.java
new file mode 100644
index 0000000..b21b998
--- /dev/null
+++ b/test/415-optimizing-arith-neg/src/Main.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Note that $opt$ is a marker for the optimizing compiler to ensure
+// it does compile the method.
+public class Main {
+
+  public static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void expectEquals(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static void main(String[] args) {
+    negInt();
+    $opt$InplaceNegOneInt(1);
+
+    negLong();
+    $opt$InplaceNegOneLong(1L);
+  }
+
+  private static void negInt() {
+    expectEquals(-1, $opt$NegInt(1));
+    expectEquals(1, $opt$NegInt(-1));
+    expectEquals(0, $opt$NegInt(0));
+    expectEquals(51, $opt$NegInt(-51));
+    expectEquals(-51, $opt$NegInt(51));
+    expectEquals(2147483647, $opt$NegInt(-2147483647));  // (2^31 - 1)
+    expectEquals(-2147483647, $opt$NegInt(2147483647));  // -(2^31 - 1)
+    // From the Java 7 SE Edition specification:
+    // http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.15.4
+    //
+    //   For integer values, negation is the same as subtraction from
+    //   zero.  The Java programming language uses two's-complement
+    //   representation for integers, and the range of two's-complement
+    //   values is not symmetric, so negation of the maximum negative
+    //   int or long results in that same maximum negative number.
+    //   Overflow occurs in this case, but no exception is thrown.
+    //   For all integer values x, -x equals (~x)+1.''
+    expectEquals(-2147483648, $opt$NegInt(-2147483648)); // -(2^31)
+  }
+
+  private static void $opt$InplaceNegOneInt(int a) {
+    a = -a;
+    expectEquals(-1, a);
+  }
+
+  private static void negLong() {
+    expectEquals(-1L, $opt$NegLong(1L));
+    expectEquals(1L, $opt$NegLong(-1L));
+    expectEquals(0L, $opt$NegLong(0L));
+    expectEquals(51L, $opt$NegLong(-51L));
+    expectEquals(-51L, $opt$NegLong(51L));
+
+    expectEquals(2147483647L, $opt$NegLong(-2147483647L));  // (2^31 - 1)
+    expectEquals(-2147483647L, $opt$NegLong(2147483647L));  // -(2^31 - 1)
+    expectEquals(2147483648L, $opt$NegLong(-2147483648L));  // 2^31
+    expectEquals(-2147483648L, $opt$NegLong(2147483648L));  // -(2^31)
+
+    expectEquals(9223372036854775807L, $opt$NegLong(-9223372036854775807L));  // (2^63 - 1)
+    expectEquals(-9223372036854775807L, $opt$NegLong(9223372036854775807L));  // -(2^63 - 1)
+    // See remark regarding the negation of the maximum negative
+    // (long) value in negInt().
+    expectEquals(-9223372036854775808L, $opt$NegLong(-9223372036854775808L)); // -(2^63)
+  }
+
+  private static void $opt$InplaceNegOneLong(long a) {
+    a = -a;
+    expectEquals(-1L, a);
+  }
+
+  static int $opt$NegInt(int a){
+    return -a;
+  }
+
+  static long $opt$NegLong(long a){
+    return -a;
+  }
+}
diff --git a/test/800-smali/build b/test/800-smali/build
deleted file mode 100644
index 1b5a4e3..0000000
--- a/test/800-smali/build
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#
-# Copyright (C) 2014 The Android Open Source Project
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Stop if something fails.
-set -e
-
-# Compile Java classes
-mkdir classes
-${JAVAC} -d classes `find src -name '*.java'`
-${DX} -JXmx256m --debug --dex --output=java_classes.dex classes
-
-# Compile Smali classes
-${SMALI} -JXmx256m --output smali_classes.dex `find src -name '*.smali'`
-
-# Combine files.
-${DXMERGER} classes.dex java_classes.dex smali_classes.dex
-
-# Zip up output.
-zip $TEST_NAME.jar classes.dex
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 468e7a6..4002fbf 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -1,2 +1,3 @@
 b/17790197
+FloatBadArgReg
 Done!
diff --git a/test/800-smali/info.txt b/test/800-smali/info.txt
index cfcc230..3022962 100644
--- a/test/800-smali/info.txt
+++ b/test/800-smali/info.txt
@@ -1,4 +1,4 @@
 Smali-based tests.
-Will compile and run all the smali files in src/ and run the test cases mentioned in src/Main.java.
+Will compile and run all the smali files in smali/ and run the test cases mentioned in src/Main.java.
 
 Obviously needs to run under Dalvik or ART.
diff --git a/test/800-smali/smali/FloatBadArgReg.smali b/test/800-smali/smali/FloatBadArgReg.smali
new file mode 100644
index 0000000..719ba09
--- /dev/null
+++ b/test/800-smali/smali/FloatBadArgReg.smali
@@ -0,0 +1,16 @@
+.class public LFloatBadArgReg;
+
+.super Ljava/lang/Object;
+
+.method public static getInt(I)I
+    .registers 2
+    const/4 v0, 0x0
+    if-ne v0, v0, :after
+    float-to-int v0, v0
+    :exit
+    add-int/2addr v0, v1
+    return v0
+    :after
+    move v1, v0
+    goto :exit
+.end method
diff --git a/test/800-smali/src/b_17790197.smali b/test/800-smali/smali/b_17790197.smali
similarity index 100%
rename from test/800-smali/src/b_17790197.smali
rename to test/800-smali/smali/b_17790197.smali
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index 0ef3a9d..c86470c 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -49,6 +49,8 @@
         testCases = new LinkedList<TestCase>();
 
         testCases.add(new TestCase("b/17790197", "B17790197", "getInt", null, null, 100));
+        testCases.add(new TestCase("FloatBadArgReg", "FloatBadArgReg", "getInt",
+            new Object[]{100}, null, 100));
     }
 
     public void runTests() {
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index fd95038..55de1f3 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -58,8 +58,7 @@
   ifeq ($$(art_target_or_host),target)
     $(call set-target-local-clang-vars)
     $(call set-target-local-cflags-vars,debug)
-    LOCAL_SHARED_LIBRARIES += libdl libcutils
-    LOCAL_STATIC_LIBRARIES := libgtest
+    LOCAL_SHARED_LIBRARIES += libdl
     LOCAL_MULTILIB := both
     LOCAL_MODULE_PATH_32 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_32)
     LOCAL_MODULE_PATH_64 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_64)
@@ -68,11 +67,7 @@
   else # host
     LOCAL_CLANG := $(ART_HOST_CLANG)
     LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS)
-    LOCAL_STATIC_LIBRARIES := libcutils
     LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread
-    ifeq ($(HOST_OS),linux)
-      LOCAL_LDLIBS += -lrt
-    endif
     LOCAL_IS_HOST_MODULE := true
     LOCAL_MULTILIB := both
     include $(BUILD_HOST_SHARED_LIBRARY)
diff --git a/test/Android.libnativebridgetest.mk b/test/Android.libnativebridgetest.mk
index 5e2493c..1b20e69 100644
--- a/test/Android.libnativebridgetest.mk
+++ b/test/Android.libnativebridgetest.mk
@@ -51,7 +51,7 @@
   ifeq ($$(art_target_or_host),target)
     $(call set-target-local-clang-vars)
     $(call set-target-local-cflags-vars,debug)
-    LOCAL_SHARED_LIBRARIES += libdl libcutils
+    LOCAL_SHARED_LIBRARIES += libdl
     LOCAL_STATIC_LIBRARIES := libgtest
     LOCAL_MULTILIB := both
     LOCAL_MODULE_PATH_32 := $(ART_TARGET_TEST_OUT)/$(ART_TARGET_ARCH_32)
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 427e0b1..2de4d5c 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -67,7 +67,10 @@
 # General rules to build and run a run-test.
 
 TARGET_TYPES := host target
-PREBUILD_TYPES := prebuild
+PREBUILD_TYPES :=
+ifeq ($(ART_TEST_RUN_TEST_PREBUILD),true)
+  PREBUILD_TYPES += prebuild
+endif
 ifeq ($(ART_TEST_RUN_TEST_NO_PREBUILD),true)
   PREBUILD_TYPES += no-prebuild
 endif
@@ -117,8 +120,12 @@
 ifeq ($(ART_TEST_RUN_TEST_NDEBUG),true)
   RUN_TYPES += ndebug
 endif
-ADDRESS_SIZES_TARGET := $(ART_PHONY_TEST_TARGET_SUFFIX) $(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
-ADDRESS_SIZES_HOST := $(ART_PHONY_TEST_HOST_SUFFIX) $(2ND_ART_PHONY_TEST_HOST_SUFFIX)
+ADDRESS_SIZES_TARGET := $(ART_PHONY_TEST_TARGET_SUFFIX)
+ADDRESS_SIZES_HOST := $(ART_PHONY_TEST_HOST_SUFFIX)
+ifeq ($(ART_TEST_RUN_TEST_2ND_ARCH),true)
+  ADDRESS_SIZES_TARGET += $(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
+  ADDRESS_SIZES_HOST += $(2ND_ART_PHONY_TEST_HOST_SUFFIX)
+endif
 ALL_ADDRESS_SIZES := 64 32
 
 # List all run test names with number arguments agreeing with the comment above.
@@ -290,6 +297,46 @@
 
 TEST_ART_BROKEN_DEFAULT_RUN_TESTS :=
 
+# Known broken tests for the arm64 optimizing compiler backend.
+TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS := \
+  003-omnibus-opcodes \
+  006-args \
+  011-array-copy \
+  018-stack-overflow \
+  036-finalizer \
+  044-proxy \
+  070-nio-buffer \
+  072-precise-gc \
+  082-inline-execute \
+  083-compiler-regressions \
+  093-serialization \
+  096-array-copy-concurrent-gc \
+  100-reflect2 \
+  106-exceptions2 \
+  107-int-math2 \
+  121-modifiers \
+  122-npe \
+  123-compiler-regressions-mt \
+  405-optimizing-long-allocator \
+  407-arrays \
+  410-floats \
+  411-optimizing-arith \
+  412-new-array \
+  413-regalloc-regression \
+  414-optimizing-arith-sub \
+  415-optimizing-arith-neg \
+  700-LoadArgRegs \
+  800-smali
+
+ifneq (,$(filter optimizing,$(COMPILER_TYPES)))
+  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+      optimizing,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+      $(IMAGE_TYPES),$(TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS),64)
+endif
+
+TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS :=
+
+
 # Clear variables ahead of appending to them when defining tests.
 $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
 $(foreach target, $(TARGET_TYPES), \
diff --git a/test/etc/default-build b/test/etc/default-build
index 009736b..3369dc6 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -20,7 +20,7 @@
 mkdir classes
 ${JAVAC} -d classes `find src -name '*.java'`
 
-if [ -r src2 ]; then
+if [ -d src2 ]; then
   ${JAVAC} -d classes `find src2 -name '*.java'`
 fi
 
@@ -28,13 +28,13 @@
   ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
 fi
 
-if [ -r smali ]; then
+if [ -d smali ]; then
   # Compile Smali classes
   ${SMALI} -JXmx256m --output smali_classes.dex `find smali -name '*.smali'`
   ${DXMERGER} classes.dex classes.dex smali_classes.dex
 fi
 
-if [ -r src-ex ]; then
+if [ -d src-ex ]; then
   mkdir classes-ex
   ${JAVAC} -d classes-ex -cp classes `find src-ex -name '*.java'`
   if [ ${NEED_DEX} = "true" ]; then
diff --git a/test/run-test b/test/run-test
index 73ffc31..2ef3ab1 100755
--- a/test/run-test
+++ b/test/run-test
@@ -469,9 +469,6 @@
   file_size_limit=5120
 elif echo "$test_dir" | grep 083; then
   file_size_limit=5120
-elif echo "$test_dir" | grep 115; then
-# Native bridge test copies libarttest.so into its directory, which needs 2MB already.
-  file_size_limit=5120
 fi
 if ! ulimit -S "$file_size_limit"; then
    echo "ulimit file size setting failed"