Merge "Fix test-art-host-oat flakiness" into dalvik-dev
diff --git a/src/compiler/codegen/arm/arm_lir.h b/src/compiler/codegen/arm/arm_lir.h
index 3fc8792..c41f53b 100644
--- a/src/compiler/codegen/arm/arm_lir.h
+++ b/src/compiler/codegen/arm/arm_lir.h
@@ -371,7 +371,7 @@
   kThumb2StrbRRI12,  // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0].
   kThumb2Pop,        // pop   [1110100010111101] list[15-0]*/
   kThumb2Push,       // push  [1110100100101101] list[15-0]*/
-  kThumb2CmpRI8,     // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
+  kThumb2CmpRI12,    // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
   kThumb2AdcRRR,     // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2AndRRR,     // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2BicRRR,     // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
@@ -445,6 +445,9 @@
   kThumb2Pop1,       // t3 encoding of pop.
   kThumb2RsubRRR,    // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2Smull,      // smull [111110111000] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
+  kThumb2LdrdPcRel8, // ldrd rt, rt2, pc +-/1024.
+  kThumb2LdrdI8,     // ldrd rt, rt2, [rn +-/1024].
+  kThumb2StrdI8,     // strd rt, rt2, [rn +-/1024].
   kArmLast,
 };
 
diff --git a/src/compiler/codegen/arm/assemble_arm.cc b/src/compiler/codegen/arm/assemble_arm.cc
index 91f25d6..455ea67 100644
--- a/src/compiler/codegen/arm/assemble_arm.cc
+++ b/src/compiler/codegen/arm/assemble_arm.cc
@@ -646,7 +646,7 @@
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
                  | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4),
-    ENCODING_MAP(kThumb2CmpRI8, 0xf1b00f00,
+    ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00,
                  kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_BINARY_OP | REG_USE0 | SETS_CCODES,
@@ -917,8 +917,8 @@
                  "b", "!0t", 4),
     ENCODING_MAP(kThumb2MovImm16H,       0xf2c00000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
-                 "movh", "!0C, #!1M", 4),
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0,
+                 "movt", "!0C, #!1M", 4),
     ENCODING_MAP(kThumb2AddPCR,      0x4487,
                  kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
@@ -936,8 +936,8 @@
                  "mov", "!0C, #!1M", 4),
     ENCODING_MAP(kThumb2MovImm16HST,     0xf2c00000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
-                 "movh", "!0C, #!1M", 4),
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP,
+                 "movt", "!0C, #!1M", 4),
     ENCODING_MAP(kThumb2LdmiaWB,         0xe8b00000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
@@ -972,7 +972,21 @@
                  kFmtBitBlt, 3, 0,
                  IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
                  "smull", "!0C, !1C, !2C, !3C", 4),
-
+    ENCODING_MAP(kThumb2LdrdPcRel8,  0xe9df0000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+                 "ldrd", "!0C, !1C, [pc, #!2E]", 4),
+    ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+                 kFmtBitBlt, 7, 0,
+                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD,
+                 "ldrd", "!0C, !1C, [!2C, #!3E]", 4),
+    ENCODING_MAP(kThumb2StrdI8, 0xe9c00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+                 kFmtBitBlt, 7, 0,
+                 IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
+                 "strd", "!0C, !1C, [!2C, #!3E]", 4),
 };
 
 /*
@@ -1023,13 +1037,14 @@
       if (lir->opcode == kThumbLdrPcRel ||
           lir->opcode == kThumb2LdrPcRel12 ||
           lir->opcode == kThumbAddPcRel ||
+          lir->opcode == kThumb2LdrdPcRel8 ||
           ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) ||
           ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) {
         /*
          * PC-relative loads are mostly used to load immediates
          * that are too large to materialize directly in one shot.
          * However, if the load displacement exceeds the limit,
-         * we revert to a 2-instruction materialization sequence.
+         * we revert to a multiple-instruction materialization sequence.
          */
         LIR *lir_target = lir->target;
         uintptr_t pc = (lir->offset + 4) & ~3;
@@ -1044,8 +1059,9 @@
           // Shouldn't happen in current codegen.
           LOG(FATAL) << "Unexpected pc-rel offset " << delta;
         }
-        // Now, check for the two difficult cases
+        // Now, check for the difficult cases
         if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
+            ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) ||
             ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
             ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
           /*
@@ -1053,26 +1069,34 @@
            * vldrs/vldrd we include REG_DEF_LR in the resource
            * masks for these instructions.
            */
-          int base_reg = (lir->opcode == kThumb2LdrPcRel12) ?
-            lir->operands[0] : rARM_LR;
+          int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || (lir->opcode == kThumb2LdrPcRel12))
+              ?  lir->operands[0] : rARM_LR;
 
-          // Add new Adr to generate the address
+          // Add new Adr to generate the address.
           LIR* new_adr = RawLIR(cu, lir->dalvik_offset, kThumb2Adr,
                      base_reg, 0, 0, 0, 0, lir->target);
           InsertLIRBefore(lir, new_adr);
 
-          // Convert to normal load
+          // Convert to normal load.
           if (lir->opcode == kThumb2LdrPcRel12) {
             lir->opcode = kThumb2LdrRRI12;
+          } else if (lir->opcode == kThumb2LdrdPcRel8) {
+            lir->opcode = kThumb2LdrdI8;
           }
-          // Change the load to be relative to the new Adr base
-          lir->operands[1] = base_reg;
-          lir->operands[2] = 0;
+          // Change the load to be relative to the new Adr base.
+          if (lir->opcode == kThumb2LdrdI8) {
+            lir->operands[3] = 0;
+            lir->operands[2] = base_reg;
+          } else {
+            lir->operands[2] = 0;
+            lir->operands[1] = base_reg;
+          }
           SetupResourceMasks(cu, lir);
           res = kRetryAll;
         } else {
           if ((lir->opcode == kThumb2Vldrs) ||
-              (lir->opcode == kThumb2Vldrd)) {
+              (lir->opcode == kThumb2Vldrd) ||
+              (lir->opcode == kThumb2LdrdPcRel8)) {
             lir->operands[2] = delta >> 2;
           } else {
             lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?  delta :
diff --git a/src/compiler/codegen/arm/codegen_arm.h b/src/compiler/codegen/arm/codegen_arm.h
index ea34ff2..4dadd6c 100644
--- a/src/compiler/codegen/arm/codegen_arm.h
+++ b/src/compiler/codegen/arm/codegen_arm.h
@@ -37,8 +37,7 @@
                                      int displacement, int r_dest, int r_dest_hi, OpSize size,
                                      int s_reg);
     virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value);
-    virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
-                                       int val_lo, int val_hi);
+    virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value);
     virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
                                OpSize size);
     virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -89,12 +88,18 @@
     virtual bool IsUnconditionalBranch(LIR* lir);
 
     // Required for target - Dalvik-level generators.
+    virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+                                   RegLocation rl_src1, RegLocation rl_src2);
     virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
                                 RegLocation rl_index, RegLocation rl_src, int scale);
     virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale);
     virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_src, int scale);
+    virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift);
+    virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2);
     virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2);
     virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -197,7 +202,14 @@
     static int EncodeShift(int code, int amount);
     static int ModifiedImmediate(uint32_t value);
     static ArmConditionCode ArmConditionEncoding(ConditionCode code);
-    bool InexpensiveConstant(int reg, int value);
+    bool InexpensiveConstantInt(int32_t value);
+    bool InexpensiveConstantFloat(int32_t value);
+    bool InexpensiveConstantLong(int64_t value);
+    bool InexpensiveConstantDouble(int64_t value);
+
+  private:
+    void GenFusedLongCmpImmBranch(CompilationUnit* cu, BasicBlock* bb, RegLocation rl_src1,
+                                  int64_t val, ConditionCode ccode);
 };
 
 }  // namespace art
diff --git a/src/compiler/codegen/arm/int_arm.cc b/src/compiler/codegen/arm/int_arm.cc
index fcf74f1..5a9786c 100644
--- a/src/compiler/codegen/arm/int_arm.cc
+++ b/src/compiler/codegen/arm/int_arm.cc
@@ -121,16 +121,81 @@
   branch3->target = branch1->target;
 }
 
-void ArmCodegen::GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir)
+void ArmCodegen::GenFusedLongCmpImmBranch(CompilationUnit* cu, BasicBlock* bb, RegLocation rl_src1,
+                                          int64_t val, ConditionCode ccode)
 {
+  int32_t val_lo = Low32Bits(val);
+  int32_t val_hi = High32Bits(val);
+  DCHECK(ModifiedImmediate(val_lo) >= 0);
+  DCHECK(ModifiedImmediate(val_hi) >= 0);
   LIR* label_list = cu->block_label_list;
   LIR* taken = &label_list[bb->taken->id];
   LIR* not_taken = &label_list[bb->fall_through->id];
+  rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg);
+  int32_t low_reg = rl_src1.low_reg;
+  int32_t high_reg = rl_src1.high_reg;
+
+  switch(ccode) {
+    case kCondEq:
+      OpCmpImmBranch(cu, kCondNe, high_reg, val_hi, not_taken);
+      break;
+    case kCondNe:
+      OpCmpImmBranch(cu, kCondNe, high_reg, val_hi, taken);
+      break;
+    case kCondLt:
+      OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, taken);
+      OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, not_taken);
+      ccode = kCondCc;
+      break;
+    case kCondLe:
+      OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, taken);
+      OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, not_taken);
+      ccode = kCondLs;
+      break;
+    case kCondGt:
+      OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, taken);
+      OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, not_taken);
+      ccode = kCondHi;
+      break;
+    case kCondGe:
+      OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, taken);
+      OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, not_taken);
+      ccode = kCondCs;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected ccode: " << ccode;
+  }
+  OpCmpImmBranch(cu, ccode, low_reg, val_lo, taken);
+}
+
+
+void ArmCodegen::GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir)
+{
   RegLocation rl_src1 = GetSrcWide(cu, mir, 0);
   RegLocation rl_src2 = GetSrcWide(cu, mir, 2);
+  // Normalize such that if either operand is constant, src2 will be constant.
+  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  if (rl_src1.is_const) {
+    RegLocation rl_temp = rl_src1;
+    rl_src1 = rl_src2;
+    rl_src2 = rl_temp;
+    ccode = FlipComparisonOrder(ccode);
+  }
+  if (rl_src2.is_const) {
+    RegLocation rl_temp = UpdateLocWide(cu, rl_src2);
+    // Do special compare/branch against simple const operand if not already in registers.
+    int64_t val = ConstantValueWide(cu, rl_src2);
+    if ((rl_temp.location != kLocPhysReg) &&
+        ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
+      GenFusedLongCmpImmBranch(cu, bb, rl_src1, val, ccode);
+      return;
+    }
+  }
+  LIR* label_list = cu->block_label_list;
+  LIR* taken = &label_list[bb->taken->id];
+  LIR* not_taken = &label_list[bb->fall_through->id];
   rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(cu, rl_src2, kCoreReg);
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
   OpRegReg(cu, kOpCmp, rl_src1.high_reg, rl_src2.high_reg);
   switch(ccode) {
     case kCondEq:
@@ -185,7 +250,7 @@
     if (ARM_LOWREG(reg) && ((check_value & 0xff) == check_value)) {
       NewLIR2(cu, kThumbCmpRI8, reg, check_value);
     } else if (mod_imm >= 0) {
-      NewLIR2(cu, kThumb2CmpRI8, reg, mod_imm);
+      NewLIR2(cu, kThumb2CmpRI12, reg, mod_imm);
     } else {
       int t_reg = AllocTemp(cu);
       LoadConstant(cu, t_reg, check_value);
@@ -523,6 +588,93 @@
   return false;
 }
 
+
+ /*
+  * Check to see if a result pair has a misaligned overlap with an operand pair.  This
+  * is not usual for dx to generate, but it is legal (for now).  In a future rev of
+  * dex, we'll want to make this case illegal.
+  */
+static bool BadOverlap(CompilationUnit* cu, RegLocation rl_src, RegLocation rl_dest)
+{
+  DCHECK(rl_src.wide);
+  DCHECK(rl_dest.wide);
+  return (abs(SRegToVReg(cu, rl_src.s_reg_low) - SRegToVReg(cu, rl_dest.s_reg_low)) == 1);
+}
+
+void ArmCodegen::GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2)
+{
+    /*
+     * To pull off inline multiply, we have a worst-case requirement of 8 temporary
+     * registers.  Normally for Arm, we get 5.  We can get to 6 by including
+     * lr in the temp set.  The only problematic case is all operands and result are
+     * distinct, and none have been promoted.  In that case, we can succeed by aggressively
+     * freeing operand temp registers after they are no longer needed.  All other cases
+     * can proceed normally.  We'll just punt on the case of the result having a misaligned
+     * overlap with either operand and send that case to a runtime handler.
+     */
+    RegLocation rl_result;
+    if (BadOverlap(cu, rl_src1, rl_dest) || (BadOverlap(cu, rl_src2, rl_dest))) {
+      int func_offset = ENTRYPOINT_OFFSET(pLmul);
+      FlushAllRegs(cu);
+      CallRuntimeHelperRegLocationRegLocation(cu, func_offset, rl_src1, rl_src2, false);
+      rl_result = GetReturnWide(cu, false);
+      StoreValueWide(cu, rl_dest, rl_result);
+      return;
+    }
+    // Temporarily add LR to the temp pool, and assign it to tmp1
+    MarkTemp(cu, rARM_LR);
+    FreeTemp(cu, rARM_LR);
+    int tmp1 = rARM_LR;
+    LockTemp(cu, rARM_LR);
+
+    rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg);
+    rl_src2 = LoadValueWide(cu, rl_src2, kCoreReg);
+
+    bool special_case = true;
+    // If operands are the same, or any pair has been promoted we're not the special case.
+    if ((rl_src1.s_reg_low == rl_src2.s_reg_low) ||
+        (!IsTemp(cu, rl_src1.low_reg) && !IsTemp(cu, rl_src1.high_reg)) ||
+        (!IsTemp(cu, rl_src2.low_reg) && !IsTemp(cu, rl_src2.high_reg))) {
+      special_case = false;
+    }
+    // Tuning: if rl_dest has been promoted and is *not* either operand, could use directly.
+    int res_lo = AllocTemp(cu);
+    int res_hi;
+    if (rl_src1.low_reg == rl_src2.low_reg) {
+      res_hi = AllocTemp(cu);
+      NewLIR3(cu, kThumb2MulRRR, tmp1, rl_src1.low_reg, rl_src1.high_reg);
+      NewLIR4(cu, kThumb2Umull, res_lo, res_hi, rl_src1.low_reg, rl_src1.low_reg);
+      OpRegRegRegShift(cu, kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
+    } else {
+      // In the special case, all temps are now allocated
+      NewLIR3(cu, kThumb2MulRRR, tmp1, rl_src2.low_reg, rl_src1.high_reg);
+      if (special_case) {
+        DCHECK_NE(rl_src1.low_reg, rl_src2.low_reg);
+        DCHECK_NE(rl_src1.high_reg, rl_src2.high_reg);
+        FreeTemp(cu, rl_src1.high_reg);
+      }
+      res_hi = AllocTemp(cu);
+
+      NewLIR4(cu, kThumb2Umull, res_lo, res_hi, rl_src2.low_reg, rl_src1.low_reg);
+      NewLIR4(cu, kThumb2Mla, tmp1, rl_src1.low_reg, rl_src2.high_reg, tmp1);
+      NewLIR4(cu, kThumb2AddRRR, res_hi, tmp1, res_hi, 0);
+      if (special_case) {
+        FreeTemp(cu, rl_src1.low_reg);
+        Clobber(cu, rl_src1.low_reg);
+        Clobber(cu, rl_src1.high_reg);
+      }
+    }
+    FreeTemp(cu, tmp1);
+    rl_result = GetReturnWide(cu, false); // Just using as a template.
+    rl_result.low_reg = res_lo;
+    rl_result.high_reg = res_hi;
+    StoreValueWide(cu, rl_dest, rl_result);
+    // Now, restore lr to its non-temp status.
+    Clobber(cu, rARM_LR);
+    UnmarkTemp(cu, rARM_LR);
+}
+
 bool ArmCodegen::GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2)
 {
@@ -568,8 +720,11 @@
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
   RegLocation rl_result;
+  bool constant_index = rl_index.is_const;
   rl_array = LoadValue(cu, rl_array, kCoreReg);
-  rl_index = LoadValue(cu, rl_index, kCoreReg);
+  if (!constant_index) {
+    rl_index = LoadValue(cu, rl_index, kCoreReg);
+  }
 
   if (rl_dest.wide) {
     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
@@ -577,6 +732,11 @@
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   }
 
+  // If index is constant, just fold it into the data offset
+  if (constant_index) {
+    data_offset += ConstantValue(cu, rl_index) << scale;
+  }
+
   /* null object? */
   GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
 
@@ -587,27 +747,38 @@
     /* Get len */
     LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
   }
-  if (rl_dest.wide || rl_dest.fp) {
-    // No special indexed operation, lea + load w/ displacement
-    int reg_ptr = AllocTemp(cu);
-    OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
-                     EncodeShift(kArmLsl, scale));
-    FreeTemp(cu, rl_index.low_reg);
+  if (rl_dest.wide || rl_dest.fp || constant_index) {
+    int reg_ptr;
+    if (constant_index) {
+      reg_ptr = rl_array.low_reg;  // NOTE: must not alter reg_ptr in constant case.
+    } else {
+      // No special indexed operation, lea + load w/ displacement
+      reg_ptr = AllocTemp(cu);
+      OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
+                       EncodeShift(kArmLsl, scale));
+      FreeTemp(cu, rl_index.low_reg);
+    }
     rl_result = EvalLoc(cu, rl_dest, reg_class, true);
 
     if (needs_range_check) {
-      // TODO: change kCondCS to a more meaningful name, is the sense of
-      // carry-set/clear flipped?
-      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      if (constant_index) {
+        GenImmedCheck(cu, kCondLs, reg_len, ConstantValue(cu, rl_index), kThrowConstantArrayBounds);
+      } else {
+        GenRegRegCheck(cu, kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds);
+      }
       FreeTemp(cu, reg_len);
     }
     if (rl_dest.wide) {
       LoadBaseDispWide(cu, reg_ptr, data_offset, rl_result.low_reg, rl_result.high_reg, INVALID_SREG);
-      FreeTemp(cu, reg_ptr);
+      if (!constant_index) {
+        FreeTemp(cu, reg_ptr);
+      }
       StoreValueWide(cu, rl_dest, rl_result);
     } else {
       LoadBaseDisp(cu, reg_ptr, data_offset, rl_result.low_reg, size, INVALID_SREG);
-      FreeTemp(cu, reg_ptr);
+      if (!constant_index) {
+        FreeTemp(cu, reg_ptr);
+      }
       StoreValue(cu, rl_dest, rl_result);
     }
   } else {
@@ -639,17 +810,28 @@
   RegisterClass reg_class = oat_reg_class_by_size(size);
   int len_offset = mirror::Array::LengthOffset().Int32Value();
   int data_offset;
+  bool constant_index = rl_index.is_const;
 
-  if (size == kLong || size == kDouble) {
+  if (rl_src.wide) {
     data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
   } else {
     data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
   }
 
+  // If index is constant, just fold it into the data offset.
+  if (constant_index) {
+    data_offset += ConstantValue(cu, rl_index) << scale;
+  }
+
   rl_array = LoadValue(cu, rl_array, kCoreReg);
-  rl_index = LoadValue(cu, rl_index, kCoreReg);
-  int reg_ptr = INVALID_REG;
-  if (IsTemp(cu, rl_array.low_reg)) {
+  if (!constant_index) {
+    rl_index = LoadValue(cu, rl_index, kCoreReg);
+  }
+
+  int reg_ptr;
+  if (constant_index) {
+    reg_ptr = rl_array.low_reg;
+  } else if (IsTemp(cu, rl_array.low_reg)) {
     Clobber(cu, rl_array.low_reg);
     reg_ptr = rl_array.low_reg;
   } else {
@@ -668,18 +850,25 @@
     LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
   }
   /* at this point, reg_ptr points to array, 2 live temps */
-  if (rl_src.wide || rl_src.fp) {
+  if (rl_src.wide || rl_src.fp || constant_index) {
     if (rl_src.wide) {
       rl_src = LoadValueWide(cu, rl_src, reg_class);
     } else {
       rl_src = LoadValue(cu, rl_src, reg_class);
     }
-    OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
-                     EncodeShift(kArmLsl, scale));
+    if (!constant_index) {
+      OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
+                       EncodeShift(kArmLsl, scale));
+    }
     if (needs_range_check) {
-      GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+      if (constant_index) {
+        GenImmedCheck(cu, kCondLs, reg_len, ConstantValue(cu, rl_index), kThrowConstantArrayBounds);
+      } else {
+        GenRegRegCheck(cu, kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds);
+      }
       FreeTemp(cu, reg_len);
     }
+
     if (rl_src.wide) {
       StoreBaseDispWide(cu, reg_ptr, data_offset, rl_src.low_reg, rl_src.high_reg);
     } else {
@@ -696,7 +885,9 @@
     StoreBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_src.low_reg,
                      scale, size);
   }
-  FreeTemp(cu, reg_ptr);
+  if (!constant_index) {
+    FreeTemp(cu, reg_ptr);
+  }
 }
 
 /*
@@ -758,4 +949,163 @@
   MarkGCCard(cu, r_value, r_array);
 }
 
+bool ArmCodegen::GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+                                   RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift)
+{
+  rl_src = LoadValueWide(cu, rl_src, kCoreReg);
+  // Per spec, we only care about low 6 bits of shift amount.
+  int shift_amount = ConstantValue(cu, rl_shift) & 0x3f;
+  if (shift_amount == 0) {
+    StoreValueWide(cu, rl_dest, rl_src);
+    return false; // TODO: remove useless bool return result.
+  }
+  if (BadOverlap(cu, rl_src, rl_dest)) {
+    return GenShiftOpLong(cu, opcode, rl_dest, rl_src, rl_shift);
+  }
+  RegLocation rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
+  switch(opcode) {
+    case Instruction::SHL_LONG:
+    case Instruction::SHL_LONG_2ADDR:
+      if (shift_amount == 1) {
+        OpRegRegReg(cu, kOpAdd, rl_result.low_reg, rl_src.low_reg, rl_src.low_reg);
+        OpRegRegReg(cu, kOpAdc, rl_result.high_reg, rl_src.high_reg, rl_src.high_reg);
+      } else if (shift_amount == 32) {
+        OpRegCopy(cu, rl_result.high_reg, rl_src.low_reg);
+        LoadConstant(cu, rl_result.low_reg, 0);
+      } else if (shift_amount > 31) {
+        OpRegRegImm(cu, kOpLsl, rl_result.high_reg, rl_src.low_reg, shift_amount - 32);
+        LoadConstant(cu, rl_result.low_reg, 0);
+      } else {
+        OpRegRegImm(cu, kOpLsl, rl_result.high_reg, rl_src.high_reg, shift_amount);
+        OpRegRegRegShift(cu, kOpOr, rl_result.high_reg, rl_result.high_reg, rl_src.low_reg,
+                         EncodeShift(kArmLsr, 32 - shift_amount));
+        OpRegRegImm(cu, kOpLsl, rl_result.low_reg, rl_src.low_reg, shift_amount);
+      }
+      break;
+    case Instruction::SHR_LONG:
+    case Instruction::SHR_LONG_2ADDR:
+      if (shift_amount == 32) {
+        OpRegCopy(cu, rl_result.low_reg, rl_src.high_reg);
+        OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, 31);
+      } else if (shift_amount > 31) {
+        OpRegRegImm(cu, kOpAsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32);
+        OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, 31);
+      } else {
+        int t_reg = AllocTemp(cu);
+        OpRegRegImm(cu, kOpLsr, t_reg, rl_src.low_reg, shift_amount);
+        OpRegRegRegShift(cu, kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg,
+                         EncodeShift(kArmLsl, 32 - shift_amount));
+        FreeTemp(cu, t_reg);
+        OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, shift_amount);
+      }
+      break;
+    case Instruction::USHR_LONG:
+    case Instruction::USHR_LONG_2ADDR:
+      if (shift_amount == 32) {
+        OpRegCopy(cu, rl_result.low_reg, rl_src.high_reg);
+        LoadConstant(cu, rl_result.high_reg, 0);
+      } else if (shift_amount > 31) {
+        OpRegRegImm(cu, kOpLsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32);
+        LoadConstant(cu, rl_result.high_reg, 0);
+      } else {
+        int t_reg = AllocTemp(cu);
+        OpRegRegImm(cu, kOpLsr, t_reg, rl_src.low_reg, shift_amount);
+        OpRegRegRegShift(cu, kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg,
+                         EncodeShift(kArmLsl, 32 - shift_amount));
+        FreeTemp(cu, t_reg);
+        OpRegRegImm(cu, kOpLsr, rl_result.high_reg, rl_src.high_reg, shift_amount);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected case";
+      return true;
+  }
+  StoreValueWide(cu, rl_dest, rl_result);
+  return false;
+}
+
+bool ArmCodegen::GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2)
+{
+  if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
+    if (!rl_src2.is_const) {
+      // Don't bother with special handling for subtract from immediate.
+      return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2);
+    }
+  } else {
+    // Normalize
+    if (!rl_src2.is_const) {
+      DCHECK(rl_src1.is_const);
+      RegLocation rl_temp = rl_src1;
+      rl_src1 = rl_src2;
+      rl_src2 = rl_temp;
+    }
+  }
+  if (BadOverlap(cu, rl_src1, rl_dest)) {
+    return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2);
+  }
+  DCHECK(rl_src2.is_const);
+  int64_t val = ConstantValueWide(cu, rl_src2);
+  uint32_t val_lo = Low32Bits(val);
+  uint32_t val_hi = High32Bits(val);
+  int32_t mod_imm_lo = ModifiedImmediate(val_lo);
+  int32_t mod_imm_hi = ModifiedImmediate(val_hi);
+
+  // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
+  switch(opcode) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
+        return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2);
+      }
+      break;
+    default:
+      break;
+  }
+  rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg);
+  RegLocation rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
+  // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
+  switch (opcode) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      NewLIR3(cu, kThumb2AddRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
+      NewLIR3(cu, kThumb2AdcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
+      break;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      if ((val_lo != 0) || (rl_result.low_reg != rl_src1.low_reg)) {
+        OpRegRegImm(cu, kOpOr, rl_result.low_reg, rl_src1.low_reg, val_lo);
+      }
+      if ((val_hi != 0) || (rl_result.high_reg != rl_src1.high_reg)) {
+        OpRegRegImm(cu, kOpOr, rl_result.high_reg, rl_src1.high_reg, val_hi);
+      }
+      break;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      OpRegRegImm(cu, kOpXor, rl_result.low_reg, rl_src1.low_reg, val_lo);
+      OpRegRegImm(cu, kOpXor, rl_result.high_reg, rl_src1.high_reg, val_hi);
+      break;
+    case Instruction::AND_LONG:
+    case Instruction::AND_LONG_2ADDR:
+      if ((val_lo != 0xffffffff) || (rl_result.low_reg != rl_src1.low_reg)) {
+        OpRegRegImm(cu, kOpAnd, rl_result.low_reg, rl_src1.low_reg, val_lo);
+      }
+      if ((val_hi != 0xffffffff) || (rl_result.high_reg != rl_src1.high_reg)) {
+        OpRegRegImm(cu, kOpAnd, rl_result.high_reg, rl_src1.high_reg, val_hi);
+      }
+      break;
+    case Instruction::SUB_LONG_2ADDR:
+    case Instruction::SUB_LONG:
+      NewLIR3(cu, kThumb2SubRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
+      NewLIR3(cu, kThumb2SbcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected opcode " << opcode;
+  }
+  StoreValueWide(cu, rl_dest, rl_result);
+  return false;  // TODO: remove bool return value from all of these Gen routines.
+}
+
 }  // namespace art
diff --git a/src/compiler/codegen/arm/utility_arm.cc b/src/compiler/codegen/arm/utility_arm.cc
index 433111c..a670199 100644
--- a/src/compiler/codegen/arm/utility_arm.cc
+++ b/src/compiler/codegen/arm/utility_arm.cc
@@ -45,6 +45,32 @@
   return res;
 }
 
+/*
+ * Determine whether value can be encoded as a Thumb2 floating point
+ * immediate.  If not, return -1.  If so return encoded 8-bit value.
+ */
+static int EncodeImmDouble(int64_t value)
+{
+  int res;
+  int bit_a = (value & 0x8000000000000000ll) >> 63;
+  int not_bit_b = (value & 0x4000000000000000ll) >> 62;
+  int bit_b = (value & 0x2000000000000000ll) >> 61;
+  int b_smear = (value & 0x3fc0000000000000ll) >> 54;
+  int slice =  (value & 0x003f000000000000ll) >> 48;
+  uint64_t zeroes = (value & 0x0000ffffffffffffll);
+  if (zeroes != 0)
+    return -1;
+  if (bit_b) {
+    if ((not_bit_b != 0) || (b_smear != 0xff))
+      return -1;
+  } else {
+    if ((not_bit_b != 1) || (b_smear != 0x0))
+      return -1;
+  }
+  res = (bit_a << 7) | (bit_b << 6) | slice;
+  return res;
+}
+
 static LIR* LoadFPConstantValue(CompilationUnit* cu, int r_dest, int value)
 {
   DCHECK(ARM_SINGLEREG(r_dest));
@@ -126,19 +152,24 @@
    return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */
 }
 
-bool ArmCodegen::InexpensiveConstant(int reg, int value)
+bool ArmCodegen::InexpensiveConstantInt(int32_t value)
 {
-  bool res = false;
-  if (ARM_FPREG(reg)) {
-    res = (EncodeImmSingle(value) >= 0);
-  } else {
-    if (ARM_LOWREG(reg) && (value >= 0) && (IsUint(8, value))) {
-      res = true;
-    } else {
-      res = (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
-    }
-  }
-  return res;
+  return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+}
+
+bool ArmCodegen::InexpensiveConstantFloat(int32_t value)
+{
+  return EncodeImmSingle(value) >= 0;
+}
+
+bool ArmCodegen::InexpensiveConstantLong(int64_t value)
+{
+  return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value));
+}
+
+bool ArmCodegen::InexpensiveConstantDouble(int64_t value)
+{
+  return EncodeImmDouble(value) >= 0;
 }
 
 /*
@@ -178,25 +209,9 @@
     res = NewLIR2(cu, kThumb2MovImm16, r_dest, value);
     return res;
   }
-  /* No shortcut - go ahead and use literal pool */
-  LIR* data_target = ScanLiteralPool(cu->literal_list, value, 0);
-  if (data_target == NULL) {
-    data_target = AddWordData(cu, &cu->literal_list, value);
-  }
-  LIR* load_pc_rel = RawLIR(cu, cu->current_dalvik_offset,
-                          kThumb2LdrPcRel12, r_dest, 0, 0, 0, 0, data_target);
-  SetMemRefType(cu, load_pc_rel, true, kLiteral);
-  load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target);
-  res = load_pc_rel;
-  AppendLIR(cu, load_pc_rel);
-
-  /*
-   * To save space in the constant pool, we use the ADD_RRI8 instruction to
-   * add up to 255 to an existing constant value.
-   */
-  if (data_target->operands[0] != value) {
-    OpRegImm(cu, kOpAdd, r_dest, value - data_target->operands[0]);
-  }
+  /* Do a low/high pair */
+  res = NewLIR2(cu, kThumb2MovImm16, r_dest, Low16Bits(value));
+  NewLIR2(cu, kThumb2MovImm16H, r_dest, High16Bits(value));
   return res;
 }
 
@@ -514,7 +529,7 @@
       int mod_imm = ModifiedImmediate(value);
       LIR* res;
       if (mod_imm >= 0) {
-        res = NewLIR2(cu, kThumb2CmpRI8, r_src1, mod_imm);
+        res = NewLIR2(cu, kThumb2CmpRI12, r_src1, mod_imm);
       } else {
         int r_tmp = AllocTemp(cu);
         res = LoadConstant(cu, r_tmp, value);
@@ -587,44 +602,11 @@
   }
 }
 
-/*
- * Determine whether value can be encoded as a Thumb2 floating point
- * immediate.  If not, return -1.  If so return encoded 8-bit value.
- */
-static int EncodeImmDoubleHigh(int value)
+LIR* ArmCodegen::LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value)
 {
-  int res;
-  int bit_a =  (value & 0x80000000) >> 31;
-  int not_bit_b = (value & 0x40000000) >> 30;
-  int bit_b =  (value & 0x20000000) >> 29;
-  int b_smear =  (value & 0x3fc00000) >> 22;
-  int slice =   (value & 0x003f0000) >> 16;
-  int zeroes =  (value & 0x0000ffff);
-  if (zeroes != 0)
-    return -1;
-  if (bit_b) {
-    if ((not_bit_b != 0) || (b_smear != 0xff))
-      return -1;
-  } else {
-    if ((not_bit_b != 1) || (b_smear != 0x0))
-      return -1;
-  }
-  res = (bit_a << 7) | (bit_b << 6) | slice;
-  return res;
-}
-
-static int EncodeImmDouble(int val_lo, int val_hi)
-{
-  int res = -1;
-  if (val_lo == 0)
-    res = EncodeImmDoubleHigh(val_hi);
-  return res;
-}
-
-LIR* ArmCodegen::LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
-                                       int val_lo, int val_hi)
-{
-  LIR* res;
+  LIR* res = NULL;
+  int32_t val_lo = Low32Bits(value);
+  int32_t val_hi = High32Bits(value);
   int target_reg = S2d(r_dest_lo, r_dest_hi);
   if (ARM_FPREG(r_dest_lo)) {
     if ((val_lo == 0) && (val_hi == 0)) {
@@ -635,26 +617,33 @@
       // +0.0 = +2.0 - +2.0
       res = NewLIR3(cu, kThumb2Vsubd, target_reg, target_reg, target_reg);
     } else {
-      int encoded_imm = EncodeImmDouble(val_lo, val_hi);
+      int encoded_imm = EncodeImmDouble(value);
       if (encoded_imm >= 0) {
         res = NewLIR2(cu, kThumb2Vmovd_IMM8, target_reg, encoded_imm);
-      } else {
-        LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi);
-        if (data_target == NULL) {
-          data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi);
-        }
-        LIR* load_pc_rel =
-            RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd,
-                   target_reg, r15pc, 0, 0, 0, data_target);
-        SetMemRefType(cu, load_pc_rel, true, kLiteral);
-        load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target);
-        AppendLIR(cu, load_pc_rel);
-        res = load_pc_rel;
       }
     }
   } else {
-    res = LoadConstantNoClobber(cu, r_dest_lo, val_lo);
-    LoadConstantNoClobber(cu, r_dest_hi, val_hi);
+    if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) {
+      res = LoadConstantNoClobber(cu, r_dest_lo, val_lo);
+      LoadConstantNoClobber(cu, r_dest_hi, val_hi);
+    }
+  }
+  if (res == NULL) {
+    // No short form - load from the literal pool.
+    LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi);
+    if (data_target == NULL) {
+      data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi);
+    }
+    if (ARM_FPREG(r_dest_lo)) {
+      res = RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd,
+                   target_reg, r15pc, 0, 0, 0, data_target);
+    } else {
+      res = RawLIR(cu, cu->current_dalvik_offset, kThumb2LdrdPcRel8,
+                   r_dest_lo, r_dest_hi, r15pc, 0, 0, data_target);
+    }
+    SetMemRefType(cu, res, true, kLiteral);
+    res->alias_info = reinterpret_cast<uintptr_t>(data_target);
+    AppendLIR(cu, res);
   }
   return res;
 }
@@ -732,7 +721,7 @@
                                   int scale, OpSize size)
 {
   bool all_low_regs = ARM_LOWREG(rBase) && ARM_LOWREG(r_index) && ARM_LOWREG(r_src);
-  LIR* store;
+  LIR* store = NULL;
   ArmOpcode opcode = kThumbBkpt;
   bool thumb_form = (all_low_regs && (scale == 0));
   int reg_ptr;
@@ -798,14 +787,14 @@
                                   int r_dest_hi, OpSize size, int s_reg)
 {
   Codegen* cg = cu->cg.get();
-  LIR* res;
-  LIR* load;
+  LIR* load = NULL;
   ArmOpcode opcode = kThumbBkpt;
   bool short_form = false;
   bool thumb2Form = (displacement < 4092 && displacement >= 0);
   bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_dest));
   int encoded_disp = displacement;
   bool is64bit = false;
+  bool already_generated = false;
   switch (size) {
     case kDouble:
     case kLong:
@@ -822,11 +811,15 @@
         }
         break;
       } else {
-        res = LoadBaseDispBody(cu, rBase, displacement, r_dest,
-                               -1, kWord, s_reg);
-        LoadBaseDispBody(cu, rBase, displacement + 4, r_dest_hi,
-                         -1, kWord, INVALID_SREG);
-        return res;
+        if (displacement <= 1020) {
+          load = NewLIR4(cu, kThumb2LdrdI8, r_dest, r_dest_hi, rBase, displacement >> 2);
+        } else {
+          load = LoadBaseDispBody(cu, rBase, displacement, r_dest,
+                                 -1, kWord, s_reg);
+          LoadBaseDispBody(cu, rBase, displacement + 4, r_dest_hi,
+                           -1, kWord, INVALID_SREG);
+        }
+        already_generated = true;
       }
     case kSingle:
     case kWord:
@@ -894,13 +887,15 @@
       LOG(FATAL) << "Bad size: " << size;
   }
 
-  if (short_form) {
-    load = res = NewLIR3(cu, opcode, r_dest, rBase, encoded_disp);
-  } else {
-    int reg_offset = AllocTemp(cu);
-    res = cg->LoadConstant(cu, reg_offset, encoded_disp);
-    load = cg->LoadBaseIndexed(cu, rBase, reg_offset, r_dest, 0, size);
-    FreeTemp(cu, reg_offset);
+  if (!already_generated) {
+    if (short_form) {
+      load = NewLIR3(cu, opcode, r_dest, rBase, encoded_disp);
+    } else {
+      int reg_offset = AllocTemp(cu);
+      cg->LoadConstant(cu, reg_offset, encoded_disp);
+      load = cg->LoadBaseIndexed(cu, rBase, reg_offset, r_dest, 0, size);
+      FreeTemp(cu, reg_offset);
+    }
   }
 
   // TODO: in future may need to differentiate Dalvik accesses w/ spills
@@ -926,30 +921,36 @@
 LIR* ArmCodegen::StoreBaseDispBody(CompilationUnit* cu, int rBase, int displacement,
                                    int r_src, int r_src_hi, OpSize size) {
   Codegen* cg = cu->cg.get();
-  LIR* res, *store;
+  LIR* store = NULL;
   ArmOpcode opcode = kThumbBkpt;
   bool short_form = false;
   bool thumb2Form = (displacement < 4092 && displacement >= 0);
   bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_src));
   int encoded_disp = displacement;
   bool is64bit = false;
+  bool already_generated = false;
   switch (size) {
     case kLong:
     case kDouble:
       is64bit = true;
       if (!ARM_FPREG(r_src)) {
-        res = StoreBaseDispBody(cu, rBase, displacement, r_src, -1, kWord);
-        StoreBaseDispBody(cu, rBase, displacement + 4, r_src_hi, -1, kWord);
-        return res;
-      }
-      if (ARM_SINGLEREG(r_src)) {
-        DCHECK(ARM_FPREG(r_src_hi));
-        r_src = cg->S2d(r_src, r_src_hi);
-      }
-      opcode = kThumb2Vstrd;
-      if (displacement <= 1020) {
-        short_form = true;
-        encoded_disp >>= 2;
+        if (displacement <= 1020) {
+          store = NewLIR4(cu, kThumb2StrdI8, r_src, r_src_hi, rBase, displacement >> 2);
+        } else {
+          store = StoreBaseDispBody(cu, rBase, displacement, r_src, -1, kWord);
+          StoreBaseDispBody(cu, rBase, displacement + 4, r_src_hi, -1, kWord);
+        }
+        already_generated = true;
+      } else {
+        if (ARM_SINGLEREG(r_src)) {
+          DCHECK(ARM_FPREG(r_src_hi));
+          r_src = cg->S2d(r_src, r_src_hi);
+        }
+        opcode = kThumb2Vstrd;
+        if (displacement <= 1020) {
+          short_form = true;
+          encoded_disp >>= 2;
+        }
       }
       break;
     case kSingle:
@@ -998,20 +999,22 @@
     default:
       LOG(FATAL) << "Bad size: " << size;
   }
-  if (short_form) {
-    store = res = NewLIR3(cu, opcode, r_src, rBase, encoded_disp);
-  } else {
-    int r_scratch = AllocTemp(cu);
-    res = cg->LoadConstant(cu, r_scratch, encoded_disp);
-    store = cg->StoreBaseIndexed(cu, rBase, r_scratch, r_src, 0, size);
-    FreeTemp(cu, r_scratch);
+  if (!already_generated) {
+    if (short_form) {
+      store = NewLIR3(cu, opcode, r_src, rBase, encoded_disp);
+    } else {
+      int r_scratch = AllocTemp(cu);
+      cg->LoadConstant(cu, r_scratch, encoded_disp);
+      store = cg->StoreBaseIndexed(cu, rBase, r_scratch, r_src, 0, size);
+      FreeTemp(cu, r_scratch);
+    }
   }
 
   // TODO: In future, may need to differentiate Dalvik & spill accesses
   if (rBase == rARM_SP) {
     AnnotateDalvikRegAccess(cu, store, displacement >> 2, false /* is_load */, is64bit);
   }
-  return res;
+  return store;
 }
 
 LIR* ArmCodegen::StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
diff --git a/src/compiler/codegen/codegen.h b/src/compiler/codegen/codegen.h
index 03ecb43..901e5da 100644
--- a/src/compiler/codegen/codegen.h
+++ b/src/compiler/codegen/codegen.h
@@ -236,8 +236,8 @@
                                      int displacement, int r_dest, int r_dest_hi, OpSize size,
                                      int s_reg) = 0;
     virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value) = 0;
-    virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
-                                       int val_lo, int val_hi) = 0;
+    virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
+                                  int64_t value) = 0;
     virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
                                OpSize size) = 0;
     virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -288,6 +288,10 @@
     virtual bool IsUnconditionalBranch(LIR* lir) = 0;
 
     // Required for target - Dalvik-level generators.
+    virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+                                   RegLocation rl_src1, RegLocation rl_src2) = 0;
+    virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2) = 0;
     virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
     virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -349,6 +353,9 @@
                              RegLocation rl_index, RegLocation rl_dest, int scale) = 0;
     virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
                      RegLocation rl_index, RegLocation rl_src, int scale) = 0;
+    virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+                                   RegLocation rl_dest, RegLocation rl_src1,
+                                   RegLocation rl_shift) = 0;
 
     // Required for target - single operation generators.
     virtual LIR* OpUnconditionalBranch(CompilationUnit* cu, LIR* target) = 0;
@@ -381,7 +388,10 @@
     virtual void OpRegCopyWide(CompilationUnit* cu, int dest_lo, int dest_hi, int src_lo,
                                int src_hi) = 0;
     virtual void OpTlsCmp(CompilationUnit* cu, int offset, int val) = 0;
-    virtual bool InexpensiveConstant(int reg, int value) = 0;
+    virtual bool InexpensiveConstantInt(int32_t value) = 0;
+    virtual bool InexpensiveConstantFloat(int32_t value) = 0;
+    virtual bool InexpensiveConstantLong(int64_t value) = 0;
+    virtual bool InexpensiveConstantDouble(int64_t value) = 0;
 
     // Temp workaround
     void Workaround7250540(CompilationUnit* cu, RegLocation rl_dest, int value);
diff --git a/src/compiler/codegen/codegen_util.cc b/src/compiler/codegen/codegen_util.cc
index ad05b93..57d932f 100644
--- a/src/compiler/codegen/codegen_util.cc
+++ b/src/compiler/codegen/codegen_util.cc
@@ -23,6 +23,27 @@
 
 namespace art {
 
+bool IsInexpensiveConstant(CompilationUnit* cu, RegLocation rl_src)
+{
+  bool res = false;
+  if (rl_src.is_const) {
+    if (rl_src.wide) {
+      if (rl_src.fp) {
+         res = cu->cg->InexpensiveConstantDouble(ConstantValueWide(cu, rl_src));
+      } else {
+         res = cu->cg->InexpensiveConstantLong(ConstantValueWide(cu, rl_src));
+      }
+    } else {
+      if (rl_src.fp) {
+         res = cu->cg->InexpensiveConstantFloat(ConstantValue(cu, rl_src));
+      } else {
+         res = cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src));
+      }
+    }
+  }
+  return res;
+}
+
 void MarkSafepointPC(CompilationUnit* cu, LIR* inst)
 {
   inst->def_mask = ENCODE_ALL;
@@ -202,6 +223,9 @@
       LOG(INFO) << "-------- entry offset: 0x" << std::hex << dest;
       break;
     case kPseudoDalvikByteCodeBoundary:
+      if (lir->operands[0] == 0) {
+         lir->operands[0] = reinterpret_cast<uintptr_t>("No instruction string");
+      }
       LOG(INFO) << "-------- dalvik offset: 0x" << std::hex
                 << lir->dalvik_offset << " @ " << reinterpret_cast<char*>(lir->operands[0]);
       break;
@@ -471,6 +495,8 @@
   LIR* lo_target = NULL;
   while (data_target) {
     if (lo_match && (data_target->operands[0] == val_hi)) {
+      // Record high word in case we need to expand this later.
+      lo_target->operands[1] = val_hi;
       return lo_target;
     }
     lo_match = false;
@@ -488,7 +514,7 @@
  * instruction streams.
  */
 
-/* Add a 32-bit constant either in the constant pool */
+/* Add a 32-bit constant to the constant pool */
 LIR* AddWordData(CompilationUnit* cu, LIR* *constant_list_p, int value)
 {
   /* Add the constant to the literal pool */
@@ -1097,4 +1123,21 @@
   return is_taken;
 }
 
+// Convert relation of src1/src2 to src2/src1
+ConditionCode FlipComparisonOrder(ConditionCode before) {
+  ConditionCode res;
+  switch (before) {
+    case kCondEq: res = kCondEq; break;
+    case kCondNe: res = kCondNe; break;
+    case kCondLt: res = kCondGt; break;
+    case kCondGt: res = kCondLt; break;
+    case kCondLe: res = kCondGe; break;
+    case kCondGe: res = kCondLe; break;
+    default:
+      res = static_cast<ConditionCode>(0);
+      LOG(FATAL) << "Unexpected ccode " << before;
+  }
+  return res;
+}
+
 } // namespace art
diff --git a/src/compiler/codegen/codegen_util.h b/src/compiler/codegen/codegen_util.h
index 4f14656..9b9bece 100644
--- a/src/compiler/codegen/codegen_util.h
+++ b/src/compiler/codegen/codegen_util.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 
 #include "compiler/compiler_enums.h"
+#include "compiler/compiler_ir.h"
 
 namespace art {
 
@@ -59,6 +60,8 @@
 LIR* MarkBoundary(CompilationUnit* cu, int offset, const char* inst_str);
 void NopLIR(LIR* lir);
 bool EvaluateBranch(Instruction::Code opcode, int src1, int src2);
+bool IsInexpensiveConstant(CompilationUnit* cu, RegLocation rl_src);
+ConditionCode FlipComparisonOrder(ConditionCode before);
 
 }  // namespace art
 
diff --git a/src/compiler/codegen/gen_common.cc b/src/compiler/codegen/gen_common.cc
index 1d64a71..a4c8d0c 100644
--- a/src/compiler/codegen/gen_common.cc
+++ b/src/compiler/codegen/gen_common.cc
@@ -55,7 +55,7 @@
                             ThrowKind kind)
 {
   LIR* tgt = RawLIR(cu, 0, kPseudoThrowTarget, kind,
-                    cu->current_dalvik_offset);
+                    cu->current_dalvik_offset, reg, imm_val);
   LIR* branch;
   if (c_code == kCondAl) {
     branch = OpUnconditionalBranch(cu, tgt);
@@ -89,23 +89,6 @@
   return branch;
 }
 
-// Convert relation of src1/src2 to src2/src1
-ConditionCode FlipComparisonOrder(ConditionCode before) {
-  ConditionCode res;
-  switch (before) {
-    case kCondEq: res = kCondEq; break;
-    case kCondNe: res = kCondNe; break;
-    case kCondLt: res = kCondGt; break;
-    case kCondGt: res = kCondLt; break;
-    case kCondLe: res = kCondGe; break;
-    case kCondGe: res = kCondLe; break;
-    default:
-      res = static_cast<ConditionCode>(0);
-      LOG(FATAL) << "Unexpected ccode " << before;
-  }
-  return res;
-}
-
 void Codegen::GenCompareAndBranch(CompilationUnit* cu, Instruction::Code opcode,
                                   RegLocation rl_src1, RegLocation rl_src2, LIR* taken,
                                   LIR* fall_through)
@@ -146,12 +129,12 @@
   rl_src1 = LoadValue(cu, rl_src1, kCoreReg);
   // Is this really an immediate comparison?
   if (rl_src2.is_const) {
-    int immval = cu->constant_values[rl_src2.orig_sreg];
     // If it's already live in a register or not easily materialized, just keep going
     RegLocation rl_temp = UpdateLoc(cu, rl_src2);
-    if ((rl_temp.location == kLocDalvikFrame) && InexpensiveConstant(rl_src1.low_reg, immval)) {
+    if ((rl_temp.location == kLocDalvikFrame) &&
+        InexpensiveConstantInt(ConstantValue(cu, rl_src2))) {
       // OK - convert this to a compare immediate and branch
-      OpCmpImmBranch(cu, cond, rl_src1.low_reg, immval, taken);
+      OpCmpImmBranch(cu, cond, rl_src1.low_reg, ConstantValue(cu, rl_src2), taken);
       OpUnconditionalBranch(cu, fall_through);
       return;
     }
@@ -614,6 +597,18 @@
       case kThrowNullPointer:
         func_offset = ENTRYPOINT_OFFSET(pThrowNullPointerFromCode);
         break;
+      case kThrowConstantArrayBounds: // v1 is length reg (for Arm/Mips), v2 constant index
+        // v1 holds the constant array index.  Mips/Arm uses v2 for length, x86 reloads.
+        if (target_x86) {
+          OpRegMem(cu, kOpMov, TargetReg(kArg1), v1, mirror::Array::LengthOffset().Int32Value());
+        } else {
+          OpRegCopy(cu, TargetReg(kArg1), v1);
+        }
+        // Make sure the following LoadConstant doesn't mess with kArg1.
+        LockTemp(cu, TargetReg(kArg1));
+        LoadConstant(cu, TargetReg(kArg0), v2);
+        func_offset = ENTRYPOINT_OFFSET(pThrowArrayBoundsFromCode);
+        break;
       case kThrowArrayBounds:
         // Move v1 (array index) to kArg0 and v2 (array length) to kArg1
         if (v2 != TargetReg(kArg0)) {
@@ -1602,9 +1597,14 @@
       break;
     case Instruction::MUL_LONG:
     case Instruction::MUL_LONG_2ADDR:
-      call_out = true;
-      ret_reg = TargetReg(kRet0);
-      func_offset = ENTRYPOINT_OFFSET(pLmul);
+      if (cu->instruction_set == kThumb2) {
+        GenMulLong(cu, rl_dest, rl_src1, rl_src2);
+        return false;
+      } else {
+        call_out = true;
+        ret_reg = TargetReg(kRet0);
+        func_offset = ENTRYPOINT_OFFSET(pLmul);
+      }
       break;
     case Instruction::DIV_LONG:
     case Instruction::DIV_LONG_2ADDR:
diff --git a/src/compiler/codegen/gen_loadstore.cc b/src/compiler/codegen/gen_loadstore.cc
index b183f9e..c8f9c51 100644
--- a/src/compiler/codegen/gen_loadstore.cc
+++ b/src/compiler/codegen/gen_loadstore.cc
@@ -59,12 +59,20 @@
           return;
         }
       }
+      int temp_reg = zero_reg;
+      if (temp_reg == INVALID_REG) {
+        temp_reg = AllocTemp(cu);
+        cu->cg->LoadConstant(cu, temp_reg, 0);
+      }
       if (cu->promotion_map[pmap_index].core_location == kLocPhysReg) {
         // Promoted - just copy in a zero
-        OpRegCopy(cu, cu->promotion_map[pmap_index].core_reg, zero_reg);
+        OpRegCopy(cu, cu->promotion_map[pmap_index].core_reg, temp_reg);
       } else {
         // Lives in the frame, need to store.
-        StoreBaseDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_dest.s_reg_low), zero_reg, kWord);
+        StoreBaseDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_dest.s_reg_low), temp_reg, kWord);
+      }
+      if (zero_reg == INVALID_REG) {
+        FreeTemp(cu, temp_reg);
       }
     }
   }
@@ -92,14 +100,12 @@
   rl_src = UpdateLoc(cu, rl_src);
   if (rl_src.location == kLocPhysReg) {
     OpRegCopy(cu, r_dest, rl_src.low_reg);
+  } else if (IsInexpensiveConstant(cu, rl_src)) {
+    LoadConstantNoClobber(cu, r_dest, ConstantValue(cu, rl_src));
   } else {
     DCHECK((rl_src.location == kLocDalvikFrame) ||
            (rl_src.location == kLocCompilerTemp));
-    if (rl_src.is_const && InexpensiveConstant(r_dest, cu->constant_values[rl_src.orig_sreg])) {
-      LoadConstantNoClobber(cu, r_dest, cu->constant_values[rl_src.orig_sreg]);
-    } else {
-      LoadWordDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_src.s_reg_low), r_dest);
-    }
+    LoadWordDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_src.s_reg_low), r_dest);
   }
 }
 
@@ -126,6 +132,8 @@
   rl_src = UpdateLocWide(cu, rl_src);
   if (rl_src.location == kLocPhysReg) {
     OpRegCopyWide(cu, reg_lo, reg_hi, rl_src.low_reg, rl_src.high_reg);
+  } else if (IsInexpensiveConstant(cu, rl_src)) {
+    LoadConstantWide(cu, reg_lo, reg_hi, ConstantValueWide(cu, rl_src));
   } else {
     DCHECK((rl_src.location == kLocDalvikFrame) ||
            (rl_src.location == kLocCompilerTemp));
@@ -152,9 +160,7 @@
 RegLocation Codegen::LoadValue(CompilationUnit* cu, RegLocation rl_src, RegisterClass op_kind)
 {
   rl_src = EvalLoc(cu, rl_src, op_kind, false);
-  if (rl_src.location != kLocPhysReg) {
-    DCHECK((rl_src.location == kLocDalvikFrame) ||
-           (rl_src.location == kLocCompilerTemp));
+  if (IsInexpensiveConstant(cu, rl_src) || rl_src.location != kLocPhysReg) {
     LoadValueDirect(cu, rl_src, rl_src.low_reg);
     rl_src.location = kLocPhysReg;
     MarkLive(cu, rl_src.low_reg, rl_src.s_reg_low);
@@ -222,14 +228,11 @@
 {
   DCHECK(rl_src.wide);
   rl_src = EvalLoc(cu, rl_src, op_kind, false);
-  if (rl_src.location != kLocPhysReg) {
-    DCHECK((rl_src.location == kLocDalvikFrame) ||
-        (rl_src.location == kLocCompilerTemp));
+  if (IsInexpensiveConstant(cu, rl_src) || rl_src.location != kLocPhysReg) {
     LoadValueDirectWide(cu, rl_src, rl_src.low_reg, rl_src.high_reg);
     rl_src.location = kLocPhysReg;
     MarkLive(cu, rl_src.low_reg, rl_src.s_reg_low);
-    MarkLive(cu, rl_src.high_reg,
-                GetSRegHi(rl_src.s_reg_low));
+    MarkLive(cu, rl_src.high_reg, GetSRegHi(rl_src.s_reg_low));
   }
   return rl_src;
 }
diff --git a/src/compiler/codegen/local_optimizations.cc b/src/compiler/codegen/local_optimizations.cc
index b6981ca..2b86421 100644
--- a/src/compiler/codegen/local_optimizations.cc
+++ b/src/compiler/codegen/local_optimizations.cc
@@ -81,13 +81,20 @@
   if (head_lir == tail_lir) return;
 
   for (this_lir = PREV_LIR(tail_lir); this_lir != head_lir; this_lir = PREV_LIR(this_lir)) {
+
+    if (is_pseudo_opcode(this_lir->opcode)) continue;
+
     int sink_distance = 0;
 
+    uint64_t target_flags = cg->GetTargetInstFlags(this_lir->opcode);
+
     /* Skip non-interesting instructions */
     if ((this_lir->flags.is_nop == true) ||
-        is_pseudo_opcode(this_lir->opcode) ||
-        (cg->GetTargetInstFlags(this_lir->opcode) & IS_BRANCH) ||
-        !(cg->GetTargetInstFlags(this_lir->opcode) & (IS_LOAD | IS_STORE))) {
+        (target_flags & IS_BRANCH) ||
+        ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) ||  // Skip wide loads.
+        ((target_flags & (REG_USE0 | REG_USE1 | REG_USE2)) ==
+         (REG_USE0 | REG_USE1 | REG_USE2)) ||  // Skip wide stores.
+        !(target_flags & (IS_LOAD | IS_STORE))) {
       continue;
     }
 
@@ -130,7 +137,7 @@
        * Skip already dead instructions (whose dataflow information is
        * outdated and misleading).
        */
-      if (check_lir->flags.is_nop) continue;
+      if (check_lir->flags.is_nop || is_pseudo_opcode(check_lir->opcode)) continue;
 
       uint64_t check_mem_mask = (check_lir->use_mask | check_lir->def_mask) & ENCODE_MEM;
       uint64_t alias_condition = this_mem_mask & check_mem_mask;
@@ -139,14 +146,18 @@
       /*
        * Potential aliases seen - check the alias relations
        */
-      if (check_mem_mask != ENCODE_MEM && alias_condition != 0) {
-        bool is_check_lir_load = cg->GetTargetInstFlags(check_lir->opcode) & IS_LOAD;
+      uint64_t check_flags = cg->GetTargetInstFlags(check_lir->opcode);
+      // TUNING: Support instructions with multiple register targets.
+      if ((check_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) {
+        stop_here = true;
+      } else if (check_mem_mask != ENCODE_MEM && alias_condition != 0) {
+        bool is_check_lir_load = check_flags & IS_LOAD;
         if  (alias_condition == ENCODE_LITERAL) {
           /*
            * Should only see literal loads in the instruction
            * stream.
            */
-          DCHECK(!(cg->GetTargetInstFlags(check_lir->opcode) & IS_STORE));
+          DCHECK(!(check_flags & IS_STORE));
           /* Same value && same register type */
           if (check_lir->alias_info == this_lir->alias_info &&
               cg->SameRegType(check_lir->operands[0], native_reg_id)) {
@@ -276,10 +287,13 @@
   /* Start from the second instruction */
   for (this_lir = NEXT_LIR(head_lir); this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) {
 
+    if (is_pseudo_opcode(this_lir->opcode)) continue;
+
+    uint64_t target_flags = cg->GetTargetInstFlags(this_lir->opcode);
     /* Skip non-interesting instructions */
     if ((this_lir->flags.is_nop == true) ||
-        is_pseudo_opcode(this_lir->opcode) ||
-        !(cg->GetTargetInstFlags(this_lir->opcode) & IS_LOAD)) {
+        ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) ||
+        !(target_flags & IS_LOAD)) {
       continue;
     }
 
diff --git a/src/compiler/codegen/mips/codegen_mips.h b/src/compiler/codegen/mips/codegen_mips.h
index 705ecfa..a4d44d5 100644
--- a/src/compiler/codegen/mips/codegen_mips.h
+++ b/src/compiler/codegen/mips/codegen_mips.h
@@ -38,8 +38,7 @@
                                      int displacement, int r_dest, int r_dest_hi, OpSize size,
                                      int s_reg);
     virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value);
-    virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
-                                       int val_lo, int val_hi);
+    virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value);
     virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
                                OpSize size);
     virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -90,12 +89,18 @@
     virtual bool IsUnconditionalBranch(LIR* lir);
 
     // Required for target - Dalvik-level generators.
+    virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+                                   RegLocation rl_src1, RegLocation rl_src2);
     virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
                                 RegLocation rl_index, RegLocation rl_src, int scale);
     virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale);
     virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_src, int scale);
+    virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift);
+    virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2);
     virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2);
     virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -191,7 +196,10 @@
     void SpillCoreRegs(CompilationUnit* cu);
     void UnSpillCoreRegs(CompilationUnit* cu);
     static const MipsEncodingMap EncodingMap[kMipsLast];
-    bool InexpensiveConstant(int reg, int value);
+    bool InexpensiveConstantInt(int32_t value);
+    bool InexpensiveConstantFloat(int32_t value);
+    bool InexpensiveConstantLong(int64_t value);
+    bool InexpensiveConstantDouble(int64_t value);
 };
 
 }  // namespace art
diff --git a/src/compiler/codegen/mips/int_mips.cc b/src/compiler/codegen/mips/int_mips.cc
index 7da4cf6..675cf8d 100644
--- a/src/compiler/codegen/mips/int_mips.cc
+++ b/src/compiler/codegen/mips/int_mips.cc
@@ -341,6 +341,13 @@
   return NULL;
 }
 
+void MipsCodegen::GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+                             RegLocation rl_src2)
+{
+  LOG(FATAL) << "Unexpected use of GenMulLong for Mips";
+  return;
+}
+
 bool MipsCodegen::GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
                              RegLocation rl_src2)
 {
@@ -635,4 +642,18 @@
   MarkGCCard(cu, r_value, r_array);
 }
 
+bool MipsCodegen::GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+                                    RegLocation rl_src1, RegLocation rl_shift)
+{
+  // Default implementation is just to ignore the constant case.
+  return GenShiftOpLong(cu, opcode, rl_dest, rl_src1, rl_shift);
+}
+
+bool MipsCodegen::GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2)
+{
+  // Default - bail to non-const handler.
+  return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2);
+}
+
 }  // namespace art
diff --git a/src/compiler/codegen/mips/utility_mips.cc b/src/compiler/codegen/mips/utility_mips.cc
index 1e217fb..12d054c 100644
--- a/src/compiler/codegen/mips/utility_mips.cc
+++ b/src/compiler/codegen/mips/utility_mips.cc
@@ -52,17 +52,24 @@
   return res;
 }
 
-bool MipsCodegen::InexpensiveConstant(int reg, int value)
+bool MipsCodegen::InexpensiveConstantInt(int32_t value)
 {
-  bool res = false;
-  if (value == 0) {
-    res = true;
-  } else if (IsUint(16, value)) {
-    res = true;
-  } else if ((value < 0) && (value >= -32768)) {
-    res = true;
-  }
-  return res;
+  return ((value == 0) || IsUint(16, value) || ((value < 0) && (value >= -32768)));
+}
+
+bool MipsCodegen::InexpensiveConstantFloat(int32_t value)
+{
+  return false;  // TUNING
+}
+
+bool MipsCodegen::InexpensiveConstantLong(int64_t value)
+{
+  return false;  // TUNING
+}
+
+bool MipsCodegen::InexpensiveConstantDouble(int64_t value)
+{
+  return false; // TUNING
 }
 
 /*
@@ -336,12 +343,11 @@
   return NewLIR2(cu, opcode, r_dest_src1, r_src2);
 }
 
-LIR* MipsCodegen::LoadConstantValueWide(CompilationUnit *cu, int r_dest_lo, int r_dest_hi,
-                                        int val_lo, int val_hi)
+LIR* MipsCodegen::LoadConstantWide(CompilationUnit *cu, int r_dest_lo, int r_dest_hi, int64_t value)
 {
   LIR *res;
-  res = LoadConstantNoClobber(cu, r_dest_lo, val_lo);
-  LoadConstantNoClobber(cu, r_dest_hi, val_hi);
+  res = LoadConstantNoClobber(cu, r_dest_lo, Low32Bits(value));
+  LoadConstantNoClobber(cu, r_dest_hi, High32Bits(value));
   return res;
 }
 
diff --git a/src/compiler/codegen/mir_to_gbc.cc b/src/compiler/codegen/mir_to_gbc.cc
index f67f760..ba90269 100644
--- a/src/compiler/codegen/mir_to_gbc.cc
+++ b/src/compiler/codegen/mir_to_gbc.cc
@@ -1018,7 +1018,7 @@
         }
         EmitPopShadowFrame(cu);
         cu->irb->CreateRet(GetLLVMValue(cu, rl_src[0].orig_sreg));
-        bb->has_return = true;
+        DCHECK(bb->has_return);
       }
       break;
 
@@ -1028,7 +1028,7 @@
         }
         EmitPopShadowFrame(cu);
         cu->irb->CreateRetVoid();
-        bb->has_return = true;
+        DCHECK(bb->has_return);
       }
       break;
 
@@ -2572,8 +2572,7 @@
   RegLocation rl_dest = GetLoc(cu, call_inst);
   RegLocation rl_result = EvalLoc(cu, rl_dest, kAnyReg, true);
   if (rl_dest.wide) {
-    cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg,
-                          (immval) & 0xffffffff, (immval >> 32) & 0xffffffff);
+    cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg, immval);
     cg->StoreValueWide(cu, rl_dest, rl_result);
   } else {
     int immediate = immval & 0xffffffff;
diff --git a/src/compiler/codegen/mir_to_lir.cc b/src/compiler/codegen/mir_to_lir.cc
index bd26f2d..96de65e 100644
--- a/src/compiler/codegen/mir_to_lir.cc
+++ b/src/compiler/codegen/mir_to_lir.cc
@@ -164,23 +164,21 @@
     case Instruction::CONST_WIDE_16:
     case Instruction::CONST_WIDE_32:
       rl_result = EvalLoc(cu, rl_dest, kAnyReg, true);
-      cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg, vB,
-                            (vB & 0x80000000) ? -1 : 0);
+      cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg,
+                           static_cast<int64_t>(static_cast<int32_t>(vB)));
       cg->StoreValueWide(cu, rl_dest, rl_result);
       break;
 
     case Instruction::CONST_WIDE:
       rl_result = EvalLoc(cu, rl_dest, kAnyReg, true);
-      cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg,
-                            mir->dalvikInsn.vB_wide & 0xffffffff,
-                            (mir->dalvikInsn.vB_wide >> 32) & 0xffffffff);
+      cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg, mir->dalvikInsn.vB_wide);
       cg->StoreValueWide(cu, rl_dest, rl_result);
       break;
 
     case Instruction::CONST_WIDE_HIGH16:
       rl_result = EvalLoc(cu, rl_dest, kAnyReg, true);
-      cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg,
-                            0, vB << 16);
+      cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg,
+                           static_cast<int64_t>(vB) << 48);
       cg->StoreValueWide(cu, rl_dest, rl_result);
       break;
 
@@ -543,11 +541,11 @@
     case Instruction::XOR_INT:
     case Instruction::XOR_INT_2ADDR:
       if (rl_src[0].is_const &&
-          cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[0].orig_sreg])) {
+          cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src[0]))) {
         cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[1],
                              cu->constant_values[rl_src[0].orig_sreg]);
       } else if (rl_src[1].is_const &&
-          cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[1].orig_sreg])) {
+          cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src[1]))) {
         cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[0],
                              cu->constant_values[rl_src[1].orig_sreg]);
       } else {
@@ -568,9 +566,8 @@
     case Instruction::USHR_INT:
     case Instruction::USHR_INT_2ADDR:
       if (rl_src[1].is_const &&
-          cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[1].orig_sreg])) {
-        cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[0],
-                             cu->constant_values[rl_src[1].orig_sreg]);
+          cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src[1]))) {
+        cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[0], ConstantValue(cu, rl_src[1]));
       } else {
         cg->GenArithOpInt(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
       }
@@ -578,20 +575,26 @@
 
     case Instruction::ADD_LONG:
     case Instruction::SUB_LONG:
-    case Instruction::MUL_LONG:
-    case Instruction::DIV_LONG:
-    case Instruction::REM_LONG:
     case Instruction::AND_LONG:
     case Instruction::OR_LONG:
     case Instruction::XOR_LONG:
     case Instruction::ADD_LONG_2ADDR:
     case Instruction::SUB_LONG_2ADDR:
-    case Instruction::MUL_LONG_2ADDR:
-    case Instruction::DIV_LONG_2ADDR:
-    case Instruction::REM_LONG_2ADDR:
     case Instruction::AND_LONG_2ADDR:
     case Instruction::OR_LONG_2ADDR:
     case Instruction::XOR_LONG_2ADDR:
+      if (rl_src[0].is_const || rl_src[1].is_const) {
+        cg->GenArithImmOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
+        break;
+      }
+      // Note: intentional fallthrough.
+
+    case Instruction::MUL_LONG:
+    case Instruction::DIV_LONG:
+    case Instruction::REM_LONG:
+    case Instruction::MUL_LONG_2ADDR:
+    case Instruction::DIV_LONG_2ADDR:
+    case Instruction::REM_LONG_2ADDR:
       cg->GenArithOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
       break;
 
@@ -601,7 +604,11 @@
     case Instruction::SHL_LONG_2ADDR:
     case Instruction::SHR_LONG_2ADDR:
     case Instruction::USHR_LONG_2ADDR:
-      cg->GenShiftOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
+      if (rl_src[1].is_const) {
+        cg->GenShiftImmOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
+      } else {
+        cg->GenShiftOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
+      }
       break;
 
     case Instruction::ADD_FLOAT:
diff --git a/src/compiler/codegen/ralloc_util.cc b/src/compiler/codegen/ralloc_util.cc
index afd4976..1d5f3ac 100644
--- a/src/compiler/codegen/ralloc_util.cc
+++ b/src/compiler/codegen/ralloc_util.cc
@@ -64,7 +64,7 @@
   }
 }
 
-static void DumpRegPool(RegisterInfo* p, int num_regs)
+void DumpRegPool(RegisterInfo* p, int num_regs)
 {
   LOG(INFO) << "================================================";
   for (int i = 0; i < num_regs; i++) {
@@ -1091,21 +1091,14 @@
     RegLocation loc = cu->reg_location[i];
     RefCounts* counts = loc.fp ? fp_counts : core_counts;
     int p_map_idx = SRegToPMap(cu, loc.s_reg_low);
-    int sample_reg = loc.fp ? cu->reg_pool->FPRegs[0].reg : cu->reg_pool->core_regs[0].reg;
-    bool simple_immediate = loc.is_const &&
-        !cu->cg->InexpensiveConstant(sample_reg, cu->constant_values[loc.orig_sreg]);
-    if (loc.defined) {
-      // Don't count easily regenerated immediates
-      if (!simple_immediate) {
-        counts[p_map_idx].count += cu->use_counts.elem_list[i];
-      }
+    //Don't count easily regenerated immediates
+    if (loc.fp || loc.wide || !IsInexpensiveConstant(cu, loc)) {
+      counts[p_map_idx].count += cu->use_counts.elem_list[i];
     }
     if (loc.wide) {
-      if (loc.defined) {
-        if (loc.fp && !simple_immediate) {
-          counts[p_map_idx].double_start = true;
-          counts[p_map_idx+1].count += cu->use_counts.elem_list[i+1];
-        }
+      if (loc.fp) {
+        counts[p_map_idx].double_start = true;
+        counts[p_map_idx+1].count += cu->use_counts.elem_list[i+1];
       }
       i += 2;
     } else {
diff --git a/src/compiler/codegen/ralloc_util.h b/src/compiler/codegen/ralloc_util.h
index a5ed999..67c22b5 100644
--- a/src/compiler/codegen/ralloc_util.h
+++ b/src/compiler/codegen/ralloc_util.h
@@ -157,6 +157,7 @@
 void RecordFpPromotion(CompilationUnit* cu, int reg, int s_reg);
 int ComputeFrameSize(CompilationUnit* cu);
 int SRegToPMap(CompilationUnit* cu, int s_reg);
+void DumpRegPool(RegisterInfo* p, int num_regs);
 
 }  // namespace art
 
diff --git a/src/compiler/codegen/x86/codegen_x86.h b/src/compiler/codegen/x86/codegen_x86.h
index 141638c..9cc17f1 100644
--- a/src/compiler/codegen/x86/codegen_x86.h
+++ b/src/compiler/codegen/x86/codegen_x86.h
@@ -38,8 +38,7 @@
                                      int displacement, int r_dest, int r_dest_hi, OpSize size,
                                      int s_reg);
     virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value);
-    virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
-                                       int val_lo, int val_hi);
+    virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value);
     virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
                                OpSize size);
     virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -90,12 +89,18 @@
     virtual bool IsUnconditionalBranch(LIR* lir);
 
     // Required for target - Dalvik-level generators.
+    virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+                                   RegLocation rl_src1, RegLocation rl_src2);
     virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
                                 RegLocation rl_index, RegLocation rl_src, int scale);
     virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_dest, int scale);
     virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
                              RegLocation rl_index, RegLocation rl_src, int scale);
+    virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift);
+    virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2);
     virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2);
     virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -188,7 +193,10 @@
     void SpillCoreRegs(CompilationUnit* cu);
     void UnSpillCoreRegs(CompilationUnit* cu);
     static const X86EncodingMap EncodingMap[kX86Last];
-    bool InexpensiveConstant(int reg, int value);
+    bool InexpensiveConstantInt(int32_t value);
+    bool InexpensiveConstantFloat(int32_t value);
+    bool InexpensiveConstantLong(int64_t value);
+    bool InexpensiveConstantDouble(int64_t value);
 };
 
 }  // namespace art
diff --git a/src/compiler/codegen/x86/int_x86.cc b/src/compiler/codegen/x86/int_x86.cc
index 0ae51e0..d4a34f7 100644
--- a/src/compiler/codegen/x86/int_x86.cc
+++ b/src/compiler/codegen/x86/int_x86.cc
@@ -322,6 +322,13 @@
   LOG(FATAL) << "Unexpected use of OpIT in x86";
   return NULL;
 }
+
+void X86Codegen::GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+                            RegLocation rl_src2)
+{
+  LOG(FATAL) << "Unexpected use of GenX86Long for x86";
+  return;
+}
 bool X86Codegen::GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
                          RegLocation rl_src2)
 {
@@ -583,4 +590,18 @@
   MarkGCCard(cu, r_value, r_array);
 }
 
+bool X86Codegen::GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+                                   RegLocation rl_src1, RegLocation rl_shift)
+{
+  // Default implementation is just to ignore the constant case.
+  return GenShiftOpLong(cu, opcode, rl_dest, rl_src1, rl_shift);
+}
+
+bool X86Codegen::GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+                                   RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2)
+{
+  // Default - bail to non-const handler.
+  return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2);
+}
+
 }  // namespace art
diff --git a/src/compiler/codegen/x86/utility_x86.cc b/src/compiler/codegen/x86/utility_x86.cc
index 4f9e28b..4cc2c18 100644
--- a/src/compiler/codegen/x86/utility_x86.cc
+++ b/src/compiler/codegen/x86/utility_x86.cc
@@ -50,11 +50,26 @@
   return res;
 }
 
-bool X86Codegen::InexpensiveConstant(int reg, int value)
+bool X86Codegen::InexpensiveConstantInt(int32_t value)
 {
   return true;
 }
 
+bool X86Codegen::InexpensiveConstantFloat(int32_t value)
+{
+  return false;
+}
+
+bool X86Codegen::InexpensiveConstantLong(int64_t value)
+{
+  return true;
+}
+
+bool X86Codegen::InexpensiveConstantDouble(int64_t value)
+{
+  return false; // TUNING
+}
+
 /*
  * Load a immediate using a shortcut if possible; otherwise
  * grab from the per-translation literal pool.  If target is
@@ -316,13 +331,14 @@
   return NewLIR2(cu, opcode, rBase, disp);
 }
 
-LIR* X86Codegen::LoadConstantValueWide(CompilationUnit *cu, int r_dest_lo,
-                                       int r_dest_hi, int val_lo, int val_hi)
+LIR* X86Codegen::LoadConstantWide(CompilationUnit *cu, int r_dest_lo, int r_dest_hi, int64_t value)
 {
+    int32_t val_lo = Low32Bits(value);
+    int32_t val_hi = High32Bits(value);
     LIR *res;
     if (X86_FPREG(r_dest_lo)) {
       DCHECK(X86_FPREG(r_dest_hi));  // ignore r_dest_hi
-      if (val_lo == 0 && val_hi == 0) {
+      if (value == 0) {
         return NewLIR2(cu, kX86XorpsRR, r_dest_lo, r_dest_lo);
       } else {
         if (val_lo == 0) {
diff --git a/src/compiler/compiler_enums.h b/src/compiler/compiler_enums.h
index bdf7a8b..ae305c0 100644
--- a/src/compiler/compiler_enums.h
+++ b/src/compiler/compiler_enums.h
@@ -291,6 +291,7 @@
   kThrowNullPointer,
   kThrowDivZero,
   kThrowArrayBounds,
+  kThrowConstantArrayBounds,
   kThrowNoSuchMethod,
   kThrowStackOverflow,
 };
diff --git a/src/compiler/compiler_ir.h b/src/compiler/compiler_ir.h
index aca32d5..056c308 100644
--- a/src/compiler/compiler_ir.h
+++ b/src/compiler/compiler_ir.h
@@ -230,7 +230,8 @@
   bool catch_entry;
   bool explicit_throw;
   bool conditional_branch;
-  bool has_return;
+  bool has_return;                  // Contains a return.
+  bool dominates_return;            // Is a member of return extended basic block
   uint16_t start_offset;
   uint16_t nesting_depth;
   BBType block_type;
@@ -306,6 +307,7 @@
       vreg_to_ssa_map(NULL),
       ssa_last_defs(NULL),
       is_constant_v(NULL),
+      must_flush_constant_v(NULL),
       constant_values(NULL),
       reg_location(NULL),
       promotion_map(NULL),
@@ -418,6 +420,7 @@
   int* vreg_to_ssa_map;            // length == method->registers_size
   int* ssa_last_defs;              // length == method->registers_size
   ArenaBitVector* is_constant_v;   // length == num_ssa_reg
+  ArenaBitVector* must_flush_constant_v;   // length == num_ssa_reg
   int* constant_values;            // length == num_ssa_reg
 
   // Use counts of ssa names.
@@ -579,6 +582,35 @@
   {{Instruction::RETURN_WIDE}, kIdentity},
 };
 
+static inline bool IsConst(const CompilationUnit* cu, int32_t s_reg)
+{
+  return (IsBitSet(cu->is_constant_v, s_reg));
+}
+
+static inline bool IsConst(const CompilationUnit* cu, RegLocation loc)
+{
+  return (IsConst(cu, loc.orig_sreg));
+}
+
+static inline int32_t ConstantValue(const CompilationUnit* cu, RegLocation loc)
+{
+  DCHECK(IsConst(cu, loc));
+  return cu->constant_values[loc.orig_sreg];
+}
+
+static inline int64_t ConstantValueWide(const CompilationUnit* cu, RegLocation loc)
+{
+  DCHECK(IsConst(cu, loc));
+  return (static_cast<int64_t>(cu->constant_values[loc.orig_sreg + 1]) << 32) |
+      Low32Bits(static_cast<int64_t>(cu->constant_values[loc.orig_sreg]));
+}
+
+static inline bool MustFlushConstant(const CompilationUnit* cu, RegLocation loc)
+{
+  DCHECK(IsConst(cu, loc));
+  return IsBitSet(cu->must_flush_constant_v, loc.orig_sreg);
+}
+
 }  // namespace art
 
 #endif // ART_SRC_COMPILER_COMPILER_IR_H_
diff --git a/src/compiler/dataflow.cc b/src/compiler/dataflow.cc
index 16065ab..1e20cbd 100644
--- a/src/compiler/dataflow.cc
+++ b/src/compiler/dataflow.cc
@@ -867,16 +867,15 @@
     // Pre-SSA - just use the standard name
     return GetSSAName(cu, ssa_reg);
   }
-  if (cu->reg_location[ssa_reg].is_const) {
+  if (IsConst(cu, cu->reg_location[ssa_reg])) {
     if (!singles_only && cu->reg_location[ssa_reg].wide) {
-      int64_t immval = cu->constant_values[ssa_reg + 1];
-      immval = (immval << 32) | cu->constant_values[ssa_reg];
       return StringPrintf("v%d_%d#0x%llx", SRegToVReg(cu, ssa_reg),
-                          SRegToSubscript(cu, ssa_reg), immval);
+                          SRegToSubscript(cu, ssa_reg),
+                          ConstantValueWide(cu, cu->reg_location[ssa_reg]));
     } else {
-      int32_t immval = cu->constant_values[ssa_reg];
       return StringPrintf("v%d_%d#0x%x", SRegToVReg(cu, ssa_reg),
-                          SRegToSubscript(cu, ssa_reg), immval);
+                          SRegToSubscript(cu, ssa_reg),
+                          ConstantValue(cu, cu->reg_location[ssa_reg]));
     }
   } else {
     return StringPrintf("v%d_%d", SRegToVReg(cu, ssa_reg), SRegToSubscript(cu, ssa_reg));
@@ -1300,12 +1299,19 @@
 }
 
 /* Setup a constant value for opcodes thare have the DF_SETS_CONST attribute */
-static void SetConstant(CompilationUnit* cu, int ssa_reg, int value)
+static void SetConstant(CompilationUnit* cu, int32_t ssa_reg, int value)
 {
   SetBit(cu, cu->is_constant_v, ssa_reg);
   cu->constant_values[ssa_reg] = value;
 }
 
+static void SetConstantWide(CompilationUnit* cu, int ssa_reg, int64_t value)
+{
+  SetBit(cu, cu->is_constant_v, ssa_reg);
+  cu->constant_values[ssa_reg] = Low32Bits(value);
+  cu->constant_values[ssa_reg + 1] = High32Bits(value);
+}
+
 bool DoConstantPropogation(CompilationUnit* cu, BasicBlock* bb)
 {
   MIR* mir;
@@ -1321,27 +1327,25 @@
     /* Handle instructions that set up constants directly */
     if (df_attributes & DF_SETS_CONST) {
       if (df_attributes & DF_DA) {
+        int32_t vB = static_cast<int32_t>(d_insn->vB);
         switch (d_insn->opcode) {
           case Instruction::CONST_4:
           case Instruction::CONST_16:
           case Instruction::CONST:
-            SetConstant(cu, mir->ssa_rep->defs[0], d_insn->vB);
+            SetConstant(cu, mir->ssa_rep->defs[0], vB);
             break;
           case Instruction::CONST_HIGH16:
-            SetConstant(cu, mir->ssa_rep->defs[0], d_insn->vB << 16);
+            SetConstant(cu, mir->ssa_rep->defs[0], vB << 16);
             break;
           case Instruction::CONST_WIDE_16:
           case Instruction::CONST_WIDE_32:
-            SetConstant(cu, mir->ssa_rep->defs[0], d_insn->vB);
-            SetConstant(cu, mir->ssa_rep->defs[1], 0);
+            SetConstantWide(cu, mir->ssa_rep->defs[0], static_cast<int64_t>(vB));
             break;
           case Instruction::CONST_WIDE:
-            SetConstant(cu, mir->ssa_rep->defs[0], static_cast<int>(d_insn->vB_wide));
-            SetConstant(cu, mir->ssa_rep->defs[1], static_cast<int>(d_insn->vB_wide >> 32));
+            SetConstantWide(cu, mir->ssa_rep->defs[0],d_insn->vB_wide);
             break;
           case Instruction::CONST_WIDE_HIGH16:
-            SetConstant(cu, mir->ssa_rep->defs[0], 0);
-            SetConstant(cu, mir->ssa_rep->defs[1], d_insn->vB << 16);
+            SetConstantWide(cu, mir->ssa_rep->defs[0], static_cast<int64_t>(vB) << 48);
             break;
           default:
             break;
@@ -1363,6 +1367,18 @@
                       cu->constant_values[mir->ssa_rep->uses[1]]);
         }
       }
+    } else if (df_attributes & DF_NULL_TRANSFER_N) {
+      /*
+       * Mark const sregs that appear in merges.  Need to flush those to home location.
+       * TUNING: instead of flushing on def, we could insert a flush on the appropriate
+       * edge[s].
+       */
+      DCHECK_EQ(static_cast<int32_t>(d_insn->opcode), kMirOpPhi);
+      for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
+        if (IsConst(cu, mir->ssa_rep->uses[i])) {
+          SetBit(cu, cu->must_flush_constant_v, mir->ssa_rep->uses[i]);
+        }
+      }
     }
   }
   /* TODO: implement code to handle arithmetic operations */
@@ -1708,6 +1724,28 @@
             }
           }
           break;
+        case Instruction::GOTO:
+        case Instruction::GOTO_16:
+        case Instruction::GOTO_32:
+        case Instruction::IF_EQ:
+        case Instruction::IF_NE:
+        case Instruction::IF_LT:
+        case Instruction::IF_GE:
+        case Instruction::IF_GT:
+        case Instruction::IF_LE:
+        case Instruction::IF_EQZ:
+        case Instruction::IF_NEZ:
+        case Instruction::IF_LTZ:
+        case Instruction::IF_GEZ:
+        case Instruction::IF_GTZ:
+        case Instruction::IF_LEZ:
+          if (bb->taken->dominates_return) {
+            mir->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK;
+            if (cu->verbose) {
+              LOG(INFO) << "Suppressed suspend check at 0x" << std::hex << mir->offset;
+            }
+          }
+          break;
         default:
           break;
       }
@@ -2056,15 +2094,26 @@
   if (cu->verbose) {
     LOG(INFO) << "Extended bb head " << bb->id;
   }
+  BasicBlock* start_bb = bb;
   cu->extended_basic_blocks.push_back(bb);
+  bool has_return = false;
   // Visit blocks strictly dominated by this head.
   while (bb != NULL) {
     bb->visited = true;
+    has_return |= bb->has_return;
     bb = NextDominatedBlock(cu, bb);
     if (cu->verbose && (bb != NULL)) {
       LOG(INFO) << "...added bb " << bb->id;
     }
   }
+  if (has_return) {
+    // This extended basic block contains a return, so mark all members.
+    bb = start_bb;
+    while (bb != NULL) {
+      bb->dominates_return = true;
+      bb = NextDominatedBlock(cu, bb);
+    }
+  }
   return false; // Not iterative - return value will be ignored
 }
 
diff --git a/src/compiler/frontend.cc b/src/compiler/frontend.cc
index 6ccbc07..6eb117a 100644
--- a/src/compiler/frontend.cc
+++ b/src/compiler/frontend.cc
@@ -66,13 +66,13 @@
 
 /* Default optimizer/debug setting for the compiler. */
 static uint32_t kCompilerOptimizerDisableFlags = 0 | // Disable specific optimizations
-  //(1 << kLoadStoreElimination) |
+  (1 << kLoadStoreElimination) |
   //(1 << kLoadHoisting) |
   //(1 << kSuppressLoads) |
   //(1 << kNullCheckElimination) |
   //(1 << kPromoteRegs) |
   //(1 << kTrackLiveTemps) |
-  //(1 << kSkipLargeMethodOptimization) |
+  (1 << kSkipLargeMethodOptimization) |
   //(1 << kSafeOptimizations) |
   //(1 << kBBOpt) |
   //(1 << kMatch) |
@@ -972,6 +972,7 @@
       cur_block = ProcessCanBranch(cu.get(), cur_block, insn, cur_offset,
                                   width, flags, code_ptr, code_end);
     } else if (flags & Instruction::kReturn) {
+      cur_block->has_return = true;
       cur_block->fall_through = exit_block;
       InsertGrowableList(cu.get(), exit_block->predecessors,
                             reinterpret_cast<uintptr_t>(cur_block));
@@ -1078,10 +1079,9 @@
   }
 
   /* Do constant propagation */
-  // TODO: Probably need to make these expandable to support new ssa names
-  // introducted during MIR optimization passes
-  cu->is_constant_v = AllocBitVector(cu.get(), cu->num_ssa_regs,
-                                         false  /* not expandable */);
+  cu->is_constant_v = AllocBitVector(cu.get(), cu->num_ssa_regs, false  /* not expandable */);
+  cu->must_flush_constant_v = AllocBitVector(cu.get(), cu->num_ssa_regs,
+                                             false  /* not expandable */);
   cu->constant_values =
       static_cast<int*>(NewMem(cu.get(), sizeof(int) * cu->num_ssa_regs, true, kAllocDFInfo));
   DataFlowAnalysisDispatcher(cu.get(), DoConstantPropogation,
diff --git a/src/compiler/ralloc.cc b/src/compiler/ralloc.cc
index 2038e19..3514200 100644
--- a/src/compiler/ralloc.cc
+++ b/src/compiler/ralloc.cc
@@ -479,6 +479,39 @@
     }
   }
 
+  /*
+   * Now that everything is typed and constants propagated, identify those constants
+   * that can be cheaply materialized and don't need to be flushed to a home location.
+   * The default is to not flush, and some have already been marked as must flush.
+   */
+  for (i=0; i < cu->num_ssa_regs; i++) {
+    if (IsBitSet(cu->is_constant_v, i)) {
+      bool flush = false;
+      RegLocation loc = cu->reg_location[i];
+      if (loc.wide) {
+        int64_t value = ConstantValueWide(cu, loc);
+        if (loc.fp) {
+          flush = !cu->cg->InexpensiveConstantDouble(value);
+        } else {
+          flush = !cu->cg->InexpensiveConstantLong(value);
+        }
+      } else {
+        int32_t value = ConstantValue(cu, loc);
+        if (loc.fp) {
+          flush = !cu->cg->InexpensiveConstantFloat(value);
+        } else {
+          flush = !cu->cg->InexpensiveConstantInt(value);
+        }
+      }
+      if (flush) {
+        SetBit(cu, cu->must_flush_constant_v, i);
+      }
+      if (loc.wide) {
+        i++;  // Skip the high word
+      }
+    }
+  }
+
   cu->core_spill_mask = 0;
   cu->fp_spill_mask = 0;
   cu->num_core_spills = 0;
diff --git a/src/utils.h b/src/utils.h
index f3c9b7a..d808fc3 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -91,20 +91,20 @@
   return IsUint(N, value);
 }
 
-static inline int32_t Low16Bits(int32_t value) {
-  return static_cast<int32_t>(value & 0xffff);
+static inline uint16_t Low16Bits(uint32_t value) {
+  return static_cast<uint16_t>(value);
 }
 
-static inline int32_t High16Bits(int32_t value) {
-  return static_cast<int32_t>(value >> 16);
+static inline uint16_t High16Bits(uint32_t value) {
+  return static_cast<uint16_t>(value >> 16);
 }
 
-static inline int32_t Low32Bits(int64_t value) {
-  return static_cast<int32_t>(value);
+static inline uint32_t Low32Bits(uint64_t value) {
+  return static_cast<uint32_t>(value);
 }
 
-static inline int32_t High32Bits(int64_t value) {
-  return static_cast<int32_t>(value >> 32);
+static inline uint32_t High32Bits(uint64_t value) {
+  return static_cast<uint32_t>(value >> 32);
 }
 
 template<typename T>