Merge "Fix test-art-host-oat flakiness" into dalvik-dev
diff --git a/src/compiler/codegen/arm/arm_lir.h b/src/compiler/codegen/arm/arm_lir.h
index 3fc8792..c41f53b 100644
--- a/src/compiler/codegen/arm/arm_lir.h
+++ b/src/compiler/codegen/arm/arm_lir.h
@@ -371,7 +371,7 @@
kThumb2StrbRRI12, // strb rt,[rn,#imm12] [111110001000] rt[15..12] rn[19..16] imm12[11..0].
kThumb2Pop, // pop [1110100010111101] list[15-0]*/
kThumb2Push, // push [1110100100101101] list[15-0]*/
- kThumb2CmpRI8, // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
+ kThumb2CmpRI12, // cmp rn, #<const> [11110] i [011011] rn[19-16] [0] imm3 [1111] imm8[7..0].
kThumb2AdcRRR, // adc [111010110101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
kThumb2AndRRR, // and [111010100000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
kThumb2BicRRR, // bic [111010100010] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
@@ -445,6 +445,9 @@
kThumb2Pop1, // t3 encoding of pop.
kThumb2RsubRRR, // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
kThumb2Smull, // smull [111110111000] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0].
+ kThumb2LdrdPcRel8, // ldrd rt, rt2, pc +-/1024.
+ kThumb2LdrdI8, // ldrd rt, rt2, [rn +-/1024].
+ kThumb2StrdI8, // strd rt, rt2, [rn +-/1024].
kArmLast,
};
diff --git a/src/compiler/codegen/arm/assemble_arm.cc b/src/compiler/codegen/arm/assemble_arm.cc
index 91f25d6..455ea67 100644
--- a/src/compiler/codegen/arm/assemble_arm.cc
+++ b/src/compiler/codegen/arm/assemble_arm.cc
@@ -646,7 +646,7 @@
kFmtUnused, -1, -1,
IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
| IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 4),
- ENCODING_MAP(kThumb2CmpRI8, 0xf1b00f00,
+ ENCODING_MAP(kThumb2CmpRI12, 0xf1b00f00,
kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1,
IS_BINARY_OP | REG_USE0 | SETS_CCODES,
@@ -917,8 +917,8 @@
"b", "!0t", 4),
ENCODING_MAP(kThumb2MovImm16H, 0xf2c00000,
kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
- "movh", "!0C, #!1M", 4),
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0,
+ "movt", "!0C, #!1M", 4),
ENCODING_MAP(kThumb2AddPCR, 0x4487,
kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1,
@@ -936,8 +936,8 @@
"mov", "!0C, #!1M", 4),
ENCODING_MAP(kThumb2MovImm16HST, 0xf2c00000,
kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
- kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
- "movh", "!0C, #!1M", 4),
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | REG_USE0 | NEEDS_FIXUP,
+ "movt", "!0C, #!1M", 4),
ENCODING_MAP(kThumb2LdmiaWB, 0xe8b00000,
kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
kFmtUnused, -1, -1,
@@ -972,7 +972,21 @@
kFmtBitBlt, 3, 0,
IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
"smull", "!0C, !1C, !2C, !3C", 4),
-
+ ENCODING_MAP(kThumb2LdrdPcRel8, 0xe9df0000,
+ kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 7, 0,
+ kFmtUnused, -1, -1,
+ IS_TERTIARY_OP | REG_DEF0 | REG_DEF1 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
+ "ldrd", "!0C, !1C, [pc, #!2E]", 4),
+ ENCODING_MAP(kThumb2LdrdI8, 0xe9d00000,
+ kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+ kFmtBitBlt, 7, 0,
+ IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | IS_LOAD,
+ "ldrd", "!0C, !1C, [!2C, #!3E]", 4),
+ ENCODING_MAP(kThumb2StrdI8, 0xe9c00000,
+ kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+ kFmtBitBlt, 7, 0,
+ IS_QUAD_OP | REG_USE0 | REG_USE1 | REG_USE2 | IS_STORE,
+ "strd", "!0C, !1C, [!2C, #!3E]", 4),
};
/*
@@ -1023,13 +1037,14 @@
if (lir->opcode == kThumbLdrPcRel ||
lir->opcode == kThumb2LdrPcRel12 ||
lir->opcode == kThumbAddPcRel ||
+ lir->opcode == kThumb2LdrdPcRel8 ||
((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) ||
((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) {
/*
* PC-relative loads are mostly used to load immediates
* that are too large to materialize directly in one shot.
* However, if the load displacement exceeds the limit,
- * we revert to a 2-instruction materialization sequence.
+ * we revert to a multiple-instruction materialization sequence.
*/
LIR *lir_target = lir->target;
uintptr_t pc = (lir->offset + 4) & ~3;
@@ -1044,8 +1059,9 @@
// Shouldn't happen in current codegen.
LOG(FATAL) << "Unexpected pc-rel offset " << delta;
}
- // Now, check for the two difficult cases
+ // Now, check for the difficult cases
if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
+ ((lir->opcode == kThumb2LdrdPcRel8) && (delta > 1020)) ||
((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
/*
@@ -1053,26 +1069,34 @@
* vldrs/vldrd we include REG_DEF_LR in the resource
* masks for these instructions.
*/
- int base_reg = (lir->opcode == kThumb2LdrPcRel12) ?
- lir->operands[0] : rARM_LR;
+ int base_reg = ((lir->opcode == kThumb2LdrdPcRel8) || (lir->opcode == kThumb2LdrPcRel12))
+ ? lir->operands[0] : rARM_LR;
- // Add new Adr to generate the address
+ // Add new Adr to generate the address.
LIR* new_adr = RawLIR(cu, lir->dalvik_offset, kThumb2Adr,
base_reg, 0, 0, 0, 0, lir->target);
InsertLIRBefore(lir, new_adr);
- // Convert to normal load
+ // Convert to normal load.
if (lir->opcode == kThumb2LdrPcRel12) {
lir->opcode = kThumb2LdrRRI12;
+ } else if (lir->opcode == kThumb2LdrdPcRel8) {
+ lir->opcode = kThumb2LdrdI8;
}
- // Change the load to be relative to the new Adr base
- lir->operands[1] = base_reg;
- lir->operands[2] = 0;
+ // Change the load to be relative to the new Adr base.
+ if (lir->opcode == kThumb2LdrdI8) {
+ lir->operands[3] = 0;
+ lir->operands[2] = base_reg;
+ } else {
+ lir->operands[2] = 0;
+ lir->operands[1] = base_reg;
+ }
SetupResourceMasks(cu, lir);
res = kRetryAll;
} else {
if ((lir->opcode == kThumb2Vldrs) ||
- (lir->opcode == kThumb2Vldrd)) {
+ (lir->opcode == kThumb2Vldrd) ||
+ (lir->opcode == kThumb2LdrdPcRel8)) {
lir->operands[2] = delta >> 2;
} else {
lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? delta :
diff --git a/src/compiler/codegen/arm/codegen_arm.h b/src/compiler/codegen/arm/codegen_arm.h
index ea34ff2..4dadd6c 100644
--- a/src/compiler/codegen/arm/codegen_arm.h
+++ b/src/compiler/codegen/arm/codegen_arm.h
@@ -37,8 +37,7 @@
int displacement, int r_dest, int r_dest_hi, OpSize size,
int s_reg);
virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value);
- virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
- int val_lo, int val_hi);
+ virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value);
virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
OpSize size);
virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -89,12 +88,18 @@
virtual bool IsUnconditionalBranch(LIR* lir);
// Required for target - Dalvik-level generators.
+ virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+ RegLocation rl_src1, RegLocation rl_src2);
virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_src, int scale);
virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_dest, int scale);
virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_src, int scale);
+ virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+ RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift);
+ virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+ RegLocation rl_src2);
virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2);
virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -197,7 +202,14 @@
static int EncodeShift(int code, int amount);
static int ModifiedImmediate(uint32_t value);
static ArmConditionCode ArmConditionEncoding(ConditionCode code);
- bool InexpensiveConstant(int reg, int value);
+ bool InexpensiveConstantInt(int32_t value);
+ bool InexpensiveConstantFloat(int32_t value);
+ bool InexpensiveConstantLong(int64_t value);
+ bool InexpensiveConstantDouble(int64_t value);
+
+ private:
+ void GenFusedLongCmpImmBranch(CompilationUnit* cu, BasicBlock* bb, RegLocation rl_src1,
+ int64_t val, ConditionCode ccode);
};
} // namespace art
diff --git a/src/compiler/codegen/arm/int_arm.cc b/src/compiler/codegen/arm/int_arm.cc
index fcf74f1..5a9786c 100644
--- a/src/compiler/codegen/arm/int_arm.cc
+++ b/src/compiler/codegen/arm/int_arm.cc
@@ -121,16 +121,81 @@
branch3->target = branch1->target;
}
-void ArmCodegen::GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir)
+void ArmCodegen::GenFusedLongCmpImmBranch(CompilationUnit* cu, BasicBlock* bb, RegLocation rl_src1,
+ int64_t val, ConditionCode ccode)
{
+ int32_t val_lo = Low32Bits(val);
+ int32_t val_hi = High32Bits(val);
+ DCHECK(ModifiedImmediate(val_lo) >= 0);
+ DCHECK(ModifiedImmediate(val_hi) >= 0);
LIR* label_list = cu->block_label_list;
LIR* taken = &label_list[bb->taken->id];
LIR* not_taken = &label_list[bb->fall_through->id];
+ rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg);
+ int32_t low_reg = rl_src1.low_reg;
+ int32_t high_reg = rl_src1.high_reg;
+
+ switch(ccode) {
+ case kCondEq:
+ OpCmpImmBranch(cu, kCondNe, high_reg, val_hi, not_taken);
+ break;
+ case kCondNe:
+ OpCmpImmBranch(cu, kCondNe, high_reg, val_hi, taken);
+ break;
+ case kCondLt:
+ OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, taken);
+ OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, not_taken);
+ ccode = kCondCc;
+ break;
+ case kCondLe:
+ OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, taken);
+ OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, not_taken);
+ ccode = kCondLs;
+ break;
+ case kCondGt:
+ OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, taken);
+ OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, not_taken);
+ ccode = kCondHi;
+ break;
+ case kCondGe:
+ OpCmpImmBranch(cu, kCondGt, high_reg, val_hi, taken);
+ OpCmpImmBranch(cu, kCondLt, high_reg, val_hi, not_taken);
+ ccode = kCondCs;
+ break;
+ default:
+ LOG(FATAL) << "Unexpected ccode: " << ccode;
+ }
+ OpCmpImmBranch(cu, ccode, low_reg, val_lo, taken);
+}
+
+
+void ArmCodegen::GenFusedLongCmpBranch(CompilationUnit* cu, BasicBlock* bb, MIR* mir)
+{
RegLocation rl_src1 = GetSrcWide(cu, mir, 0);
RegLocation rl_src2 = GetSrcWide(cu, mir, 2);
+ // Normalize such that if either operand is constant, src2 will be constant.
+ ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+ if (rl_src1.is_const) {
+ RegLocation rl_temp = rl_src1;
+ rl_src1 = rl_src2;
+ rl_src2 = rl_temp;
+ ccode = FlipComparisonOrder(ccode);
+ }
+ if (rl_src2.is_const) {
+ RegLocation rl_temp = UpdateLocWide(cu, rl_src2);
+ // Do special compare/branch against simple const operand if not already in registers.
+ int64_t val = ConstantValueWide(cu, rl_src2);
+ if ((rl_temp.location != kLocPhysReg) &&
+ ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))) {
+ GenFusedLongCmpImmBranch(cu, bb, rl_src1, val, ccode);
+ return;
+ }
+ }
+ LIR* label_list = cu->block_label_list;
+ LIR* taken = &label_list[bb->taken->id];
+ LIR* not_taken = &label_list[bb->fall_through->id];
rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg);
rl_src2 = LoadValueWide(cu, rl_src2, kCoreReg);
- ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
OpRegReg(cu, kOpCmp, rl_src1.high_reg, rl_src2.high_reg);
switch(ccode) {
case kCondEq:
@@ -185,7 +250,7 @@
if (ARM_LOWREG(reg) && ((check_value & 0xff) == check_value)) {
NewLIR2(cu, kThumbCmpRI8, reg, check_value);
} else if (mod_imm >= 0) {
- NewLIR2(cu, kThumb2CmpRI8, reg, mod_imm);
+ NewLIR2(cu, kThumb2CmpRI12, reg, mod_imm);
} else {
int t_reg = AllocTemp(cu);
LoadConstant(cu, t_reg, check_value);
@@ -523,6 +588,93 @@
return false;
}
+
+ /*
+ * Check to see if a result pair has a misaligned overlap with an operand pair. This
+ * is not usual for dx to generate, but it is legal (for now). In a future rev of
+ * dex, we'll want to make this case illegal.
+ */
+static bool BadOverlap(CompilationUnit* cu, RegLocation rl_src, RegLocation rl_dest)
+{
+ DCHECK(rl_src.wide);
+ DCHECK(rl_dest.wide);
+ return (abs(SRegToVReg(cu, rl_src.s_reg_low) - SRegToVReg(cu, rl_dest.s_reg_low)) == 1);
+}
+
+void ArmCodegen::GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+ RegLocation rl_src2)
+{
+ /*
+ * To pull off inline multiply, we have a worst-case requirement of 8 temporary
+ * registers. Normally for Arm, we get 5. We can get to 6 by including
+ * lr in the temp set. The only problematic case is all operands and result are
+ * distinct, and none have been promoted. In that case, we can succeed by aggressively
+ * freeing operand temp registers after they are no longer needed. All other cases
+ * can proceed normally. We'll just punt on the case of the result having a misaligned
+ * overlap with either operand and send that case to a runtime handler.
+ */
+ RegLocation rl_result;
+ if (BadOverlap(cu, rl_src1, rl_dest) || (BadOverlap(cu, rl_src2, rl_dest))) {
+ int func_offset = ENTRYPOINT_OFFSET(pLmul);
+ FlushAllRegs(cu);
+ CallRuntimeHelperRegLocationRegLocation(cu, func_offset, rl_src1, rl_src2, false);
+ rl_result = GetReturnWide(cu, false);
+ StoreValueWide(cu, rl_dest, rl_result);
+ return;
+ }
+ // Temporarily add LR to the temp pool, and assign it to tmp1
+ MarkTemp(cu, rARM_LR);
+ FreeTemp(cu, rARM_LR);
+ int tmp1 = rARM_LR;
+ LockTemp(cu, rARM_LR);
+
+ rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg);
+ rl_src2 = LoadValueWide(cu, rl_src2, kCoreReg);
+
+ bool special_case = true;
+ // If operands are the same, or any pair has been promoted we're not the special case.
+ if ((rl_src1.s_reg_low == rl_src2.s_reg_low) ||
+ (!IsTemp(cu, rl_src1.low_reg) && !IsTemp(cu, rl_src1.high_reg)) ||
+ (!IsTemp(cu, rl_src2.low_reg) && !IsTemp(cu, rl_src2.high_reg))) {
+ special_case = false;
+ }
+ // Tuning: if rl_dest has been promoted and is *not* either operand, could use directly.
+ int res_lo = AllocTemp(cu);
+ int res_hi;
+ if (rl_src1.low_reg == rl_src2.low_reg) {
+ res_hi = AllocTemp(cu);
+ NewLIR3(cu, kThumb2MulRRR, tmp1, rl_src1.low_reg, rl_src1.high_reg);
+ NewLIR4(cu, kThumb2Umull, res_lo, res_hi, rl_src1.low_reg, rl_src1.low_reg);
+ OpRegRegRegShift(cu, kOpAdd, res_hi, res_hi, tmp1, EncodeShift(kArmLsl, 1));
+ } else {
+ // In the special case, all temps are now allocated
+ NewLIR3(cu, kThumb2MulRRR, tmp1, rl_src2.low_reg, rl_src1.high_reg);
+ if (special_case) {
+ DCHECK_NE(rl_src1.low_reg, rl_src2.low_reg);
+ DCHECK_NE(rl_src1.high_reg, rl_src2.high_reg);
+ FreeTemp(cu, rl_src1.high_reg);
+ }
+ res_hi = AllocTemp(cu);
+
+ NewLIR4(cu, kThumb2Umull, res_lo, res_hi, rl_src2.low_reg, rl_src1.low_reg);
+ NewLIR4(cu, kThumb2Mla, tmp1, rl_src1.low_reg, rl_src2.high_reg, tmp1);
+ NewLIR4(cu, kThumb2AddRRR, res_hi, tmp1, res_hi, 0);
+ if (special_case) {
+ FreeTemp(cu, rl_src1.low_reg);
+ Clobber(cu, rl_src1.low_reg);
+ Clobber(cu, rl_src1.high_reg);
+ }
+ }
+ FreeTemp(cu, tmp1);
+ rl_result = GetReturnWide(cu, false); // Just using as a template.
+ rl_result.low_reg = res_lo;
+ rl_result.high_reg = res_hi;
+ StoreValueWide(cu, rl_dest, rl_result);
+ // Now, restore lr to its non-temp status.
+ Clobber(cu, rARM_LR);
+ UnmarkTemp(cu, rARM_LR);
+}
+
bool ArmCodegen::GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2)
{
@@ -568,8 +720,11 @@
int len_offset = mirror::Array::LengthOffset().Int32Value();
int data_offset;
RegLocation rl_result;
+ bool constant_index = rl_index.is_const;
rl_array = LoadValue(cu, rl_array, kCoreReg);
- rl_index = LoadValue(cu, rl_index, kCoreReg);
+ if (!constant_index) {
+ rl_index = LoadValue(cu, rl_index, kCoreReg);
+ }
if (rl_dest.wide) {
data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
@@ -577,6 +732,11 @@
data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
}
+ // If index is constant, just fold it into the data offset
+ if (constant_index) {
+ data_offset += ConstantValue(cu, rl_index) << scale;
+ }
+
/* null object? */
GenNullCheck(cu, rl_array.s_reg_low, rl_array.low_reg, opt_flags);
@@ -587,27 +747,38 @@
/* Get len */
LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
}
- if (rl_dest.wide || rl_dest.fp) {
- // No special indexed operation, lea + load w/ displacement
- int reg_ptr = AllocTemp(cu);
- OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
- EncodeShift(kArmLsl, scale));
- FreeTemp(cu, rl_index.low_reg);
+ if (rl_dest.wide || rl_dest.fp || constant_index) {
+ int reg_ptr;
+ if (constant_index) {
+ reg_ptr = rl_array.low_reg; // NOTE: must not alter reg_ptr in constant case.
+ } else {
+ // No special indexed operation, lea + load w/ displacement
+ reg_ptr = AllocTemp(cu);
+ OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
+ EncodeShift(kArmLsl, scale));
+ FreeTemp(cu, rl_index.low_reg);
+ }
rl_result = EvalLoc(cu, rl_dest, reg_class, true);
if (needs_range_check) {
- // TODO: change kCondCS to a more meaningful name, is the sense of
- // carry-set/clear flipped?
- GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+ if (constant_index) {
+ GenImmedCheck(cu, kCondLs, reg_len, ConstantValue(cu, rl_index), kThrowConstantArrayBounds);
+ } else {
+ GenRegRegCheck(cu, kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds);
+ }
FreeTemp(cu, reg_len);
}
if (rl_dest.wide) {
LoadBaseDispWide(cu, reg_ptr, data_offset, rl_result.low_reg, rl_result.high_reg, INVALID_SREG);
- FreeTemp(cu, reg_ptr);
+ if (!constant_index) {
+ FreeTemp(cu, reg_ptr);
+ }
StoreValueWide(cu, rl_dest, rl_result);
} else {
LoadBaseDisp(cu, reg_ptr, data_offset, rl_result.low_reg, size, INVALID_SREG);
- FreeTemp(cu, reg_ptr);
+ if (!constant_index) {
+ FreeTemp(cu, reg_ptr);
+ }
StoreValue(cu, rl_dest, rl_result);
}
} else {
@@ -639,17 +810,28 @@
RegisterClass reg_class = oat_reg_class_by_size(size);
int len_offset = mirror::Array::LengthOffset().Int32Value();
int data_offset;
+ bool constant_index = rl_index.is_const;
- if (size == kLong || size == kDouble) {
+ if (rl_src.wide) {
data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
} else {
data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
}
+ // If index is constant, just fold it into the data offset.
+ if (constant_index) {
+ data_offset += ConstantValue(cu, rl_index) << scale;
+ }
+
rl_array = LoadValue(cu, rl_array, kCoreReg);
- rl_index = LoadValue(cu, rl_index, kCoreReg);
- int reg_ptr = INVALID_REG;
- if (IsTemp(cu, rl_array.low_reg)) {
+ if (!constant_index) {
+ rl_index = LoadValue(cu, rl_index, kCoreReg);
+ }
+
+ int reg_ptr;
+ if (constant_index) {
+ reg_ptr = rl_array.low_reg;
+ } else if (IsTemp(cu, rl_array.low_reg)) {
Clobber(cu, rl_array.low_reg);
reg_ptr = rl_array.low_reg;
} else {
@@ -668,18 +850,25 @@
LoadWordDisp(cu, rl_array.low_reg, len_offset, reg_len);
}
/* at this point, reg_ptr points to array, 2 live temps */
- if (rl_src.wide || rl_src.fp) {
+ if (rl_src.wide || rl_src.fp || constant_index) {
if (rl_src.wide) {
rl_src = LoadValueWide(cu, rl_src, reg_class);
} else {
rl_src = LoadValue(cu, rl_src, reg_class);
}
- OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
- EncodeShift(kArmLsl, scale));
+ if (!constant_index) {
+ OpRegRegRegShift(cu, kOpAdd, reg_ptr, rl_array.low_reg, rl_index.low_reg,
+ EncodeShift(kArmLsl, scale));
+ }
if (needs_range_check) {
- GenRegRegCheck(cu, kCondCs, rl_index.low_reg, reg_len, kThrowArrayBounds);
+ if (constant_index) {
+ GenImmedCheck(cu, kCondLs, reg_len, ConstantValue(cu, rl_index), kThrowConstantArrayBounds);
+ } else {
+ GenRegRegCheck(cu, kCondLs, reg_len, rl_index.low_reg, kThrowArrayBounds);
+ }
FreeTemp(cu, reg_len);
}
+
if (rl_src.wide) {
StoreBaseDispWide(cu, reg_ptr, data_offset, rl_src.low_reg, rl_src.high_reg);
} else {
@@ -696,7 +885,9 @@
StoreBaseIndexed(cu, reg_ptr, rl_index.low_reg, rl_src.low_reg,
scale, size);
}
- FreeTemp(cu, reg_ptr);
+ if (!constant_index) {
+ FreeTemp(cu, reg_ptr);
+ }
}
/*
@@ -758,4 +949,163 @@
MarkGCCard(cu, r_value, r_array);
}
+bool ArmCodegen::GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+ RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift)
+{
+ rl_src = LoadValueWide(cu, rl_src, kCoreReg);
+ // Per spec, we only care about low 6 bits of shift amount.
+ int shift_amount = ConstantValue(cu, rl_shift) & 0x3f;
+ if (shift_amount == 0) {
+ StoreValueWide(cu, rl_dest, rl_src);
+ return false; // TODO: remove useless bool return result.
+ }
+ if (BadOverlap(cu, rl_src, rl_dest)) {
+ return GenShiftOpLong(cu, opcode, rl_dest, rl_src, rl_shift);
+ }
+ RegLocation rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
+ switch(opcode) {
+ case Instruction::SHL_LONG:
+ case Instruction::SHL_LONG_2ADDR:
+ if (shift_amount == 1) {
+ OpRegRegReg(cu, kOpAdd, rl_result.low_reg, rl_src.low_reg, rl_src.low_reg);
+ OpRegRegReg(cu, kOpAdc, rl_result.high_reg, rl_src.high_reg, rl_src.high_reg);
+ } else if (shift_amount == 32) {
+ OpRegCopy(cu, rl_result.high_reg, rl_src.low_reg);
+ LoadConstant(cu, rl_result.low_reg, 0);
+ } else if (shift_amount > 31) {
+ OpRegRegImm(cu, kOpLsl, rl_result.high_reg, rl_src.low_reg, shift_amount - 32);
+ LoadConstant(cu, rl_result.low_reg, 0);
+ } else {
+ OpRegRegImm(cu, kOpLsl, rl_result.high_reg, rl_src.high_reg, shift_amount);
+ OpRegRegRegShift(cu, kOpOr, rl_result.high_reg, rl_result.high_reg, rl_src.low_reg,
+ EncodeShift(kArmLsr, 32 - shift_amount));
+ OpRegRegImm(cu, kOpLsl, rl_result.low_reg, rl_src.low_reg, shift_amount);
+ }
+ break;
+ case Instruction::SHR_LONG:
+ case Instruction::SHR_LONG_2ADDR:
+ if (shift_amount == 32) {
+ OpRegCopy(cu, rl_result.low_reg, rl_src.high_reg);
+ OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, 31);
+ } else if (shift_amount > 31) {
+ OpRegRegImm(cu, kOpAsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32);
+ OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, 31);
+ } else {
+ int t_reg = AllocTemp(cu);
+ OpRegRegImm(cu, kOpLsr, t_reg, rl_src.low_reg, shift_amount);
+ OpRegRegRegShift(cu, kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg,
+ EncodeShift(kArmLsl, 32 - shift_amount));
+ FreeTemp(cu, t_reg);
+ OpRegRegImm(cu, kOpAsr, rl_result.high_reg, rl_src.high_reg, shift_amount);
+ }
+ break;
+ case Instruction::USHR_LONG:
+ case Instruction::USHR_LONG_2ADDR:
+ if (shift_amount == 32) {
+ OpRegCopy(cu, rl_result.low_reg, rl_src.high_reg);
+ LoadConstant(cu, rl_result.high_reg, 0);
+ } else if (shift_amount > 31) {
+ OpRegRegImm(cu, kOpLsr, rl_result.low_reg, rl_src.high_reg, shift_amount - 32);
+ LoadConstant(cu, rl_result.high_reg, 0);
+ } else {
+ int t_reg = AllocTemp(cu);
+ OpRegRegImm(cu, kOpLsr, t_reg, rl_src.low_reg, shift_amount);
+ OpRegRegRegShift(cu, kOpOr, rl_result.low_reg, t_reg, rl_src.high_reg,
+ EncodeShift(kArmLsl, 32 - shift_amount));
+ FreeTemp(cu, t_reg);
+ OpRegRegImm(cu, kOpLsr, rl_result.high_reg, rl_src.high_reg, shift_amount);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unexpected case";
+ return true;
+ }
+ StoreValueWide(cu, rl_dest, rl_result);
+ return false;
+}
+
+bool ArmCodegen::GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+ RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2)
+{
+ if ((opcode == Instruction::SUB_LONG_2ADDR) || (opcode == Instruction::SUB_LONG)) {
+ if (!rl_src2.is_const) {
+ // Don't bother with special handling for subtract from immediate.
+ return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2);
+ }
+ } else {
+ // Normalize
+ if (!rl_src2.is_const) {
+ DCHECK(rl_src1.is_const);
+ RegLocation rl_temp = rl_src1;
+ rl_src1 = rl_src2;
+ rl_src2 = rl_temp;
+ }
+ }
+ if (BadOverlap(cu, rl_src1, rl_dest)) {
+ return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2);
+ }
+ DCHECK(rl_src2.is_const);
+ int64_t val = ConstantValueWide(cu, rl_src2);
+ uint32_t val_lo = Low32Bits(val);
+ uint32_t val_hi = High32Bits(val);
+ int32_t mod_imm_lo = ModifiedImmediate(val_lo);
+ int32_t mod_imm_hi = ModifiedImmediate(val_hi);
+
+ // Only a subset of add/sub immediate instructions set carry - so bail if we don't fit
+ switch(opcode) {
+ case Instruction::ADD_LONG:
+ case Instruction::ADD_LONG_2ADDR:
+ case Instruction::SUB_LONG:
+ case Instruction::SUB_LONG_2ADDR:
+ if ((mod_imm_lo < 0) || (mod_imm_hi < 0)) {
+ return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2);
+ }
+ break;
+ default:
+ break;
+ }
+ rl_src1 = LoadValueWide(cu, rl_src1, kCoreReg);
+ RegLocation rl_result = EvalLoc(cu, rl_dest, kCoreReg, true);
+ // NOTE: once we've done the EvalLoc on dest, we can no longer bail.
+ switch (opcode) {
+ case Instruction::ADD_LONG:
+ case Instruction::ADD_LONG_2ADDR:
+ NewLIR3(cu, kThumb2AddRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
+ NewLIR3(cu, kThumb2AdcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
+ break;
+ case Instruction::OR_LONG:
+ case Instruction::OR_LONG_2ADDR:
+ if ((val_lo != 0) || (rl_result.low_reg != rl_src1.low_reg)) {
+ OpRegRegImm(cu, kOpOr, rl_result.low_reg, rl_src1.low_reg, val_lo);
+ }
+ if ((val_hi != 0) || (rl_result.high_reg != rl_src1.high_reg)) {
+ OpRegRegImm(cu, kOpOr, rl_result.high_reg, rl_src1.high_reg, val_hi);
+ }
+ break;
+ case Instruction::XOR_LONG:
+ case Instruction::XOR_LONG_2ADDR:
+ OpRegRegImm(cu, kOpXor, rl_result.low_reg, rl_src1.low_reg, val_lo);
+ OpRegRegImm(cu, kOpXor, rl_result.high_reg, rl_src1.high_reg, val_hi);
+ break;
+ case Instruction::AND_LONG:
+ case Instruction::AND_LONG_2ADDR:
+ if ((val_lo != 0xffffffff) || (rl_result.low_reg != rl_src1.low_reg)) {
+ OpRegRegImm(cu, kOpAnd, rl_result.low_reg, rl_src1.low_reg, val_lo);
+ }
+ if ((val_hi != 0xffffffff) || (rl_result.high_reg != rl_src1.high_reg)) {
+ OpRegRegImm(cu, kOpAnd, rl_result.high_reg, rl_src1.high_reg, val_hi);
+ }
+ break;
+ case Instruction::SUB_LONG_2ADDR:
+ case Instruction::SUB_LONG:
+ NewLIR3(cu, kThumb2SubRRI8, rl_result.low_reg, rl_src1.low_reg, mod_imm_lo);
+ NewLIR3(cu, kThumb2SbcRRI8, rl_result.high_reg, rl_src1.high_reg, mod_imm_hi);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected opcode " << opcode;
+ }
+ StoreValueWide(cu, rl_dest, rl_result);
+ return false; // TODO: remove bool return value from all of these Gen routines.
+}
+
} // namespace art
diff --git a/src/compiler/codegen/arm/utility_arm.cc b/src/compiler/codegen/arm/utility_arm.cc
index 433111c..a670199 100644
--- a/src/compiler/codegen/arm/utility_arm.cc
+++ b/src/compiler/codegen/arm/utility_arm.cc
@@ -45,6 +45,32 @@
return res;
}
+/*
+ * Determine whether value can be encoded as a Thumb2 floating point
+ * immediate. If not, return -1. If so return encoded 8-bit value.
+ */
+static int EncodeImmDouble(int64_t value)
+{
+ int res;
+ int bit_a = (value & 0x8000000000000000ll) >> 63;
+ int not_bit_b = (value & 0x4000000000000000ll) >> 62;
+ int bit_b = (value & 0x2000000000000000ll) >> 61;
+ int b_smear = (value & 0x3fc0000000000000ll) >> 54;
+ int slice = (value & 0x003f000000000000ll) >> 48;
+ uint64_t zeroes = (value & 0x0000ffffffffffffll);
+ if (zeroes != 0)
+ return -1;
+ if (bit_b) {
+ if ((not_bit_b != 0) || (b_smear != 0xff))
+ return -1;
+ } else {
+ if ((not_bit_b != 1) || (b_smear != 0x0))
+ return -1;
+ }
+ res = (bit_a << 7) | (bit_b << 6) | slice;
+ return res;
+}
+
static LIR* LoadFPConstantValue(CompilationUnit* cu, int r_dest, int value)
{
DCHECK(ARM_SINGLEREG(r_dest));
@@ -126,19 +152,24 @@
return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */
}
-bool ArmCodegen::InexpensiveConstant(int reg, int value)
+bool ArmCodegen::InexpensiveConstantInt(int32_t value)
{
- bool res = false;
- if (ARM_FPREG(reg)) {
- res = (EncodeImmSingle(value) >= 0);
- } else {
- if (ARM_LOWREG(reg) && (value >= 0) && (IsUint(8, value))) {
- res = true;
- } else {
- res = (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
- }
- }
- return res;
+ return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+}
+
+bool ArmCodegen::InexpensiveConstantFloat(int32_t value)
+{
+ return EncodeImmSingle(value) >= 0;
+}
+
+bool ArmCodegen::InexpensiveConstantLong(int64_t value)
+{
+ return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value));
+}
+
+bool ArmCodegen::InexpensiveConstantDouble(int64_t value)
+{
+ return EncodeImmDouble(value) >= 0;
}
/*
@@ -178,25 +209,9 @@
res = NewLIR2(cu, kThumb2MovImm16, r_dest, value);
return res;
}
- /* No shortcut - go ahead and use literal pool */
- LIR* data_target = ScanLiteralPool(cu->literal_list, value, 0);
- if (data_target == NULL) {
- data_target = AddWordData(cu, &cu->literal_list, value);
- }
- LIR* load_pc_rel = RawLIR(cu, cu->current_dalvik_offset,
- kThumb2LdrPcRel12, r_dest, 0, 0, 0, 0, data_target);
- SetMemRefType(cu, load_pc_rel, true, kLiteral);
- load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target);
- res = load_pc_rel;
- AppendLIR(cu, load_pc_rel);
-
- /*
- * To save space in the constant pool, we use the ADD_RRI8 instruction to
- * add up to 255 to an existing constant value.
- */
- if (data_target->operands[0] != value) {
- OpRegImm(cu, kOpAdd, r_dest, value - data_target->operands[0]);
- }
+ /* Do a low/high pair */
+ res = NewLIR2(cu, kThumb2MovImm16, r_dest, Low16Bits(value));
+ NewLIR2(cu, kThumb2MovImm16H, r_dest, High16Bits(value));
return res;
}
@@ -514,7 +529,7 @@
int mod_imm = ModifiedImmediate(value);
LIR* res;
if (mod_imm >= 0) {
- res = NewLIR2(cu, kThumb2CmpRI8, r_src1, mod_imm);
+ res = NewLIR2(cu, kThumb2CmpRI12, r_src1, mod_imm);
} else {
int r_tmp = AllocTemp(cu);
res = LoadConstant(cu, r_tmp, value);
@@ -587,44 +602,11 @@
}
}
-/*
- * Determine whether value can be encoded as a Thumb2 floating point
- * immediate. If not, return -1. If so return encoded 8-bit value.
- */
-static int EncodeImmDoubleHigh(int value)
+LIR* ArmCodegen::LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value)
{
- int res;
- int bit_a = (value & 0x80000000) >> 31;
- int not_bit_b = (value & 0x40000000) >> 30;
- int bit_b = (value & 0x20000000) >> 29;
- int b_smear = (value & 0x3fc00000) >> 22;
- int slice = (value & 0x003f0000) >> 16;
- int zeroes = (value & 0x0000ffff);
- if (zeroes != 0)
- return -1;
- if (bit_b) {
- if ((not_bit_b != 0) || (b_smear != 0xff))
- return -1;
- } else {
- if ((not_bit_b != 1) || (b_smear != 0x0))
- return -1;
- }
- res = (bit_a << 7) | (bit_b << 6) | slice;
- return res;
-}
-
-static int EncodeImmDouble(int val_lo, int val_hi)
-{
- int res = -1;
- if (val_lo == 0)
- res = EncodeImmDoubleHigh(val_hi);
- return res;
-}
-
-LIR* ArmCodegen::LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
- int val_lo, int val_hi)
-{
- LIR* res;
+ LIR* res = NULL;
+ int32_t val_lo = Low32Bits(value);
+ int32_t val_hi = High32Bits(value);
int target_reg = S2d(r_dest_lo, r_dest_hi);
if (ARM_FPREG(r_dest_lo)) {
if ((val_lo == 0) && (val_hi == 0)) {
@@ -635,26 +617,33 @@
// +0.0 = +2.0 - +2.0
res = NewLIR3(cu, kThumb2Vsubd, target_reg, target_reg, target_reg);
} else {
- int encoded_imm = EncodeImmDouble(val_lo, val_hi);
+ int encoded_imm = EncodeImmDouble(value);
if (encoded_imm >= 0) {
res = NewLIR2(cu, kThumb2Vmovd_IMM8, target_reg, encoded_imm);
- } else {
- LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi);
- if (data_target == NULL) {
- data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi);
- }
- LIR* load_pc_rel =
- RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd,
- target_reg, r15pc, 0, 0, 0, data_target);
- SetMemRefType(cu, load_pc_rel, true, kLiteral);
- load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target);
- AppendLIR(cu, load_pc_rel);
- res = load_pc_rel;
}
}
} else {
- res = LoadConstantNoClobber(cu, r_dest_lo, val_lo);
- LoadConstantNoClobber(cu, r_dest_hi, val_hi);
+ if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) {
+ res = LoadConstantNoClobber(cu, r_dest_lo, val_lo);
+ LoadConstantNoClobber(cu, r_dest_hi, val_hi);
+ }
+ }
+ if (res == NULL) {
+ // No short form - load from the literal pool.
+ LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi);
+ if (data_target == NULL) {
+ data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi);
+ }
+ if (ARM_FPREG(r_dest_lo)) {
+ res = RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd,
+ target_reg, r15pc, 0, 0, 0, data_target);
+ } else {
+ res = RawLIR(cu, cu->current_dalvik_offset, kThumb2LdrdPcRel8,
+ r_dest_lo, r_dest_hi, r15pc, 0, 0, data_target);
+ }
+ SetMemRefType(cu, res, true, kLiteral);
+ res->alias_info = reinterpret_cast<uintptr_t>(data_target);
+ AppendLIR(cu, res);
}
return res;
}
@@ -732,7 +721,7 @@
int scale, OpSize size)
{
bool all_low_regs = ARM_LOWREG(rBase) && ARM_LOWREG(r_index) && ARM_LOWREG(r_src);
- LIR* store;
+ LIR* store = NULL;
ArmOpcode opcode = kThumbBkpt;
bool thumb_form = (all_low_regs && (scale == 0));
int reg_ptr;
@@ -798,14 +787,14 @@
int r_dest_hi, OpSize size, int s_reg)
{
Codegen* cg = cu->cg.get();
- LIR* res;
- LIR* load;
+ LIR* load = NULL;
ArmOpcode opcode = kThumbBkpt;
bool short_form = false;
bool thumb2Form = (displacement < 4092 && displacement >= 0);
bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_dest));
int encoded_disp = displacement;
bool is64bit = false;
+ bool already_generated = false;
switch (size) {
case kDouble:
case kLong:
@@ -822,11 +811,15 @@
}
break;
} else {
- res = LoadBaseDispBody(cu, rBase, displacement, r_dest,
- -1, kWord, s_reg);
- LoadBaseDispBody(cu, rBase, displacement + 4, r_dest_hi,
- -1, kWord, INVALID_SREG);
- return res;
+ if (displacement <= 1020) {
+ load = NewLIR4(cu, kThumb2LdrdI8, r_dest, r_dest_hi, rBase, displacement >> 2);
+ } else {
+ load = LoadBaseDispBody(cu, rBase, displacement, r_dest,
+ -1, kWord, s_reg);
+ LoadBaseDispBody(cu, rBase, displacement + 4, r_dest_hi,
+ -1, kWord, INVALID_SREG);
+ }
+ already_generated = true;
}
case kSingle:
case kWord:
@@ -894,13 +887,15 @@
LOG(FATAL) << "Bad size: " << size;
}
- if (short_form) {
- load = res = NewLIR3(cu, opcode, r_dest, rBase, encoded_disp);
- } else {
- int reg_offset = AllocTemp(cu);
- res = cg->LoadConstant(cu, reg_offset, encoded_disp);
- load = cg->LoadBaseIndexed(cu, rBase, reg_offset, r_dest, 0, size);
- FreeTemp(cu, reg_offset);
+ if (!already_generated) {
+ if (short_form) {
+ load = NewLIR3(cu, opcode, r_dest, rBase, encoded_disp);
+ } else {
+ int reg_offset = AllocTemp(cu);
+ cg->LoadConstant(cu, reg_offset, encoded_disp);
+ load = cg->LoadBaseIndexed(cu, rBase, reg_offset, r_dest, 0, size);
+ FreeTemp(cu, reg_offset);
+ }
}
// TODO: in future may need to differentiate Dalvik accesses w/ spills
@@ -926,30 +921,36 @@
LIR* ArmCodegen::StoreBaseDispBody(CompilationUnit* cu, int rBase, int displacement,
int r_src, int r_src_hi, OpSize size) {
Codegen* cg = cu->cg.get();
- LIR* res, *store;
+ LIR* store = NULL;
ArmOpcode opcode = kThumbBkpt;
bool short_form = false;
bool thumb2Form = (displacement < 4092 && displacement >= 0);
bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_src));
int encoded_disp = displacement;
bool is64bit = false;
+ bool already_generated = false;
switch (size) {
case kLong:
case kDouble:
is64bit = true;
if (!ARM_FPREG(r_src)) {
- res = StoreBaseDispBody(cu, rBase, displacement, r_src, -1, kWord);
- StoreBaseDispBody(cu, rBase, displacement + 4, r_src_hi, -1, kWord);
- return res;
- }
- if (ARM_SINGLEREG(r_src)) {
- DCHECK(ARM_FPREG(r_src_hi));
- r_src = cg->S2d(r_src, r_src_hi);
- }
- opcode = kThumb2Vstrd;
- if (displacement <= 1020) {
- short_form = true;
- encoded_disp >>= 2;
+ if (displacement <= 1020) {
+ store = NewLIR4(cu, kThumb2StrdI8, r_src, r_src_hi, rBase, displacement >> 2);
+ } else {
+ store = StoreBaseDispBody(cu, rBase, displacement, r_src, -1, kWord);
+ StoreBaseDispBody(cu, rBase, displacement + 4, r_src_hi, -1, kWord);
+ }
+ already_generated = true;
+ } else {
+ if (ARM_SINGLEREG(r_src)) {
+ DCHECK(ARM_FPREG(r_src_hi));
+ r_src = cg->S2d(r_src, r_src_hi);
+ }
+ opcode = kThumb2Vstrd;
+ if (displacement <= 1020) {
+ short_form = true;
+ encoded_disp >>= 2;
+ }
}
break;
case kSingle:
@@ -998,20 +999,22 @@
default:
LOG(FATAL) << "Bad size: " << size;
}
- if (short_form) {
- store = res = NewLIR3(cu, opcode, r_src, rBase, encoded_disp);
- } else {
- int r_scratch = AllocTemp(cu);
- res = cg->LoadConstant(cu, r_scratch, encoded_disp);
- store = cg->StoreBaseIndexed(cu, rBase, r_scratch, r_src, 0, size);
- FreeTemp(cu, r_scratch);
+ if (!already_generated) {
+ if (short_form) {
+ store = NewLIR3(cu, opcode, r_src, rBase, encoded_disp);
+ } else {
+ int r_scratch = AllocTemp(cu);
+ cg->LoadConstant(cu, r_scratch, encoded_disp);
+ store = cg->StoreBaseIndexed(cu, rBase, r_scratch, r_src, 0, size);
+ FreeTemp(cu, r_scratch);
+ }
}
// TODO: In future, may need to differentiate Dalvik & spill accesses
if (rBase == rARM_SP) {
AnnotateDalvikRegAccess(cu, store, displacement >> 2, false /* is_load */, is64bit);
}
- return res;
+ return store;
}
LIR* ArmCodegen::StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
diff --git a/src/compiler/codegen/codegen.h b/src/compiler/codegen/codegen.h
index 03ecb43..901e5da 100644
--- a/src/compiler/codegen/codegen.h
+++ b/src/compiler/codegen/codegen.h
@@ -236,8 +236,8 @@
int displacement, int r_dest, int r_dest_hi, OpSize size,
int s_reg) = 0;
virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value) = 0;
- virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
- int val_lo, int val_hi) = 0;
+ virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
+ int64_t value) = 0;
virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
OpSize size) = 0;
virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -288,6 +288,10 @@
virtual bool IsUnconditionalBranch(LIR* lir) = 0;
// Required for target - Dalvik-level generators.
+ virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+ RegLocation rl_src1, RegLocation rl_src2) = 0;
+ virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+ RegLocation rl_src2) = 0;
virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2) = 0;
virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -349,6 +353,9 @@
RegLocation rl_index, RegLocation rl_dest, int scale) = 0;
virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_src, int scale) = 0;
+ virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+ RegLocation rl_dest, RegLocation rl_src1,
+ RegLocation rl_shift) = 0;
// Required for target - single operation generators.
virtual LIR* OpUnconditionalBranch(CompilationUnit* cu, LIR* target) = 0;
@@ -381,7 +388,10 @@
virtual void OpRegCopyWide(CompilationUnit* cu, int dest_lo, int dest_hi, int src_lo,
int src_hi) = 0;
virtual void OpTlsCmp(CompilationUnit* cu, int offset, int val) = 0;
- virtual bool InexpensiveConstant(int reg, int value) = 0;
+ virtual bool InexpensiveConstantInt(int32_t value) = 0;
+ virtual bool InexpensiveConstantFloat(int32_t value) = 0;
+ virtual bool InexpensiveConstantLong(int64_t value) = 0;
+ virtual bool InexpensiveConstantDouble(int64_t value) = 0;
// Temp workaround
void Workaround7250540(CompilationUnit* cu, RegLocation rl_dest, int value);
diff --git a/src/compiler/codegen/codegen_util.cc b/src/compiler/codegen/codegen_util.cc
index ad05b93..57d932f 100644
--- a/src/compiler/codegen/codegen_util.cc
+++ b/src/compiler/codegen/codegen_util.cc
@@ -23,6 +23,27 @@
namespace art {
+bool IsInexpensiveConstant(CompilationUnit* cu, RegLocation rl_src)
+{
+ bool res = false;
+ if (rl_src.is_const) {
+ if (rl_src.wide) {
+ if (rl_src.fp) {
+ res = cu->cg->InexpensiveConstantDouble(ConstantValueWide(cu, rl_src));
+ } else {
+ res = cu->cg->InexpensiveConstantLong(ConstantValueWide(cu, rl_src));
+ }
+ } else {
+ if (rl_src.fp) {
+ res = cu->cg->InexpensiveConstantFloat(ConstantValue(cu, rl_src));
+ } else {
+ res = cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src));
+ }
+ }
+ }
+ return res;
+}
+
void MarkSafepointPC(CompilationUnit* cu, LIR* inst)
{
inst->def_mask = ENCODE_ALL;
@@ -202,6 +223,9 @@
LOG(INFO) << "-------- entry offset: 0x" << std::hex << dest;
break;
case kPseudoDalvikByteCodeBoundary:
+ if (lir->operands[0] == 0) {
+ lir->operands[0] = reinterpret_cast<uintptr_t>("No instruction string");
+ }
LOG(INFO) << "-------- dalvik offset: 0x" << std::hex
<< lir->dalvik_offset << " @ " << reinterpret_cast<char*>(lir->operands[0]);
break;
@@ -471,6 +495,8 @@
LIR* lo_target = NULL;
while (data_target) {
if (lo_match && (data_target->operands[0] == val_hi)) {
+ // Record high word in case we need to expand this later.
+ lo_target->operands[1] = val_hi;
return lo_target;
}
lo_match = false;
@@ -488,7 +514,7 @@
* instruction streams.
*/
-/* Add a 32-bit constant either in the constant pool */
+/* Add a 32-bit constant to the constant pool */
LIR* AddWordData(CompilationUnit* cu, LIR* *constant_list_p, int value)
{
/* Add the constant to the literal pool */
@@ -1097,4 +1123,21 @@
return is_taken;
}
+// Convert relation of src1/src2 to src2/src1
+ConditionCode FlipComparisonOrder(ConditionCode before) {
+ ConditionCode res;
+ switch (before) {
+ case kCondEq: res = kCondEq; break;
+ case kCondNe: res = kCondNe; break;
+ case kCondLt: res = kCondGt; break;
+ case kCondGt: res = kCondLt; break;
+ case kCondLe: res = kCondGe; break;
+ case kCondGe: res = kCondLe; break;
+ default:
+ res = static_cast<ConditionCode>(0);
+ LOG(FATAL) << "Unexpected ccode " << before;
+ }
+ return res;
+}
+
} // namespace art
diff --git a/src/compiler/codegen/codegen_util.h b/src/compiler/codegen/codegen_util.h
index 4f14656..9b9bece 100644
--- a/src/compiler/codegen/codegen_util.h
+++ b/src/compiler/codegen/codegen_util.h
@@ -20,6 +20,7 @@
#include <stdint.h>
#include "compiler/compiler_enums.h"
+#include "compiler/compiler_ir.h"
namespace art {
@@ -59,6 +60,8 @@
LIR* MarkBoundary(CompilationUnit* cu, int offset, const char* inst_str);
void NopLIR(LIR* lir);
bool EvaluateBranch(Instruction::Code opcode, int src1, int src2);
+bool IsInexpensiveConstant(CompilationUnit* cu, RegLocation rl_src);
+ConditionCode FlipComparisonOrder(ConditionCode before);
} // namespace art
diff --git a/src/compiler/codegen/gen_common.cc b/src/compiler/codegen/gen_common.cc
index 1d64a71..a4c8d0c 100644
--- a/src/compiler/codegen/gen_common.cc
+++ b/src/compiler/codegen/gen_common.cc
@@ -55,7 +55,7 @@
ThrowKind kind)
{
LIR* tgt = RawLIR(cu, 0, kPseudoThrowTarget, kind,
- cu->current_dalvik_offset);
+ cu->current_dalvik_offset, reg, imm_val);
LIR* branch;
if (c_code == kCondAl) {
branch = OpUnconditionalBranch(cu, tgt);
@@ -89,23 +89,6 @@
return branch;
}
-// Convert relation of src1/src2 to src2/src1
-ConditionCode FlipComparisonOrder(ConditionCode before) {
- ConditionCode res;
- switch (before) {
- case kCondEq: res = kCondEq; break;
- case kCondNe: res = kCondNe; break;
- case kCondLt: res = kCondGt; break;
- case kCondGt: res = kCondLt; break;
- case kCondLe: res = kCondGe; break;
- case kCondGe: res = kCondLe; break;
- default:
- res = static_cast<ConditionCode>(0);
- LOG(FATAL) << "Unexpected ccode " << before;
- }
- return res;
-}
-
void Codegen::GenCompareAndBranch(CompilationUnit* cu, Instruction::Code opcode,
RegLocation rl_src1, RegLocation rl_src2, LIR* taken,
LIR* fall_through)
@@ -146,12 +129,12 @@
rl_src1 = LoadValue(cu, rl_src1, kCoreReg);
// Is this really an immediate comparison?
if (rl_src2.is_const) {
- int immval = cu->constant_values[rl_src2.orig_sreg];
// If it's already live in a register or not easily materialized, just keep going
RegLocation rl_temp = UpdateLoc(cu, rl_src2);
- if ((rl_temp.location == kLocDalvikFrame) && InexpensiveConstant(rl_src1.low_reg, immval)) {
+ if ((rl_temp.location == kLocDalvikFrame) &&
+ InexpensiveConstantInt(ConstantValue(cu, rl_src2))) {
// OK - convert this to a compare immediate and branch
- OpCmpImmBranch(cu, cond, rl_src1.low_reg, immval, taken);
+ OpCmpImmBranch(cu, cond, rl_src1.low_reg, ConstantValue(cu, rl_src2), taken);
OpUnconditionalBranch(cu, fall_through);
return;
}
@@ -614,6 +597,18 @@
case kThrowNullPointer:
func_offset = ENTRYPOINT_OFFSET(pThrowNullPointerFromCode);
break;
+ case kThrowConstantArrayBounds: // v1 is length reg (for Arm/Mips), v2 constant index
+ // v1 holds the constant array index. Mips/Arm uses v2 for length, x86 reloads.
+ if (target_x86) {
+ OpRegMem(cu, kOpMov, TargetReg(kArg1), v1, mirror::Array::LengthOffset().Int32Value());
+ } else {
+ OpRegCopy(cu, TargetReg(kArg1), v1);
+ }
+ // Make sure the following LoadConstant doesn't mess with kArg1.
+ LockTemp(cu, TargetReg(kArg1));
+ LoadConstant(cu, TargetReg(kArg0), v2);
+ func_offset = ENTRYPOINT_OFFSET(pThrowArrayBoundsFromCode);
+ break;
case kThrowArrayBounds:
// Move v1 (array index) to kArg0 and v2 (array length) to kArg1
if (v2 != TargetReg(kArg0)) {
@@ -1602,9 +1597,14 @@
break;
case Instruction::MUL_LONG:
case Instruction::MUL_LONG_2ADDR:
- call_out = true;
- ret_reg = TargetReg(kRet0);
- func_offset = ENTRYPOINT_OFFSET(pLmul);
+ if (cu->instruction_set == kThumb2) {
+ GenMulLong(cu, rl_dest, rl_src1, rl_src2);
+ return false;
+ } else {
+ call_out = true;
+ ret_reg = TargetReg(kRet0);
+ func_offset = ENTRYPOINT_OFFSET(pLmul);
+ }
break;
case Instruction::DIV_LONG:
case Instruction::DIV_LONG_2ADDR:
diff --git a/src/compiler/codegen/gen_loadstore.cc b/src/compiler/codegen/gen_loadstore.cc
index b183f9e..c8f9c51 100644
--- a/src/compiler/codegen/gen_loadstore.cc
+++ b/src/compiler/codegen/gen_loadstore.cc
@@ -59,12 +59,20 @@
return;
}
}
+ int temp_reg = zero_reg;
+ if (temp_reg == INVALID_REG) {
+ temp_reg = AllocTemp(cu);
+ cu->cg->LoadConstant(cu, temp_reg, 0);
+ }
if (cu->promotion_map[pmap_index].core_location == kLocPhysReg) {
// Promoted - just copy in a zero
- OpRegCopy(cu, cu->promotion_map[pmap_index].core_reg, zero_reg);
+ OpRegCopy(cu, cu->promotion_map[pmap_index].core_reg, temp_reg);
} else {
// Lives in the frame, need to store.
- StoreBaseDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_dest.s_reg_low), zero_reg, kWord);
+ StoreBaseDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_dest.s_reg_low), temp_reg, kWord);
+ }
+ if (zero_reg == INVALID_REG) {
+ FreeTemp(cu, temp_reg);
}
}
}
@@ -92,14 +100,12 @@
rl_src = UpdateLoc(cu, rl_src);
if (rl_src.location == kLocPhysReg) {
OpRegCopy(cu, r_dest, rl_src.low_reg);
+ } else if (IsInexpensiveConstant(cu, rl_src)) {
+ LoadConstantNoClobber(cu, r_dest, ConstantValue(cu, rl_src));
} else {
DCHECK((rl_src.location == kLocDalvikFrame) ||
(rl_src.location == kLocCompilerTemp));
- if (rl_src.is_const && InexpensiveConstant(r_dest, cu->constant_values[rl_src.orig_sreg])) {
- LoadConstantNoClobber(cu, r_dest, cu->constant_values[rl_src.orig_sreg]);
- } else {
- LoadWordDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_src.s_reg_low), r_dest);
- }
+ LoadWordDisp(cu, TargetReg(kSp), SRegOffset(cu, rl_src.s_reg_low), r_dest);
}
}
@@ -126,6 +132,8 @@
rl_src = UpdateLocWide(cu, rl_src);
if (rl_src.location == kLocPhysReg) {
OpRegCopyWide(cu, reg_lo, reg_hi, rl_src.low_reg, rl_src.high_reg);
+ } else if (IsInexpensiveConstant(cu, rl_src)) {
+ LoadConstantWide(cu, reg_lo, reg_hi, ConstantValueWide(cu, rl_src));
} else {
DCHECK((rl_src.location == kLocDalvikFrame) ||
(rl_src.location == kLocCompilerTemp));
@@ -152,9 +160,7 @@
RegLocation Codegen::LoadValue(CompilationUnit* cu, RegLocation rl_src, RegisterClass op_kind)
{
rl_src = EvalLoc(cu, rl_src, op_kind, false);
- if (rl_src.location != kLocPhysReg) {
- DCHECK((rl_src.location == kLocDalvikFrame) ||
- (rl_src.location == kLocCompilerTemp));
+ if (IsInexpensiveConstant(cu, rl_src) || rl_src.location != kLocPhysReg) {
LoadValueDirect(cu, rl_src, rl_src.low_reg);
rl_src.location = kLocPhysReg;
MarkLive(cu, rl_src.low_reg, rl_src.s_reg_low);
@@ -222,14 +228,11 @@
{
DCHECK(rl_src.wide);
rl_src = EvalLoc(cu, rl_src, op_kind, false);
- if (rl_src.location != kLocPhysReg) {
- DCHECK((rl_src.location == kLocDalvikFrame) ||
- (rl_src.location == kLocCompilerTemp));
+ if (IsInexpensiveConstant(cu, rl_src) || rl_src.location != kLocPhysReg) {
LoadValueDirectWide(cu, rl_src, rl_src.low_reg, rl_src.high_reg);
rl_src.location = kLocPhysReg;
MarkLive(cu, rl_src.low_reg, rl_src.s_reg_low);
- MarkLive(cu, rl_src.high_reg,
- GetSRegHi(rl_src.s_reg_low));
+ MarkLive(cu, rl_src.high_reg, GetSRegHi(rl_src.s_reg_low));
}
return rl_src;
}
diff --git a/src/compiler/codegen/local_optimizations.cc b/src/compiler/codegen/local_optimizations.cc
index b6981ca..2b86421 100644
--- a/src/compiler/codegen/local_optimizations.cc
+++ b/src/compiler/codegen/local_optimizations.cc
@@ -81,13 +81,20 @@
if (head_lir == tail_lir) return;
for (this_lir = PREV_LIR(tail_lir); this_lir != head_lir; this_lir = PREV_LIR(this_lir)) {
+
+ if (is_pseudo_opcode(this_lir->opcode)) continue;
+
int sink_distance = 0;
+ uint64_t target_flags = cg->GetTargetInstFlags(this_lir->opcode);
+
/* Skip non-interesting instructions */
if ((this_lir->flags.is_nop == true) ||
- is_pseudo_opcode(this_lir->opcode) ||
- (cg->GetTargetInstFlags(this_lir->opcode) & IS_BRANCH) ||
- !(cg->GetTargetInstFlags(this_lir->opcode) & (IS_LOAD | IS_STORE))) {
+ (target_flags & IS_BRANCH) ||
+ ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) || // Skip wide loads.
+ ((target_flags & (REG_USE0 | REG_USE1 | REG_USE2)) ==
+ (REG_USE0 | REG_USE1 | REG_USE2)) || // Skip wide stores.
+ !(target_flags & (IS_LOAD | IS_STORE))) {
continue;
}
@@ -130,7 +137,7 @@
* Skip already dead instructions (whose dataflow information is
* outdated and misleading).
*/
- if (check_lir->flags.is_nop) continue;
+ if (check_lir->flags.is_nop || is_pseudo_opcode(check_lir->opcode)) continue;
uint64_t check_mem_mask = (check_lir->use_mask | check_lir->def_mask) & ENCODE_MEM;
uint64_t alias_condition = this_mem_mask & check_mem_mask;
@@ -139,14 +146,18 @@
/*
* Potential aliases seen - check the alias relations
*/
- if (check_mem_mask != ENCODE_MEM && alias_condition != 0) {
- bool is_check_lir_load = cg->GetTargetInstFlags(check_lir->opcode) & IS_LOAD;
+ uint64_t check_flags = cg->GetTargetInstFlags(check_lir->opcode);
+ // TUNING: Support instructions with multiple register targets.
+ if ((check_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) {
+ stop_here = true;
+ } else if (check_mem_mask != ENCODE_MEM && alias_condition != 0) {
+ bool is_check_lir_load = check_flags & IS_LOAD;
if (alias_condition == ENCODE_LITERAL) {
/*
* Should only see literal loads in the instruction
* stream.
*/
- DCHECK(!(cg->GetTargetInstFlags(check_lir->opcode) & IS_STORE));
+ DCHECK(!(check_flags & IS_STORE));
/* Same value && same register type */
if (check_lir->alias_info == this_lir->alias_info &&
cg->SameRegType(check_lir->operands[0], native_reg_id)) {
@@ -276,10 +287,13 @@
/* Start from the second instruction */
for (this_lir = NEXT_LIR(head_lir); this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) {
+ if (is_pseudo_opcode(this_lir->opcode)) continue;
+
+ uint64_t target_flags = cg->GetTargetInstFlags(this_lir->opcode);
/* Skip non-interesting instructions */
if ((this_lir->flags.is_nop == true) ||
- is_pseudo_opcode(this_lir->opcode) ||
- !(cg->GetTargetInstFlags(this_lir->opcode) & IS_LOAD)) {
+ ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) ||
+ !(target_flags & IS_LOAD)) {
continue;
}
diff --git a/src/compiler/codegen/mips/codegen_mips.h b/src/compiler/codegen/mips/codegen_mips.h
index 705ecfa..a4d44d5 100644
--- a/src/compiler/codegen/mips/codegen_mips.h
+++ b/src/compiler/codegen/mips/codegen_mips.h
@@ -38,8 +38,7 @@
int displacement, int r_dest, int r_dest_hi, OpSize size,
int s_reg);
virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value);
- virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
- int val_lo, int val_hi);
+ virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value);
virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
OpSize size);
virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -90,12 +89,18 @@
virtual bool IsUnconditionalBranch(LIR* lir);
// Required for target - Dalvik-level generators.
+ virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+ RegLocation rl_src1, RegLocation rl_src2);
virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_src, int scale);
virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_dest, int scale);
virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_src, int scale);
+ virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+ RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift);
+ virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+ RegLocation rl_src2);
virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2);
virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -191,7 +196,10 @@
void SpillCoreRegs(CompilationUnit* cu);
void UnSpillCoreRegs(CompilationUnit* cu);
static const MipsEncodingMap EncodingMap[kMipsLast];
- bool InexpensiveConstant(int reg, int value);
+ bool InexpensiveConstantInt(int32_t value);
+ bool InexpensiveConstantFloat(int32_t value);
+ bool InexpensiveConstantLong(int64_t value);
+ bool InexpensiveConstantDouble(int64_t value);
};
} // namespace art
diff --git a/src/compiler/codegen/mips/int_mips.cc b/src/compiler/codegen/mips/int_mips.cc
index 7da4cf6..675cf8d 100644
--- a/src/compiler/codegen/mips/int_mips.cc
+++ b/src/compiler/codegen/mips/int_mips.cc
@@ -341,6 +341,13 @@
return NULL;
}
+void MipsCodegen::GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+ RegLocation rl_src2)
+{
+ LOG(FATAL) << "Unexpected use of GenMulLong for Mips";
+ return;
+}
+
bool MipsCodegen::GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2)
{
@@ -635,4 +642,18 @@
MarkGCCard(cu, r_value, r_array);
}
+bool MipsCodegen::GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+ RegLocation rl_src1, RegLocation rl_shift)
+{
+ // Default implementation is just to ignore the constant case.
+ return GenShiftOpLong(cu, opcode, rl_dest, rl_src1, rl_shift);
+}
+
+bool MipsCodegen::GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+ RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2)
+{
+ // Default - bail to non-const handler.
+ return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2);
+}
+
} // namespace art
diff --git a/src/compiler/codegen/mips/utility_mips.cc b/src/compiler/codegen/mips/utility_mips.cc
index 1e217fb..12d054c 100644
--- a/src/compiler/codegen/mips/utility_mips.cc
+++ b/src/compiler/codegen/mips/utility_mips.cc
@@ -52,17 +52,24 @@
return res;
}
-bool MipsCodegen::InexpensiveConstant(int reg, int value)
+bool MipsCodegen::InexpensiveConstantInt(int32_t value)
{
- bool res = false;
- if (value == 0) {
- res = true;
- } else if (IsUint(16, value)) {
- res = true;
- } else if ((value < 0) && (value >= -32768)) {
- res = true;
- }
- return res;
+ return ((value == 0) || IsUint(16, value) || ((value < 0) && (value >= -32768)));
+}
+
+bool MipsCodegen::InexpensiveConstantFloat(int32_t value)
+{
+ return false; // TUNING
+}
+
+bool MipsCodegen::InexpensiveConstantLong(int64_t value)
+{
+ return false; // TUNING
+}
+
+bool MipsCodegen::InexpensiveConstantDouble(int64_t value)
+{
+ return false; // TUNING
}
/*
@@ -336,12 +343,11 @@
return NewLIR2(cu, opcode, r_dest_src1, r_src2);
}
-LIR* MipsCodegen::LoadConstantValueWide(CompilationUnit *cu, int r_dest_lo, int r_dest_hi,
- int val_lo, int val_hi)
+LIR* MipsCodegen::LoadConstantWide(CompilationUnit *cu, int r_dest_lo, int r_dest_hi, int64_t value)
{
LIR *res;
- res = LoadConstantNoClobber(cu, r_dest_lo, val_lo);
- LoadConstantNoClobber(cu, r_dest_hi, val_hi);
+ res = LoadConstantNoClobber(cu, r_dest_lo, Low32Bits(value));
+ LoadConstantNoClobber(cu, r_dest_hi, High32Bits(value));
return res;
}
diff --git a/src/compiler/codegen/mir_to_gbc.cc b/src/compiler/codegen/mir_to_gbc.cc
index f67f760..ba90269 100644
--- a/src/compiler/codegen/mir_to_gbc.cc
+++ b/src/compiler/codegen/mir_to_gbc.cc
@@ -1018,7 +1018,7 @@
}
EmitPopShadowFrame(cu);
cu->irb->CreateRet(GetLLVMValue(cu, rl_src[0].orig_sreg));
- bb->has_return = true;
+ DCHECK(bb->has_return);
}
break;
@@ -1028,7 +1028,7 @@
}
EmitPopShadowFrame(cu);
cu->irb->CreateRetVoid();
- bb->has_return = true;
+ DCHECK(bb->has_return);
}
break;
@@ -2572,8 +2572,7 @@
RegLocation rl_dest = GetLoc(cu, call_inst);
RegLocation rl_result = EvalLoc(cu, rl_dest, kAnyReg, true);
if (rl_dest.wide) {
- cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg,
- (immval) & 0xffffffff, (immval >> 32) & 0xffffffff);
+ cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg, immval);
cg->StoreValueWide(cu, rl_dest, rl_result);
} else {
int immediate = immval & 0xffffffff;
diff --git a/src/compiler/codegen/mir_to_lir.cc b/src/compiler/codegen/mir_to_lir.cc
index bd26f2d..96de65e 100644
--- a/src/compiler/codegen/mir_to_lir.cc
+++ b/src/compiler/codegen/mir_to_lir.cc
@@ -164,23 +164,21 @@
case Instruction::CONST_WIDE_16:
case Instruction::CONST_WIDE_32:
rl_result = EvalLoc(cu, rl_dest, kAnyReg, true);
- cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg, vB,
- (vB & 0x80000000) ? -1 : 0);
+ cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg,
+ static_cast<int64_t>(static_cast<int32_t>(vB)));
cg->StoreValueWide(cu, rl_dest, rl_result);
break;
case Instruction::CONST_WIDE:
rl_result = EvalLoc(cu, rl_dest, kAnyReg, true);
- cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg,
- mir->dalvikInsn.vB_wide & 0xffffffff,
- (mir->dalvikInsn.vB_wide >> 32) & 0xffffffff);
+ cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg, mir->dalvikInsn.vB_wide);
cg->StoreValueWide(cu, rl_dest, rl_result);
break;
case Instruction::CONST_WIDE_HIGH16:
rl_result = EvalLoc(cu, rl_dest, kAnyReg, true);
- cg->LoadConstantValueWide(cu, rl_result.low_reg, rl_result.high_reg,
- 0, vB << 16);
+ cg->LoadConstantWide(cu, rl_result.low_reg, rl_result.high_reg,
+ static_cast<int64_t>(vB) << 48);
cg->StoreValueWide(cu, rl_dest, rl_result);
break;
@@ -543,11 +541,11 @@
case Instruction::XOR_INT:
case Instruction::XOR_INT_2ADDR:
if (rl_src[0].is_const &&
- cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[0].orig_sreg])) {
+ cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src[0]))) {
cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[1],
cu->constant_values[rl_src[0].orig_sreg]);
} else if (rl_src[1].is_const &&
- cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[1].orig_sreg])) {
+ cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src[1]))) {
cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[0],
cu->constant_values[rl_src[1].orig_sreg]);
} else {
@@ -568,9 +566,8 @@
case Instruction::USHR_INT:
case Instruction::USHR_INT_2ADDR:
if (rl_src[1].is_const &&
- cu->cg->InexpensiveConstant(0, cu->constant_values[rl_src[1].orig_sreg])) {
- cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[0],
- cu->constant_values[rl_src[1].orig_sreg]);
+ cu->cg->InexpensiveConstantInt(ConstantValue(cu, rl_src[1]))) {
+ cg->GenArithOpIntLit(cu, opcode, rl_dest, rl_src[0], ConstantValue(cu, rl_src[1]));
} else {
cg->GenArithOpInt(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
}
@@ -578,20 +575,26 @@
case Instruction::ADD_LONG:
case Instruction::SUB_LONG:
- case Instruction::MUL_LONG:
- case Instruction::DIV_LONG:
- case Instruction::REM_LONG:
case Instruction::AND_LONG:
case Instruction::OR_LONG:
case Instruction::XOR_LONG:
case Instruction::ADD_LONG_2ADDR:
case Instruction::SUB_LONG_2ADDR:
- case Instruction::MUL_LONG_2ADDR:
- case Instruction::DIV_LONG_2ADDR:
- case Instruction::REM_LONG_2ADDR:
case Instruction::AND_LONG_2ADDR:
case Instruction::OR_LONG_2ADDR:
case Instruction::XOR_LONG_2ADDR:
+ if (rl_src[0].is_const || rl_src[1].is_const) {
+ cg->GenArithImmOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
+ break;
+ }
+ // Note: intentional fallthrough.
+
+ case Instruction::MUL_LONG:
+ case Instruction::DIV_LONG:
+ case Instruction::REM_LONG:
+ case Instruction::MUL_LONG_2ADDR:
+ case Instruction::DIV_LONG_2ADDR:
+ case Instruction::REM_LONG_2ADDR:
cg->GenArithOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
break;
@@ -601,7 +604,11 @@
case Instruction::SHL_LONG_2ADDR:
case Instruction::SHR_LONG_2ADDR:
case Instruction::USHR_LONG_2ADDR:
- cg->GenShiftOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
+ if (rl_src[1].is_const) {
+ cg->GenShiftImmOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
+ } else {
+ cg->GenShiftOpLong(cu, opcode, rl_dest, rl_src[0], rl_src[1]);
+ }
break;
case Instruction::ADD_FLOAT:
diff --git a/src/compiler/codegen/ralloc_util.cc b/src/compiler/codegen/ralloc_util.cc
index afd4976..1d5f3ac 100644
--- a/src/compiler/codegen/ralloc_util.cc
+++ b/src/compiler/codegen/ralloc_util.cc
@@ -64,7 +64,7 @@
}
}
-static void DumpRegPool(RegisterInfo* p, int num_regs)
+void DumpRegPool(RegisterInfo* p, int num_regs)
{
LOG(INFO) << "================================================";
for (int i = 0; i < num_regs; i++) {
@@ -1091,21 +1091,14 @@
RegLocation loc = cu->reg_location[i];
RefCounts* counts = loc.fp ? fp_counts : core_counts;
int p_map_idx = SRegToPMap(cu, loc.s_reg_low);
- int sample_reg = loc.fp ? cu->reg_pool->FPRegs[0].reg : cu->reg_pool->core_regs[0].reg;
- bool simple_immediate = loc.is_const &&
- !cu->cg->InexpensiveConstant(sample_reg, cu->constant_values[loc.orig_sreg]);
- if (loc.defined) {
- // Don't count easily regenerated immediates
- if (!simple_immediate) {
- counts[p_map_idx].count += cu->use_counts.elem_list[i];
- }
+ //Don't count easily regenerated immediates
+ if (loc.fp || loc.wide || !IsInexpensiveConstant(cu, loc)) {
+ counts[p_map_idx].count += cu->use_counts.elem_list[i];
}
if (loc.wide) {
- if (loc.defined) {
- if (loc.fp && !simple_immediate) {
- counts[p_map_idx].double_start = true;
- counts[p_map_idx+1].count += cu->use_counts.elem_list[i+1];
- }
+ if (loc.fp) {
+ counts[p_map_idx].double_start = true;
+ counts[p_map_idx+1].count += cu->use_counts.elem_list[i+1];
}
i += 2;
} else {
diff --git a/src/compiler/codegen/ralloc_util.h b/src/compiler/codegen/ralloc_util.h
index a5ed999..67c22b5 100644
--- a/src/compiler/codegen/ralloc_util.h
+++ b/src/compiler/codegen/ralloc_util.h
@@ -157,6 +157,7 @@
void RecordFpPromotion(CompilationUnit* cu, int reg, int s_reg);
int ComputeFrameSize(CompilationUnit* cu);
int SRegToPMap(CompilationUnit* cu, int s_reg);
+void DumpRegPool(RegisterInfo* p, int num_regs);
} // namespace art
diff --git a/src/compiler/codegen/x86/codegen_x86.h b/src/compiler/codegen/x86/codegen_x86.h
index 141638c..9cc17f1 100644
--- a/src/compiler/codegen/x86/codegen_x86.h
+++ b/src/compiler/codegen/x86/codegen_x86.h
@@ -38,8 +38,7 @@
int displacement, int r_dest, int r_dest_hi, OpSize size,
int s_reg);
virtual LIR* LoadConstantNoClobber(CompilationUnit* cu, int r_dest, int value);
- virtual LIR* LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
- int val_lo, int val_hi);
+ virtual LIR* LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value);
virtual LIR* StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,
OpSize size);
virtual LIR* StoreBaseDispWide(CompilationUnit* cu, int rBase, int displacement, int r_src_lo,
@@ -90,12 +89,18 @@
virtual bool IsUnconditionalBranch(LIR* lir);
// Required for target - Dalvik-level generators.
+ virtual bool GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+ RegLocation rl_src1, RegLocation rl_src2);
virtual void GenArrayObjPut(CompilationUnit* cu, int opt_flags, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_src, int scale);
virtual void GenArrayGet(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_dest, int scale);
virtual void GenArrayPut(CompilationUnit* cu, int opt_flags, OpSize size, RegLocation rl_array,
RegLocation rl_index, RegLocation rl_src, int scale);
+ virtual bool GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+ RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_shift);
+ virtual void GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+ RegLocation rl_src2);
virtual bool GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2);
virtual bool GenAndLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
@@ -188,7 +193,10 @@
void SpillCoreRegs(CompilationUnit* cu);
void UnSpillCoreRegs(CompilationUnit* cu);
static const X86EncodingMap EncodingMap[kX86Last];
- bool InexpensiveConstant(int reg, int value);
+ bool InexpensiveConstantInt(int32_t value);
+ bool InexpensiveConstantFloat(int32_t value);
+ bool InexpensiveConstantLong(int64_t value);
+ bool InexpensiveConstantDouble(int64_t value);
};
} // namespace art
diff --git a/src/compiler/codegen/x86/int_x86.cc b/src/compiler/codegen/x86/int_x86.cc
index 0ae51e0..d4a34f7 100644
--- a/src/compiler/codegen/x86/int_x86.cc
+++ b/src/compiler/codegen/x86/int_x86.cc
@@ -322,6 +322,13 @@
LOG(FATAL) << "Unexpected use of OpIT in x86";
return NULL;
}
+
+void X86Codegen::GenMulLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
+ RegLocation rl_src2)
+{
+ LOG(FATAL) << "Unexpected use of GenX86Long for x86";
+ return;
+}
bool X86Codegen::GenAddLong(CompilationUnit* cu, RegLocation rl_dest, RegLocation rl_src1,
RegLocation rl_src2)
{
@@ -583,4 +590,18 @@
MarkGCCard(cu, r_value, r_array);
}
+bool X86Codegen::GenShiftImmOpLong(CompilationUnit* cu, Instruction::Code opcode, RegLocation rl_dest,
+ RegLocation rl_src1, RegLocation rl_shift)
+{
+ // Default implementation is just to ignore the constant case.
+ return GenShiftOpLong(cu, opcode, rl_dest, rl_src1, rl_shift);
+}
+
+bool X86Codegen::GenArithImmOpLong(CompilationUnit* cu, Instruction::Code opcode,
+ RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2)
+{
+ // Default - bail to non-const handler.
+ return GenArithOpLong(cu, opcode, rl_dest, rl_src1, rl_src2);
+}
+
} // namespace art
diff --git a/src/compiler/codegen/x86/utility_x86.cc b/src/compiler/codegen/x86/utility_x86.cc
index 4f9e28b..4cc2c18 100644
--- a/src/compiler/codegen/x86/utility_x86.cc
+++ b/src/compiler/codegen/x86/utility_x86.cc
@@ -50,11 +50,26 @@
return res;
}
-bool X86Codegen::InexpensiveConstant(int reg, int value)
+bool X86Codegen::InexpensiveConstantInt(int32_t value)
{
return true;
}
+bool X86Codegen::InexpensiveConstantFloat(int32_t value)
+{
+ return false;
+}
+
+bool X86Codegen::InexpensiveConstantLong(int64_t value)
+{
+ return true;
+}
+
+bool X86Codegen::InexpensiveConstantDouble(int64_t value)
+{
+ return false; // TUNING
+}
+
/*
* Load a immediate using a shortcut if possible; otherwise
* grab from the per-translation literal pool. If target is
@@ -316,13 +331,14 @@
return NewLIR2(cu, opcode, rBase, disp);
}
-LIR* X86Codegen::LoadConstantValueWide(CompilationUnit *cu, int r_dest_lo,
- int r_dest_hi, int val_lo, int val_hi)
+LIR* X86Codegen::LoadConstantWide(CompilationUnit *cu, int r_dest_lo, int r_dest_hi, int64_t value)
{
+ int32_t val_lo = Low32Bits(value);
+ int32_t val_hi = High32Bits(value);
LIR *res;
if (X86_FPREG(r_dest_lo)) {
DCHECK(X86_FPREG(r_dest_hi)); // ignore r_dest_hi
- if (val_lo == 0 && val_hi == 0) {
+ if (value == 0) {
return NewLIR2(cu, kX86XorpsRR, r_dest_lo, r_dest_lo);
} else {
if (val_lo == 0) {
diff --git a/src/compiler/compiler_enums.h b/src/compiler/compiler_enums.h
index bdf7a8b..ae305c0 100644
--- a/src/compiler/compiler_enums.h
+++ b/src/compiler/compiler_enums.h
@@ -291,6 +291,7 @@
kThrowNullPointer,
kThrowDivZero,
kThrowArrayBounds,
+ kThrowConstantArrayBounds,
kThrowNoSuchMethod,
kThrowStackOverflow,
};
diff --git a/src/compiler/compiler_ir.h b/src/compiler/compiler_ir.h
index aca32d5..056c308 100644
--- a/src/compiler/compiler_ir.h
+++ b/src/compiler/compiler_ir.h
@@ -230,7 +230,8 @@
bool catch_entry;
bool explicit_throw;
bool conditional_branch;
- bool has_return;
+ bool has_return; // Contains a return.
+ bool dominates_return; // Is a member of return extended basic block
uint16_t start_offset;
uint16_t nesting_depth;
BBType block_type;
@@ -306,6 +307,7 @@
vreg_to_ssa_map(NULL),
ssa_last_defs(NULL),
is_constant_v(NULL),
+ must_flush_constant_v(NULL),
constant_values(NULL),
reg_location(NULL),
promotion_map(NULL),
@@ -418,6 +420,7 @@
int* vreg_to_ssa_map; // length == method->registers_size
int* ssa_last_defs; // length == method->registers_size
ArenaBitVector* is_constant_v; // length == num_ssa_reg
+ ArenaBitVector* must_flush_constant_v; // length == num_ssa_reg
int* constant_values; // length == num_ssa_reg
// Use counts of ssa names.
@@ -579,6 +582,35 @@
{{Instruction::RETURN_WIDE}, kIdentity},
};
+static inline bool IsConst(const CompilationUnit* cu, int32_t s_reg)
+{
+ return (IsBitSet(cu->is_constant_v, s_reg));
+}
+
+static inline bool IsConst(const CompilationUnit* cu, RegLocation loc)
+{
+ return (IsConst(cu, loc.orig_sreg));
+}
+
+static inline int32_t ConstantValue(const CompilationUnit* cu, RegLocation loc)
+{
+ DCHECK(IsConst(cu, loc));
+ return cu->constant_values[loc.orig_sreg];
+}
+
+static inline int64_t ConstantValueWide(const CompilationUnit* cu, RegLocation loc)
+{
+ DCHECK(IsConst(cu, loc));
+ return (static_cast<int64_t>(cu->constant_values[loc.orig_sreg + 1]) << 32) |
+ Low32Bits(static_cast<int64_t>(cu->constant_values[loc.orig_sreg]));
+}
+
+static inline bool MustFlushConstant(const CompilationUnit* cu, RegLocation loc)
+{
+ DCHECK(IsConst(cu, loc));
+ return IsBitSet(cu->must_flush_constant_v, loc.orig_sreg);
+}
+
} // namespace art
#endif // ART_SRC_COMPILER_COMPILER_IR_H_
diff --git a/src/compiler/dataflow.cc b/src/compiler/dataflow.cc
index 16065ab..1e20cbd 100644
--- a/src/compiler/dataflow.cc
+++ b/src/compiler/dataflow.cc
@@ -867,16 +867,15 @@
// Pre-SSA - just use the standard name
return GetSSAName(cu, ssa_reg);
}
- if (cu->reg_location[ssa_reg].is_const) {
+ if (IsConst(cu, cu->reg_location[ssa_reg])) {
if (!singles_only && cu->reg_location[ssa_reg].wide) {
- int64_t immval = cu->constant_values[ssa_reg + 1];
- immval = (immval << 32) | cu->constant_values[ssa_reg];
return StringPrintf("v%d_%d#0x%llx", SRegToVReg(cu, ssa_reg),
- SRegToSubscript(cu, ssa_reg), immval);
+ SRegToSubscript(cu, ssa_reg),
+ ConstantValueWide(cu, cu->reg_location[ssa_reg]));
} else {
- int32_t immval = cu->constant_values[ssa_reg];
return StringPrintf("v%d_%d#0x%x", SRegToVReg(cu, ssa_reg),
- SRegToSubscript(cu, ssa_reg), immval);
+ SRegToSubscript(cu, ssa_reg),
+ ConstantValue(cu, cu->reg_location[ssa_reg]));
}
} else {
return StringPrintf("v%d_%d", SRegToVReg(cu, ssa_reg), SRegToSubscript(cu, ssa_reg));
@@ -1300,12 +1299,19 @@
}
/* Setup a constant value for opcodes thare have the DF_SETS_CONST attribute */
-static void SetConstant(CompilationUnit* cu, int ssa_reg, int value)
+static void SetConstant(CompilationUnit* cu, int32_t ssa_reg, int value)
{
SetBit(cu, cu->is_constant_v, ssa_reg);
cu->constant_values[ssa_reg] = value;
}
+static void SetConstantWide(CompilationUnit* cu, int ssa_reg, int64_t value)
+{
+ SetBit(cu, cu->is_constant_v, ssa_reg);
+ cu->constant_values[ssa_reg] = Low32Bits(value);
+ cu->constant_values[ssa_reg + 1] = High32Bits(value);
+}
+
bool DoConstantPropogation(CompilationUnit* cu, BasicBlock* bb)
{
MIR* mir;
@@ -1321,27 +1327,25 @@
/* Handle instructions that set up constants directly */
if (df_attributes & DF_SETS_CONST) {
if (df_attributes & DF_DA) {
+ int32_t vB = static_cast<int32_t>(d_insn->vB);
switch (d_insn->opcode) {
case Instruction::CONST_4:
case Instruction::CONST_16:
case Instruction::CONST:
- SetConstant(cu, mir->ssa_rep->defs[0], d_insn->vB);
+ SetConstant(cu, mir->ssa_rep->defs[0], vB);
break;
case Instruction::CONST_HIGH16:
- SetConstant(cu, mir->ssa_rep->defs[0], d_insn->vB << 16);
+ SetConstant(cu, mir->ssa_rep->defs[0], vB << 16);
break;
case Instruction::CONST_WIDE_16:
case Instruction::CONST_WIDE_32:
- SetConstant(cu, mir->ssa_rep->defs[0], d_insn->vB);
- SetConstant(cu, mir->ssa_rep->defs[1], 0);
+ SetConstantWide(cu, mir->ssa_rep->defs[0], static_cast<int64_t>(vB));
break;
case Instruction::CONST_WIDE:
- SetConstant(cu, mir->ssa_rep->defs[0], static_cast<int>(d_insn->vB_wide));
- SetConstant(cu, mir->ssa_rep->defs[1], static_cast<int>(d_insn->vB_wide >> 32));
+ SetConstantWide(cu, mir->ssa_rep->defs[0],d_insn->vB_wide);
break;
case Instruction::CONST_WIDE_HIGH16:
- SetConstant(cu, mir->ssa_rep->defs[0], 0);
- SetConstant(cu, mir->ssa_rep->defs[1], d_insn->vB << 16);
+ SetConstantWide(cu, mir->ssa_rep->defs[0], static_cast<int64_t>(vB) << 48);
break;
default:
break;
@@ -1363,6 +1367,18 @@
cu->constant_values[mir->ssa_rep->uses[1]]);
}
}
+ } else if (df_attributes & DF_NULL_TRANSFER_N) {
+ /*
+ * Mark const sregs that appear in merges. Need to flush those to home location.
+ * TUNING: instead of flushing on def, we could insert a flush on the appropriate
+ * edge[s].
+ */
+ DCHECK_EQ(static_cast<int32_t>(d_insn->opcode), kMirOpPhi);
+ for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
+ if (IsConst(cu, mir->ssa_rep->uses[i])) {
+ SetBit(cu, cu->must_flush_constant_v, mir->ssa_rep->uses[i]);
+ }
+ }
}
}
/* TODO: implement code to handle arithmetic operations */
@@ -1708,6 +1724,28 @@
}
}
break;
+ case Instruction::GOTO:
+ case Instruction::GOTO_16:
+ case Instruction::GOTO_32:
+ case Instruction::IF_EQ:
+ case Instruction::IF_NE:
+ case Instruction::IF_LT:
+ case Instruction::IF_GE:
+ case Instruction::IF_GT:
+ case Instruction::IF_LE:
+ case Instruction::IF_EQZ:
+ case Instruction::IF_NEZ:
+ case Instruction::IF_LTZ:
+ case Instruction::IF_GEZ:
+ case Instruction::IF_GTZ:
+ case Instruction::IF_LEZ:
+ if (bb->taken->dominates_return) {
+ mir->optimization_flags |= MIR_IGNORE_SUSPEND_CHECK;
+ if (cu->verbose) {
+ LOG(INFO) << "Suppressed suspend check at 0x" << std::hex << mir->offset;
+ }
+ }
+ break;
default:
break;
}
@@ -2056,15 +2094,26 @@
if (cu->verbose) {
LOG(INFO) << "Extended bb head " << bb->id;
}
+ BasicBlock* start_bb = bb;
cu->extended_basic_blocks.push_back(bb);
+ bool has_return = false;
// Visit blocks strictly dominated by this head.
while (bb != NULL) {
bb->visited = true;
+ has_return |= bb->has_return;
bb = NextDominatedBlock(cu, bb);
if (cu->verbose && (bb != NULL)) {
LOG(INFO) << "...added bb " << bb->id;
}
}
+ if (has_return) {
+ // This extended basic block contains a return, so mark all members.
+ bb = start_bb;
+ while (bb != NULL) {
+ bb->dominates_return = true;
+ bb = NextDominatedBlock(cu, bb);
+ }
+ }
return false; // Not iterative - return value will be ignored
}
diff --git a/src/compiler/frontend.cc b/src/compiler/frontend.cc
index 6ccbc07..6eb117a 100644
--- a/src/compiler/frontend.cc
+++ b/src/compiler/frontend.cc
@@ -66,13 +66,13 @@
/* Default optimizer/debug setting for the compiler. */
static uint32_t kCompilerOptimizerDisableFlags = 0 | // Disable specific optimizations
- //(1 << kLoadStoreElimination) |
+ (1 << kLoadStoreElimination) |
//(1 << kLoadHoisting) |
//(1 << kSuppressLoads) |
//(1 << kNullCheckElimination) |
//(1 << kPromoteRegs) |
//(1 << kTrackLiveTemps) |
- //(1 << kSkipLargeMethodOptimization) |
+ (1 << kSkipLargeMethodOptimization) |
//(1 << kSafeOptimizations) |
//(1 << kBBOpt) |
//(1 << kMatch) |
@@ -972,6 +972,7 @@
cur_block = ProcessCanBranch(cu.get(), cur_block, insn, cur_offset,
width, flags, code_ptr, code_end);
} else if (flags & Instruction::kReturn) {
+ cur_block->has_return = true;
cur_block->fall_through = exit_block;
InsertGrowableList(cu.get(), exit_block->predecessors,
reinterpret_cast<uintptr_t>(cur_block));
@@ -1078,10 +1079,9 @@
}
/* Do constant propagation */
- // TODO: Probably need to make these expandable to support new ssa names
- // introducted during MIR optimization passes
- cu->is_constant_v = AllocBitVector(cu.get(), cu->num_ssa_regs,
- false /* not expandable */);
+ cu->is_constant_v = AllocBitVector(cu.get(), cu->num_ssa_regs, false /* not expandable */);
+ cu->must_flush_constant_v = AllocBitVector(cu.get(), cu->num_ssa_regs,
+ false /* not expandable */);
cu->constant_values =
static_cast<int*>(NewMem(cu.get(), sizeof(int) * cu->num_ssa_regs, true, kAllocDFInfo));
DataFlowAnalysisDispatcher(cu.get(), DoConstantPropogation,
diff --git a/src/compiler/ralloc.cc b/src/compiler/ralloc.cc
index 2038e19..3514200 100644
--- a/src/compiler/ralloc.cc
+++ b/src/compiler/ralloc.cc
@@ -479,6 +479,39 @@
}
}
+ /*
+ * Now that everything is typed and constants propagated, identify those constants
+ * that can be cheaply materialized and don't need to be flushed to a home location.
+ * The default is to not flush, and some have already been marked as must flush.
+ */
+ for (i=0; i < cu->num_ssa_regs; i++) {
+ if (IsBitSet(cu->is_constant_v, i)) {
+ bool flush = false;
+ RegLocation loc = cu->reg_location[i];
+ if (loc.wide) {
+ int64_t value = ConstantValueWide(cu, loc);
+ if (loc.fp) {
+ flush = !cu->cg->InexpensiveConstantDouble(value);
+ } else {
+ flush = !cu->cg->InexpensiveConstantLong(value);
+ }
+ } else {
+ int32_t value = ConstantValue(cu, loc);
+ if (loc.fp) {
+ flush = !cu->cg->InexpensiveConstantFloat(value);
+ } else {
+ flush = !cu->cg->InexpensiveConstantInt(value);
+ }
+ }
+ if (flush) {
+ SetBit(cu, cu->must_flush_constant_v, i);
+ }
+ if (loc.wide) {
+ i++; // Skip the high word
+ }
+ }
+ }
+
cu->core_spill_mask = 0;
cu->fp_spill_mask = 0;
cu->num_core_spills = 0;
diff --git a/src/utils.h b/src/utils.h
index f3c9b7a..d808fc3 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -91,20 +91,20 @@
return IsUint(N, value);
}
-static inline int32_t Low16Bits(int32_t value) {
- return static_cast<int32_t>(value & 0xffff);
+static inline uint16_t Low16Bits(uint32_t value) {
+ return static_cast<uint16_t>(value);
}
-static inline int32_t High16Bits(int32_t value) {
- return static_cast<int32_t>(value >> 16);
+static inline uint16_t High16Bits(uint32_t value) {
+ return static_cast<uint16_t>(value >> 16);
}
-static inline int32_t Low32Bits(int64_t value) {
- return static_cast<int32_t>(value);
+static inline uint32_t Low32Bits(uint64_t value) {
+ return static_cast<uint32_t>(value);
}
-static inline int32_t High32Bits(int64_t value) {
- return static_cast<int32_t>(value >> 32);
+static inline uint32_t High32Bits(uint64_t value) {
+ return static_cast<uint32_t>(value >> 32);
}
template<typename T>