Compiler constant handling rework
In preparation for de-optimization, reworked the constant
handling mechanism. Also took advantage of knowledge of
constant operands (particularly for long operations).
Significant performance improvements for Mandelbrot
(~60 seconds to ~34 seconds). Minor improvements in other
benchmarks.
The new constant handling breaks two of the existing
optimization passes: "Skip Large Method" and "Load/Store
Elimization."
I don't intend to update the large method optimization
because it will be superceeded by the upcoming interpreter/
fingerprinting mechanism. Leaving the code in place for
now in order to compare compile-time improvements with
fingerprinting/interpret. All related code will be deleted
when that is complete.
The load/store elimination pass needs some rework to handle
uses of multiple-register loads and stores. It will be
updated & restored in a future CL.
Change-Id: Ia979abaf51b8ae81bbb0428031cbcea854625fac
diff --git a/src/compiler/codegen/arm/utility_arm.cc b/src/compiler/codegen/arm/utility_arm.cc
index 433111c..a670199 100644
--- a/src/compiler/codegen/arm/utility_arm.cc
+++ b/src/compiler/codegen/arm/utility_arm.cc
@@ -45,6 +45,32 @@
return res;
}
+/*
+ * Determine whether value can be encoded as a Thumb2 floating point
+ * immediate. If not, return -1. If so return encoded 8-bit value.
+ */
+static int EncodeImmDouble(int64_t value)
+{
+ int res;
+ int bit_a = (value & 0x8000000000000000ll) >> 63;
+ int not_bit_b = (value & 0x4000000000000000ll) >> 62;
+ int bit_b = (value & 0x2000000000000000ll) >> 61;
+ int b_smear = (value & 0x3fc0000000000000ll) >> 54;
+ int slice = (value & 0x003f000000000000ll) >> 48;
+ uint64_t zeroes = (value & 0x0000ffffffffffffll);
+ if (zeroes != 0)
+ return -1;
+ if (bit_b) {
+ if ((not_bit_b != 0) || (b_smear != 0xff))
+ return -1;
+ } else {
+ if ((not_bit_b != 1) || (b_smear != 0x0))
+ return -1;
+ }
+ res = (bit_a << 7) | (bit_b << 6) | slice;
+ return res;
+}
+
static LIR* LoadFPConstantValue(CompilationUnit* cu, int r_dest, int value)
{
DCHECK(ARM_SINGLEREG(r_dest));
@@ -126,19 +152,24 @@
return value | ((0x8 + z_leading) << 7); /* [01000..11111]:bcdefgh */
}
-bool ArmCodegen::InexpensiveConstant(int reg, int value)
+bool ArmCodegen::InexpensiveConstantInt(int32_t value)
{
- bool res = false;
- if (ARM_FPREG(reg)) {
- res = (EncodeImmSingle(value) >= 0);
- } else {
- if (ARM_LOWREG(reg) && (value >= 0) && (IsUint(8, value))) {
- res = true;
- } else {
- res = (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
- }
- }
- return res;
+ return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+}
+
+bool ArmCodegen::InexpensiveConstantFloat(int32_t value)
+{
+ return EncodeImmSingle(value) >= 0;
+}
+
+bool ArmCodegen::InexpensiveConstantLong(int64_t value)
+{
+ return InexpensiveConstantInt(High32Bits(value)) && InexpensiveConstantInt(Low32Bits(value));
+}
+
+bool ArmCodegen::InexpensiveConstantDouble(int64_t value)
+{
+ return EncodeImmDouble(value) >= 0;
}
/*
@@ -178,25 +209,9 @@
res = NewLIR2(cu, kThumb2MovImm16, r_dest, value);
return res;
}
- /* No shortcut - go ahead and use literal pool */
- LIR* data_target = ScanLiteralPool(cu->literal_list, value, 0);
- if (data_target == NULL) {
- data_target = AddWordData(cu, &cu->literal_list, value);
- }
- LIR* load_pc_rel = RawLIR(cu, cu->current_dalvik_offset,
- kThumb2LdrPcRel12, r_dest, 0, 0, 0, 0, data_target);
- SetMemRefType(cu, load_pc_rel, true, kLiteral);
- load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target);
- res = load_pc_rel;
- AppendLIR(cu, load_pc_rel);
-
- /*
- * To save space in the constant pool, we use the ADD_RRI8 instruction to
- * add up to 255 to an existing constant value.
- */
- if (data_target->operands[0] != value) {
- OpRegImm(cu, kOpAdd, r_dest, value - data_target->operands[0]);
- }
+ /* Do a low/high pair */
+ res = NewLIR2(cu, kThumb2MovImm16, r_dest, Low16Bits(value));
+ NewLIR2(cu, kThumb2MovImm16H, r_dest, High16Bits(value));
return res;
}
@@ -514,7 +529,7 @@
int mod_imm = ModifiedImmediate(value);
LIR* res;
if (mod_imm >= 0) {
- res = NewLIR2(cu, kThumb2CmpRI8, r_src1, mod_imm);
+ res = NewLIR2(cu, kThumb2CmpRI12, r_src1, mod_imm);
} else {
int r_tmp = AllocTemp(cu);
res = LoadConstant(cu, r_tmp, value);
@@ -587,44 +602,11 @@
}
}
-/*
- * Determine whether value can be encoded as a Thumb2 floating point
- * immediate. If not, return -1. If so return encoded 8-bit value.
- */
-static int EncodeImmDoubleHigh(int value)
+LIR* ArmCodegen::LoadConstantWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi, int64_t value)
{
- int res;
- int bit_a = (value & 0x80000000) >> 31;
- int not_bit_b = (value & 0x40000000) >> 30;
- int bit_b = (value & 0x20000000) >> 29;
- int b_smear = (value & 0x3fc00000) >> 22;
- int slice = (value & 0x003f0000) >> 16;
- int zeroes = (value & 0x0000ffff);
- if (zeroes != 0)
- return -1;
- if (bit_b) {
- if ((not_bit_b != 0) || (b_smear != 0xff))
- return -1;
- } else {
- if ((not_bit_b != 1) || (b_smear != 0x0))
- return -1;
- }
- res = (bit_a << 7) | (bit_b << 6) | slice;
- return res;
-}
-
-static int EncodeImmDouble(int val_lo, int val_hi)
-{
- int res = -1;
- if (val_lo == 0)
- res = EncodeImmDoubleHigh(val_hi);
- return res;
-}
-
-LIR* ArmCodegen::LoadConstantValueWide(CompilationUnit* cu, int r_dest_lo, int r_dest_hi,
- int val_lo, int val_hi)
-{
- LIR* res;
+ LIR* res = NULL;
+ int32_t val_lo = Low32Bits(value);
+ int32_t val_hi = High32Bits(value);
int target_reg = S2d(r_dest_lo, r_dest_hi);
if (ARM_FPREG(r_dest_lo)) {
if ((val_lo == 0) && (val_hi == 0)) {
@@ -635,26 +617,33 @@
// +0.0 = +2.0 - +2.0
res = NewLIR3(cu, kThumb2Vsubd, target_reg, target_reg, target_reg);
} else {
- int encoded_imm = EncodeImmDouble(val_lo, val_hi);
+ int encoded_imm = EncodeImmDouble(value);
if (encoded_imm >= 0) {
res = NewLIR2(cu, kThumb2Vmovd_IMM8, target_reg, encoded_imm);
- } else {
- LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi);
- if (data_target == NULL) {
- data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi);
- }
- LIR* load_pc_rel =
- RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd,
- target_reg, r15pc, 0, 0, 0, data_target);
- SetMemRefType(cu, load_pc_rel, true, kLiteral);
- load_pc_rel->alias_info = reinterpret_cast<uintptr_t>(data_target);
- AppendLIR(cu, load_pc_rel);
- res = load_pc_rel;
}
}
} else {
- res = LoadConstantNoClobber(cu, r_dest_lo, val_lo);
- LoadConstantNoClobber(cu, r_dest_hi, val_hi);
+ if ((InexpensiveConstantInt(val_lo) && (InexpensiveConstantInt(val_hi)))) {
+ res = LoadConstantNoClobber(cu, r_dest_lo, val_lo);
+ LoadConstantNoClobber(cu, r_dest_hi, val_hi);
+ }
+ }
+ if (res == NULL) {
+ // No short form - load from the literal pool.
+ LIR* data_target = ScanLiteralPoolWide(cu->literal_list, val_lo, val_hi);
+ if (data_target == NULL) {
+ data_target = AddWideData(cu, &cu->literal_list, val_lo, val_hi);
+ }
+ if (ARM_FPREG(r_dest_lo)) {
+ res = RawLIR(cu, cu->current_dalvik_offset, kThumb2Vldrd,
+ target_reg, r15pc, 0, 0, 0, data_target);
+ } else {
+ res = RawLIR(cu, cu->current_dalvik_offset, kThumb2LdrdPcRel8,
+ r_dest_lo, r_dest_hi, r15pc, 0, 0, data_target);
+ }
+ SetMemRefType(cu, res, true, kLiteral);
+ res->alias_info = reinterpret_cast<uintptr_t>(data_target);
+ AppendLIR(cu, res);
}
return res;
}
@@ -732,7 +721,7 @@
int scale, OpSize size)
{
bool all_low_regs = ARM_LOWREG(rBase) && ARM_LOWREG(r_index) && ARM_LOWREG(r_src);
- LIR* store;
+ LIR* store = NULL;
ArmOpcode opcode = kThumbBkpt;
bool thumb_form = (all_low_regs && (scale == 0));
int reg_ptr;
@@ -798,14 +787,14 @@
int r_dest_hi, OpSize size, int s_reg)
{
Codegen* cg = cu->cg.get();
- LIR* res;
- LIR* load;
+ LIR* load = NULL;
ArmOpcode opcode = kThumbBkpt;
bool short_form = false;
bool thumb2Form = (displacement < 4092 && displacement >= 0);
bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_dest));
int encoded_disp = displacement;
bool is64bit = false;
+ bool already_generated = false;
switch (size) {
case kDouble:
case kLong:
@@ -822,11 +811,15 @@
}
break;
} else {
- res = LoadBaseDispBody(cu, rBase, displacement, r_dest,
- -1, kWord, s_reg);
- LoadBaseDispBody(cu, rBase, displacement + 4, r_dest_hi,
- -1, kWord, INVALID_SREG);
- return res;
+ if (displacement <= 1020) {
+ load = NewLIR4(cu, kThumb2LdrdI8, r_dest, r_dest_hi, rBase, displacement >> 2);
+ } else {
+ load = LoadBaseDispBody(cu, rBase, displacement, r_dest,
+ -1, kWord, s_reg);
+ LoadBaseDispBody(cu, rBase, displacement + 4, r_dest_hi,
+ -1, kWord, INVALID_SREG);
+ }
+ already_generated = true;
}
case kSingle:
case kWord:
@@ -894,13 +887,15 @@
LOG(FATAL) << "Bad size: " << size;
}
- if (short_form) {
- load = res = NewLIR3(cu, opcode, r_dest, rBase, encoded_disp);
- } else {
- int reg_offset = AllocTemp(cu);
- res = cg->LoadConstant(cu, reg_offset, encoded_disp);
- load = cg->LoadBaseIndexed(cu, rBase, reg_offset, r_dest, 0, size);
- FreeTemp(cu, reg_offset);
+ if (!already_generated) {
+ if (short_form) {
+ load = NewLIR3(cu, opcode, r_dest, rBase, encoded_disp);
+ } else {
+ int reg_offset = AllocTemp(cu);
+ cg->LoadConstant(cu, reg_offset, encoded_disp);
+ load = cg->LoadBaseIndexed(cu, rBase, reg_offset, r_dest, 0, size);
+ FreeTemp(cu, reg_offset);
+ }
}
// TODO: in future may need to differentiate Dalvik accesses w/ spills
@@ -926,30 +921,36 @@
LIR* ArmCodegen::StoreBaseDispBody(CompilationUnit* cu, int rBase, int displacement,
int r_src, int r_src_hi, OpSize size) {
Codegen* cg = cu->cg.get();
- LIR* res, *store;
+ LIR* store = NULL;
ArmOpcode opcode = kThumbBkpt;
bool short_form = false;
bool thumb2Form = (displacement < 4092 && displacement >= 0);
bool all_low_regs = (ARM_LOWREG(rBase) && ARM_LOWREG(r_src));
int encoded_disp = displacement;
bool is64bit = false;
+ bool already_generated = false;
switch (size) {
case kLong:
case kDouble:
is64bit = true;
if (!ARM_FPREG(r_src)) {
- res = StoreBaseDispBody(cu, rBase, displacement, r_src, -1, kWord);
- StoreBaseDispBody(cu, rBase, displacement + 4, r_src_hi, -1, kWord);
- return res;
- }
- if (ARM_SINGLEREG(r_src)) {
- DCHECK(ARM_FPREG(r_src_hi));
- r_src = cg->S2d(r_src, r_src_hi);
- }
- opcode = kThumb2Vstrd;
- if (displacement <= 1020) {
- short_form = true;
- encoded_disp >>= 2;
+ if (displacement <= 1020) {
+ store = NewLIR4(cu, kThumb2StrdI8, r_src, r_src_hi, rBase, displacement >> 2);
+ } else {
+ store = StoreBaseDispBody(cu, rBase, displacement, r_src, -1, kWord);
+ StoreBaseDispBody(cu, rBase, displacement + 4, r_src_hi, -1, kWord);
+ }
+ already_generated = true;
+ } else {
+ if (ARM_SINGLEREG(r_src)) {
+ DCHECK(ARM_FPREG(r_src_hi));
+ r_src = cg->S2d(r_src, r_src_hi);
+ }
+ opcode = kThumb2Vstrd;
+ if (displacement <= 1020) {
+ short_form = true;
+ encoded_disp >>= 2;
+ }
}
break;
case kSingle:
@@ -998,20 +999,22 @@
default:
LOG(FATAL) << "Bad size: " << size;
}
- if (short_form) {
- store = res = NewLIR3(cu, opcode, r_src, rBase, encoded_disp);
- } else {
- int r_scratch = AllocTemp(cu);
- res = cg->LoadConstant(cu, r_scratch, encoded_disp);
- store = cg->StoreBaseIndexed(cu, rBase, r_scratch, r_src, 0, size);
- FreeTemp(cu, r_scratch);
+ if (!already_generated) {
+ if (short_form) {
+ store = NewLIR3(cu, opcode, r_src, rBase, encoded_disp);
+ } else {
+ int r_scratch = AllocTemp(cu);
+ cg->LoadConstant(cu, r_scratch, encoded_disp);
+ store = cg->StoreBaseIndexed(cu, rBase, r_scratch, r_src, 0, size);
+ FreeTemp(cu, r_scratch);
+ }
}
// TODO: In future, may need to differentiate Dalvik & spill accesses
if (rBase == rARM_SP) {
AnnotateDalvikRegAccess(cu, store, displacement >> 2, false /* is_load */, is64bit);
}
- return res;
+ return store;
}
LIR* ArmCodegen::StoreBaseDisp(CompilationUnit* cu, int rBase, int displacement, int r_src,