ARM: Strength reduction for floating-point division
For floating-point division by power of two constants, generate
multiplication by the reciprocal instead.
Change-Id: I39c79eeb26b60cc754ad42045362b79498c755be
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 179ba02..d235199 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -90,6 +90,10 @@
bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
RegLocation rl_dest, int lit);
bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+ void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+ int32_t constant) OVERRIDE;
+ void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+ int64_t constant) OVERRIDE;
LIR* CheckSuspendUsingLoad() OVERRIDE;
RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 3eb7c83..2b2592d 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -113,6 +113,32 @@
StoreValueWide(rl_dest, rl_result);
}
+void ArmMir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+ int32_t constant) {
+ RegLocation rl_result;
+ RegStorage r_tmp = AllocTempSingle();
+ LoadConstantNoClobber(r_tmp, constant);
+ rl_src1 = LoadValue(rl_src1, kFPReg);
+ rl_result = EvalLoc(rl_dest, kFPReg, true);
+ NewLIR3(kThumb2Vmuls, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
+ StoreValue(rl_dest, rl_result);
+}
+
+void ArmMir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+ int64_t constant) {
+ RegLocation rl_result;
+ RegStorage r_tmp = AllocTempDouble();
+ DCHECK(r_tmp.IsDouble());
+ LoadConstantWide(r_tmp, constant);
+ rl_src1 = LoadValueWide(rl_src1, kFPReg);
+ DCHECK(rl_src1.wide);
+ rl_result = EvalLocWide(rl_dest, kFPReg, true);
+ DCHECK(rl_dest.wide);
+ DCHECK(rl_result.wide);
+ NewLIR3(kThumb2Vmuld, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
+ StoreValueWide(rl_dest, rl_result);
+}
+
void ArmMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) {
int op = kThumbBkpt;
int src_reg;
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index bd363c4..5182a89 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -71,6 +71,10 @@
bool HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div,
RegLocation rl_src, RegLocation rl_dest, int64_t lit);
bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+ void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+ int32_t constant) OVERRIDE;
+ void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+ int64_t constant) OVERRIDE;
LIR* CheckSuspendUsingLoad() OVERRIDE;
RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index db24d12..ff692b7 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -116,6 +116,32 @@
StoreValueWide(rl_dest, rl_result);
}
+void Arm64Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+ int32_t constant) {
+ RegLocation rl_result;
+ RegStorage r_tmp = AllocTempSingle();
+ LoadConstantNoClobber(r_tmp, constant);
+ rl_src1 = LoadValue(rl_src1, kFPReg);
+ rl_result = EvalLoc(rl_dest, kFPReg, true);
+ NewLIR3(kA64Fmul3fff, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
+ StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+ int64_t constant) {
+ RegLocation rl_result;
+ RegStorage r_tmp = AllocTempDouble();
+ DCHECK(r_tmp.IsDouble());
+ LoadConstantWide(r_tmp, constant);
+ rl_src1 = LoadValueWide(rl_src1, kFPReg);
+ DCHECK(rl_src1.wide);
+ rl_result = EvalLocWide(rl_dest, kFPReg, true);
+ DCHECK(rl_dest.wide);
+ DCHECK(rl_result.wide);
+ NewLIR3(WIDE(kA64Fmul3fff), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
+ StoreValueWide(rl_dest, rl_result);
+}
+
void Arm64Mir2Lir::GenConversion(Instruction::Code opcode,
RegLocation rl_dest, RegLocation rl_src) {
int op = kA64Brk1d;
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index c5aa27c..061ee07 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1785,6 +1785,34 @@
return true;
}
+// Returns true if it generates instructions.
+bool Mir2Lir::HandleEasyFloatingPointDiv(RegLocation rl_dest, RegLocation rl_src1,
+ RegLocation rl_src2) {
+ if (!rl_src2.is_const ||
+ ((cu_->instruction_set != kThumb2) && (cu_->instruction_set != kArm64))) {
+ return false;
+ }
+
+ if (!rl_src2.wide) {
+ int32_t divisor = mir_graph_->ConstantValue(rl_src2);
+ if (CanDivideByReciprocalMultiplyFloat(divisor)) {
+ // Generate multiply by reciprocal instead of div.
+ float recip = 1.0f/bit_cast<int32_t, float>(divisor);
+ GenMultiplyByConstantFloat(rl_dest, rl_src1, bit_cast<float, int32_t>(recip));
+ return true;
+ }
+ } else {
+ int64_t divisor = mir_graph_->ConstantValueWide(rl_src2);
+ if (CanDivideByReciprocalMultiplyDouble(divisor)) {
+ // Generate multiply by reciprocal instead of div.
+ double recip = 1.0/bit_cast<double, int64_t>(divisor);
+ GenMultiplyByConstantDouble(rl_dest, rl_src1, bit_cast<double, int64_t>(recip));
+ return true;
+ }
+ }
+ return false;
+}
+
void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src,
int lit) {
RegLocation rl_result;
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index dc6930c..7e9d80d 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -31,6 +31,10 @@
bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
RegLocation rl_dest, int lit);
bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+ void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+ int32_t constant) OVERRIDE;
+ void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+ int64_t constant) OVERRIDE;
LIR* CheckSuspendUsingLoad() OVERRIDE;
RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index 4315915..0a7aa99 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc
@@ -113,6 +113,20 @@
StoreValueWide(rl_dest, rl_result);
}
+void MipsMir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+ int32_t constant) {
+ // TODO: need mips implementation.
+ UNUSED(rl_dest, rl_src1, constant);
+ LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in mips";
+}
+
+void MipsMir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+ int64_t constant) {
+ // TODO: need mips implementation.
+ UNUSED(rl_dest, rl_src1, constant);
+ LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in mips";
+}
+
void MipsMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
RegLocation rl_src) {
int op = kMipsNop;
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 533a677..ccaa167 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1052,28 +1052,36 @@
}
break;
+ case Instruction::DIV_FLOAT:
+ case Instruction::DIV_FLOAT_2ADDR:
+ if (HandleEasyFloatingPointDiv(rl_dest, rl_src[0], rl_src[1])) {
+ break;
+ }
+ FALLTHROUGH_INTENDED;
case Instruction::ADD_FLOAT:
case Instruction::SUB_FLOAT:
case Instruction::MUL_FLOAT:
- case Instruction::DIV_FLOAT:
case Instruction::REM_FLOAT:
case Instruction::ADD_FLOAT_2ADDR:
case Instruction::SUB_FLOAT_2ADDR:
case Instruction::MUL_FLOAT_2ADDR:
- case Instruction::DIV_FLOAT_2ADDR:
case Instruction::REM_FLOAT_2ADDR:
GenArithOpFloat(opcode, rl_dest, rl_src[0], rl_src[1]);
break;
+ case Instruction::DIV_DOUBLE:
+ case Instruction::DIV_DOUBLE_2ADDR:
+ if (HandleEasyFloatingPointDiv(rl_dest, rl_src[0], rl_src[1])) {
+ break;
+ }
+ FALLTHROUGH_INTENDED;
case Instruction::ADD_DOUBLE:
case Instruction::SUB_DOUBLE:
case Instruction::MUL_DOUBLE:
- case Instruction::DIV_DOUBLE:
case Instruction::REM_DOUBLE:
case Instruction::ADD_DOUBLE_2ADDR:
case Instruction::SUB_DOUBLE_2ADDR:
case Instruction::MUL_DOUBLE_2ADDR:
- case Instruction::DIV_DOUBLE_2ADDR:
case Instruction::REM_DOUBLE_2ADDR:
GenArithOpDouble(opcode, rl_dest, rl_src[0], rl_src[1]);
break;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4623f79..bacc6d2 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -789,6 +789,7 @@
virtual bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
RegLocation rl_src, RegLocation rl_dest, int lit);
bool HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit);
+ bool HandleEasyFloatingPointDiv(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
virtual void HandleSlowPaths();
void GenBarrier();
void GenDivZeroException();
@@ -1120,6 +1121,10 @@
virtual bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
RegLocation rl_src, RegLocation rl_dest, int lit) = 0;
virtual bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) = 0;
+ virtual void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+ int32_t constant) = 0;
+ virtual void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+ int64_t constant) = 0;
virtual LIR* CheckSuspendUsingLoad() = 0;
virtual RegStorage LoadHelper(QuickEntrypointEnum trampoline) = 0;
@@ -1439,6 +1444,26 @@
return InexpensiveConstantInt(value);
}
+ /**
+ * @brief Whether division by the given divisor can be converted to multiply by its reciprocal.
+ * @param divisor A constant divisor bits of float type.
+ * @return Returns true iff, x/divisor == x*(1.0f/divisor), for every float x.
+ */
+ bool CanDivideByReciprocalMultiplyFloat(int32_t divisor) {
+ // True, if float value significand bits are 0.
+ return ((divisor & 0x7fffff) == 0);
+ }
+
+ /**
+ * @brief Whether division by the given divisor can be converted to multiply by its reciprocal.
+ * @param divisor A constant divisor bits of double type.
+ * @return Returns true iff, x/divisor == x*(1.0/divisor), for every double x.
+ */
+ bool CanDivideByReciprocalMultiplyDouble(int64_t divisor) {
+ // True, if double value significand bits are 0.
+ return ((divisor & ((UINT64_C(1) << 52) - 1)) == 0);
+ }
+
// May be optimized by targets.
virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src);
virtual void GenMonitorExit(int opt_flags, RegLocation rl_src);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index dec99ae..4412a1e 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -78,6 +78,10 @@
bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
RegLocation rl_dest, int lit) OVERRIDE;
bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+ void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+ int32_t constant) OVERRIDE;
+ void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+ int64_t constant) OVERRIDE;
LIR* CheckSuspendUsingLoad() OVERRIDE;
RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 254d90f..33bb0ee 100755
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -122,6 +122,20 @@
StoreValueWide(rl_dest, rl_result);
}
+void X86Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+ int32_t constant) {
+ // TODO: need x86 implementation.
+ UNUSED(rl_dest, rl_src1, constant);
+ LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in x86";
+}
+
+void X86Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+ int64_t constant) {
+ // TODO: need x86 implementation.
+ UNUSED(rl_dest, rl_src1, constant);
+ LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in x86";
+}
+
void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
// Compute offsets to the source and destination VRs on stack
int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);