Extend the r107852 optimization which turns some fp compare to code sequence using only i32 operations. It now optimize some f64 compares when fp compare is exceptionally slow (e.g. cortex-a8). It also catches comparison against 0.0.
llvm-svn: 108258
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index c73e204..59a222c 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -38,6 +38,12 @@
[SDTCisPtrTy<0>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+ SDTCisVT<5, OtherVT>]>;
+
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
@@ -90,6 +96,9 @@
def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
[SDNPHasChain]>;
+def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+ [SDNPHasChain]>;
+
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
[SDNPOutFlag]>;
@@ -2279,6 +2288,22 @@
def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
(CMNzri GPR:$src, so_imm_neg:$imm)>;
+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+ Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
+ "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+
+def BCCZi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst),
+ IIC_Br,
+ "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc",
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+} // usesCustomInserter
+
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use