Prefer to expand mask for xor to -1, so we have a chance to turn it into a not.
If it cannot be expanded, it will keep the old behaviour and try to shrink the constant.
Part of enhancement for PR2191.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@49280 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0fcb3c8..a0894dd 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -657,10 +657,25 @@
}
// If the RHS is a constant, see if we can simplify it.
- // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
- if (TLO.ShrinkDemandedConstant(Op, NewMask))
- return true;
-
+ // for XOR, we prefer to force bits to 1 if they will make a -1.
+ // if we can't force bits, try to shrink constant
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ APInt Expanded = C->getAPIntValue() | (~NewMask);
+ // if we can expand it to have all bits set, do it
+ if (Expanded.isAllOnesValue()) {
+ if (Expanded != C->getAPIntValue()) {
+ MVT::ValueType VT = Op.getValueType();
+ SDOperand New = TLO.DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0),
+ TLO.DAG.getConstant(Expanded, VT));
+ return TLO.CombineTo(Op, New);
+ }
+ // if it already has all the bits set, nothing to change
+ // but don't shrink either!
+ } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+ return true;
+ }
+ }
+
KnownZero = KnownZeroOut;
KnownOne = KnownOneOut;
break;
diff --git a/test/CodeGen/X86/xor_not.ll b/test/CodeGen/X86/xor_not.ll
new file mode 100644
index 0000000..52f8e65
--- /dev/null
+++ b/test/CodeGen/X86/xor_not.ll
@@ -0,0 +1,146 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep not[lwb] | count 3
+; RUN: llvm-as < %s | llc -march=x86-64 | grep not[lwb] | count 4
+define i32 @test(i32 %a, i32 %b) nounwind {
+entry:
+ %tmp1not = xor i32 %b, -2
+ %tmp3 = and i32 %tmp1not, %a
+ %tmp4 = lshr i32 %tmp3, 1
+ ret i32 %tmp4
+}
+
+define i32 @sum32(i32 %a, i32 %b) nounwind {
+entry:
+ br label %bb
+bb:
+ %b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+ %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+ %tmp3 = xor i32 %a_addr.0, %b_addr.0
+ %tmp4not = xor i32 %tmp3, 2147483647
+ %tmp6 = and i32 %tmp4not, %b_addr.0
+ %tmp8 = shl i32 %tmp6, 1
+ %tmp10 = icmp eq i32 %tmp8, 0
+ br i1 %tmp10, label %bb12, label %bb
+bb12:
+ ret i32 %tmp3
+}
+
+define i16 @sum16(i16 %a, i16 %b) nounwind {
+entry:
+ br label %bb
+bb:
+ %b_addr.0 = phi i16 [ %b, %entry ], [ %tmp8, %bb ]
+ %a_addr.0 = phi i16 [ %a, %entry ], [ %tmp3, %bb ]
+ %tmp3 = xor i16 %a_addr.0, %b_addr.0
+ %tmp4not = xor i16 %tmp3, 32767
+ %tmp6 = and i16 %tmp4not, %b_addr.0
+ %tmp8 = shl i16 %tmp6, 1
+ %tmp10 = icmp eq i16 %tmp8, 0
+ br i1 %tmp10, label %bb12, label %bb
+bb12:
+ ret i16 %tmp3
+}
+
+define i8 @sum8(i8 %a, i8 %b) nounwind {
+entry:
+ br label %bb
+bb:
+ %b_addr.0 = phi i8 [ %b, %entry ], [ %tmp8, %bb ]
+ %a_addr.0 = phi i8 [ %a, %entry ], [ %tmp3, %bb ]
+ %tmp3 = xor i8 %a_addr.0, %b_addr.0
+ %tmp4not = xor i8 %tmp3, 127
+ %tmp6 = and i8 %tmp4not, %b_addr.0
+ %tmp8 = shl i8 %tmp6, 1
+ %tmp10 = icmp eq i8 %tmp8, 0
+ br i1 %tmp10, label %bb12, label %bb
+bb12:
+ ret i8 %tmp3
+}
+
+define i32 @notransform(i32 %a, i32 %b) nounwind {
+entry:
+ br label %bb
+bb:
+ %b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+ %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+ %tmp3 = xor i32 %a_addr.0, %b_addr.0
+ %tmp4not = xor i32 %tmp3, 2147483646
+ %tmp6 = and i32 %tmp4not, %b_addr.0
+ %tmp8 = shl i32 %tmp6, 1
+ %tmp10 = icmp eq i32 %tmp8, 0
+ br i1 %tmp10, label %bb12, label %bb
+bb12:
+ ret i32 %tmp3
+}
+; RUN: llvm-as < %s | llc -march=x86 | grep not[lwb] | count 3
+; RUN: llvm-as < %s | llc -march=x86-64 | grep not[lwb] | count 4
+define i32 @test(i32 %a, i32 %b) nounwind {
+entry:
+ %tmp1not = xor i32 %b, -2
+ %tmp3 = and i32 %tmp1not, %a
+ %tmp4 = lshr i32 %tmp3, 1
+ ret i32 %tmp4
+}
+
+define i32 @sum32(i32 %a, i32 %b) nounwind {
+entry:
+ br label %bb
+bb:
+ %b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+ %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+ %tmp3 = xor i32 %a_addr.0, %b_addr.0
+ %tmp4not = xor i32 %tmp3, 2147483647
+ %tmp6 = and i32 %tmp4not, %b_addr.0
+ %tmp8 = shl i32 %tmp6, 1
+ %tmp10 = icmp eq i32 %tmp8, 0
+ br i1 %tmp10, label %bb12, label %bb
+bb12:
+ ret i32 %tmp3
+}
+
+define i16 @sum16(i16 %a, i16 %b) nounwind {
+entry:
+ br label %bb
+bb:
+ %b_addr.0 = phi i16 [ %b, %entry ], [ %tmp8, %bb ]
+ %a_addr.0 = phi i16 [ %a, %entry ], [ %tmp3, %bb ]
+ %tmp3 = xor i16 %a_addr.0, %b_addr.0
+ %tmp4not = xor i16 %tmp3, 32767
+ %tmp6 = and i16 %tmp4not, %b_addr.0
+ %tmp8 = shl i16 %tmp6, 1
+ %tmp10 = icmp eq i16 %tmp8, 0
+ br i1 %tmp10, label %bb12, label %bb
+bb12:
+ ret i16 %tmp3
+}
+
+define i8 @sum8(i8 %a, i8 %b) nounwind {
+entry:
+ br label %bb
+bb:
+ %b_addr.0 = phi i8 [ %b, %entry ], [ %tmp8, %bb ]
+ %a_addr.0 = phi i8 [ %a, %entry ], [ %tmp3, %bb ]
+ %tmp3 = xor i8 %a_addr.0, %b_addr.0
+ %tmp4not = xor i8 %tmp3, 127
+ %tmp6 = and i8 %tmp4not, %b_addr.0
+ %tmp8 = shl i8 %tmp6, 1
+ %tmp10 = icmp eq i8 %tmp8, 0
+ br i1 %tmp10, label %bb12, label %bb
+bb12:
+ ret i8 %tmp3
+}
+
+define i32 @notransform(i32 %a, i32 %b) nounwind {
+entry:
+ br label %bb
+bb:
+ %b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+ %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+ %tmp3 = xor i32 %a_addr.0, %b_addr.0
+ %tmp4not = xor i32 %tmp3, 2147483646
+ %tmp6 = and i32 %tmp4not, %b_addr.0
+ %tmp8 = shl i32 %tmp6, 1
+ %tmp10 = icmp eq i32 %tmp8, 0
+ br i1 %tmp10, label %bb12, label %bb
+bb12:
+ ret i32 %tmp3
+}