Teach DAG combine to handle vector logical operations with vectors of all 1s or all 0s. These cases can show up when vectors are split for legalizing. Fix some tests that were dependent on these cases not being combined. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169684 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 9472b4fbf96fb840eb416acd00bbee173c62e8da [log] [tgz]
author: Craig Topper <craig.topper@gmail.com> Sat Dec 08 22:49:19 2012 +0000
committer: Craig Topper <craig.topper@gmail.com> Sat Dec 08 22:49:19 2012 +0000
tree: 8c060d25d7511409e33632b88955d17f08c1606f
parent: 7065a2bcec3b775da12cf8fbcd6fa972d5f2afeb [diff]
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6cd07c2..4a17dc0 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -2427,6 +2427,18 @@
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (and x, 0) -> 0, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N1;
+
+    // fold (and x, -1) -> x, vector edition
+    if (ISD::isBuildVectorAllOnes(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllOnes(N1.getNode()))
+      return N0;
   }
 
   // fold (and x, undef) -> 0
@@ -3025,6 +3037,18 @@
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (or x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
+
+    // fold (or x, -1) -> -1, vector edition
+    if (ISD::isBuildVectorAllOnes(N0.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllOnes(N1.getNode()))
+      return N1;
   }
 
   // fold (or x, undef) -> -1
@@ -3334,6 +3358,12 @@
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (xor x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
   }
 
   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).

diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
index 0a949eb..f66248b 100644
--- a/test/CodeGen/X86/2011-11-30-or.ll
+++ b/test/CodeGen/X86/2011-11-30-or.ll

@@ -11,12 +11,12 @@
 define void @select_func() {
 entry:
   %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-  %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64>
   %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
   %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
-  %neg.i.i.i.i = xor <2 x i64> %a35, <i64 -1, i64 -1>
-  %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i
-  %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i
+  %neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %and.i.i.i = and <8 x i16> %neg.i.i.i.i, <i16 45, i16 15, i16 95, i16 8, i16 25, i16 65, i16 8, i16 25>
+  %and.i2.i.i.i = bitcast <8 x i16> %and.i.i.i to <2 x i64>
+  %or.i.i.i.i = or <2 x i64> %and.i2.i.i.i, %and.i5.i.i.i
   %a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16>
   store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4
   ret void

diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 9cf4607..6cd2761 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll

@@ -43,21 +43,21 @@
 	%mul171.i = fmul <4 x float> %add167.i, %sub140.i		; <<4 x float>> [#uses=1]
 	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
 	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%andnps178.i = add <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
 	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%andnps192.i = add <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
 	%xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
+	%orps203.i = add <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
 	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
 	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
 	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
 	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
-	%andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13		; <<4 x i32>> [#uses=1]
+	%andps.i14 = add <4 x i32> zeroinitializer, %bitcast6.i13		; <<4 x i32>> [#uses=1]
 	%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%andnps.i17 = and <4 x i32> zeroinitializer, %not.i16		; <<4 x i32>> [#uses=1]
+	%andnps.i17 = add <4 x i32> zeroinitializer, %not.i16		; <<4 x i32>> [#uses=1]
 	%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14		; <<4 x i32>> [#uses=1]
 	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]

diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 2f4317b..67ce1be 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll

@@ -28,33 +28,31 @@
 
 ; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_i64
-; CHECK: xorps
-; CHECK: andps
 ; CHECK: andnps
+; CHECK: andps
 ; CHECK: orps
 ; CHECK: ret
 
-define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
-  %A = load <4 x i64>* %v1
-  %B = load <4 x i64>* %v2
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %A, <4 x i64> %B
-  store <4 x i64 > %vsel, <4 x i64>* %v1
+define void@vsel_i64(<2 x i64>* %v1, <2 x i64>* %v2) {
+  %A = load <2 x i64>* %v1
+  %B = load <2 x i64>* %v2
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %A, <2 x i64> %B
+  store <2 x i64 > %vsel, <2 x i64>* %v1
   ret void
 }
 
 ; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_double
-; CHECK: xorps
-; CHECK: andps
 ; CHECK: andnps
+; CHECK: andps
 ; CHECK: orps
 ; CHECK: ret
 
-define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
-  %A = load <4 x double>* %v1
-  %B = load <4 x double>* %v2
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %A, <4 x double> %B
-  store <4 x double > %vsel, <4 x double>* %v1
+define void@vsel_double(<2 x double>* %v1, <2 x double>* %v2) {
+  %A = load <2 x double>* %v1
+  %B = load <2 x double>* %v2
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %A, <2 x double> %B
+  store <2 x double > %vsel, <2 x double>* %v1
   ret void
 }
commit	9472b4fbf96fb840eb416acd00bbee173c62e8da	[log] [tgz]
author	Craig Topper <craig.topper@gmail.com>	Sat Dec 08 22:49:19 2012 +0000
committer	Craig Topper <craig.topper@gmail.com>	Sat Dec 08 22:49:19 2012 +0000
tree	8c060d25d7511409e33632b88955d17f08c1606f
parent	7065a2bcec3b775da12cf8fbcd6fa972d5f2afeb [diff]