Optimize vector select from all 0s or all 1s As packed comparisons in AVX/SSE produce all 0s or all 1s in each SIMD lane, vector select could be simplified to AND/OR or removed if one or both values being selected is all 0s or all 1s. llvm-svn: 179267

commit: 55658d42228e8dbfa8d74d1b37e330aaee284c00 [log] [tgz]
author: Michael Liao <michael.liao@intel.com> Thu Apr 11 05:15:54 2013 +0000
committer: Michael Liao <michael.liao@intel.com> Thu Apr 11 05:15:54 2013 +0000
tree: 248bdaeb5819f711d4e863163bd0ca50a34003b2
parent: 95d9440348a8437fb792052a2238d972bf884bda [diff] [blame]
diff --git a/llvm/test/CodeGen/X86/select-with-and-or.ll b/llvm/test/CodeGen/X86/select-with-and-or.ll
new file mode 100644
index 0000000..1ccf30b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/select-with-and-or.ll

@@ -0,0 +1,72 @@
+; RUN: opt < %s -O3 | \
+; RUN:	llc -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i32> @test1(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> %c, <4 x i32> zeroinitializer
+  ret <4 x i32> %r
+; CHECK: test1
+; CHECK: cmpnle
+; CHECK-NEXT: andps
+; CHECK: ret
+}
+
+define <4 x i32> @test2(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c
+  ret <4 x i32> %r
+; CHECK: test2
+; CHECK: cmpnle
+; CHECK-NEXT: orps
+; CHECK: ret
+}
+
+define <4 x i32> @test3(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> zeroinitializer, <4 x i32> %c
+  ret <4 x i32> %r
+; CHECK: test3
+; CHECK: cmple
+; CHECK-NEXT: andps
+; CHECK: ret
+}
+
+define <4 x i32> @test4(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> %c, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %r
+; CHECK: test4
+; CHECK: cmple
+; CHECK-NEXT: orps
+; CHECK: ret
+}
+
+define <4 x i32> @test5(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer
+  ret <4 x i32> %r
+; CHECK: test5
+; CHECK: cmpnle
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test6(<4 x float> %a, <4 x float> %b, <4 x i32> %c) {
+  %f = fcmp ult <4 x float> %a, %b
+  %r = select <4 x i1> %f, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %r
+; CHECK: test6
+; CHECK: cmple
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test7(<4 x float> %a, <4 x float> %b, <4 x i32>* %p) {
+  %f = fcmp ult <4 x float> %a, %b
+  %s = sext <4 x i1> %f to <4 x i32>
+  %l = load <4 x i32>* %p
+  %r = and <4 x i32> %l, %s
+  ret <4 x i32> %r
+; CHECK: test7
+; CHECK: cmpnle
+; CHECK-NEXT: andps
+; CHECK: ret
+}
commit	55658d42228e8dbfa8d74d1b37e330aaee284c00	[log] [tgz]
author	Michael Liao <michael.liao@intel.com>	Thu Apr 11 05:15:54 2013 +0000
committer	Michael Liao <michael.liao@intel.com>	Thu Apr 11 05:15:54 2013 +0000
tree	248bdaeb5819f711d4e863163bd0ca50a34003b2
parent	95d9440348a8437fb792052a2238d972bf884bda [diff] [blame]