blob: 4ce2ecfa739d856f65643bdb123786c955b7e004 [file] [log] [blame]
Sanjay Patelf2d67f72017-08-17 17:07:37 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
4
5; First, check the generic pattern for any 2 vector constants. Then, check special cases where
6; the constants are all off-by-one. Finally, check the extra special cases where the constants
7; include 0 or -1.
8; Each minimal select test is repeated with a more typical pattern that includes a compare to
9; generate the condition value.
10
Sanjay Patele404cbf2017-08-24 23:24:43 +000011; TODO: If we don't have blendv, this can definitely be improved. There's also a selection of
12; chips where it makes sense to transform the general case blendv to 2 bit-ops. That should be
13; a uarch-specfic transform. At some point (Ryzen?), the implementation should catch up to the
14; architecture, so blendv is as fast as a single bit-op.
15
Sanjay Patelf2d67f72017-08-17 17:07:37 +000016define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) {
17; SSE-LABEL: sel_C1_or_C2_vec:
18; SSE: # BB#0:
19; SSE-NEXT: pslld $31, %xmm0
20; SSE-NEXT: psrad $31, %xmm0
21; SSE-NEXT: movdqa %xmm0, %xmm1
22; SSE-NEXT: pandn {{.*}}(%rip), %xmm1
23; SSE-NEXT: pand {{.*}}(%rip), %xmm0
24; SSE-NEXT: por %xmm1, %xmm0
25; SSE-NEXT: retq
26;
27; AVX-LABEL: sel_C1_or_C2_vec:
28; AVX: # BB#0:
29; AVX-NEXT: vpslld $31, %xmm0, %xmm0
30; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
31; AVX-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
32; AVX-NEXT: retq
33 %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
34 ret <4 x i32> %add
35}
36
37define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) {
38; SSE-LABEL: cmp_sel_C1_or_C2_vec:
39; SSE: # BB#0:
40; SSE-NEXT: pcmpeqd %xmm1, %xmm0
41; SSE-NEXT: movdqa %xmm0, %xmm1
42; SSE-NEXT: pandn {{.*}}(%rip), %xmm1
43; SSE-NEXT: pand {{.*}}(%rip), %xmm0
44; SSE-NEXT: por %xmm1, %xmm0
45; SSE-NEXT: retq
46;
47; AVX-LABEL: cmp_sel_C1_or_C2_vec:
48; AVX: # BB#0:
49; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
50; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
51; AVX-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
52; AVX-NEXT: retq
53 %cond = icmp eq <4 x i32> %x, %y
54 %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
55 ret <4 x i32> %add
56}
57
58define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) {
59; SSE-LABEL: sel_Cplus1_or_C_vec:
60; SSE: # BB#0:
Sanjay Patelf2d67f72017-08-17 17:07:37 +000061; SSE-NEXT: pand {{.*}}(%rip), %xmm0
Sanjay Patele404cbf2017-08-24 23:24:43 +000062; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +000063; SSE-NEXT: retq
64;
65; AVX-LABEL: sel_Cplus1_or_C_vec:
66; AVX: # BB#0:
Sanjay Patele404cbf2017-08-24 23:24:43 +000067; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
68; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +000069; AVX-NEXT: retq
70 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
71 ret <4 x i32> %add
72}
73
74define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
75; SSE-LABEL: cmp_sel_Cplus1_or_C_vec:
76; SSE: # BB#0:
77; SSE-NEXT: pcmpeqd %xmm1, %xmm0
Sanjay Patele404cbf2017-08-24 23:24:43 +000078; SSE-NEXT: movdqa {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
79; SSE-NEXT: psubd %xmm0, %xmm1
80; SSE-NEXT: movdqa %xmm1, %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +000081; SSE-NEXT: retq
82;
83; AVX-LABEL: cmp_sel_Cplus1_or_C_vec:
84; AVX: # BB#0:
85; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
Sanjay Patele404cbf2017-08-24 23:24:43 +000086; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
87; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +000088; AVX-NEXT: retq
89 %cond = icmp eq <4 x i32> %x, %y
90 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
91 ret <4 x i32> %add
92}
93
94define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) {
95; SSE-LABEL: sel_Cminus1_or_C_vec:
96; SSE: # BB#0:
97; SSE-NEXT: pslld $31, %xmm0
98; SSE-NEXT: psrad $31, %xmm0
Sanjay Patele404cbf2017-08-24 23:24:43 +000099; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +0000100; SSE-NEXT: retq
101;
102; AVX-LABEL: sel_Cminus1_or_C_vec:
103; AVX: # BB#0:
104; AVX-NEXT: vpslld $31, %xmm0, %xmm0
Sanjay Patele404cbf2017-08-24 23:24:43 +0000105; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
106; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +0000107; AVX-NEXT: retq
108 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
109 ret <4 x i32> %add
110}
111
112define <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
113; SSE-LABEL: cmp_sel_Cminus1_or_C_vec:
114; SSE: # BB#0:
115; SSE-NEXT: pcmpeqd %xmm1, %xmm0
Sanjay Patele404cbf2017-08-24 23:24:43 +0000116; SSE-NEXT: paddd {{.*}}(%rip), %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +0000117; SSE-NEXT: retq
118;
119; AVX-LABEL: cmp_sel_Cminus1_or_C_vec:
120; AVX: # BB#0:
121; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
Sanjay Patele404cbf2017-08-24 23:24:43 +0000122; AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +0000123; AVX-NEXT: retq
124 %cond = icmp eq <4 x i32> %x, %y
125 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
126 ret <4 x i32> %add
127}
128
129define <4 x i32> @sel_minus1_or_0_vec(<4 x i1> %cond) {
130; SSE-LABEL: sel_minus1_or_0_vec:
131; SSE: # BB#0:
132; SSE-NEXT: pslld $31, %xmm0
133; SSE-NEXT: psrad $31, %xmm0
134; SSE-NEXT: retq
135;
136; AVX-LABEL: sel_minus1_or_0_vec:
137; AVX: # BB#0:
138; AVX-NEXT: vpslld $31, %xmm0, %xmm0
139; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
140; AVX-NEXT: retq
141 %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
142 ret <4 x i32> %add
143}
144
145define <4 x i32> @cmp_sel_minus1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
146; SSE-LABEL: cmp_sel_minus1_or_0_vec:
147; SSE: # BB#0:
148; SSE-NEXT: pcmpeqd %xmm1, %xmm0
149; SSE-NEXT: retq
150;
151; AVX-LABEL: cmp_sel_minus1_or_0_vec:
152; AVX: # BB#0:
153; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
154; AVX-NEXT: retq
155 %cond = icmp eq <4 x i32> %x, %y
156 %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
157 ret <4 x i32> %add
158}
159
160define <4 x i32> @sel_0_or_minus1_vec(<4 x i1> %cond) {
161; SSE-LABEL: sel_0_or_minus1_vec:
162; SSE: # BB#0:
Sanjay Patele404cbf2017-08-24 23:24:43 +0000163; SSE-NEXT: pand {{.*}}(%rip), %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +0000164; SSE-NEXT: pcmpeqd %xmm1, %xmm1
Sanjay Patele404cbf2017-08-24 23:24:43 +0000165; SSE-NEXT: paddd %xmm1, %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +0000166; SSE-NEXT: retq
167;
168; AVX-LABEL: sel_0_or_minus1_vec:
169; AVX: # BB#0:
Sanjay Patele404cbf2017-08-24 23:24:43 +0000170; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
171; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
172; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +0000173; AVX-NEXT: retq
174 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
175 ret <4 x i32> %add
176}
177
178define <4 x i32> @cmp_sel_0_or_minus1_vec(<4 x i32> %x, <4 x i32> %y) {
179; SSE-LABEL: cmp_sel_0_or_minus1_vec:
180; SSE: # BB#0:
181; SSE-NEXT: pcmpeqd %xmm1, %xmm0
182; SSE-NEXT: pcmpeqd %xmm1, %xmm1
183; SSE-NEXT: pxor %xmm1, %xmm0
184; SSE-NEXT: retq
185;
186; AVX-LABEL: cmp_sel_0_or_minus1_vec:
187; AVX: # BB#0:
188; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
189; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
190; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
191; AVX-NEXT: retq
192 %cond = icmp eq <4 x i32> %x, %y
193 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
194 ret <4 x i32> %add
195}
196
197define <4 x i32> @sel_1_or_0_vec(<4 x i1> %cond) {
198; SSE-LABEL: sel_1_or_0_vec:
199; SSE: # BB#0:
200; SSE-NEXT: andps {{.*}}(%rip), %xmm0
201; SSE-NEXT: retq
202;
203; AVX-LABEL: sel_1_or_0_vec:
204; AVX: # BB#0:
205; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
206; AVX-NEXT: retq
207 %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
208 ret <4 x i32> %add
209}
210
211define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
212; SSE-LABEL: cmp_sel_1_or_0_vec:
213; SSE: # BB#0:
214; SSE-NEXT: pcmpeqd %xmm1, %xmm0
215; SSE-NEXT: psrld $31, %xmm0
216; SSE-NEXT: retq
217;
218; AVX-LABEL: cmp_sel_1_or_0_vec:
219; AVX: # BB#0:
220; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
221; AVX-NEXT: vpsrld $31, %xmm0, %xmm0
222; AVX-NEXT: retq
223 %cond = icmp eq <4 x i32> %x, %y
224 %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
225 ret <4 x i32> %add
226}
227
228define <4 x i32> @sel_0_or_1_vec(<4 x i1> %cond) {
229; SSE-LABEL: sel_0_or_1_vec:
230; SSE: # BB#0:
Sanjay Patele404cbf2017-08-24 23:24:43 +0000231; SSE-NEXT: andnps {{.*}}(%rip), %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +0000232; SSE-NEXT: retq
233;
234; AVX-LABEL: sel_0_or_1_vec:
235; AVX: # BB#0:
Sanjay Patele404cbf2017-08-24 23:24:43 +0000236; AVX-NEXT: vandnps {{.*}}(%rip), %xmm0, %xmm0
Sanjay Patelf2d67f72017-08-17 17:07:37 +0000237; AVX-NEXT: retq
238 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
239 ret <4 x i32> %add
240}
241
242define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) {
243; SSE-LABEL: cmp_sel_0_or_1_vec:
244; SSE: # BB#0:
245; SSE-NEXT: pcmpeqd %xmm1, %xmm0
246; SSE-NEXT: pandn {{.*}}(%rip), %xmm0
247; SSE-NEXT: retq
248;
249; AVX-LABEL: cmp_sel_0_or_1_vec:
250; AVX: # BB#0:
251; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
252; AVX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0
253; AVX-NEXT: retq
254 %cond = icmp eq <4 x i32> %x, %y
255 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
256 ret <4 x i32> %add
257}
258