blob: 838c03500c660a97a4d82b2dde830fce5d57e5e8 [file] [log] [blame]
Sanjay Patelf2d67f72017-08-17 17:07:37 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
4
5; First, check the generic pattern for any 2 vector constants. Then, check special cases where
6; the constants are all off-by-one. Finally, check the extra special cases where the constants
7; include 0 or -1.
8; Each minimal select test is repeated with a more typical pattern that includes a compare to
9; generate the condition value.
10
11define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) {
12; SSE-LABEL: sel_C1_or_C2_vec:
13; SSE: # BB#0:
14; SSE-NEXT: pslld $31, %xmm0
15; SSE-NEXT: psrad $31, %xmm0
16; SSE-NEXT: movdqa %xmm0, %xmm1
17; SSE-NEXT: pandn {{.*}}(%rip), %xmm1
18; SSE-NEXT: pand {{.*}}(%rip), %xmm0
19; SSE-NEXT: por %xmm1, %xmm0
20; SSE-NEXT: retq
21;
22; AVX-LABEL: sel_C1_or_C2_vec:
23; AVX: # BB#0:
24; AVX-NEXT: vpslld $31, %xmm0, %xmm0
25; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
26; AVX-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
27; AVX-NEXT: retq
28 %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
29 ret <4 x i32> %add
30}
31
32define <4 x i32> @cmp_sel_C1_or_C2_vec(<4 x i32> %x, <4 x i32> %y) {
33; SSE-LABEL: cmp_sel_C1_or_C2_vec:
34; SSE: # BB#0:
35; SSE-NEXT: pcmpeqd %xmm1, %xmm0
36; SSE-NEXT: movdqa %xmm0, %xmm1
37; SSE-NEXT: pandn {{.*}}(%rip), %xmm1
38; SSE-NEXT: pand {{.*}}(%rip), %xmm0
39; SSE-NEXT: por %xmm1, %xmm0
40; SSE-NEXT: retq
41;
42; AVX-LABEL: cmp_sel_C1_or_C2_vec:
43; AVX: # BB#0:
44; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
45; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
46; AVX-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
47; AVX-NEXT: retq
48 %cond = icmp eq <4 x i32> %x, %y
49 %add = select <4 x i1> %cond, <4 x i32> <i32 3000, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
50 ret <4 x i32> %add
51}
52
53define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) {
54; SSE-LABEL: sel_Cplus1_or_C_vec:
55; SSE: # BB#0:
56; SSE-NEXT: pslld $31, %xmm0
57; SSE-NEXT: psrad $31, %xmm0
58; SSE-NEXT: movdqa %xmm0, %xmm1
59; SSE-NEXT: pandn {{.*}}(%rip), %xmm1
60; SSE-NEXT: pand {{.*}}(%rip), %xmm0
61; SSE-NEXT: por %xmm1, %xmm0
62; SSE-NEXT: retq
63;
64; AVX-LABEL: sel_Cplus1_or_C_vec:
65; AVX: # BB#0:
66; AVX-NEXT: vpslld $31, %xmm0, %xmm0
67; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
68; AVX-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
69; AVX-NEXT: retq
70 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
71 ret <4 x i32> %add
72}
73
74define <4 x i32> @cmp_sel_Cplus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
75; SSE-LABEL: cmp_sel_Cplus1_or_C_vec:
76; SSE: # BB#0:
77; SSE-NEXT: pcmpeqd %xmm1, %xmm0
78; SSE-NEXT: movdqa %xmm0, %xmm1
79; SSE-NEXT: pandn {{.*}}(%rip), %xmm1
80; SSE-NEXT: pand {{.*}}(%rip), %xmm0
81; SSE-NEXT: por %xmm1, %xmm0
82; SSE-NEXT: retq
83;
84; AVX-LABEL: cmp_sel_Cplus1_or_C_vec:
85; AVX: # BB#0:
86; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
87; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [42,0,4294967294,4294967295]
88; AVX-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
89; AVX-NEXT: retq
90 %cond = icmp eq <4 x i32> %x, %y
91 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 42, i32 0, i32 -2, i32 -1>
92 ret <4 x i32> %add
93}
94
95define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) {
96; SSE-LABEL: sel_Cminus1_or_C_vec:
97; SSE: # BB#0:
98; SSE-NEXT: pslld $31, %xmm0
99; SSE-NEXT: psrad $31, %xmm0
100; SSE-NEXT: movdqa %xmm0, %xmm1
101; SSE-NEXT: pandn {{.*}}(%rip), %xmm1
102; SSE-NEXT: pand {{.*}}(%rip), %xmm0
103; SSE-NEXT: por %xmm1, %xmm0
104; SSE-NEXT: retq
105;
106; AVX-LABEL: sel_Cminus1_or_C_vec:
107; AVX: # BB#0:
108; AVX-NEXT: vpslld $31, %xmm0, %xmm0
109; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [44,2,0,1]
110; AVX-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
111; AVX-NEXT: retq
112 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
113 ret <4 x i32> %add
114}
115
116define <4 x i32> @cmp_sel_Cminus1_or_C_vec(<4 x i32> %x, <4 x i32> %y) {
117; SSE-LABEL: cmp_sel_Cminus1_or_C_vec:
118; SSE: # BB#0:
119; SSE-NEXT: pcmpeqd %xmm1, %xmm0
120; SSE-NEXT: movdqa %xmm0, %xmm1
121; SSE-NEXT: pandn {{.*}}(%rip), %xmm1
122; SSE-NEXT: pand {{.*}}(%rip), %xmm0
123; SSE-NEXT: por %xmm1, %xmm0
124; SSE-NEXT: retq
125;
126; AVX-LABEL: cmp_sel_Cminus1_or_C_vec:
127; AVX: # BB#0:
128; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
129; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [44,2,0,1]
130; AVX-NEXT: vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
131; AVX-NEXT: retq
132 %cond = icmp eq <4 x i32> %x, %y
133 %add = select <4 x i1> %cond, <4 x i32> <i32 43, i32 1, i32 -1, i32 0>, <4 x i32> <i32 44, i32 2, i32 0, i32 1>
134 ret <4 x i32> %add
135}
136
137define <4 x i32> @sel_minus1_or_0_vec(<4 x i1> %cond) {
138; SSE-LABEL: sel_minus1_or_0_vec:
139; SSE: # BB#0:
140; SSE-NEXT: pslld $31, %xmm0
141; SSE-NEXT: psrad $31, %xmm0
142; SSE-NEXT: retq
143;
144; AVX-LABEL: sel_minus1_or_0_vec:
145; AVX: # BB#0:
146; AVX-NEXT: vpslld $31, %xmm0, %xmm0
147; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
148; AVX-NEXT: retq
149 %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
150 ret <4 x i32> %add
151}
152
153define <4 x i32> @cmp_sel_minus1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
154; SSE-LABEL: cmp_sel_minus1_or_0_vec:
155; SSE: # BB#0:
156; SSE-NEXT: pcmpeqd %xmm1, %xmm0
157; SSE-NEXT: retq
158;
159; AVX-LABEL: cmp_sel_minus1_or_0_vec:
160; AVX: # BB#0:
161; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
162; AVX-NEXT: retq
163 %cond = icmp eq <4 x i32> %x, %y
164 %add = select <4 x i1> %cond, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
165 ret <4 x i32> %add
166}
167
168define <4 x i32> @sel_0_or_minus1_vec(<4 x i1> %cond) {
169; SSE-LABEL: sel_0_or_minus1_vec:
170; SSE: # BB#0:
171; SSE-NEXT: pslld $31, %xmm0
172; SSE-NEXT: psrad $31, %xmm0
173; SSE-NEXT: pcmpeqd %xmm1, %xmm1
174; SSE-NEXT: pxor %xmm1, %xmm0
175; SSE-NEXT: retq
176;
177; AVX-LABEL: sel_0_or_minus1_vec:
178; AVX: # BB#0:
179; AVX-NEXT: vpslld $31, %xmm0, %xmm0
180; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
181; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2
182; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
183; AVX-NEXT: retq
184 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
185 ret <4 x i32> %add
186}
187
188define <4 x i32> @cmp_sel_0_or_minus1_vec(<4 x i32> %x, <4 x i32> %y) {
189; SSE-LABEL: cmp_sel_0_or_minus1_vec:
190; SSE: # BB#0:
191; SSE-NEXT: pcmpeqd %xmm1, %xmm0
192; SSE-NEXT: pcmpeqd %xmm1, %xmm1
193; SSE-NEXT: pxor %xmm1, %xmm0
194; SSE-NEXT: retq
195;
196; AVX-LABEL: cmp_sel_0_or_minus1_vec:
197; AVX: # BB#0:
198; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
199; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
200; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
201; AVX-NEXT: retq
202 %cond = icmp eq <4 x i32> %x, %y
203 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
204 ret <4 x i32> %add
205}
206
207define <4 x i32> @sel_1_or_0_vec(<4 x i1> %cond) {
208; SSE-LABEL: sel_1_or_0_vec:
209; SSE: # BB#0:
210; SSE-NEXT: andps {{.*}}(%rip), %xmm0
211; SSE-NEXT: retq
212;
213; AVX-LABEL: sel_1_or_0_vec:
214; AVX: # BB#0:
215; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
216; AVX-NEXT: retq
217 %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
218 ret <4 x i32> %add
219}
220
221define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
222; SSE-LABEL: cmp_sel_1_or_0_vec:
223; SSE: # BB#0:
224; SSE-NEXT: pcmpeqd %xmm1, %xmm0
225; SSE-NEXT: psrld $31, %xmm0
226; SSE-NEXT: retq
227;
228; AVX-LABEL: cmp_sel_1_or_0_vec:
229; AVX: # BB#0:
230; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
231; AVX-NEXT: vpsrld $31, %xmm0, %xmm0
232; AVX-NEXT: retq
233 %cond = icmp eq <4 x i32> %x, %y
234 %add = select <4 x i1> %cond, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
235 ret <4 x i32> %add
236}
237
238define <4 x i32> @sel_0_or_1_vec(<4 x i1> %cond) {
239; SSE-LABEL: sel_0_or_1_vec:
240; SSE: # BB#0:
241; SSE-NEXT: pslld $31, %xmm0
242; SSE-NEXT: psrad $31, %xmm0
243; SSE-NEXT: pandn {{.*}}(%rip), %xmm0
244; SSE-NEXT: retq
245;
246; AVX-LABEL: sel_0_or_1_vec:
247; AVX: # BB#0:
248; AVX-NEXT: vpslld $31, %xmm0, %xmm0
249; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
250; AVX-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1]
251; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
252; AVX-NEXT: retq
253 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
254 ret <4 x i32> %add
255}
256
257define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) {
258; SSE-LABEL: cmp_sel_0_or_1_vec:
259; SSE: # BB#0:
260; SSE-NEXT: pcmpeqd %xmm1, %xmm0
261; SSE-NEXT: pandn {{.*}}(%rip), %xmm0
262; SSE-NEXT: retq
263;
264; AVX-LABEL: cmp_sel_0_or_1_vec:
265; AVX: # BB#0:
266; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
267; AVX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0
268; AVX-NEXT: retq
269 %cond = icmp eq <4 x i32> %x, %y
270 %add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
271 ret <4 x i32> %add
272}
273