blob: 520d5cb513c8b5c47b25b356e86b0e1246f8b3c0 [file] [log] [blame]
Zvi Rackoverc20c6d02017-05-18 07:04:48 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s --check-prefix=AVX2
3; RUN: llc -mcpu=x86-64 -mattr=+avx512f,+avx512vl,+avx512bw < %s | FileCheck %s --check-prefix=AVX512
4
5define i16 @v16i16(<16 x i16> %a, <16 x i16> %b) {
6; AVX2-LABEL: v16i16:
7; AVX2: # BB#0:
8; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
9; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
10; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
11; AVX2-NEXT: vpextrb $15, %xmm0, %eax
12; AVX2-NEXT: andb $1, %al
13; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
14; AVX2-NEXT: vpextrb $14, %xmm0, %eax
15; AVX2-NEXT: andb $1, %al
16; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
17; AVX2-NEXT: vpextrb $13, %xmm0, %eax
18; AVX2-NEXT: andb $1, %al
19; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
20; AVX2-NEXT: vpextrb $12, %xmm0, %eax
21; AVX2-NEXT: andb $1, %al
22; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
23; AVX2-NEXT: vpextrb $11, %xmm0, %eax
24; AVX2-NEXT: andb $1, %al
25; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
26; AVX2-NEXT: vpextrb $10, %xmm0, %eax
27; AVX2-NEXT: andb $1, %al
28; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
29; AVX2-NEXT: vpextrb $9, %xmm0, %eax
30; AVX2-NEXT: andb $1, %al
31; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
32; AVX2-NEXT: vpextrb $8, %xmm0, %eax
33; AVX2-NEXT: andb $1, %al
34; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
35; AVX2-NEXT: vpextrb $7, %xmm0, %eax
36; AVX2-NEXT: andb $1, %al
37; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
38; AVX2-NEXT: vpextrb $6, %xmm0, %eax
39; AVX2-NEXT: andb $1, %al
40; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
41; AVX2-NEXT: vpextrb $5, %xmm0, %eax
42; AVX2-NEXT: andb $1, %al
43; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
44; AVX2-NEXT: vpextrb $4, %xmm0, %eax
45; AVX2-NEXT: andb $1, %al
46; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
47; AVX2-NEXT: vpextrb $3, %xmm0, %eax
48; AVX2-NEXT: andb $1, %al
49; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
50; AVX2-NEXT: vpextrb $2, %xmm0, %eax
51; AVX2-NEXT: andb $1, %al
52; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
53; AVX2-NEXT: vpextrb $1, %xmm0, %eax
54; AVX2-NEXT: andb $1, %al
55; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
56; AVX2-NEXT: vpextrb $0, %xmm0, %eax
57; AVX2-NEXT: andb $1, %al
58; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
59; AVX2-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
60; AVX2-NEXT: vzeroupper
61; AVX2-NEXT: retq
62;
63; AVX512-LABEL: v16i16:
64; AVX512: # BB#0:
65; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
66; AVX512-NEXT: kmovd %k0, %eax
67; AVX512-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
68; AVX512-NEXT: vzeroupper
69; AVX512-NEXT: retq
70 %x = icmp sgt <16 x i16> %a, %b
71 %res = bitcast <16 x i1> %x to i16
72 ret i16 %res
73}
74
75define i8 @v8i32(<8 x i32> %a, <8 x i32> %b) {
76; AVX2-LABEL: v8i32:
77; AVX2: # BB#0:
78; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
79; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
80; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
81; AVX2-NEXT: vpextrw $7, %xmm0, %eax
82; AVX2-NEXT: andl $1, %eax
83; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
84; AVX2-NEXT: vpextrw $6, %xmm0, %eax
85; AVX2-NEXT: andl $1, %eax
86; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
87; AVX2-NEXT: vpextrw $5, %xmm0, %eax
88; AVX2-NEXT: andl $1, %eax
89; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
90; AVX2-NEXT: vpextrw $4, %xmm0, %eax
91; AVX2-NEXT: andl $1, %eax
92; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
93; AVX2-NEXT: vpextrw $3, %xmm0, %eax
94; AVX2-NEXT: andl $1, %eax
95; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
96; AVX2-NEXT: vpextrw $2, %xmm0, %eax
97; AVX2-NEXT: andl $1, %eax
98; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
99; AVX2-NEXT: vpextrw $1, %xmm0, %eax
100; AVX2-NEXT: andl $1, %eax
101; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
102; AVX2-NEXT: vmovd %xmm0, %eax
103; AVX2-NEXT: andl $1, %eax
104; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
105; AVX2-NEXT: movb -{{[0-9]+}}(%rsp), %al
106; AVX2-NEXT: vzeroupper
107; AVX2-NEXT: retq
108;
109; AVX512-LABEL: v8i32:
110; AVX512: # BB#0:
111; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
112; AVX512-NEXT: kmovd %k0, %eax
113; AVX512-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
114; AVX512-NEXT: vzeroupper
115; AVX512-NEXT: retq
116 %x = icmp sgt <8 x i32> %a, %b
117 %res = bitcast <8 x i1> %x to i8
118 ret i8 %res
119}
120
121define i8 @v8f32(<8 x float> %a, <8 x float> %b) {
122; AVX2-LABEL: v8f32:
123; AVX2: # BB#0:
124; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
125; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
126; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
127; AVX2-NEXT: vpextrw $7, %xmm0, %eax
128; AVX2-NEXT: andl $1, %eax
129; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
130; AVX2-NEXT: vpextrw $6, %xmm0, %eax
131; AVX2-NEXT: andl $1, %eax
132; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
133; AVX2-NEXT: vpextrw $5, %xmm0, %eax
134; AVX2-NEXT: andl $1, %eax
135; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
136; AVX2-NEXT: vpextrw $4, %xmm0, %eax
137; AVX2-NEXT: andl $1, %eax
138; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
139; AVX2-NEXT: vpextrw $3, %xmm0, %eax
140; AVX2-NEXT: andl $1, %eax
141; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
142; AVX2-NEXT: vpextrw $2, %xmm0, %eax
143; AVX2-NEXT: andl $1, %eax
144; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
145; AVX2-NEXT: vpextrw $1, %xmm0, %eax
146; AVX2-NEXT: andl $1, %eax
147; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
148; AVX2-NEXT: vmovd %xmm0, %eax
149; AVX2-NEXT: andl $1, %eax
150; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
151; AVX2-NEXT: movb -{{[0-9]+}}(%rsp), %al
152; AVX2-NEXT: vzeroupper
153; AVX2-NEXT: retq
154;
155; AVX512-LABEL: v8f32:
156; AVX512: # BB#0:
157; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
158; AVX512-NEXT: kmovd %k0, %eax
159; AVX512-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
160; AVX512-NEXT: vzeroupper
161; AVX512-NEXT: retq
162 %x = fcmp ogt <8 x float> %a, %b
163 %res = bitcast <8 x i1> %x to i8
164 ret i8 %res
165}
166
167define i32 @v32i8(<32 x i8> %a, <32 x i8> %b) {
168; AVX2-LABEL: v32i8:
169; AVX2: # BB#0:
170; AVX2-NEXT: pushq %rbp
171; AVX2-NEXT: .Lcfi0:
172; AVX2-NEXT: .cfi_def_cfa_offset 16
173; AVX2-NEXT: .Lcfi1:
174; AVX2-NEXT: .cfi_offset %rbp, -16
175; AVX2-NEXT: movq %rsp, %rbp
176; AVX2-NEXT: .Lcfi2:
177; AVX2-NEXT: .cfi_def_cfa_register %rbp
178; AVX2-NEXT: andq $-32, %rsp
179; AVX2-NEXT: subq $32, %rsp
180; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
181; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
182; AVX2-NEXT: vpextrb $15, %xmm1, %eax
183; AVX2-NEXT: andb $1, %al
184; AVX2-NEXT: movb %al, (%rsp)
185; AVX2-NEXT: vpextrb $14, %xmm1, %eax
186; AVX2-NEXT: andb $1, %al
187; AVX2-NEXT: movb %al, (%rsp)
188; AVX2-NEXT: vpextrb $13, %xmm1, %eax
189; AVX2-NEXT: andb $1, %al
190; AVX2-NEXT: movb %al, (%rsp)
191; AVX2-NEXT: vpextrb $12, %xmm1, %eax
192; AVX2-NEXT: andb $1, %al
193; AVX2-NEXT: movb %al, (%rsp)
194; AVX2-NEXT: vpextrb $11, %xmm1, %eax
195; AVX2-NEXT: andb $1, %al
196; AVX2-NEXT: movb %al, (%rsp)
197; AVX2-NEXT: vpextrb $10, %xmm1, %eax
198; AVX2-NEXT: andb $1, %al
199; AVX2-NEXT: movb %al, (%rsp)
200; AVX2-NEXT: vpextrb $9, %xmm1, %eax
201; AVX2-NEXT: andb $1, %al
202; AVX2-NEXT: movb %al, (%rsp)
203; AVX2-NEXT: vpextrb $8, %xmm1, %eax
204; AVX2-NEXT: andb $1, %al
205; AVX2-NEXT: movb %al, (%rsp)
206; AVX2-NEXT: vpextrb $7, %xmm1, %eax
207; AVX2-NEXT: andb $1, %al
208; AVX2-NEXT: movb %al, (%rsp)
209; AVX2-NEXT: vpextrb $6, %xmm1, %eax
210; AVX2-NEXT: andb $1, %al
211; AVX2-NEXT: movb %al, (%rsp)
212; AVX2-NEXT: vpextrb $5, %xmm1, %eax
213; AVX2-NEXT: andb $1, %al
214; AVX2-NEXT: movb %al, (%rsp)
215; AVX2-NEXT: vpextrb $4, %xmm1, %eax
216; AVX2-NEXT: andb $1, %al
217; AVX2-NEXT: movb %al, (%rsp)
218; AVX2-NEXT: vpextrb $3, %xmm1, %eax
219; AVX2-NEXT: andb $1, %al
220; AVX2-NEXT: movb %al, (%rsp)
221; AVX2-NEXT: vpextrb $2, %xmm1, %eax
222; AVX2-NEXT: andb $1, %al
223; AVX2-NEXT: movb %al, (%rsp)
224; AVX2-NEXT: vpextrb $1, %xmm1, %eax
225; AVX2-NEXT: andb $1, %al
226; AVX2-NEXT: movb %al, (%rsp)
227; AVX2-NEXT: vpextrb $0, %xmm1, %eax
228; AVX2-NEXT: andb $1, %al
229; AVX2-NEXT: movb %al, (%rsp)
230; AVX2-NEXT: vpextrb $15, %xmm0, %eax
231; AVX2-NEXT: andb $1, %al
232; AVX2-NEXT: movb %al, (%rsp)
233; AVX2-NEXT: vpextrb $14, %xmm0, %eax
234; AVX2-NEXT: andb $1, %al
235; AVX2-NEXT: movb %al, (%rsp)
236; AVX2-NEXT: vpextrb $13, %xmm0, %eax
237; AVX2-NEXT: andb $1, %al
238; AVX2-NEXT: movb %al, (%rsp)
239; AVX2-NEXT: vpextrb $12, %xmm0, %eax
240; AVX2-NEXT: andb $1, %al
241; AVX2-NEXT: movb %al, (%rsp)
242; AVX2-NEXT: vpextrb $11, %xmm0, %eax
243; AVX2-NEXT: andb $1, %al
244; AVX2-NEXT: movb %al, (%rsp)
245; AVX2-NEXT: vpextrb $10, %xmm0, %eax
246; AVX2-NEXT: andb $1, %al
247; AVX2-NEXT: movb %al, (%rsp)
248; AVX2-NEXT: vpextrb $9, %xmm0, %eax
249; AVX2-NEXT: andb $1, %al
250; AVX2-NEXT: movb %al, (%rsp)
251; AVX2-NEXT: vpextrb $8, %xmm0, %eax
252; AVX2-NEXT: andb $1, %al
253; AVX2-NEXT: movb %al, (%rsp)
254; AVX2-NEXT: vpextrb $7, %xmm0, %eax
255; AVX2-NEXT: andb $1, %al
256; AVX2-NEXT: movb %al, (%rsp)
257; AVX2-NEXT: vpextrb $6, %xmm0, %eax
258; AVX2-NEXT: andb $1, %al
259; AVX2-NEXT: movb %al, (%rsp)
260; AVX2-NEXT: vpextrb $5, %xmm0, %eax
261; AVX2-NEXT: andb $1, %al
262; AVX2-NEXT: movb %al, (%rsp)
263; AVX2-NEXT: vpextrb $4, %xmm0, %eax
264; AVX2-NEXT: andb $1, %al
265; AVX2-NEXT: movb %al, (%rsp)
266; AVX2-NEXT: vpextrb $3, %xmm0, %eax
267; AVX2-NEXT: andb $1, %al
268; AVX2-NEXT: movb %al, (%rsp)
269; AVX2-NEXT: vpextrb $2, %xmm0, %eax
270; AVX2-NEXT: andb $1, %al
271; AVX2-NEXT: movb %al, (%rsp)
272; AVX2-NEXT: vpextrb $1, %xmm0, %eax
273; AVX2-NEXT: andb $1, %al
274; AVX2-NEXT: movb %al, (%rsp)
275; AVX2-NEXT: vpextrb $0, %xmm0, %eax
276; AVX2-NEXT: andb $1, %al
277; AVX2-NEXT: movb %al, (%rsp)
278; AVX2-NEXT: movl (%rsp), %eax
279; AVX2-NEXT: movq %rbp, %rsp
280; AVX2-NEXT: popq %rbp
281; AVX2-NEXT: vzeroupper
282; AVX2-NEXT: retq
283;
284; AVX512-LABEL: v32i8:
285; AVX512: # BB#0:
286; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
287; AVX512-NEXT: kmovd %k0, %eax
288; AVX512-NEXT: vzeroupper
289; AVX512-NEXT: retq
290 %x = icmp sgt <32 x i8> %a, %b
291 %res = bitcast <32 x i1> %x to i32
292 ret i32 %res
293}
294
295define i4 @v4i64(<4 x i64> %a, <4 x i64> %b) {
296; AVX2-LABEL: v4i64:
297; AVX2: # BB#0:
298; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
299; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
300; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
301; AVX2-NEXT: vpextrd $3, %xmm0, %eax
302; AVX2-NEXT: andl $1, %eax
303; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
304; AVX2-NEXT: vpextrd $2, %xmm0, %eax
305; AVX2-NEXT: andl $1, %eax
306; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
307; AVX2-NEXT: vpextrd $1, %xmm0, %eax
308; AVX2-NEXT: andl $1, %eax
309; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
310; AVX2-NEXT: vmovd %xmm0, %eax
311; AVX2-NEXT: andl $1, %eax
312; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
313; AVX2-NEXT: movb -{{[0-9]+}}(%rsp), %al
314; AVX2-NEXT: vzeroupper
315; AVX2-NEXT: retq
316;
317; AVX512-LABEL: v4i64:
318; AVX512: # BB#0:
319; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k0
320; AVX512-NEXT: kmovd %k0, %eax
321; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
322; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
323; AVX512-NEXT: vzeroupper
324; AVX512-NEXT: retq
325 %x = icmp sgt <4 x i64> %a, %b
326 %res = bitcast <4 x i1> %x to i4
327 ret i4 %res
328}
329
330define i4 @v4f64(<4 x double> %a, <4 x double> %b) {
331; AVX2-LABEL: v4f64:
332; AVX2: # BB#0:
333; AVX2-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
334; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
335; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
336; AVX2-NEXT: vpextrd $3, %xmm0, %eax
337; AVX2-NEXT: andl $1, %eax
338; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
339; AVX2-NEXT: vpextrd $2, %xmm0, %eax
340; AVX2-NEXT: andl $1, %eax
341; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
342; AVX2-NEXT: vpextrd $1, %xmm0, %eax
343; AVX2-NEXT: andl $1, %eax
344; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
345; AVX2-NEXT: vmovd %xmm0, %eax
346; AVX2-NEXT: andl $1, %eax
347; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp)
348; AVX2-NEXT: movb -{{[0-9]+}}(%rsp), %al
349; AVX2-NEXT: vzeroupper
350; AVX2-NEXT: retq
351;
352; AVX512-LABEL: v4f64:
353; AVX512: # BB#0:
354; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k0
355; AVX512-NEXT: kmovd %k0, %eax
356; AVX512-NEXT: movb %al, -{{[0-9]+}}(%rsp)
357; AVX512-NEXT: movb -{{[0-9]+}}(%rsp), %al
358; AVX512-NEXT: vzeroupper
359; AVX512-NEXT: retq
360 %x = fcmp ogt <4 x double> %a, %b
361 %res = bitcast <4 x i1> %x to i4
362 ret i4 %res
363}