blob: d861395f3a9494f9e94d72e4155397dccd4c5c4f [file] [log] [blame]
Simon Pilgrim298377f2020-06-15 10:40:27 +01001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512
8
9;
10; vXi64
11;
12
13define i1 @test_v2i64(<2 x i64> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +010014; SSE2-LABEL: test_v2i64:
15; SSE2: # %bb.0:
16; SSE2-NEXT: pxor %xmm1, %xmm1
17; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
18; SSE2-NEXT: pmovmskb %xmm1, %eax
19; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
20; SSE2-NEXT: sete %al
21; SSE2-NEXT: retq
22;
23; SSE41-LABEL: test_v2i64:
24; SSE41: # %bb.0:
25; SSE41-NEXT: ptest %xmm0, %xmm0
26; SSE41-NEXT: sete %al
27; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +010028;
29; AVX-LABEL: test_v2i64:
30; AVX: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +010031; AVX-NEXT: vptest %xmm0, %xmm0
Simon Pilgrim298377f2020-06-15 10:40:27 +010032; AVX-NEXT: sete %al
33; AVX-NEXT: retq
34 %1 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %a0)
35 %2 = icmp eq i64 %1, 0
36 ret i1 %2
37}
38
39define i1 @test_v4i64(<4 x i64> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +010040; SSE2-LABEL: test_v4i64:
41; SSE2: # %bb.0:
42; SSE2-NEXT: por %xmm1, %xmm0
43; SSE2-NEXT: pxor %xmm1, %xmm1
44; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
45; SSE2-NEXT: pmovmskb %xmm1, %eax
46; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
47; SSE2-NEXT: setne %al
48; SSE2-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +010049;
Simon Pilgrim333aa692020-06-29 15:24:49 +010050; SSE41-LABEL: test_v4i64:
51; SSE41: # %bb.0:
52; SSE41-NEXT: por %xmm1, %xmm0
53; SSE41-NEXT: ptest %xmm0, %xmm0
54; SSE41-NEXT: setne %al
55; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +010056;
Simon Pilgrim333aa692020-06-29 15:24:49 +010057; AVX-LABEL: test_v4i64:
58; AVX: # %bb.0:
59; AVX-NEXT: vptest %ymm0, %ymm0
60; AVX-NEXT: setne %al
61; AVX-NEXT: vzeroupper
62; AVX-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +010063 %1 = call i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64> %a0)
64 %2 = icmp ne i64 %1, 0
65 ret i1 %2
66}
67
68define i1 @test_v8i64(<8 x i64> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +010069; SSE2-LABEL: test_v8i64:
70; SSE2: # %bb.0:
71; SSE2-NEXT: por %xmm3, %xmm1
72; SSE2-NEXT: por %xmm2, %xmm1
73; SSE2-NEXT: por %xmm0, %xmm1
74; SSE2-NEXT: pxor %xmm0, %xmm0
75; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
76; SSE2-NEXT: pmovmskb %xmm0, %eax
77; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
78; SSE2-NEXT: sete %al
79; SSE2-NEXT: retq
80;
81; SSE41-LABEL: test_v8i64:
82; SSE41: # %bb.0:
83; SSE41-NEXT: por %xmm3, %xmm1
84; SSE41-NEXT: por %xmm2, %xmm1
85; SSE41-NEXT: por %xmm0, %xmm1
86; SSE41-NEXT: ptest %xmm1, %xmm1
87; SSE41-NEXT: sete %al
88; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +010089;
90; AVX1-LABEL: test_v8i64:
91; AVX1: # %bb.0:
92; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +010093; AVX1-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +010094; AVX1-NEXT: sete %al
95; AVX1-NEXT: vzeroupper
96; AVX1-NEXT: retq
97;
98; AVX2-LABEL: test_v8i64:
99; AVX2: # %bb.0:
100; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100101; AVX2-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100102; AVX2-NEXT: sete %al
103; AVX2-NEXT: vzeroupper
104; AVX2-NEXT: retq
105;
106; AVX512-LABEL: test_v8i64:
107; AVX512: # %bb.0:
108; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrim333aa692020-06-29 15:24:49 +0100109; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
110; AVX512-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100111; AVX512-NEXT: sete %al
112; AVX512-NEXT: vzeroupper
113; AVX512-NEXT: retq
114 %1 = call i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64> %a0)
115 %2 = icmp eq i64 %1, 0
116 ret i1 %2
117}
118
119define i1 @test_v16i64(<16 x i64> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100120; SSE2-LABEL: test_v16i64:
121; SSE2: # %bb.0:
122; SSE2-NEXT: por %xmm7, %xmm3
123; SSE2-NEXT: por %xmm5, %xmm3
124; SSE2-NEXT: por %xmm1, %xmm3
125; SSE2-NEXT: por %xmm6, %xmm2
126; SSE2-NEXT: por %xmm4, %xmm2
127; SSE2-NEXT: por %xmm3, %xmm2
128; SSE2-NEXT: por %xmm0, %xmm2
129; SSE2-NEXT: pxor %xmm0, %xmm0
130; SSE2-NEXT: pcmpeqb %xmm2, %xmm0
131; SSE2-NEXT: pmovmskb %xmm0, %eax
132; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
133; SSE2-NEXT: setne %al
134; SSE2-NEXT: retq
135;
136; SSE41-LABEL: test_v16i64:
137; SSE41: # %bb.0:
138; SSE41-NEXT: por %xmm7, %xmm3
139; SSE41-NEXT: por %xmm5, %xmm3
140; SSE41-NEXT: por %xmm1, %xmm3
141; SSE41-NEXT: por %xmm6, %xmm2
142; SSE41-NEXT: por %xmm4, %xmm2
143; SSE41-NEXT: por %xmm3, %xmm2
144; SSE41-NEXT: por %xmm0, %xmm2
145; SSE41-NEXT: ptest %xmm2, %xmm2
146; SSE41-NEXT: setne %al
147; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100148;
149; AVX1-LABEL: test_v16i64:
150; AVX1: # %bb.0:
151; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
152; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
153; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100154; AVX1-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100155; AVX1-NEXT: setne %al
156; AVX1-NEXT: vzeroupper
157; AVX1-NEXT: retq
158;
159; AVX2-LABEL: test_v16i64:
160; AVX2: # %bb.0:
161; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
162; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
163; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100164; AVX2-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100165; AVX2-NEXT: setne %al
166; AVX2-NEXT: vzeroupper
167; AVX2-NEXT: retq
168;
169; AVX512-LABEL: test_v16i64:
170; AVX512: # %bb.0:
171; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
172; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrim333aa692020-06-29 15:24:49 +0100173; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
174; AVX512-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100175; AVX512-NEXT: setne %al
176; AVX512-NEXT: vzeroupper
177; AVX512-NEXT: retq
178 %1 = call i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64> %a0)
179 %2 = icmp ne i64 %1, 0
180 ret i1 %2
181}
182
183;
184; vXi32
185;
186
187define i1 @test_v2i32(<2 x i32> %a0) {
188; SSE-LABEL: test_v2i32:
189; SSE: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +0100190; SSE-NEXT: movq %xmm0, %rax
191; SSE-NEXT: testq %rax, %rax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100192; SSE-NEXT: sete %al
193; SSE-NEXT: retq
194;
195; AVX-LABEL: test_v2i32:
196; AVX: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +0100197; AVX-NEXT: vmovq %xmm0, %rax
198; AVX-NEXT: testq %rax, %rax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100199; AVX-NEXT: sete %al
200; AVX-NEXT: retq
201 %1 = call i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32> %a0)
202 %2 = icmp eq i32 %1, 0
203 ret i1 %2
204}
205
206define i1 @test_v4i32(<4 x i32> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100207; SSE2-LABEL: test_v4i32:
208; SSE2: # %bb.0:
209; SSE2-NEXT: pxor %xmm1, %xmm1
210; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
211; SSE2-NEXT: pmovmskb %xmm1, %eax
212; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
213; SSE2-NEXT: setne %al
214; SSE2-NEXT: retq
215;
216; SSE41-LABEL: test_v4i32:
217; SSE41: # %bb.0:
218; SSE41-NEXT: ptest %xmm0, %xmm0
219; SSE41-NEXT: setne %al
220; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100221;
222; AVX-LABEL: test_v4i32:
223; AVX: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +0100224; AVX-NEXT: vptest %xmm0, %xmm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100225; AVX-NEXT: setne %al
226; AVX-NEXT: retq
227 %1 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %a0)
228 %2 = icmp ne i32 %1, 0
229 ret i1 %2
230}
231
232define i1 @test_v8i32(<8 x i32> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100233; SSE2-LABEL: test_v8i32:
234; SSE2: # %bb.0:
235; SSE2-NEXT: por %xmm1, %xmm0
236; SSE2-NEXT: pxor %xmm1, %xmm1
237; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
238; SSE2-NEXT: pmovmskb %xmm1, %eax
239; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
240; SSE2-NEXT: sete %al
241; SSE2-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100242;
Simon Pilgrim333aa692020-06-29 15:24:49 +0100243; SSE41-LABEL: test_v8i32:
244; SSE41: # %bb.0:
245; SSE41-NEXT: por %xmm1, %xmm0
246; SSE41-NEXT: ptest %xmm0, %xmm0
247; SSE41-NEXT: sete %al
248; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100249;
Simon Pilgrim333aa692020-06-29 15:24:49 +0100250; AVX-LABEL: test_v8i32:
251; AVX: # %bb.0:
252; AVX-NEXT: vptest %ymm0, %ymm0
253; AVX-NEXT: sete %al
254; AVX-NEXT: vzeroupper
255; AVX-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100256 %1 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a0)
257 %2 = icmp eq i32 %1, 0
258 ret i1 %2
259}
260
261define i1 @test_v16i32(<16 x i32> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100262; SSE2-LABEL: test_v16i32:
263; SSE2: # %bb.0:
264; SSE2-NEXT: por %xmm3, %xmm1
265; SSE2-NEXT: por %xmm2, %xmm1
266; SSE2-NEXT: por %xmm0, %xmm1
267; SSE2-NEXT: pxor %xmm0, %xmm0
268; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
269; SSE2-NEXT: pmovmskb %xmm0, %eax
270; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
271; SSE2-NEXT: setne %al
272; SSE2-NEXT: retq
273;
274; SSE41-LABEL: test_v16i32:
275; SSE41: # %bb.0:
276; SSE41-NEXT: por %xmm3, %xmm1
277; SSE41-NEXT: por %xmm2, %xmm1
278; SSE41-NEXT: por %xmm0, %xmm1
279; SSE41-NEXT: ptest %xmm1, %xmm1
280; SSE41-NEXT: setne %al
281; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100282;
283; AVX1-LABEL: test_v16i32:
284; AVX1: # %bb.0:
285; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100286; AVX1-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100287; AVX1-NEXT: setne %al
288; AVX1-NEXT: vzeroupper
289; AVX1-NEXT: retq
290;
291; AVX2-LABEL: test_v16i32:
292; AVX2: # %bb.0:
293; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100294; AVX2-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100295; AVX2-NEXT: setne %al
296; AVX2-NEXT: vzeroupper
297; AVX2-NEXT: retq
298;
299; AVX512-LABEL: test_v16i32:
300; AVX512: # %bb.0:
301; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrim333aa692020-06-29 15:24:49 +0100302; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
303; AVX512-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100304; AVX512-NEXT: setne %al
305; AVX512-NEXT: vzeroupper
306; AVX512-NEXT: retq
307 %1 = call i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32> %a0)
308 %2 = icmp ne i32 %1, 0
309 ret i1 %2
310}
311
312define i1 @test_v32i32(<32 x i32> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100313; SSE2-LABEL: test_v32i32:
314; SSE2: # %bb.0:
315; SSE2-NEXT: por %xmm7, %xmm3
316; SSE2-NEXT: por %xmm5, %xmm3
317; SSE2-NEXT: por %xmm1, %xmm3
318; SSE2-NEXT: por %xmm6, %xmm2
319; SSE2-NEXT: por %xmm4, %xmm2
320; SSE2-NEXT: por %xmm3, %xmm2
321; SSE2-NEXT: por %xmm0, %xmm2
322; SSE2-NEXT: pxor %xmm0, %xmm0
323; SSE2-NEXT: pcmpeqb %xmm2, %xmm0
324; SSE2-NEXT: pmovmskb %xmm0, %eax
325; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
326; SSE2-NEXT: sete %al
327; SSE2-NEXT: retq
328;
329; SSE41-LABEL: test_v32i32:
330; SSE41: # %bb.0:
331; SSE41-NEXT: por %xmm7, %xmm3
332; SSE41-NEXT: por %xmm5, %xmm3
333; SSE41-NEXT: por %xmm1, %xmm3
334; SSE41-NEXT: por %xmm6, %xmm2
335; SSE41-NEXT: por %xmm4, %xmm2
336; SSE41-NEXT: por %xmm3, %xmm2
337; SSE41-NEXT: por %xmm0, %xmm2
338; SSE41-NEXT: ptest %xmm2, %xmm2
339; SSE41-NEXT: sete %al
340; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100341;
342; AVX1-LABEL: test_v32i32:
343; AVX1: # %bb.0:
344; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
345; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
346; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100347; AVX1-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100348; AVX1-NEXT: sete %al
349; AVX1-NEXT: vzeroupper
350; AVX1-NEXT: retq
351;
352; AVX2-LABEL: test_v32i32:
353; AVX2: # %bb.0:
354; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
355; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
356; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100357; AVX2-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100358; AVX2-NEXT: sete %al
359; AVX2-NEXT: vzeroupper
360; AVX2-NEXT: retq
361;
362; AVX512-LABEL: test_v32i32:
363; AVX512: # %bb.0:
364; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
365; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrim333aa692020-06-29 15:24:49 +0100366; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
367; AVX512-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100368; AVX512-NEXT: sete %al
369; AVX512-NEXT: vzeroupper
370; AVX512-NEXT: retq
371 %1 = call i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32> %a0)
372 %2 = icmp eq i32 %1, 0
373 ret i1 %2
374}
375
376;
377; vXi16
378;
379
380define i1 @test_v2i16(<2 x i16> %a0) {
381; SSE-LABEL: test_v2i16:
382; SSE: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +0100383; SSE-NEXT: movd %xmm0, %eax
384; SSE-NEXT: testl %eax, %eax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100385; SSE-NEXT: sete %al
386; SSE-NEXT: retq
387;
388; AVX-LABEL: test_v2i16:
389; AVX: # %bb.0:
Simon Pilgrim298377f2020-06-15 10:40:27 +0100390; AVX-NEXT: vmovd %xmm0, %eax
Simon Pilgrim333aa692020-06-29 15:24:49 +0100391; AVX-NEXT: testl %eax, %eax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100392; AVX-NEXT: sete %al
393; AVX-NEXT: retq
394 %1 = call i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16> %a0)
395 %2 = icmp eq i16 %1, 0
396 ret i1 %2
397}
398
399define i1 @test_v4i16(<4 x i16> %a0) {
400; SSE-LABEL: test_v4i16:
401; SSE: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +0100402; SSE-NEXT: movq %xmm0, %rax
403; SSE-NEXT: testq %rax, %rax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100404; SSE-NEXT: setne %al
405; SSE-NEXT: retq
406;
407; AVX-LABEL: test_v4i16:
408; AVX: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +0100409; AVX-NEXT: vmovq %xmm0, %rax
410; AVX-NEXT: testq %rax, %rax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100411; AVX-NEXT: setne %al
412; AVX-NEXT: retq
413 %1 = call i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16> %a0)
414 %2 = icmp ne i16 %1, 0
415 ret i1 %2
416}
417
418define i1 @test_v8i16(<8 x i16> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100419; SSE2-LABEL: test_v8i16:
420; SSE2: # %bb.0:
421; SSE2-NEXT: pxor %xmm1, %xmm1
422; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
423; SSE2-NEXT: pmovmskb %xmm1, %eax
424; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
425; SSE2-NEXT: sete %al
426; SSE2-NEXT: retq
427;
428; SSE41-LABEL: test_v8i16:
429; SSE41: # %bb.0:
430; SSE41-NEXT: ptest %xmm0, %xmm0
431; SSE41-NEXT: sete %al
432; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100433;
434; AVX-LABEL: test_v8i16:
435; AVX: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +0100436; AVX-NEXT: vptest %xmm0, %xmm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100437; AVX-NEXT: sete %al
438; AVX-NEXT: retq
439 %1 = call i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16> %a0)
440 %2 = icmp eq i16 %1, 0
441 ret i1 %2
442}
443
444define i1 @test_v16i16(<16 x i16> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100445; SSE2-LABEL: test_v16i16:
446; SSE2: # %bb.0:
447; SSE2-NEXT: por %xmm1, %xmm0
448; SSE2-NEXT: pxor %xmm1, %xmm1
449; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
450; SSE2-NEXT: pmovmskb %xmm1, %eax
451; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
452; SSE2-NEXT: setne %al
453; SSE2-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100454;
Simon Pilgrim333aa692020-06-29 15:24:49 +0100455; SSE41-LABEL: test_v16i16:
456; SSE41: # %bb.0:
457; SSE41-NEXT: por %xmm1, %xmm0
458; SSE41-NEXT: ptest %xmm0, %xmm0
459; SSE41-NEXT: setne %al
460; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100461;
Simon Pilgrim333aa692020-06-29 15:24:49 +0100462; AVX-LABEL: test_v16i16:
463; AVX: # %bb.0:
464; AVX-NEXT: vptest %ymm0, %ymm0
465; AVX-NEXT: setne %al
466; AVX-NEXT: vzeroupper
467; AVX-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100468 %1 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %a0)
469 %2 = icmp ne i16 %1, 0
470 ret i1 %2
471}
472
473define i1 @test_v32i16(<32 x i16> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100474; SSE2-LABEL: test_v32i16:
475; SSE2: # %bb.0:
476; SSE2-NEXT: por %xmm3, %xmm1
477; SSE2-NEXT: por %xmm2, %xmm1
478; SSE2-NEXT: por %xmm0, %xmm1
479; SSE2-NEXT: pxor %xmm0, %xmm0
480; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
481; SSE2-NEXT: pmovmskb %xmm0, %eax
482; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
483; SSE2-NEXT: sete %al
484; SSE2-NEXT: retq
485;
486; SSE41-LABEL: test_v32i16:
487; SSE41: # %bb.0:
488; SSE41-NEXT: por %xmm3, %xmm1
489; SSE41-NEXT: por %xmm2, %xmm1
490; SSE41-NEXT: por %xmm0, %xmm1
491; SSE41-NEXT: ptest %xmm1, %xmm1
492; SSE41-NEXT: sete %al
493; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100494;
495; AVX1-LABEL: test_v32i16:
496; AVX1: # %bb.0:
497; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100498; AVX1-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100499; AVX1-NEXT: sete %al
500; AVX1-NEXT: vzeroupper
501; AVX1-NEXT: retq
502;
503; AVX2-LABEL: test_v32i16:
504; AVX2: # %bb.0:
505; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100506; AVX2-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100507; AVX2-NEXT: sete %al
508; AVX2-NEXT: vzeroupper
509; AVX2-NEXT: retq
510;
511; AVX512-LABEL: test_v32i16:
512; AVX512: # %bb.0:
513; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrim333aa692020-06-29 15:24:49 +0100514; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
515; AVX512-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100516; AVX512-NEXT: sete %al
517; AVX512-NEXT: vzeroupper
518; AVX512-NEXT: retq
519 %1 = call i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16> %a0)
520 %2 = icmp eq i16 %1, 0
521 ret i1 %2
522}
523
524define i1 @test_v64i16(<64 x i16> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100525; SSE2-LABEL: test_v64i16:
526; SSE2: # %bb.0:
527; SSE2-NEXT: por %xmm7, %xmm3
528; SSE2-NEXT: por %xmm5, %xmm3
529; SSE2-NEXT: por %xmm1, %xmm3
530; SSE2-NEXT: por %xmm6, %xmm2
531; SSE2-NEXT: por %xmm4, %xmm2
532; SSE2-NEXT: por %xmm3, %xmm2
533; SSE2-NEXT: por %xmm0, %xmm2
534; SSE2-NEXT: pxor %xmm0, %xmm0
535; SSE2-NEXT: pcmpeqb %xmm2, %xmm0
536; SSE2-NEXT: pmovmskb %xmm0, %eax
537; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
538; SSE2-NEXT: setne %al
539; SSE2-NEXT: retq
540;
541; SSE41-LABEL: test_v64i16:
542; SSE41: # %bb.0:
543; SSE41-NEXT: por %xmm7, %xmm3
544; SSE41-NEXT: por %xmm5, %xmm3
545; SSE41-NEXT: por %xmm1, %xmm3
546; SSE41-NEXT: por %xmm6, %xmm2
547; SSE41-NEXT: por %xmm4, %xmm2
548; SSE41-NEXT: por %xmm3, %xmm2
549; SSE41-NEXT: por %xmm0, %xmm2
550; SSE41-NEXT: ptest %xmm2, %xmm2
551; SSE41-NEXT: setne %al
552; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100553;
554; AVX1-LABEL: test_v64i16:
555; AVX1: # %bb.0:
556; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
557; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
558; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100559; AVX1-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100560; AVX1-NEXT: setne %al
561; AVX1-NEXT: vzeroupper
562; AVX1-NEXT: retq
563;
564; AVX2-LABEL: test_v64i16:
565; AVX2: # %bb.0:
566; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
567; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
568; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100569; AVX2-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100570; AVX2-NEXT: setne %al
571; AVX2-NEXT: vzeroupper
572; AVX2-NEXT: retq
573;
574; AVX512-LABEL: test_v64i16:
575; AVX512: # %bb.0:
576; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
577; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrim333aa692020-06-29 15:24:49 +0100578; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
579; AVX512-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100580; AVX512-NEXT: setne %al
581; AVX512-NEXT: vzeroupper
582; AVX512-NEXT: retq
583 %1 = call i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16> %a0)
584 %2 = icmp ne i16 %1, 0
585 ret i1 %2
586}
587
588;
589; vXi8
590;
591
592define i1 @test_v2i8(<2 x i8> %a0) {
593; SSE-LABEL: test_v2i8:
594; SSE: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +0100595; SSE-NEXT: movd %xmm0, %eax
596; SSE-NEXT: testw %ax, %ax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100597; SSE-NEXT: sete %al
598; SSE-NEXT: retq
599;
600; AVX-LABEL: test_v2i8:
601; AVX: # %bb.0:
Simon Pilgrim298377f2020-06-15 10:40:27 +0100602; AVX-NEXT: vmovd %xmm0, %eax
Simon Pilgrim333aa692020-06-29 15:24:49 +0100603; AVX-NEXT: testw %ax, %ax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100604; AVX-NEXT: sete %al
605; AVX-NEXT: retq
606 %1 = call i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8> %a0)
607 %2 = icmp eq i8 %1, 0
608 ret i1 %2
609}
610
611define i1 @test_v4i8(<4 x i8> %a0) {
612; SSE-LABEL: test_v4i8:
613; SSE: # %bb.0:
Simon Pilgrim298377f2020-06-15 10:40:27 +0100614; SSE-NEXT: movd %xmm0, %eax
Simon Pilgrim333aa692020-06-29 15:24:49 +0100615; SSE-NEXT: testl %eax, %eax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100616; SSE-NEXT: setne %al
617; SSE-NEXT: retq
618;
619; AVX-LABEL: test_v4i8:
620; AVX: # %bb.0:
Simon Pilgrim298377f2020-06-15 10:40:27 +0100621; AVX-NEXT: vmovd %xmm0, %eax
Simon Pilgrim333aa692020-06-29 15:24:49 +0100622; AVX-NEXT: testl %eax, %eax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100623; AVX-NEXT: setne %al
624; AVX-NEXT: retq
625 %1 = call i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8> %a0)
626 %2 = icmp ne i8 %1, 0
627 ret i1 %2
628}
629
630define i1 @test_v8i8(<8 x i8> %a0) {
631; SSE-LABEL: test_v8i8:
632; SSE: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +0100633; SSE-NEXT: movq %xmm0, %rax
634; SSE-NEXT: testq %rax, %rax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100635; SSE-NEXT: sete %al
636; SSE-NEXT: retq
637;
638; AVX-LABEL: test_v8i8:
639; AVX: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +0100640; AVX-NEXT: vmovq %xmm0, %rax
641; AVX-NEXT: testq %rax, %rax
Simon Pilgrim298377f2020-06-15 10:40:27 +0100642; AVX-NEXT: sete %al
643; AVX-NEXT: retq
644 %1 = call i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8> %a0)
645 %2 = icmp eq i8 %1, 0
646 ret i1 %2
647}
648
649define i1 @test_v16i8(<16 x i8> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100650; SSE2-LABEL: test_v16i8:
651; SSE2: # %bb.0:
652; SSE2-NEXT: pxor %xmm1, %xmm1
653; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
654; SSE2-NEXT: pmovmskb %xmm1, %eax
655; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
656; SSE2-NEXT: setne %al
657; SSE2-NEXT: retq
658;
659; SSE41-LABEL: test_v16i8:
660; SSE41: # %bb.0:
661; SSE41-NEXT: ptest %xmm0, %xmm0
662; SSE41-NEXT: setne %al
663; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100664;
665; AVX-LABEL: test_v16i8:
666; AVX: # %bb.0:
Simon Pilgrim333aa692020-06-29 15:24:49 +0100667; AVX-NEXT: vptest %xmm0, %xmm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100668; AVX-NEXT: setne %al
669; AVX-NEXT: retq
670 %1 = call i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8> %a0)
671 %2 = icmp ne i8 %1, 0
672 ret i1 %2
673}
674
675define i1 @test_v32i8(<32 x i8> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100676; SSE2-LABEL: test_v32i8:
677; SSE2: # %bb.0:
678; SSE2-NEXT: por %xmm1, %xmm0
679; SSE2-NEXT: pxor %xmm1, %xmm1
680; SSE2-NEXT: pcmpeqb %xmm0, %xmm1
681; SSE2-NEXT: pmovmskb %xmm1, %eax
682; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
683; SSE2-NEXT: sete %al
684; SSE2-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100685;
Simon Pilgrim333aa692020-06-29 15:24:49 +0100686; SSE41-LABEL: test_v32i8:
687; SSE41: # %bb.0:
688; SSE41-NEXT: por %xmm1, %xmm0
689; SSE41-NEXT: ptest %xmm0, %xmm0
690; SSE41-NEXT: sete %al
691; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100692;
Simon Pilgrim333aa692020-06-29 15:24:49 +0100693; AVX-LABEL: test_v32i8:
694; AVX: # %bb.0:
695; AVX-NEXT: vptest %ymm0, %ymm0
696; AVX-NEXT: sete %al
697; AVX-NEXT: vzeroupper
698; AVX-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100699 %1 = call i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8> %a0)
700 %2 = icmp eq i8 %1, 0
701 ret i1 %2
702}
703
704define i1 @test_v64i8(<64 x i8> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100705; SSE2-LABEL: test_v64i8:
706; SSE2: # %bb.0:
707; SSE2-NEXT: por %xmm3, %xmm1
708; SSE2-NEXT: por %xmm2, %xmm1
709; SSE2-NEXT: por %xmm0, %xmm1
710; SSE2-NEXT: pxor %xmm0, %xmm0
711; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
712; SSE2-NEXT: pmovmskb %xmm0, %eax
713; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
714; SSE2-NEXT: setne %al
715; SSE2-NEXT: retq
716;
717; SSE41-LABEL: test_v64i8:
718; SSE41: # %bb.0:
719; SSE41-NEXT: por %xmm3, %xmm1
720; SSE41-NEXT: por %xmm2, %xmm1
721; SSE41-NEXT: por %xmm0, %xmm1
722; SSE41-NEXT: ptest %xmm1, %xmm1
723; SSE41-NEXT: setne %al
724; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100725;
726; AVX1-LABEL: test_v64i8:
727; AVX1: # %bb.0:
728; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100729; AVX1-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100730; AVX1-NEXT: setne %al
731; AVX1-NEXT: vzeroupper
732; AVX1-NEXT: retq
733;
734; AVX2-LABEL: test_v64i8:
735; AVX2: # %bb.0:
736; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100737; AVX2-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100738; AVX2-NEXT: setne %al
739; AVX2-NEXT: vzeroupper
740; AVX2-NEXT: retq
741;
742; AVX512-LABEL: test_v64i8:
743; AVX512: # %bb.0:
744; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrim333aa692020-06-29 15:24:49 +0100745; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
746; AVX512-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100747; AVX512-NEXT: setne %al
748; AVX512-NEXT: vzeroupper
749; AVX512-NEXT: retq
750 %1 = call i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8> %a0)
751 %2 = icmp ne i8 %1, 0
752 ret i1 %2
753}
754
755define i1 @test_v128i8(<128 x i8> %a0) {
Simon Pilgrim333aa692020-06-29 15:24:49 +0100756; SSE2-LABEL: test_v128i8:
757; SSE2: # %bb.0:
758; SSE2-NEXT: por %xmm7, %xmm3
759; SSE2-NEXT: por %xmm5, %xmm3
760; SSE2-NEXT: por %xmm1, %xmm3
761; SSE2-NEXT: por %xmm6, %xmm2
762; SSE2-NEXT: por %xmm4, %xmm2
763; SSE2-NEXT: por %xmm3, %xmm2
764; SSE2-NEXT: por %xmm0, %xmm2
765; SSE2-NEXT: pxor %xmm0, %xmm0
766; SSE2-NEXT: pcmpeqb %xmm2, %xmm0
767; SSE2-NEXT: pmovmskb %xmm0, %eax
768; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
769; SSE2-NEXT: sete %al
770; SSE2-NEXT: retq
771;
772; SSE41-LABEL: test_v128i8:
773; SSE41: # %bb.0:
774; SSE41-NEXT: por %xmm7, %xmm3
775; SSE41-NEXT: por %xmm5, %xmm3
776; SSE41-NEXT: por %xmm1, %xmm3
777; SSE41-NEXT: por %xmm6, %xmm2
778; SSE41-NEXT: por %xmm4, %xmm2
779; SSE41-NEXT: por %xmm3, %xmm2
780; SSE41-NEXT: por %xmm0, %xmm2
781; SSE41-NEXT: ptest %xmm2, %xmm2
782; SSE41-NEXT: sete %al
783; SSE41-NEXT: retq
Simon Pilgrim298377f2020-06-15 10:40:27 +0100784;
785; AVX1-LABEL: test_v128i8:
786; AVX1: # %bb.0:
787; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
788; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
789; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100790; AVX1-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100791; AVX1-NEXT: sete %al
792; AVX1-NEXT: vzeroupper
793; AVX1-NEXT: retq
794;
795; AVX2-LABEL: test_v128i8:
796; AVX2: # %bb.0:
797; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
798; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
799; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
Simon Pilgrim333aa692020-06-29 15:24:49 +0100800; AVX2-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100801; AVX2-NEXT: sete %al
802; AVX2-NEXT: vzeroupper
803; AVX2-NEXT: retq
804;
805; AVX512-LABEL: test_v128i8:
806; AVX512: # %bb.0:
807; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
808; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
Simon Pilgrim333aa692020-06-29 15:24:49 +0100809; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0
810; AVX512-NEXT: vptest %ymm0, %ymm0
Simon Pilgrim298377f2020-06-15 10:40:27 +0100811; AVX512-NEXT: sete %al
812; AVX512-NEXT: vzeroupper
813; AVX512-NEXT: retq
814 %1 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> %a0)
815 %2 = icmp eq i8 %1, 0
816 ret i1 %2
817}
818
Simon Pilgrim0f8ca202020-06-30 12:37:55 +0100819;
820; Compare Truncated/Masked OR Reductions
821;
822
823define i1 @trunc_v2i64(<2 x i64> %a0) {
824; SSE-LABEL: trunc_v2i64:
825; SSE: # %bb.0:
826; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
827; SSE-NEXT: por %xmm0, %xmm1
828; SSE-NEXT: movd %xmm1, %eax
829; SSE-NEXT: testw %ax, %ax
830; SSE-NEXT: sete %al
831; SSE-NEXT: retq
832;
833; AVX-LABEL: trunc_v2i64:
834; AVX: # %bb.0:
835; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
836; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
837; AVX-NEXT: vmovd %xmm0, %eax
838; AVX-NEXT: testw %ax, %ax
839; AVX-NEXT: sete %al
840; AVX-NEXT: retq
841 %1 = call i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64> %a0)
842 %2 = trunc i64 %1 to i16
843 %3 = icmp eq i16 %2, 0
844 ret i1 %3
845}
846
847define i1 @mask_v8i32(<8 x i32> %a0) {
848; SSE-LABEL: mask_v8i32:
849; SSE: # %bb.0:
850; SSE-NEXT: por %xmm1, %xmm0
851; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
852; SSE-NEXT: por %xmm0, %xmm1
853; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
854; SSE-NEXT: por %xmm1, %xmm0
855; SSE-NEXT: movd %xmm0, %eax
856; SSE-NEXT: testl $-2147483648, %eax # imm = 0x80000000
857; SSE-NEXT: sete %al
858; SSE-NEXT: retq
859;
860; AVX1-LABEL: mask_v8i32:
861; AVX1: # %bb.0:
862; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
863; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
864; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
865; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
866; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
867; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
868; AVX1-NEXT: vmovd %xmm0, %eax
869; AVX1-NEXT: testl $-2147483648, %eax # imm = 0x80000000
870; AVX1-NEXT: sete %al
871; AVX1-NEXT: vzeroupper
872; AVX1-NEXT: retq
873;
874; AVX2-LABEL: mask_v8i32:
875; AVX2: # %bb.0:
876; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
877; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
878; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
879; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
880; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
881; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
882; AVX2-NEXT: vmovd %xmm0, %eax
883; AVX2-NEXT: testl $-2147483648, %eax # imm = 0x80000000
884; AVX2-NEXT: sete %al
885; AVX2-NEXT: vzeroupper
886; AVX2-NEXT: retq
887;
888; AVX512-LABEL: mask_v8i32:
889; AVX512: # %bb.0:
890; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
891; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
892; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
893; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
894; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
895; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
896; AVX512-NEXT: vmovd %xmm0, %eax
897; AVX512-NEXT: testl $-2147483648, %eax # imm = 0x80000000
898; AVX512-NEXT: sete %al
899; AVX512-NEXT: vzeroupper
900; AVX512-NEXT: retq
901 %1 = call i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32> %a0)
902 %2 = and i32 %1, 2147483648
903 %3 = icmp eq i32 %2, 0
904 ret i1 %3
905}
906
907define i1 @trunc_v16i16(<16 x i16> %a0) {
908; SSE-LABEL: trunc_v16i16:
909; SSE: # %bb.0:
910; SSE-NEXT: por %xmm1, %xmm0
911; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
912; SSE-NEXT: por %xmm0, %xmm1
913; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
914; SSE-NEXT: por %xmm1, %xmm0
915; SSE-NEXT: movdqa %xmm0, %xmm1
916; SSE-NEXT: psrld $16, %xmm1
917; SSE-NEXT: por %xmm0, %xmm1
918; SSE-NEXT: movd %xmm1, %eax
919; SSE-NEXT: testb %al, %al
920; SSE-NEXT: setne %al
921; SSE-NEXT: retq
922;
923; AVX1-LABEL: trunc_v16i16:
924; AVX1: # %bb.0:
925; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
926; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
927; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
928; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
929; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
930; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
931; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
932; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
933; AVX1-NEXT: vmovd %xmm0, %eax
934; AVX1-NEXT: testb %al, %al
935; AVX1-NEXT: setne %al
936; AVX1-NEXT: vzeroupper
937; AVX1-NEXT: retq
938;
939; AVX2-LABEL: trunc_v16i16:
940; AVX2: # %bb.0:
941; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
942; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
943; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
944; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
945; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
946; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
947; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
948; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
949; AVX2-NEXT: vmovd %xmm0, %eax
950; AVX2-NEXT: testb %al, %al
951; AVX2-NEXT: setne %al
952; AVX2-NEXT: vzeroupper
953; AVX2-NEXT: retq
954;
955; AVX512-LABEL: trunc_v16i16:
956; AVX512: # %bb.0:
957; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
958; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
959; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
960; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
961; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
962; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
963; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
964; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
965; AVX512-NEXT: vmovd %xmm0, %eax
966; AVX512-NEXT: testb %al, %al
967; AVX512-NEXT: setne %al
968; AVX512-NEXT: vzeroupper
969; AVX512-NEXT: retq
970 %1 = call i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16> %a0)
971 %2 = trunc i16 %1 to i8
972 %3 = icmp ne i8 %2, 0
973 ret i1 %3
974}
975
976define i1 @mask_v128i8(<128 x i8> %a0) {
977; SSE-LABEL: mask_v128i8:
978; SSE: # %bb.0:
979; SSE-NEXT: por %xmm6, %xmm2
980; SSE-NEXT: por %xmm7, %xmm3
981; SSE-NEXT: por %xmm5, %xmm3
982; SSE-NEXT: por %xmm1, %xmm3
983; SSE-NEXT: por %xmm4, %xmm2
984; SSE-NEXT: por %xmm3, %xmm2
985; SSE-NEXT: por %xmm0, %xmm2
986; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
987; SSE-NEXT: por %xmm2, %xmm0
988; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
989; SSE-NEXT: por %xmm0, %xmm1
990; SSE-NEXT: movdqa %xmm1, %xmm0
991; SSE-NEXT: psrld $16, %xmm0
992; SSE-NEXT: por %xmm1, %xmm0
993; SSE-NEXT: movdqa %xmm0, %xmm1
994; SSE-NEXT: psrlw $8, %xmm1
995; SSE-NEXT: por %xmm0, %xmm1
996; SSE-NEXT: movd %xmm1, %eax
997; SSE-NEXT: testb $1, %al
998; SSE-NEXT: sete %al
999; SSE-NEXT: retq
1000;
1001; AVX1-LABEL: mask_v128i8:
1002; AVX1: # %bb.0:
1003; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
1004; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
1005; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1006; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1007; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
1008; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1009; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
1010; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
1011; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
1012; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
1013; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1014; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1015; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1016; AVX1-NEXT: vmovd %xmm0, %eax
1017; AVX1-NEXT: testb $1, %al
1018; AVX1-NEXT: sete %al
1019; AVX1-NEXT: vzeroupper
1020; AVX1-NEXT: retq
1021;
1022; AVX2-LABEL: mask_v128i8:
1023; AVX2: # %bb.0:
1024; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
1025; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
1026; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1027; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1028; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1029; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1030; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1031; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1032; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1033; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
1034; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1035; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1036; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1037; AVX2-NEXT: vmovd %xmm0, %eax
1038; AVX2-NEXT: testb $1, %al
1039; AVX2-NEXT: sete %al
1040; AVX2-NEXT: vzeroupper
1041; AVX2-NEXT: retq
1042;
1043; AVX512-LABEL: mask_v128i8:
1044; AVX512: # %bb.0:
1045; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
1046; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1047; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
1048; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1049; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
1050; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1051; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
1052; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1053; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
1054; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
1055; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
1056; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1057; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
1058; AVX512-NEXT: vmovd %xmm0, %eax
1059; AVX512-NEXT: testb $1, %al
1060; AVX512-NEXT: sete %al
1061; AVX512-NEXT: vzeroupper
1062; AVX512-NEXT: retq
1063 %1 = call i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8> %a0)
1064 %2 = and i8 %1, 1
1065 %3 = icmp eq i8 %2, 0
1066 ret i1 %3
1067}
1068
1069%struct.Box = type { i32, i32, i32, i32 }
1070define zeroext i1 @PR44781(%struct.Box* %0) {
1071; SSE-LABEL: PR44781:
1072; SSE: # %bb.0:
1073; SSE-NEXT: movdqu (%rdi), %xmm0
1074; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1075; SSE-NEXT: por %xmm0, %xmm1
1076; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1077; SSE-NEXT: por %xmm1, %xmm0
1078; SSE-NEXT: movd %xmm0, %eax
1079; SSE-NEXT: testb $15, %al
1080; SSE-NEXT: sete %al
1081; SSE-NEXT: retq
1082;
1083; AVX1-LABEL: PR44781:
1084; AVX1: # %bb.0:
1085; AVX1-NEXT: vmovdqu (%rdi), %xmm0
1086; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1087; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1088; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1089; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1090; AVX1-NEXT: vmovd %xmm0, %eax
1091; AVX1-NEXT: testb $15, %al
1092; AVX1-NEXT: sete %al
1093; AVX1-NEXT: retq
1094;
1095; AVX2-LABEL: PR44781:
1096; AVX2: # %bb.0:
1097; AVX2-NEXT: vpbroadcastq 8(%rdi), %xmm0
1098; AVX2-NEXT: vpor (%rdi), %xmm0, %xmm0
1099; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1100; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1101; AVX2-NEXT: vmovd %xmm0, %eax
1102; AVX2-NEXT: testb $15, %al
1103; AVX2-NEXT: sete %al
1104; AVX2-NEXT: retq
1105;
1106; AVX512-LABEL: PR44781:
1107; AVX512: # %bb.0:
1108; AVX512-NEXT: vpbroadcastq 8(%rdi), %xmm0
1109; AVX512-NEXT: vpor (%rdi), %xmm0, %xmm0
1110; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1111; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
1112; AVX512-NEXT: vmovd %xmm0, %eax
1113; AVX512-NEXT: testb $15, %al
1114; AVX512-NEXT: sete %al
1115; AVX512-NEXT: retq
1116 %2 = bitcast %struct.Box* %0 to <4 x i32>*
1117 %3 = load <4 x i32>, <4 x i32>* %2, align 4
1118 %4 = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> %3)
1119 %5 = and i32 %4, 15
1120 %6 = icmp eq i32 %5, 0
1121 ret i1 %6
1122}
1123
Simon Pilgrim298377f2020-06-15 10:40:27 +01001124declare i64 @llvm.experimental.vector.reduce.or.v2i64(<2 x i64>)
1125declare i64 @llvm.experimental.vector.reduce.or.v4i64(<4 x i64>)
1126declare i64 @llvm.experimental.vector.reduce.or.v8i64(<8 x i64>)
1127declare i64 @llvm.experimental.vector.reduce.or.v16i64(<16 x i64>)
1128
1129declare i32 @llvm.experimental.vector.reduce.or.v2i32(<2 x i32>)
1130declare i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32>)
1131declare i32 @llvm.experimental.vector.reduce.or.v8i32(<8 x i32>)
1132declare i32 @llvm.experimental.vector.reduce.or.v16i32(<16 x i32>)
1133declare i32 @llvm.experimental.vector.reduce.or.v32i32(<32 x i32>)
1134
1135declare i16 @llvm.experimental.vector.reduce.or.v2i16(<2 x i16>)
1136declare i16 @llvm.experimental.vector.reduce.or.v4i16(<4 x i16>)
1137declare i16 @llvm.experimental.vector.reduce.or.v8i16(<8 x i16>)
1138declare i16 @llvm.experimental.vector.reduce.or.v16i16(<16 x i16>)
1139declare i16 @llvm.experimental.vector.reduce.or.v32i16(<32 x i16>)
1140declare i16 @llvm.experimental.vector.reduce.or.v64i16(<64 x i16>)
1141
1142declare i8 @llvm.experimental.vector.reduce.or.v2i8(<2 x i8>)
1143declare i8 @llvm.experimental.vector.reduce.or.v4i8(<4 x i8>)
1144declare i8 @llvm.experimental.vector.reduce.or.v8i8(<8 x i8>)
1145declare i8 @llvm.experimental.vector.reduce.or.v16i8(<16 x i8>)
1146declare i8 @llvm.experimental.vector.reduce.or.v32i8(<32 x i8>)
1147declare i8 @llvm.experimental.vector.reduce.or.v64i8(<64 x i8>)
1148declare i8 @llvm.experimental.vector.reduce.or.v128i8(<128 x i8>)