blob: ef060acfb67829daa406fc7d3063f3b4307370d2 [file] [log] [blame]
Craig Topper39910892018-12-05 06:29:44 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
7; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
8
9;
10; vXi64
11;
12
13define i64 @test_v2i64(<2 x i64> %a0) {
14; SSE-LABEL: test_v2i64:
15; SSE: # %bb.0:
16; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
17; SSE-NEXT: por %xmm0, %xmm1
18; SSE-NEXT: movq %xmm1, %rax
19; SSE-NEXT: retq
20;
21; AVX-LABEL: test_v2i64:
22; AVX: # %bb.0:
23; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
24; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
25; AVX-NEXT: vmovq %xmm0, %rax
26; AVX-NEXT: retq
27;
28; AVX512-LABEL: test_v2i64:
29; AVX512: # %bb.0:
30; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
31; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
32; AVX512-NEXT: vmovq %xmm0, %rax
33; AVX512-NEXT: retq
34 %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %a0)
35 ret i64 %1
36}
37
38define i64 @test_v4i64(<4 x i64> %a0) {
39; SSE-LABEL: test_v4i64:
40; SSE: # %bb.0:
41; SSE-NEXT: por %xmm1, %xmm0
42; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
43; SSE-NEXT: por %xmm0, %xmm1
44; SSE-NEXT: movq %xmm1, %rax
45; SSE-NEXT: retq
46;
47; AVX1-LABEL: test_v4i64:
48; AVX1: # %bb.0:
49; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +000050; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
51; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
52; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +000053; AVX1-NEXT: vmovq %xmm0, %rax
54; AVX1-NEXT: vzeroupper
55; AVX1-NEXT: retq
56;
57; AVX2-LABEL: test_v4i64:
58; AVX2: # %bb.0:
59; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +000060; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +000061; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
62; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
63; AVX2-NEXT: vmovq %xmm0, %rax
64; AVX2-NEXT: vzeroupper
65; AVX2-NEXT: retq
66;
67; AVX512-LABEL: test_v4i64:
68; AVX512: # %bb.0:
69; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +000070; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +000071; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
72; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
73; AVX512-NEXT: vmovq %xmm0, %rax
74; AVX512-NEXT: vzeroupper
75; AVX512-NEXT: retq
76 %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64> %a0)
77 ret i64 %1
78}
79
80define i64 @test_v8i64(<8 x i64> %a0) {
81; SSE-LABEL: test_v8i64:
82; SSE: # %bb.0:
83; SSE-NEXT: por %xmm3, %xmm1
84; SSE-NEXT: por %xmm2, %xmm1
85; SSE-NEXT: por %xmm0, %xmm1
86; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
87; SSE-NEXT: por %xmm1, %xmm0
88; SSE-NEXT: movq %xmm0, %rax
89; SSE-NEXT: retq
90;
91; AVX1-LABEL: test_v8i64:
92; AVX1: # %bb.0:
93; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
94; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +000095; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +000096; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
97; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
98; AVX1-NEXT: vmovq %xmm0, %rax
99; AVX1-NEXT: vzeroupper
100; AVX1-NEXT: retq
101;
102; AVX2-LABEL: test_v8i64:
103; AVX2: # %bb.0:
104; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
105; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000106; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000107; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
108; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
109; AVX2-NEXT: vmovq %xmm0, %rax
110; AVX2-NEXT: vzeroupper
111; AVX2-NEXT: retq
112;
113; AVX512-LABEL: test_v8i64:
114; AVX512: # %bb.0:
115; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
116; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
117; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patela61d5862019-01-29 19:13:39 +0000118; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000119; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
120; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
121; AVX512-NEXT: vmovq %xmm0, %rax
122; AVX512-NEXT: vzeroupper
123; AVX512-NEXT: retq
124 %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64> %a0)
125 ret i64 %1
126}
127
128define i64 @test_v16i64(<16 x i64> %a0) {
129; SSE-LABEL: test_v16i64:
130; SSE: # %bb.0:
131; SSE-NEXT: por %xmm6, %xmm2
132; SSE-NEXT: por %xmm7, %xmm3
133; SSE-NEXT: por %xmm5, %xmm3
134; SSE-NEXT: por %xmm1, %xmm3
135; SSE-NEXT: por %xmm4, %xmm2
136; SSE-NEXT: por %xmm3, %xmm2
137; SSE-NEXT: por %xmm0, %xmm2
138; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
139; SSE-NEXT: por %xmm2, %xmm0
140; SSE-NEXT: movq %xmm0, %rax
141; SSE-NEXT: retq
142;
143; AVX1-LABEL: test_v16i64:
144; AVX1: # %bb.0:
145; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
146; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
147; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
148; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000149; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000150; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
151; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
152; AVX1-NEXT: vmovq %xmm0, %rax
153; AVX1-NEXT: vzeroupper
154; AVX1-NEXT: retq
155;
156; AVX2-LABEL: test_v16i64:
157; AVX2: # %bb.0:
158; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
159; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
160; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
161; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000162; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000163; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
164; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
165; AVX2-NEXT: vmovq %xmm0, %rax
166; AVX2-NEXT: vzeroupper
167; AVX2-NEXT: retq
168;
169; AVX512-LABEL: test_v16i64:
170; AVX512: # %bb.0:
171; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
172; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
173; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
174; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patela61d5862019-01-29 19:13:39 +0000175; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000176; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
177; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
178; AVX512-NEXT: vmovq %xmm0, %rax
179; AVX512-NEXT: vzeroupper
180; AVX512-NEXT: retq
181 %1 = call i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64> %a0)
182 ret i64 %1
183}
184
185;
186; vXi32
187;
188
189define i32 @test_v2i32(<2 x i32> %a0) {
190; SSE-LABEL: test_v2i32:
191; SSE: # %bb.0:
192; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
193; SSE-NEXT: por %xmm0, %xmm1
194; SSE-NEXT: movd %xmm1, %eax
195; SSE-NEXT: retq
196;
197; AVX-LABEL: test_v2i32:
198; AVX: # %bb.0:
199; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
200; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
201; AVX-NEXT: vmovd %xmm0, %eax
202; AVX-NEXT: retq
203;
204; AVX512-LABEL: test_v2i32:
205; AVX512: # %bb.0:
206; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
207; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
208; AVX512-NEXT: vmovd %xmm0, %eax
209; AVX512-NEXT: retq
210 %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32> %a0)
211 ret i32 %1
212}
213
214define i32 @test_v4i32(<4 x i32> %a0) {
215; SSE-LABEL: test_v4i32:
216; SSE: # %bb.0:
217; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
218; SSE-NEXT: por %xmm0, %xmm1
219; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
220; SSE-NEXT: por %xmm1, %xmm0
221; SSE-NEXT: movd %xmm0, %eax
222; SSE-NEXT: retq
223;
224; AVX-LABEL: test_v4i32:
225; AVX: # %bb.0:
226; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
227; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
228; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
229; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
230; AVX-NEXT: vmovd %xmm0, %eax
231; AVX-NEXT: retq
232;
233; AVX512-LABEL: test_v4i32:
234; AVX512: # %bb.0:
235; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
236; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
237; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
238; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
239; AVX512-NEXT: vmovd %xmm0, %eax
240; AVX512-NEXT: retq
241 %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32> %a0)
242 ret i32 %1
243}
244
245define i32 @test_v8i32(<8 x i32> %a0) {
246; SSE-LABEL: test_v8i32:
247; SSE: # %bb.0:
248; SSE-NEXT: por %xmm1, %xmm0
249; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
250; SSE-NEXT: por %xmm0, %xmm1
251; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
252; SSE-NEXT: por %xmm1, %xmm0
253; SSE-NEXT: movd %xmm0, %eax
254; SSE-NEXT: retq
255;
256; AVX1-LABEL: test_v8i32:
257; AVX1: # %bb.0:
258; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000259; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
260; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
261; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
262; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
263; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000264; AVX1-NEXT: vmovd %xmm0, %eax
265; AVX1-NEXT: vzeroupper
266; AVX1-NEXT: retq
267;
268; AVX2-LABEL: test_v8i32:
269; AVX2: # %bb.0:
270; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000271; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000272; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000273; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000274; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
275; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
276; AVX2-NEXT: vmovd %xmm0, %eax
277; AVX2-NEXT: vzeroupper
278; AVX2-NEXT: retq
279;
280; AVX512-LABEL: test_v8i32:
281; AVX512: # %bb.0:
282; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000283; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000284; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000285; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000286; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
287; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
288; AVX512-NEXT: vmovd %xmm0, %eax
289; AVX512-NEXT: vzeroupper
290; AVX512-NEXT: retq
291 %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32> %a0)
292 ret i32 %1
293}
294
295define i32 @test_v16i32(<16 x i32> %a0) {
296; SSE-LABEL: test_v16i32:
297; SSE: # %bb.0:
298; SSE-NEXT: por %xmm3, %xmm1
299; SSE-NEXT: por %xmm2, %xmm1
300; SSE-NEXT: por %xmm0, %xmm1
301; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
302; SSE-NEXT: por %xmm1, %xmm0
303; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
304; SSE-NEXT: por %xmm0, %xmm1
305; SSE-NEXT: movd %xmm1, %eax
306; SSE-NEXT: retq
307;
308; AVX1-LABEL: test_v16i32:
309; AVX1: # %bb.0:
310; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
311; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000312; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000313; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000314; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000315; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
316; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
317; AVX1-NEXT: vmovd %xmm0, %eax
318; AVX1-NEXT: vzeroupper
319; AVX1-NEXT: retq
320;
321; AVX2-LABEL: test_v16i32:
322; AVX2: # %bb.0:
323; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
324; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000325; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000326; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000327; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000328; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
329; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
330; AVX2-NEXT: vmovd %xmm0, %eax
331; AVX2-NEXT: vzeroupper
332; AVX2-NEXT: retq
333;
334; AVX512-LABEL: test_v16i32:
335; AVX512: # %bb.0:
336; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
337; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
338; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patela61d5862019-01-29 19:13:39 +0000339; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000340; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patela61d5862019-01-29 19:13:39 +0000341; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000342; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
343; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
344; AVX512-NEXT: vmovd %xmm0, %eax
345; AVX512-NEXT: vzeroupper
346; AVX512-NEXT: retq
347 %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32> %a0)
348 ret i32 %1
349}
350
351define i32 @test_v32i32(<32 x i32> %a0) {
352; SSE-LABEL: test_v32i32:
353; SSE: # %bb.0:
354; SSE-NEXT: por %xmm6, %xmm2
355; SSE-NEXT: por %xmm7, %xmm3
356; SSE-NEXT: por %xmm5, %xmm3
357; SSE-NEXT: por %xmm1, %xmm3
358; SSE-NEXT: por %xmm4, %xmm2
359; SSE-NEXT: por %xmm3, %xmm2
360; SSE-NEXT: por %xmm0, %xmm2
361; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
362; SSE-NEXT: por %xmm2, %xmm0
363; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
364; SSE-NEXT: por %xmm0, %xmm1
365; SSE-NEXT: movd %xmm1, %eax
366; SSE-NEXT: retq
367;
368; AVX1-LABEL: test_v32i32:
369; AVX1: # %bb.0:
370; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
371; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
372; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
373; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000374; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000375; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000376; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000377; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
378; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
379; AVX1-NEXT: vmovd %xmm0, %eax
380; AVX1-NEXT: vzeroupper
381; AVX1-NEXT: retq
382;
383; AVX2-LABEL: test_v32i32:
384; AVX2: # %bb.0:
385; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
386; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
387; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
388; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000389; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000390; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000391; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000392; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
393; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
394; AVX2-NEXT: vmovd %xmm0, %eax
395; AVX2-NEXT: vzeroupper
396; AVX2-NEXT: retq
397;
398; AVX512-LABEL: test_v32i32:
399; AVX512: # %bb.0:
400; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
401; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
402; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0
403; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patela61d5862019-01-29 19:13:39 +0000404; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000405; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patela61d5862019-01-29 19:13:39 +0000406; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000407; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
408; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
409; AVX512-NEXT: vmovd %xmm0, %eax
410; AVX512-NEXT: vzeroupper
411; AVX512-NEXT: retq
412 %1 = call i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32> %a0)
413 ret i32 %1
414}
415
416;
417; vXi16
418;
419
420define i16 @test_v2i16(<2 x i16> %a0) {
421; SSE-LABEL: test_v2i16:
422; SSE: # %bb.0:
423; SSE-NEXT: movdqa %xmm0, %xmm1
424; SSE-NEXT: psrld $16, %xmm1
425; SSE-NEXT: por %xmm0, %xmm1
426; SSE-NEXT: movd %xmm1, %eax
427; SSE-NEXT: # kill: def $ax killed $ax killed $eax
428; SSE-NEXT: retq
429;
430; AVX-LABEL: test_v2i16:
431; AVX: # %bb.0:
432; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
433; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
434; AVX-NEXT: vmovd %xmm0, %eax
435; AVX-NEXT: # kill: def $ax killed $ax killed $eax
436; AVX-NEXT: retq
437;
438; AVX512-LABEL: test_v2i16:
439; AVX512: # %bb.0:
440; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
441; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
442; AVX512-NEXT: vmovd %xmm0, %eax
443; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
444; AVX512-NEXT: retq
445 %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16> %a0)
446 ret i16 %1
447}
448
449define i16 @test_v4i16(<4 x i16> %a0) {
450; SSE-LABEL: test_v4i16:
451; SSE: # %bb.0:
452; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
453; SSE-NEXT: por %xmm0, %xmm1
454; SSE-NEXT: movdqa %xmm1, %xmm0
455; SSE-NEXT: psrld $16, %xmm0
456; SSE-NEXT: por %xmm1, %xmm0
457; SSE-NEXT: movd %xmm0, %eax
458; SSE-NEXT: # kill: def $ax killed $ax killed $eax
459; SSE-NEXT: retq
460;
461; AVX-LABEL: test_v4i16:
462; AVX: # %bb.0:
463; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
464; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
465; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
466; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
467; AVX-NEXT: vmovd %xmm0, %eax
468; AVX-NEXT: # kill: def $ax killed $ax killed $eax
469; AVX-NEXT: retq
470;
471; AVX512-LABEL: test_v4i16:
472; AVX512: # %bb.0:
473; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
474; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
475; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
476; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
477; AVX512-NEXT: vmovd %xmm0, %eax
478; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
479; AVX512-NEXT: retq
480 %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16> %a0)
481 ret i16 %1
482}
483
484define i16 @test_v8i16(<8 x i16> %a0) {
485; SSE-LABEL: test_v8i16:
486; SSE: # %bb.0:
487; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
488; SSE-NEXT: por %xmm0, %xmm1
489; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
490; SSE-NEXT: por %xmm1, %xmm0
491; SSE-NEXT: movdqa %xmm0, %xmm1
492; SSE-NEXT: psrld $16, %xmm1
493; SSE-NEXT: por %xmm0, %xmm1
494; SSE-NEXT: movd %xmm1, %eax
495; SSE-NEXT: # kill: def $ax killed $ax killed $eax
496; SSE-NEXT: retq
497;
498; AVX-LABEL: test_v8i16:
499; AVX: # %bb.0:
500; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
501; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
502; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
503; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
504; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
505; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
506; AVX-NEXT: vmovd %xmm0, %eax
507; AVX-NEXT: # kill: def $ax killed $ax killed $eax
508; AVX-NEXT: retq
509;
510; AVX512-LABEL: test_v8i16:
511; AVX512: # %bb.0:
512; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
513; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
514; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
515; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
516; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
517; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
518; AVX512-NEXT: vmovd %xmm0, %eax
519; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
520; AVX512-NEXT: retq
521 %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16> %a0)
522 ret i16 %1
523}
524
525define i16 @test_v16i16(<16 x i16> %a0) {
526; SSE-LABEL: test_v16i16:
527; SSE: # %bb.0:
528; SSE-NEXT: por %xmm1, %xmm0
529; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
530; SSE-NEXT: por %xmm0, %xmm1
531; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
532; SSE-NEXT: por %xmm1, %xmm0
533; SSE-NEXT: movdqa %xmm0, %xmm1
534; SSE-NEXT: psrld $16, %xmm1
535; SSE-NEXT: por %xmm0, %xmm1
536; SSE-NEXT: movd %xmm1, %eax
537; SSE-NEXT: # kill: def $ax killed $ax killed $eax
538; SSE-NEXT: retq
539;
540; AVX1-LABEL: test_v16i16:
541; AVX1: # %bb.0:
542; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000543; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
544; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
545; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
546; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
547; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000548; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
549; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
550; AVX1-NEXT: vmovd %xmm0, %eax
551; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
552; AVX1-NEXT: vzeroupper
553; AVX1-NEXT: retq
554;
555; AVX2-LABEL: test_v16i16:
556; AVX2: # %bb.0:
557; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000558; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000559; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000560; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000561; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000562; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000563; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
564; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
565; AVX2-NEXT: vmovd %xmm0, %eax
566; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
567; AVX2-NEXT: vzeroupper
568; AVX2-NEXT: retq
569;
570; AVX512-LABEL: test_v16i16:
571; AVX512: # %bb.0:
572; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000573; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000574; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000575; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000576; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000577; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000578; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
579; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
580; AVX512-NEXT: vmovd %xmm0, %eax
581; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
582; AVX512-NEXT: vzeroupper
583; AVX512-NEXT: retq
584 %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16> %a0)
585 ret i16 %1
586}
587
588define i16 @test_v32i16(<32 x i16> %a0) {
589; SSE-LABEL: test_v32i16:
590; SSE: # %bb.0:
591; SSE-NEXT: por %xmm3, %xmm1
592; SSE-NEXT: por %xmm2, %xmm1
593; SSE-NEXT: por %xmm0, %xmm1
594; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
595; SSE-NEXT: por %xmm1, %xmm0
596; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
597; SSE-NEXT: por %xmm0, %xmm1
598; SSE-NEXT: movdqa %xmm1, %xmm0
599; SSE-NEXT: psrld $16, %xmm0
600; SSE-NEXT: por %xmm1, %xmm0
601; SSE-NEXT: movd %xmm0, %eax
602; SSE-NEXT: # kill: def $ax killed $ax killed $eax
603; SSE-NEXT: retq
604;
605; AVX1-LABEL: test_v32i16:
606; AVX1: # %bb.0:
607; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
608; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000609; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000610; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000611; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000612; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000613; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000614; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
615; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
616; AVX1-NEXT: vmovd %xmm0, %eax
617; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
618; AVX1-NEXT: vzeroupper
619; AVX1-NEXT: retq
620;
621; AVX2-LABEL: test_v32i16:
622; AVX2: # %bb.0:
623; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
624; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000625; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000626; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000627; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000628; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000629; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000630; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
631; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
632; AVX2-NEXT: vmovd %xmm0, %eax
633; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
634; AVX2-NEXT: vzeroupper
635; AVX2-NEXT: retq
636;
637; AVX512-LABEL: test_v32i16:
638; AVX512: # %bb.0:
639; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
640; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
641; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patela61d5862019-01-29 19:13:39 +0000642; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000643; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patela61d5862019-01-29 19:13:39 +0000644; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000645; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patela61d5862019-01-29 19:13:39 +0000646; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000647; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
648; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
649; AVX512-NEXT: vmovd %xmm0, %eax
650; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
651; AVX512-NEXT: vzeroupper
652; AVX512-NEXT: retq
653 %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16> %a0)
654 ret i16 %1
655}
656
657define i16 @test_v64i16(<64 x i16> %a0) {
658; SSE-LABEL: test_v64i16:
659; SSE: # %bb.0:
660; SSE-NEXT: por %xmm6, %xmm2
661; SSE-NEXT: por %xmm7, %xmm3
662; SSE-NEXT: por %xmm5, %xmm3
663; SSE-NEXT: por %xmm1, %xmm3
664; SSE-NEXT: por %xmm4, %xmm2
665; SSE-NEXT: por %xmm3, %xmm2
666; SSE-NEXT: por %xmm0, %xmm2
667; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
668; SSE-NEXT: por %xmm2, %xmm0
669; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
670; SSE-NEXT: por %xmm0, %xmm1
671; SSE-NEXT: movdqa %xmm1, %xmm0
672; SSE-NEXT: psrld $16, %xmm0
673; SSE-NEXT: por %xmm1, %xmm0
674; SSE-NEXT: movd %xmm0, %eax
675; SSE-NEXT: # kill: def $ax killed $ax killed $eax
676; SSE-NEXT: retq
677;
678; AVX1-LABEL: test_v64i16:
679; AVX1: # %bb.0:
680; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
681; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
682; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
683; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000684; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000685; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000686; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000687; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000688; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000689; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
690; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
691; AVX1-NEXT: vmovd %xmm0, %eax
692; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
693; AVX1-NEXT: vzeroupper
694; AVX1-NEXT: retq
695;
696; AVX2-LABEL: test_v64i16:
697; AVX2: # %bb.0:
698; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
699; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
700; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
701; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000702; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000703; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000704; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000705; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000706; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000707; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
708; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
709; AVX2-NEXT: vmovd %xmm0, %eax
710; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
711; AVX2-NEXT: vzeroupper
712; AVX2-NEXT: retq
713;
714; AVX512-LABEL: test_v64i16:
715; AVX512: # %bb.0:
716; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
717; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
718; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
719; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patela61d5862019-01-29 19:13:39 +0000720; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000721; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patela61d5862019-01-29 19:13:39 +0000722; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000723; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patela61d5862019-01-29 19:13:39 +0000724; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000725; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
726; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
727; AVX512-NEXT: vmovd %xmm0, %eax
728; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
729; AVX512-NEXT: vzeroupper
730; AVX512-NEXT: retq
731 %1 = call i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16> %a0)
732 ret i16 %1
733}
734
735;
736; vXi8
737;
738
739define i8 @test_v2i8(<2 x i8> %a0) {
740; SSE2-LABEL: test_v2i8:
741; SSE2: # %bb.0:
742; SSE2-NEXT: movdqa %xmm0, %xmm1
743; SSE2-NEXT: psrlw $8, %xmm1
744; SSE2-NEXT: por %xmm0, %xmm1
745; SSE2-NEXT: movd %xmm1, %eax
746; SSE2-NEXT: # kill: def $al killed $al killed $eax
747; SSE2-NEXT: retq
748;
749; SSE41-LABEL: test_v2i8:
750; SSE41: # %bb.0:
751; SSE41-NEXT: movdqa %xmm0, %xmm1
752; SSE41-NEXT: psrlw $8, %xmm1
753; SSE41-NEXT: por %xmm0, %xmm1
754; SSE41-NEXT: pextrb $0, %xmm1, %eax
755; SSE41-NEXT: # kill: def $al killed $al killed $eax
756; SSE41-NEXT: retq
757;
758; AVX-LABEL: test_v2i8:
759; AVX: # %bb.0:
760; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
761; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
762; AVX-NEXT: vpextrb $0, %xmm0, %eax
763; AVX-NEXT: # kill: def $al killed $al killed $eax
764; AVX-NEXT: retq
765;
766; AVX512-LABEL: test_v2i8:
767; AVX512: # %bb.0:
768; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
769; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
770; AVX512-NEXT: vpextrb $0, %xmm0, %eax
771; AVX512-NEXT: # kill: def $al killed $al killed $eax
772; AVX512-NEXT: retq
773 %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8> %a0)
774 ret i8 %1
775}
776
777define i8 @test_v4i8(<4 x i8> %a0) {
778; SSE2-LABEL: test_v4i8:
779; SSE2: # %bb.0:
780; SSE2-NEXT: movdqa %xmm0, %xmm1
781; SSE2-NEXT: psrld $16, %xmm1
782; SSE2-NEXT: por %xmm0, %xmm1
783; SSE2-NEXT: movdqa %xmm1, %xmm0
784; SSE2-NEXT: psrlw $8, %xmm0
785; SSE2-NEXT: por %xmm1, %xmm0
786; SSE2-NEXT: movd %xmm0, %eax
787; SSE2-NEXT: # kill: def $al killed $al killed $eax
788; SSE2-NEXT: retq
789;
790; SSE41-LABEL: test_v4i8:
791; SSE41: # %bb.0:
792; SSE41-NEXT: movdqa %xmm0, %xmm1
793; SSE41-NEXT: psrld $16, %xmm1
794; SSE41-NEXT: por %xmm0, %xmm1
795; SSE41-NEXT: movdqa %xmm1, %xmm0
796; SSE41-NEXT: psrlw $8, %xmm0
797; SSE41-NEXT: por %xmm1, %xmm0
798; SSE41-NEXT: pextrb $0, %xmm0, %eax
799; SSE41-NEXT: # kill: def $al killed $al killed $eax
800; SSE41-NEXT: retq
801;
802; AVX-LABEL: test_v4i8:
803; AVX: # %bb.0:
804; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
805; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
806; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
807; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
808; AVX-NEXT: vpextrb $0, %xmm0, %eax
809; AVX-NEXT: # kill: def $al killed $al killed $eax
810; AVX-NEXT: retq
811;
812; AVX512-LABEL: test_v4i8:
813; AVX512: # %bb.0:
814; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
815; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
816; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
817; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
818; AVX512-NEXT: vpextrb $0, %xmm0, %eax
819; AVX512-NEXT: # kill: def $al killed $al killed $eax
820; AVX512-NEXT: retq
821 %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8> %a0)
822 ret i8 %1
823}
824
825define i8 @test_v8i8(<8 x i8> %a0) {
826; SSE2-LABEL: test_v8i8:
827; SSE2: # %bb.0:
828; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
829; SSE2-NEXT: por %xmm0, %xmm1
830; SSE2-NEXT: movdqa %xmm1, %xmm0
831; SSE2-NEXT: psrld $16, %xmm0
832; SSE2-NEXT: por %xmm1, %xmm0
833; SSE2-NEXT: movdqa %xmm0, %xmm1
834; SSE2-NEXT: psrlw $8, %xmm1
835; SSE2-NEXT: por %xmm0, %xmm1
836; SSE2-NEXT: movd %xmm1, %eax
837; SSE2-NEXT: # kill: def $al killed $al killed $eax
838; SSE2-NEXT: retq
839;
840; SSE41-LABEL: test_v8i8:
841; SSE41: # %bb.0:
842; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
843; SSE41-NEXT: por %xmm0, %xmm1
844; SSE41-NEXT: movdqa %xmm1, %xmm0
845; SSE41-NEXT: psrld $16, %xmm0
846; SSE41-NEXT: por %xmm1, %xmm0
847; SSE41-NEXT: movdqa %xmm0, %xmm1
848; SSE41-NEXT: psrlw $8, %xmm1
849; SSE41-NEXT: por %xmm0, %xmm1
850; SSE41-NEXT: pextrb $0, %xmm1, %eax
851; SSE41-NEXT: # kill: def $al killed $al killed $eax
852; SSE41-NEXT: retq
853;
854; AVX-LABEL: test_v8i8:
855; AVX: # %bb.0:
856; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
857; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
858; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
859; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
860; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
861; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
862; AVX-NEXT: vpextrb $0, %xmm0, %eax
863; AVX-NEXT: # kill: def $al killed $al killed $eax
864; AVX-NEXT: retq
865;
866; AVX512-LABEL: test_v8i8:
867; AVX512: # %bb.0:
868; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
869; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
870; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
871; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
872; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
873; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
874; AVX512-NEXT: vpextrb $0, %xmm0, %eax
875; AVX512-NEXT: # kill: def $al killed $al killed $eax
876; AVX512-NEXT: retq
877 %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8> %a0)
878 ret i8 %1
879}
880
881define i8 @test_v16i8(<16 x i8> %a0) {
882; SSE2-LABEL: test_v16i8:
883; SSE2: # %bb.0:
884; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
885; SSE2-NEXT: por %xmm0, %xmm1
886; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
887; SSE2-NEXT: por %xmm1, %xmm0
888; SSE2-NEXT: movdqa %xmm0, %xmm1
889; SSE2-NEXT: psrld $16, %xmm1
890; SSE2-NEXT: por %xmm0, %xmm1
891; SSE2-NEXT: movdqa %xmm1, %xmm0
892; SSE2-NEXT: psrlw $8, %xmm0
893; SSE2-NEXT: por %xmm1, %xmm0
894; SSE2-NEXT: movd %xmm0, %eax
895; SSE2-NEXT: # kill: def $al killed $al killed $eax
896; SSE2-NEXT: retq
897;
898; SSE41-LABEL: test_v16i8:
899; SSE41: # %bb.0:
900; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
901; SSE41-NEXT: por %xmm0, %xmm1
902; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
903; SSE41-NEXT: por %xmm1, %xmm0
904; SSE41-NEXT: movdqa %xmm0, %xmm1
905; SSE41-NEXT: psrld $16, %xmm1
906; SSE41-NEXT: por %xmm0, %xmm1
907; SSE41-NEXT: movdqa %xmm1, %xmm0
908; SSE41-NEXT: psrlw $8, %xmm0
909; SSE41-NEXT: por %xmm1, %xmm0
910; SSE41-NEXT: pextrb $0, %xmm0, %eax
911; SSE41-NEXT: # kill: def $al killed $al killed $eax
912; SSE41-NEXT: retq
913;
914; AVX-LABEL: test_v16i8:
915; AVX: # %bb.0:
916; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
917; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
918; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
919; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
920; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
921; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
922; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
923; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
924; AVX-NEXT: vpextrb $0, %xmm0, %eax
925; AVX-NEXT: # kill: def $al killed $al killed $eax
926; AVX-NEXT: retq
927;
928; AVX512-LABEL: test_v16i8:
929; AVX512: # %bb.0:
930; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
931; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
932; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
933; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
934; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
935; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
936; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
937; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
938; AVX512-NEXT: vpextrb $0, %xmm0, %eax
939; AVX512-NEXT: # kill: def $al killed $al killed $eax
940; AVX512-NEXT: retq
941 %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8> %a0)
942 ret i8 %1
943}
944
945define i8 @test_v32i8(<32 x i8> %a0) {
946; SSE2-LABEL: test_v32i8:
947; SSE2: # %bb.0:
948; SSE2-NEXT: por %xmm1, %xmm0
949; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
950; SSE2-NEXT: por %xmm0, %xmm1
951; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
952; SSE2-NEXT: por %xmm1, %xmm0
953; SSE2-NEXT: movdqa %xmm0, %xmm1
954; SSE2-NEXT: psrld $16, %xmm1
955; SSE2-NEXT: por %xmm0, %xmm1
956; SSE2-NEXT: movdqa %xmm1, %xmm0
957; SSE2-NEXT: psrlw $8, %xmm0
958; SSE2-NEXT: por %xmm1, %xmm0
959; SSE2-NEXT: movd %xmm0, %eax
960; SSE2-NEXT: # kill: def $al killed $al killed $eax
961; SSE2-NEXT: retq
962;
963; SSE41-LABEL: test_v32i8:
964; SSE41: # %bb.0:
965; SSE41-NEXT: por %xmm1, %xmm0
966; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
967; SSE41-NEXT: por %xmm0, %xmm1
968; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
969; SSE41-NEXT: por %xmm1, %xmm0
970; SSE41-NEXT: movdqa %xmm0, %xmm1
971; SSE41-NEXT: psrld $16, %xmm1
972; SSE41-NEXT: por %xmm0, %xmm1
973; SSE41-NEXT: movdqa %xmm1, %xmm0
974; SSE41-NEXT: psrlw $8, %xmm0
975; SSE41-NEXT: por %xmm1, %xmm0
976; SSE41-NEXT: pextrb $0, %xmm0, %eax
977; SSE41-NEXT: # kill: def $al killed $al killed $eax
978; SSE41-NEXT: retq
979;
980; AVX1-LABEL: test_v32i8:
981; AVX1: # %bb.0:
982; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000983; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
984; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
985; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
986; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
987; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000988; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +0000989; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +0000990; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
991; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
992; AVX1-NEXT: vpextrb $0, %xmm0, %eax
993; AVX1-NEXT: # kill: def $al killed $al killed $eax
994; AVX1-NEXT: vzeroupper
995; AVX1-NEXT: retq
996;
997; AVX2-LABEL: test_v32i8:
998; AVX2: # %bb.0:
999; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001000; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001001; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001002; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001003; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001004; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001005; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001006; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001007; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1008; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1009; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1010; AVX2-NEXT: # kill: def $al killed $al killed $eax
1011; AVX2-NEXT: vzeroupper
1012; AVX2-NEXT: retq
1013;
1014; AVX512-LABEL: test_v32i8:
1015; AVX512: # %bb.0:
1016; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001017; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001018; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001019; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001020; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001021; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001022; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001023; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001024; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1025; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
1026; AVX512-NEXT: vpextrb $0, %xmm0, %eax
1027; AVX512-NEXT: # kill: def $al killed $al killed $eax
1028; AVX512-NEXT: vzeroupper
1029; AVX512-NEXT: retq
1030 %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8> %a0)
1031 ret i8 %1
1032}
1033
1034define i8 @test_v64i8(<64 x i8> %a0) {
1035; SSE2-LABEL: test_v64i8:
1036; SSE2: # %bb.0:
1037; SSE2-NEXT: por %xmm3, %xmm1
1038; SSE2-NEXT: por %xmm2, %xmm1
1039; SSE2-NEXT: por %xmm0, %xmm1
1040; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1041; SSE2-NEXT: por %xmm1, %xmm0
1042; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1043; SSE2-NEXT: por %xmm0, %xmm1
1044; SSE2-NEXT: movdqa %xmm1, %xmm0
1045; SSE2-NEXT: psrld $16, %xmm0
1046; SSE2-NEXT: por %xmm1, %xmm0
1047; SSE2-NEXT: movdqa %xmm0, %xmm1
1048; SSE2-NEXT: psrlw $8, %xmm1
1049; SSE2-NEXT: por %xmm0, %xmm1
1050; SSE2-NEXT: movd %xmm1, %eax
1051; SSE2-NEXT: # kill: def $al killed $al killed $eax
1052; SSE2-NEXT: retq
1053;
1054; SSE41-LABEL: test_v64i8:
1055; SSE41: # %bb.0:
1056; SSE41-NEXT: por %xmm3, %xmm1
1057; SSE41-NEXT: por %xmm2, %xmm1
1058; SSE41-NEXT: por %xmm0, %xmm1
1059; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1060; SSE41-NEXT: por %xmm1, %xmm0
1061; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1062; SSE41-NEXT: por %xmm0, %xmm1
1063; SSE41-NEXT: movdqa %xmm1, %xmm0
1064; SSE41-NEXT: psrld $16, %xmm0
1065; SSE41-NEXT: por %xmm1, %xmm0
1066; SSE41-NEXT: movdqa %xmm0, %xmm1
1067; SSE41-NEXT: psrlw $8, %xmm1
1068; SSE41-NEXT: por %xmm0, %xmm1
1069; SSE41-NEXT: pextrb $0, %xmm1, %eax
1070; SSE41-NEXT: # kill: def $al killed $al killed $eax
1071; SSE41-NEXT: retq
1072;
1073; AVX1-LABEL: test_v64i8:
1074; AVX1: # %bb.0:
1075; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1076; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001077; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001078; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001079; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001080; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001081; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001082; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001083; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001084; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1085; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1086; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1087; AVX1-NEXT: # kill: def $al killed $al killed $eax
1088; AVX1-NEXT: vzeroupper
1089; AVX1-NEXT: retq
1090;
1091; AVX2-LABEL: test_v64i8:
1092; AVX2: # %bb.0:
1093; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1094; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001095; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001096; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001097; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001098; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001099; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001100; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001101; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001102; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1103; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1104; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1105; AVX2-NEXT: # kill: def $al killed $al killed $eax
1106; AVX2-NEXT: vzeroupper
1107; AVX2-NEXT: retq
1108;
1109; AVX512-LABEL: test_v64i8:
1110; AVX512: # %bb.0:
1111; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1112; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
1113; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patela61d5862019-01-29 19:13:39 +00001114; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001115; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patela61d5862019-01-29 19:13:39 +00001116; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001117; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patela61d5862019-01-29 19:13:39 +00001118; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001119; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
Sanjay Patela61d5862019-01-29 19:13:39 +00001120; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001121; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1122; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
1123; AVX512-NEXT: vpextrb $0, %xmm0, %eax
1124; AVX512-NEXT: # kill: def $al killed $al killed $eax
1125; AVX512-NEXT: vzeroupper
1126; AVX512-NEXT: retq
1127 %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8> %a0)
1128 ret i8 %1
1129}
1130
1131define i8 @test_v128i8(<128 x i8> %a0) {
1132; SSE2-LABEL: test_v128i8:
1133; SSE2: # %bb.0:
1134; SSE2-NEXT: por %xmm6, %xmm2
1135; SSE2-NEXT: por %xmm7, %xmm3
1136; SSE2-NEXT: por %xmm5, %xmm3
1137; SSE2-NEXT: por %xmm1, %xmm3
1138; SSE2-NEXT: por %xmm4, %xmm2
1139; SSE2-NEXT: por %xmm3, %xmm2
1140; SSE2-NEXT: por %xmm0, %xmm2
1141; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
1142; SSE2-NEXT: por %xmm2, %xmm0
1143; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1144; SSE2-NEXT: por %xmm0, %xmm1
1145; SSE2-NEXT: movdqa %xmm1, %xmm0
1146; SSE2-NEXT: psrld $16, %xmm0
1147; SSE2-NEXT: por %xmm1, %xmm0
1148; SSE2-NEXT: movdqa %xmm0, %xmm1
1149; SSE2-NEXT: psrlw $8, %xmm1
1150; SSE2-NEXT: por %xmm0, %xmm1
1151; SSE2-NEXT: movd %xmm1, %eax
1152; SSE2-NEXT: # kill: def $al killed $al killed $eax
1153; SSE2-NEXT: retq
1154;
1155; SSE41-LABEL: test_v128i8:
1156; SSE41: # %bb.0:
1157; SSE41-NEXT: por %xmm6, %xmm2
1158; SSE41-NEXT: por %xmm7, %xmm3
1159; SSE41-NEXT: por %xmm5, %xmm3
1160; SSE41-NEXT: por %xmm1, %xmm3
1161; SSE41-NEXT: por %xmm4, %xmm2
1162; SSE41-NEXT: por %xmm3, %xmm2
1163; SSE41-NEXT: por %xmm0, %xmm2
1164; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
1165; SSE41-NEXT: por %xmm2, %xmm0
1166; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1167; SSE41-NEXT: por %xmm0, %xmm1
1168; SSE41-NEXT: movdqa %xmm1, %xmm0
1169; SSE41-NEXT: psrld $16, %xmm0
1170; SSE41-NEXT: por %xmm1, %xmm0
1171; SSE41-NEXT: movdqa %xmm0, %xmm1
1172; SSE41-NEXT: psrlw $8, %xmm1
1173; SSE41-NEXT: por %xmm0, %xmm1
1174; SSE41-NEXT: pextrb $0, %xmm1, %eax
1175; SSE41-NEXT: # kill: def $al killed $al killed $eax
1176; SSE41-NEXT: retq
1177;
1178; AVX1-LABEL: test_v128i8:
1179; AVX1: # %bb.0:
1180; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1
1181; AVX1-NEXT: vorps %ymm1, %ymm2, %ymm1
1182; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
1183; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001184; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001185; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001186; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001187; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001188; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001189; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001190; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001191; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1192; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
1193; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1194; AVX1-NEXT: # kill: def $al killed $al killed $eax
1195; AVX1-NEXT: vzeroupper
1196; AVX1-NEXT: retq
1197;
1198; AVX2-LABEL: test_v128i8:
1199; AVX2: # %bb.0:
1200; AVX2-NEXT: vpor %ymm3, %ymm1, %ymm1
1201; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
1202; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
1203; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001204; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001205; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001206; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001207; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001208; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001209; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
Sanjay Patel21aa6dd2019-01-25 15:37:42 +00001210; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001211; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1212; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
1213; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1214; AVX2-NEXT: # kill: def $al killed $al killed $eax
1215; AVX2-NEXT: vzeroupper
1216; AVX2-NEXT: retq
1217;
1218; AVX512-LABEL: test_v128i8:
1219; AVX512: # %bb.0:
1220; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
1221; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1222; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0
1223; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
Sanjay Patela61d5862019-01-29 19:13:39 +00001224; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001225; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
Sanjay Patela61d5862019-01-29 19:13:39 +00001226; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001227; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
Sanjay Patela61d5862019-01-29 19:13:39 +00001228; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001229; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
Sanjay Patela61d5862019-01-29 19:13:39 +00001230; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
Craig Topper39910892018-12-05 06:29:44 +00001231; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1232; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0
1233; AVX512-NEXT: vpextrb $0, %xmm0, %eax
1234; AVX512-NEXT: # kill: def $al killed $al killed $eax
1235; AVX512-NEXT: vzeroupper
1236; AVX512-NEXT: retq
1237 %1 = call i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8> %a0)
1238 ret i8 %1
1239}
1240
1241declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>)
1242declare i64 @llvm.experimental.vector.reduce.or.i64.v4i64(<4 x i64>)
1243declare i64 @llvm.experimental.vector.reduce.or.i64.v8i64(<8 x i64>)
1244declare i64 @llvm.experimental.vector.reduce.or.i64.v16i64(<16 x i64>)
1245
1246declare i32 @llvm.experimental.vector.reduce.or.i32.v2i32(<2 x i32>)
1247declare i32 @llvm.experimental.vector.reduce.or.i32.v4i32(<4 x i32>)
1248declare i32 @llvm.experimental.vector.reduce.or.i32.v8i32(<8 x i32>)
1249declare i32 @llvm.experimental.vector.reduce.or.i32.v16i32(<16 x i32>)
1250declare i32 @llvm.experimental.vector.reduce.or.i32.v32i32(<32 x i32>)
1251
1252declare i16 @llvm.experimental.vector.reduce.or.i16.v2i16(<2 x i16>)
1253declare i16 @llvm.experimental.vector.reduce.or.i16.v4i16(<4 x i16>)
1254declare i16 @llvm.experimental.vector.reduce.or.i16.v8i16(<8 x i16>)
1255declare i16 @llvm.experimental.vector.reduce.or.i16.v16i16(<16 x i16>)
1256declare i16 @llvm.experimental.vector.reduce.or.i16.v32i16(<32 x i16>)
1257declare i16 @llvm.experimental.vector.reduce.or.i16.v64i16(<64 x i16>)
1258
1259declare i8 @llvm.experimental.vector.reduce.or.i8.v2i8(<2 x i8>)
1260declare i8 @llvm.experimental.vector.reduce.or.i8.v4i8(<4 x i8>)
1261declare i8 @llvm.experimental.vector.reduce.or.i8.v8i8(<8 x i8>)
1262declare i8 @llvm.experimental.vector.reduce.or.i8.v16i8(<16 x i8>)
1263declare i8 @llvm.experimental.vector.reduce.or.i8.v32i8(<32 x i8>)
1264declare i8 @llvm.experimental.vector.reduce.or.i8.v64i8(<64 x i8>)
1265declare i8 @llvm.experimental.vector.reduce.or.i8.v128i8(<128 x i8>)