blob: 7a5e5f34ad389d491eb5d718a2942e7ef457138a [file] [log] [blame]
Simon Pilgrim7f6f43f2018-04-05 17:37:35 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
8
9;
10; vXi64
11;
12
13define i64 @test_v2i64(<2 x i64> %a0) {
14; SSE-LABEL: test_v2i64:
15; SSE: # %bb.0:
16; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
17; SSE-NEXT: paddq %xmm0, %xmm1
18; SSE-NEXT: movq %xmm1, %rax
19; SSE-NEXT: retq
20;
21; AVX-LABEL: test_v2i64:
22; AVX: # %bb.0:
23; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
24; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
25; AVX-NEXT: vmovq %xmm0, %rax
26; AVX-NEXT: retq
27;
28; AVX512-LABEL: test_v2i64:
29; AVX512: # %bb.0:
30; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
31; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0
32; AVX512-NEXT: vmovq %xmm0, %rax
33; AVX512-NEXT: retq
34 %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %a0)
35 ret i64 %1
36}
37
38define i64 @test_v4i64(<4 x i64> %a0) {
39; SSE-LABEL: test_v4i64:
40; SSE: # %bb.0:
41; SSE-NEXT: paddq %xmm1, %xmm0
42; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
43; SSE-NEXT: paddq %xmm0, %xmm1
44; SSE-NEXT: movq %xmm1, %rax
45; SSE-NEXT: retq
46;
47; AVX1-LABEL: test_v4i64:
48; AVX1: # %bb.0:
49; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
50; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
51; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
52; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
53; AVX1-NEXT: vmovq %xmm0, %rax
54; AVX1-NEXT: vzeroupper
55; AVX1-NEXT: retq
56;
57; AVX2-LABEL: test_v4i64:
58; AVX2: # %bb.0:
59; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
60; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
61; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
62; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
63; AVX2-NEXT: vmovq %xmm0, %rax
64; AVX2-NEXT: vzeroupper
65; AVX2-NEXT: retq
66;
67; AVX512-LABEL: test_v4i64:
68; AVX512: # %bb.0:
69; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
70; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
71; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
72; AVX512-NEXT: vpaddq %ymm1, %ymm0, %ymm0
73; AVX512-NEXT: vmovq %xmm0, %rax
74; AVX512-NEXT: vzeroupper
75; AVX512-NEXT: retq
76 %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64> %a0)
77 ret i64 %1
78}
79
80define i64 @test_v8i64(<8 x i64> %a0) {
81; SSE-LABEL: test_v8i64:
82; SSE: # %bb.0:
83; SSE-NEXT: paddq %xmm3, %xmm1
84; SSE-NEXT: paddq %xmm2, %xmm1
85; SSE-NEXT: paddq %xmm0, %xmm1
86; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
87; SSE-NEXT: paddq %xmm1, %xmm0
88; SSE-NEXT: movq %xmm0, %rax
89; SSE-NEXT: retq
90;
91; AVX1-LABEL: test_v8i64:
92; AVX1: # %bb.0:
93; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
94; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
95; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2
96; AVX1-NEXT: vpaddq %xmm2, %xmm1, %xmm1
97; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
98; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
99; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
100; AVX1-NEXT: vmovq %xmm0, %rax
101; AVX1-NEXT: vzeroupper
102; AVX1-NEXT: retq
103;
104; AVX2-LABEL: test_v8i64:
105; AVX2: # %bb.0:
106; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
107; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
108; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
109; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
110; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
111; AVX2-NEXT: vmovq %xmm0, %rax
112; AVX2-NEXT: vzeroupper
113; AVX2-NEXT: retq
114;
115; AVX512-LABEL: test_v8i64:
116; AVX512: # %bb.0:
117; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
118; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
119; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
120; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
121; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
122; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
123; AVX512-NEXT: vmovq %xmm0, %rax
124; AVX512-NEXT: vzeroupper
125; AVX512-NEXT: retq
126 %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64> %a0)
127 ret i64 %1
128}
129
130define i64 @test_v16i64(<16 x i64> %a0) {
131; SSE-LABEL: test_v16i64:
132; SSE: # %bb.0:
133; SSE-NEXT: paddq %xmm6, %xmm2
134; SSE-NEXT: paddq %xmm7, %xmm3
135; SSE-NEXT: paddq %xmm5, %xmm3
136; SSE-NEXT: paddq %xmm1, %xmm3
137; SSE-NEXT: paddq %xmm4, %xmm2
138; SSE-NEXT: paddq %xmm3, %xmm2
139; SSE-NEXT: paddq %xmm0, %xmm2
140; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
141; SSE-NEXT: paddq %xmm2, %xmm0
142; SSE-NEXT: movq %xmm0, %rax
143; SSE-NEXT: retq
144;
145; AVX1-LABEL: test_v16i64:
146; AVX1: # %bb.0:
147; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm4
148; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
149; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
150; AVX1-NEXT: vpaddq %xmm3, %xmm1, %xmm1
151; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
152; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1
153; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
154; AVX1-NEXT: vpaddq %xmm1, %xmm3, %xmm1
155; AVX1-NEXT: vpaddq %xmm4, %xmm2, %xmm2
156; AVX1-NEXT: vpaddq %xmm1, %xmm2, %xmm1
157; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
158; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
159; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0
160; AVX1-NEXT: vmovq %xmm0, %rax
161; AVX1-NEXT: vzeroupper
162; AVX1-NEXT: retq
163;
164; AVX2-LABEL: test_v16i64:
165; AVX2: # %bb.0:
166; AVX2-NEXT: vpaddq %ymm3, %ymm1, %ymm1
167; AVX2-NEXT: vpaddq %ymm1, %ymm2, %ymm1
168; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
169; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
170; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
171; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
172; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
173; AVX2-NEXT: vmovq %xmm0, %rax
174; AVX2-NEXT: vzeroupper
175; AVX2-NEXT: retq
176;
177; AVX512-LABEL: test_v16i64:
178; AVX512: # %bb.0:
179; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
180; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
181; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
182; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
183; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
184; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
185; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
186; AVX512-NEXT: vmovq %xmm0, %rax
187; AVX512-NEXT: vzeroupper
188; AVX512-NEXT: retq
189 %1 = call i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64> %a0)
190 ret i64 %1
191}
192
193;
194; vXi32
195;
196
197define i32 @test_v4i32(<4 x i32> %a0) {
198; SSE2-LABEL: test_v4i32:
199; SSE2: # %bb.0:
200; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
201; SSE2-NEXT: paddd %xmm0, %xmm1
202; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
203; SSE2-NEXT: paddd %xmm1, %xmm0
204; SSE2-NEXT: movd %xmm0, %eax
205; SSE2-NEXT: retq
206;
207; SSE41-LABEL: test_v4i32:
208; SSE41: # %bb.0:
209; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
210; SSE41-NEXT: paddd %xmm0, %xmm1
211; SSE41-NEXT: phaddd %xmm1, %xmm1
212; SSE41-NEXT: movd %xmm1, %eax
213; SSE41-NEXT: retq
214;
215; AVX-LABEL: test_v4i32:
216; AVX: # %bb.0:
217; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
218; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
219; AVX-NEXT: vphaddd %xmm0, %xmm0, %xmm0
220; AVX-NEXT: vmovd %xmm0, %eax
221; AVX-NEXT: retq
222;
223; AVX512-LABEL: test_v4i32:
224; AVX512: # %bb.0:
225; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
226; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0
227; AVX512-NEXT: vphaddd %xmm0, %xmm0, %xmm0
228; AVX512-NEXT: vmovd %xmm0, %eax
229; AVX512-NEXT: retq
230 %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %a0)
231 ret i32 %1
232}
233
234define i32 @test_v8i32(<8 x i32> %a0) {
235; SSE2-LABEL: test_v8i32:
236; SSE2: # %bb.0:
237; SSE2-NEXT: paddd %xmm1, %xmm0
238; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
239; SSE2-NEXT: paddd %xmm0, %xmm1
240; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
241; SSE2-NEXT: paddd %xmm1, %xmm0
242; SSE2-NEXT: movd %xmm0, %eax
243; SSE2-NEXT: retq
244;
245; SSE41-LABEL: test_v8i32:
246; SSE41: # %bb.0:
247; SSE41-NEXT: paddd %xmm1, %xmm0
248; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
249; SSE41-NEXT: paddd %xmm0, %xmm1
250; SSE41-NEXT: phaddd %xmm1, %xmm1
251; SSE41-NEXT: movd %xmm1, %eax
252; SSE41-NEXT: retq
253;
254; AVX1-LABEL: test_v8i32:
255; AVX1: # %bb.0:
256; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
257; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
258; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
259; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
Simon Pilgrimc7132032018-07-20 16:20:45 +0000260; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
Simon Pilgrim7f6f43f2018-04-05 17:37:35 +0000261; AVX1-NEXT: vmovd %xmm0, %eax
262; AVX1-NEXT: vzeroupper
263; AVX1-NEXT: retq
264;
265; AVX2-LABEL: test_v8i32:
266; AVX2: # %bb.0:
267; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
268; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
269; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
270; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
271; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0
272; AVX2-NEXT: vmovd %xmm0, %eax
273; AVX2-NEXT: vzeroupper
274; AVX2-NEXT: retq
275;
276; AVX512-LABEL: test_v8i32:
277; AVX512: # %bb.0:
278; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
279; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
280; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
281; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
282; AVX512-NEXT: vphaddd %ymm0, %ymm0, %ymm0
283; AVX512-NEXT: vmovd %xmm0, %eax
284; AVX512-NEXT: vzeroupper
285; AVX512-NEXT: retq
286 %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> %a0)
287 ret i32 %1
288}
289
290define i32 @test_v16i32(<16 x i32> %a0) {
291; SSE2-LABEL: test_v16i32:
292; SSE2: # %bb.0:
293; SSE2-NEXT: paddd %xmm3, %xmm1
294; SSE2-NEXT: paddd %xmm2, %xmm1
295; SSE2-NEXT: paddd %xmm0, %xmm1
296; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
297; SSE2-NEXT: paddd %xmm1, %xmm0
298; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
299; SSE2-NEXT: paddd %xmm0, %xmm1
300; SSE2-NEXT: movd %xmm1, %eax
301; SSE2-NEXT: retq
302;
303; SSE41-LABEL: test_v16i32:
304; SSE41: # %bb.0:
305; SSE41-NEXT: paddd %xmm3, %xmm1
306; SSE41-NEXT: paddd %xmm2, %xmm1
307; SSE41-NEXT: paddd %xmm0, %xmm1
308; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
309; SSE41-NEXT: paddd %xmm1, %xmm0
310; SSE41-NEXT: phaddd %xmm0, %xmm0
311; SSE41-NEXT: movd %xmm0, %eax
312; SSE41-NEXT: retq
313;
314; AVX1-LABEL: test_v16i32:
315; AVX1: # %bb.0:
316; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
317; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
318; AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm2
319; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
320; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
321; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
322; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
Simon Pilgrimc7132032018-07-20 16:20:45 +0000323; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
Simon Pilgrim7f6f43f2018-04-05 17:37:35 +0000324; AVX1-NEXT: vmovd %xmm0, %eax
325; AVX1-NEXT: vzeroupper
326; AVX1-NEXT: retq
327;
328; AVX2-LABEL: test_v16i32:
329; AVX2: # %bb.0:
330; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
331; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
332; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
333; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
334; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
335; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0
336; AVX2-NEXT: vmovd %xmm0, %eax
337; AVX2-NEXT: vzeroupper
338; AVX2-NEXT: retq
339;
340; AVX512-LABEL: test_v16i32:
341; AVX512: # %bb.0:
342; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
343; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
344; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
345; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
346; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
347; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
348; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
349; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
350; AVX512-NEXT: vmovd %xmm0, %eax
351; AVX512-NEXT: vzeroupper
352; AVX512-NEXT: retq
353 %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32> %a0)
354 ret i32 %1
355}
356
357define i32 @test_v32i32(<32 x i32> %a0) {
358; SSE2-LABEL: test_v32i32:
359; SSE2: # %bb.0:
360; SSE2-NEXT: paddd %xmm6, %xmm2
361; SSE2-NEXT: paddd %xmm7, %xmm3
362; SSE2-NEXT: paddd %xmm5, %xmm3
363; SSE2-NEXT: paddd %xmm1, %xmm3
364; SSE2-NEXT: paddd %xmm4, %xmm2
365; SSE2-NEXT: paddd %xmm3, %xmm2
366; SSE2-NEXT: paddd %xmm0, %xmm2
367; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
368; SSE2-NEXT: paddd %xmm2, %xmm0
369; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
370; SSE2-NEXT: paddd %xmm0, %xmm1
371; SSE2-NEXT: movd %xmm1, %eax
372; SSE2-NEXT: retq
373;
374; SSE41-LABEL: test_v32i32:
375; SSE41: # %bb.0:
376; SSE41-NEXT: paddd %xmm6, %xmm2
377; SSE41-NEXT: paddd %xmm7, %xmm3
378; SSE41-NEXT: paddd %xmm5, %xmm3
379; SSE41-NEXT: paddd %xmm1, %xmm3
380; SSE41-NEXT: paddd %xmm4, %xmm2
381; SSE41-NEXT: paddd %xmm3, %xmm2
382; SSE41-NEXT: paddd %xmm0, %xmm2
383; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
384; SSE41-NEXT: paddd %xmm2, %xmm0
385; SSE41-NEXT: phaddd %xmm0, %xmm0
386; SSE41-NEXT: movd %xmm0, %eax
387; SSE41-NEXT: retq
388;
389; AVX1-LABEL: test_v32i32:
390; AVX1: # %bb.0:
391; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm4
392; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
393; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
394; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm1
395; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
396; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1
397; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
398; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm1
399; AVX1-NEXT: vpaddd %xmm4, %xmm2, %xmm2
400; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
401; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
402; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
403; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
Simon Pilgrimc7132032018-07-20 16:20:45 +0000404; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
Simon Pilgrim7f6f43f2018-04-05 17:37:35 +0000405; AVX1-NEXT: vmovd %xmm0, %eax
406; AVX1-NEXT: vzeroupper
407; AVX1-NEXT: retq
408;
409; AVX2-LABEL: test_v32i32:
410; AVX2: # %bb.0:
411; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
412; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
413; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
414; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
415; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
416; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
417; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
418; AVX2-NEXT: vphaddd %ymm0, %ymm0, %ymm0
419; AVX2-NEXT: vmovd %xmm0, %eax
420; AVX2-NEXT: vzeroupper
421; AVX2-NEXT: retq
422;
423; AVX512-LABEL: test_v32i32:
424; AVX512: # %bb.0:
425; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
426; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
427; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
428; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
429; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
430; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
431; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
432; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
433; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
434; AVX512-NEXT: vmovd %xmm0, %eax
435; AVX512-NEXT: vzeroupper
436; AVX512-NEXT: retq
437 %1 = call i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32> %a0)
438 ret i32 %1
439}
440
441;
442; vXi16
443;
444
445define i16 @test_v8i16(<8 x i16> %a0) {
446; SSE2-LABEL: test_v8i16:
447; SSE2: # %bb.0:
448; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
449; SSE2-NEXT: paddw %xmm0, %xmm1
450; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
451; SSE2-NEXT: paddw %xmm1, %xmm0
452; SSE2-NEXT: movdqa %xmm0, %xmm1
453; SSE2-NEXT: psrld $16, %xmm1
454; SSE2-NEXT: paddw %xmm0, %xmm1
455; SSE2-NEXT: movd %xmm1, %eax
456; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
457; SSE2-NEXT: retq
458;
459; SSE41-LABEL: test_v8i16:
460; SSE41: # %bb.0:
461; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
462; SSE41-NEXT: paddw %xmm0, %xmm1
463; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
464; SSE41-NEXT: paddw %xmm1, %xmm0
465; SSE41-NEXT: phaddw %xmm0, %xmm0
466; SSE41-NEXT: movd %xmm0, %eax
467; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
468; SSE41-NEXT: retq
469;
470; AVX-LABEL: test_v8i16:
471; AVX: # %bb.0:
472; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
473; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
474; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
475; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
476; AVX-NEXT: vphaddw %xmm0, %xmm0, %xmm0
477; AVX-NEXT: vmovd %xmm0, %eax
478; AVX-NEXT: # kill: def $ax killed $ax killed $eax
479; AVX-NEXT: retq
480;
481; AVX512-LABEL: test_v8i16:
482; AVX512: # %bb.0:
483; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
484; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
485; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
486; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0
487; AVX512-NEXT: vphaddw %xmm0, %xmm0, %xmm0
488; AVX512-NEXT: vmovd %xmm0, %eax
489; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
490; AVX512-NEXT: retq
491 %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %a0)
492 ret i16 %1
493}
494
495define i16 @test_v16i16(<16 x i16> %a0) {
496; SSE2-LABEL: test_v16i16:
497; SSE2: # %bb.0:
498; SSE2-NEXT: paddw %xmm1, %xmm0
499; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
500; SSE2-NEXT: paddw %xmm0, %xmm1
501; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
502; SSE2-NEXT: paddw %xmm1, %xmm0
503; SSE2-NEXT: movdqa %xmm0, %xmm1
504; SSE2-NEXT: psrld $16, %xmm1
505; SSE2-NEXT: paddw %xmm0, %xmm1
506; SSE2-NEXT: movd %xmm1, %eax
507; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
508; SSE2-NEXT: retq
509;
510; SSE41-LABEL: test_v16i16:
511; SSE41: # %bb.0:
512; SSE41-NEXT: paddw %xmm1, %xmm0
513; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
514; SSE41-NEXT: paddw %xmm0, %xmm1
515; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
516; SSE41-NEXT: paddw %xmm1, %xmm0
517; SSE41-NEXT: phaddw %xmm0, %xmm0
518; SSE41-NEXT: movd %xmm0, %eax
519; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
520; SSE41-NEXT: retq
521;
522; AVX1-LABEL: test_v16i16:
523; AVX1: # %bb.0:
524; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
525; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
526; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
527; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
528; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
529; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
Simon Pilgrimc7132032018-07-20 16:20:45 +0000530; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm0
Simon Pilgrim7f6f43f2018-04-05 17:37:35 +0000531; AVX1-NEXT: vmovd %xmm0, %eax
532; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
533; AVX1-NEXT: vzeroupper
534; AVX1-NEXT: retq
535;
536; AVX2-LABEL: test_v16i16:
537; AVX2: # %bb.0:
538; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
539; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
540; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
541; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
542; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
543; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
544; AVX2-NEXT: vphaddw %ymm0, %ymm0, %ymm0
545; AVX2-NEXT: vmovd %xmm0, %eax
546; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
547; AVX2-NEXT: vzeroupper
548; AVX2-NEXT: retq
549;
550; AVX512-LABEL: test_v16i16:
551; AVX512: # %bb.0:
552; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
553; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0
554; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
555; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0
556; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
557; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0
558; AVX512-NEXT: vphaddw %ymm0, %ymm0, %ymm0
559; AVX512-NEXT: vmovd %xmm0, %eax
560; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
561; AVX512-NEXT: vzeroupper
562; AVX512-NEXT: retq
563 %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16> %a0)
564 ret i16 %1
565}
566
567define i16 @test_v32i16(<32 x i16> %a0) {
568; SSE2-LABEL: test_v32i16:
569; SSE2: # %bb.0:
570; SSE2-NEXT: paddw %xmm3, %xmm1
571; SSE2-NEXT: paddw %xmm2, %xmm1
572; SSE2-NEXT: paddw %xmm0, %xmm1
573; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
574; SSE2-NEXT: paddw %xmm1, %xmm0
575; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
576; SSE2-NEXT: paddw %xmm0, %xmm1
577; SSE2-NEXT: movdqa %xmm1, %xmm0
578; SSE2-NEXT: psrld $16, %xmm0
579; SSE2-NEXT: paddw %xmm1, %xmm0
580; SSE2-NEXT: movd %xmm0, %eax
581; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
582; SSE2-NEXT: retq
583;
584; SSE41-LABEL: test_v32i16:
585; SSE41: # %bb.0:
586; SSE41-NEXT: paddw %xmm3, %xmm1
587; SSE41-NEXT: paddw %xmm2, %xmm1
588; SSE41-NEXT: paddw %xmm0, %xmm1
589; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
590; SSE41-NEXT: paddw %xmm1, %xmm0
591; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
592; SSE41-NEXT: paddw %xmm0, %xmm1
593; SSE41-NEXT: phaddw %xmm1, %xmm1
594; SSE41-NEXT: movd %xmm1, %eax
595; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
596; SSE41-NEXT: retq
597;
598; AVX1-LABEL: test_v32i16:
599; AVX1: # %bb.0:
600; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
601; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
602; AVX1-NEXT: vpaddw %xmm2, %xmm3, %xmm2
603; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1
604; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
605; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
606; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
607; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
608; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
Simon Pilgrimc7132032018-07-20 16:20:45 +0000609; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm0
Simon Pilgrim7f6f43f2018-04-05 17:37:35 +0000610; AVX1-NEXT: vmovd %xmm0, %eax
611; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
612; AVX1-NEXT: vzeroupper
613; AVX1-NEXT: retq
614;
615; AVX2-LABEL: test_v32i16:
616; AVX2: # %bb.0:
617; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
618; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
619; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
620; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
621; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
622; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
623; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
624; AVX2-NEXT: vphaddw %ymm0, %ymm0, %ymm0
625; AVX2-NEXT: vmovd %xmm0, %eax
626; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
627; AVX2-NEXT: vzeroupper
628; AVX2-NEXT: retq
629;
630; AVX512-LABEL: test_v32i16:
631; AVX512: # %bb.0:
632; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
633; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
634; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
635; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
636; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
637; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
638; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
639; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
640; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
641; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
642; AVX512-NEXT: vmovd %xmm0, %eax
643; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
644; AVX512-NEXT: vzeroupper
645; AVX512-NEXT: retq
646 %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16> %a0)
647 ret i16 %1
648}
649
650define i16 @test_v64i16(<64 x i16> %a0) {
651; SSE2-LABEL: test_v64i16:
652; SSE2: # %bb.0:
653; SSE2-NEXT: paddw %xmm6, %xmm2
654; SSE2-NEXT: paddw %xmm7, %xmm3
655; SSE2-NEXT: paddw %xmm5, %xmm3
656; SSE2-NEXT: paddw %xmm1, %xmm3
657; SSE2-NEXT: paddw %xmm4, %xmm2
658; SSE2-NEXT: paddw %xmm3, %xmm2
659; SSE2-NEXT: paddw %xmm0, %xmm2
660; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
661; SSE2-NEXT: paddw %xmm2, %xmm0
662; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
663; SSE2-NEXT: paddw %xmm0, %xmm1
664; SSE2-NEXT: movdqa %xmm1, %xmm0
665; SSE2-NEXT: psrld $16, %xmm0
666; SSE2-NEXT: paddw %xmm1, %xmm0
667; SSE2-NEXT: movd %xmm0, %eax
668; SSE2-NEXT: # kill: def $ax killed $ax killed $eax
669; SSE2-NEXT: retq
670;
671; SSE41-LABEL: test_v64i16:
672; SSE41: # %bb.0:
673; SSE41-NEXT: paddw %xmm6, %xmm2
674; SSE41-NEXT: paddw %xmm7, %xmm3
675; SSE41-NEXT: paddw %xmm5, %xmm3
676; SSE41-NEXT: paddw %xmm1, %xmm3
677; SSE41-NEXT: paddw %xmm4, %xmm2
678; SSE41-NEXT: paddw %xmm3, %xmm2
679; SSE41-NEXT: paddw %xmm0, %xmm2
680; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
681; SSE41-NEXT: paddw %xmm2, %xmm0
682; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
683; SSE41-NEXT: paddw %xmm0, %xmm1
684; SSE41-NEXT: phaddw %xmm1, %xmm1
685; SSE41-NEXT: movd %xmm1, %eax
686; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
687; SSE41-NEXT: retq
688;
689; AVX1-LABEL: test_v64i16:
690; AVX1: # %bb.0:
691; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm4
692; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
693; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
694; AVX1-NEXT: vpaddw %xmm3, %xmm1, %xmm1
695; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
696; AVX1-NEXT: vpaddw %xmm1, %xmm3, %xmm1
697; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
698; AVX1-NEXT: vpaddw %xmm1, %xmm3, %xmm1
699; AVX1-NEXT: vpaddw %xmm4, %xmm2, %xmm2
700; AVX1-NEXT: vpaddw %xmm1, %xmm2, %xmm1
701; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
702; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
703; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
704; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
705; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
Simon Pilgrimc7132032018-07-20 16:20:45 +0000706; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm0
Simon Pilgrim7f6f43f2018-04-05 17:37:35 +0000707; AVX1-NEXT: vmovd %xmm0, %eax
708; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
709; AVX1-NEXT: vzeroupper
710; AVX1-NEXT: retq
711;
712; AVX2-LABEL: test_v64i16:
713; AVX2: # %bb.0:
714; AVX2-NEXT: vpaddw %ymm3, %ymm1, %ymm1
715; AVX2-NEXT: vpaddw %ymm1, %ymm2, %ymm1
716; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
717; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
718; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
719; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
720; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
721; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
722; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
723; AVX2-NEXT: vphaddw %ymm0, %ymm0, %ymm0
724; AVX2-NEXT: vmovd %xmm0, %eax
725; AVX2-NEXT: # kill: def $ax killed $ax killed $eax
726; AVX2-NEXT: vzeroupper
727; AVX2-NEXT: retq
728;
729; AVX512-LABEL: test_v64i16:
730; AVX512: # %bb.0:
731; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
732; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
733; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
734; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
735; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
736; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
737; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
738; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
739; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
740; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
741; AVX512-NEXT: vpaddw %zmm1, %zmm0, %zmm0
742; AVX512-NEXT: vmovd %xmm0, %eax
743; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
744; AVX512-NEXT: vzeroupper
745; AVX512-NEXT: retq
746 %1 = call i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16> %a0)
747 ret i16 %1
748}
749
750;
751; vXi8
752;
753
754define i8 @test_v16i8(<16 x i8> %a0) {
755; SSE2-LABEL: test_v16i8:
756; SSE2: # %bb.0:
757; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
758; SSE2-NEXT: paddb %xmm0, %xmm1
759; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
760; SSE2-NEXT: paddb %xmm1, %xmm0
761; SSE2-NEXT: movdqa %xmm0, %xmm1
762; SSE2-NEXT: psrld $16, %xmm1
763; SSE2-NEXT: paddb %xmm0, %xmm1
764; SSE2-NEXT: movdqa %xmm1, %xmm0
765; SSE2-NEXT: psrlw $8, %xmm0
766; SSE2-NEXT: paddb %xmm1, %xmm0
767; SSE2-NEXT: movd %xmm0, %eax
768; SSE2-NEXT: # kill: def $al killed $al killed $eax
769; SSE2-NEXT: retq
770;
771; SSE41-LABEL: test_v16i8:
772; SSE41: # %bb.0:
773; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
774; SSE41-NEXT: paddb %xmm0, %xmm1
775; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
776; SSE41-NEXT: paddb %xmm1, %xmm0
777; SSE41-NEXT: movdqa %xmm0, %xmm1
778; SSE41-NEXT: psrld $16, %xmm1
779; SSE41-NEXT: paddb %xmm0, %xmm1
780; SSE41-NEXT: movdqa %xmm1, %xmm0
781; SSE41-NEXT: psrlw $8, %xmm0
782; SSE41-NEXT: paddb %xmm1, %xmm0
783; SSE41-NEXT: pextrb $0, %xmm0, %eax
784; SSE41-NEXT: # kill: def $al killed $al killed $eax
785; SSE41-NEXT: retq
786;
787; AVX-LABEL: test_v16i8:
788; AVX: # %bb.0:
789; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
790; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
791; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
792; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
793; AVX-NEXT: vpsrld $16, %xmm0, %xmm1
794; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
795; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
796; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0
797; AVX-NEXT: vpextrb $0, %xmm0, %eax
798; AVX-NEXT: # kill: def $al killed $al killed $eax
799; AVX-NEXT: retq
800;
801; AVX512-LABEL: test_v16i8:
802; AVX512: # %bb.0:
803; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
804; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
805; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
806; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
807; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
808; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
809; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
810; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0
811; AVX512-NEXT: vpextrb $0, %xmm0, %eax
812; AVX512-NEXT: # kill: def $al killed $al killed $eax
813; AVX512-NEXT: retq
814 %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %a0)
815 ret i8 %1
816}
817
818define i8 @test_v32i8(<32 x i8> %a0) {
819; SSE2-LABEL: test_v32i8:
820; SSE2: # %bb.0:
821; SSE2-NEXT: paddb %xmm1, %xmm0
822; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
823; SSE2-NEXT: paddb %xmm0, %xmm1
824; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
825; SSE2-NEXT: paddb %xmm1, %xmm0
826; SSE2-NEXT: movdqa %xmm0, %xmm1
827; SSE2-NEXT: psrld $16, %xmm1
828; SSE2-NEXT: paddb %xmm0, %xmm1
829; SSE2-NEXT: movdqa %xmm1, %xmm0
830; SSE2-NEXT: psrlw $8, %xmm0
831; SSE2-NEXT: paddb %xmm1, %xmm0
832; SSE2-NEXT: movd %xmm0, %eax
833; SSE2-NEXT: # kill: def $al killed $al killed $eax
834; SSE2-NEXT: retq
835;
836; SSE41-LABEL: test_v32i8:
837; SSE41: # %bb.0:
838; SSE41-NEXT: paddb %xmm1, %xmm0
839; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
840; SSE41-NEXT: paddb %xmm0, %xmm1
841; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
842; SSE41-NEXT: paddb %xmm1, %xmm0
843; SSE41-NEXT: movdqa %xmm0, %xmm1
844; SSE41-NEXT: psrld $16, %xmm1
845; SSE41-NEXT: paddb %xmm0, %xmm1
846; SSE41-NEXT: movdqa %xmm1, %xmm0
847; SSE41-NEXT: psrlw $8, %xmm0
848; SSE41-NEXT: paddb %xmm1, %xmm0
849; SSE41-NEXT: pextrb $0, %xmm0, %eax
850; SSE41-NEXT: # kill: def $al killed $al killed $eax
851; SSE41-NEXT: retq
852;
853; AVX1-LABEL: test_v32i8:
854; AVX1: # %bb.0:
855; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
856; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
857; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
858; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
859; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
860; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
861; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
862; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
863; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
864; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
865; AVX1-NEXT: vpextrb $0, %xmm0, %eax
866; AVX1-NEXT: # kill: def $al killed $al killed $eax
867; AVX1-NEXT: vzeroupper
868; AVX1-NEXT: retq
869;
870; AVX2-LABEL: test_v32i8:
871; AVX2: # %bb.0:
872; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
873; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
874; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
875; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
876; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
877; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
878; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
879; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
880; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
881; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
882; AVX2-NEXT: vpextrb $0, %xmm0, %eax
883; AVX2-NEXT: # kill: def $al killed $al killed $eax
884; AVX2-NEXT: vzeroupper
885; AVX2-NEXT: retq
886;
887; AVX512-LABEL: test_v32i8:
888; AVX512: # %bb.0:
889; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
890; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
891; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
892; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
893; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
894; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
895; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
896; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
897; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
898; AVX512-NEXT: vpaddb %ymm1, %ymm0, %ymm0
899; AVX512-NEXT: vpextrb $0, %xmm0, %eax
900; AVX512-NEXT: # kill: def $al killed $al killed $eax
901; AVX512-NEXT: vzeroupper
902; AVX512-NEXT: retq
903 %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8> %a0)
904 ret i8 %1
905}
906
907define i8 @test_v64i8(<64 x i8> %a0) {
908; SSE2-LABEL: test_v64i8:
909; SSE2: # %bb.0:
910; SSE2-NEXT: paddb %xmm3, %xmm1
911; SSE2-NEXT: paddb %xmm2, %xmm1
912; SSE2-NEXT: paddb %xmm0, %xmm1
913; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
914; SSE2-NEXT: paddb %xmm1, %xmm0
915; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
916; SSE2-NEXT: paddb %xmm0, %xmm1
917; SSE2-NEXT: movdqa %xmm1, %xmm0
918; SSE2-NEXT: psrld $16, %xmm0
919; SSE2-NEXT: paddb %xmm1, %xmm0
920; SSE2-NEXT: movdqa %xmm0, %xmm1
921; SSE2-NEXT: psrlw $8, %xmm1
922; SSE2-NEXT: paddb %xmm0, %xmm1
923; SSE2-NEXT: movd %xmm1, %eax
924; SSE2-NEXT: # kill: def $al killed $al killed $eax
925; SSE2-NEXT: retq
926;
927; SSE41-LABEL: test_v64i8:
928; SSE41: # %bb.0:
929; SSE41-NEXT: paddb %xmm3, %xmm1
930; SSE41-NEXT: paddb %xmm2, %xmm1
931; SSE41-NEXT: paddb %xmm0, %xmm1
932; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
933; SSE41-NEXT: paddb %xmm1, %xmm0
934; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
935; SSE41-NEXT: paddb %xmm0, %xmm1
936; SSE41-NEXT: movdqa %xmm1, %xmm0
937; SSE41-NEXT: psrld $16, %xmm0
938; SSE41-NEXT: paddb %xmm1, %xmm0
939; SSE41-NEXT: movdqa %xmm0, %xmm1
940; SSE41-NEXT: psrlw $8, %xmm1
941; SSE41-NEXT: paddb %xmm0, %xmm1
942; SSE41-NEXT: pextrb $0, %xmm1, %eax
943; SSE41-NEXT: # kill: def $al killed $al killed $eax
944; SSE41-NEXT: retq
945;
946; AVX1-LABEL: test_v64i8:
947; AVX1: # %bb.0:
948; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
949; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
950; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2
951; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1
952; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
953; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
954; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
955; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
956; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
957; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
958; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
959; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
960; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
961; AVX1-NEXT: vpextrb $0, %xmm0, %eax
962; AVX1-NEXT: # kill: def $al killed $al killed $eax
963; AVX1-NEXT: vzeroupper
964; AVX1-NEXT: retq
965;
966; AVX2-LABEL: test_v64i8:
967; AVX2: # %bb.0:
968; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
969; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
970; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
971; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
972; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
973; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
974; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
975; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
976; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
977; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
978; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
979; AVX2-NEXT: vpextrb $0, %xmm0, %eax
980; AVX2-NEXT: # kill: def $al killed $al killed $eax
981; AVX2-NEXT: vzeroupper
982; AVX2-NEXT: retq
983;
984; AVX512-LABEL: test_v64i8:
985; AVX512: # %bb.0:
986; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
987; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
988; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
989; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
990; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
991; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
992; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
993; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
994; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
995; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
996; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
997; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
998; AVX512-NEXT: vpextrb $0, %xmm0, %eax
999; AVX512-NEXT: # kill: def $al killed $al killed $eax
1000; AVX512-NEXT: vzeroupper
1001; AVX512-NEXT: retq
1002 %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8> %a0)
1003 ret i8 %1
1004}
1005
1006define i8 @test_v128i8(<128 x i8> %a0) {
1007; SSE2-LABEL: test_v128i8:
1008; SSE2: # %bb.0:
1009; SSE2-NEXT: paddb %xmm6, %xmm2
1010; SSE2-NEXT: paddb %xmm7, %xmm3
1011; SSE2-NEXT: paddb %xmm5, %xmm3
1012; SSE2-NEXT: paddb %xmm1, %xmm3
1013; SSE2-NEXT: paddb %xmm4, %xmm2
1014; SSE2-NEXT: paddb %xmm3, %xmm2
1015; SSE2-NEXT: paddb %xmm0, %xmm2
1016; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
1017; SSE2-NEXT: paddb %xmm2, %xmm0
1018; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1019; SSE2-NEXT: paddb %xmm0, %xmm1
1020; SSE2-NEXT: movdqa %xmm1, %xmm0
1021; SSE2-NEXT: psrld $16, %xmm0
1022; SSE2-NEXT: paddb %xmm1, %xmm0
1023; SSE2-NEXT: movdqa %xmm0, %xmm1
1024; SSE2-NEXT: psrlw $8, %xmm1
1025; SSE2-NEXT: paddb %xmm0, %xmm1
1026; SSE2-NEXT: movd %xmm1, %eax
1027; SSE2-NEXT: # kill: def $al killed $al killed $eax
1028; SSE2-NEXT: retq
1029;
1030; SSE41-LABEL: test_v128i8:
1031; SSE41: # %bb.0:
1032; SSE41-NEXT: paddb %xmm6, %xmm2
1033; SSE41-NEXT: paddb %xmm7, %xmm3
1034; SSE41-NEXT: paddb %xmm5, %xmm3
1035; SSE41-NEXT: paddb %xmm1, %xmm3
1036; SSE41-NEXT: paddb %xmm4, %xmm2
1037; SSE41-NEXT: paddb %xmm3, %xmm2
1038; SSE41-NEXT: paddb %xmm0, %xmm2
1039; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
1040; SSE41-NEXT: paddb %xmm2, %xmm0
1041; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1042; SSE41-NEXT: paddb %xmm0, %xmm1
1043; SSE41-NEXT: movdqa %xmm1, %xmm0
1044; SSE41-NEXT: psrld $16, %xmm0
1045; SSE41-NEXT: paddb %xmm1, %xmm0
1046; SSE41-NEXT: movdqa %xmm0, %xmm1
1047; SSE41-NEXT: psrlw $8, %xmm1
1048; SSE41-NEXT: paddb %xmm0, %xmm1
1049; SSE41-NEXT: pextrb $0, %xmm1, %eax
1050; SSE41-NEXT: # kill: def $al killed $al killed $eax
1051; SSE41-NEXT: retq
1052;
1053; AVX1-LABEL: test_v128i8:
1054; AVX1: # %bb.0:
1055; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm4
1056; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
1057; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1058; AVX1-NEXT: vpaddb %xmm3, %xmm1, %xmm1
1059; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
1060; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1
1061; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1062; AVX1-NEXT: vpaddb %xmm1, %xmm3, %xmm1
1063; AVX1-NEXT: vpaddb %xmm4, %xmm2, %xmm2
1064; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1
1065; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
1066; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1067; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
1068; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1069; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
1070; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
1071; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
1072; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1073; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0
1074; AVX1-NEXT: vpextrb $0, %xmm0, %eax
1075; AVX1-NEXT: # kill: def $al killed $al killed $eax
1076; AVX1-NEXT: vzeroupper
1077; AVX1-NEXT: retq
1078;
1079; AVX2-LABEL: test_v128i8:
1080; AVX2: # %bb.0:
1081; AVX2-NEXT: vpaddb %ymm3, %ymm1, %ymm1
1082; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
1083; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1084; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1085; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1086; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1087; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1088; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1089; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1090; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
1091; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1092; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1093; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
1094; AVX2-NEXT: vpextrb $0, %xmm0, %eax
1095; AVX2-NEXT: # kill: def $al killed $al killed $eax
1096; AVX2-NEXT: vzeroupper
1097; AVX2-NEXT: retq
1098;
1099; AVX512-LABEL: test_v128i8:
1100; AVX512: # %bb.0:
1101; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
1102; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1103; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
1104; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1105; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
1106; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1107; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
1108; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1109; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
1110; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
1111; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
1112; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1113; AVX512-NEXT: vpaddb %zmm1, %zmm0, %zmm0
1114; AVX512-NEXT: vpextrb $0, %xmm0, %eax
1115; AVX512-NEXT: # kill: def $al killed $al killed $eax
1116; AVX512-NEXT: vzeroupper
1117; AVX512-NEXT: retq
1118 %1 = call i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8> %a0)
1119 ret i8 %1
1120}
1121
1122declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
1123declare i64 @llvm.experimental.vector.reduce.add.i64.v4i64(<4 x i64>)
1124declare i64 @llvm.experimental.vector.reduce.add.i64.v8i64(<8 x i64>)
1125declare i64 @llvm.experimental.vector.reduce.add.i64.v16i64(<16 x i64>)
1126
1127declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
1128declare i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32>)
1129declare i32 @llvm.experimental.vector.reduce.add.i32.v16i32(<16 x i32>)
1130declare i32 @llvm.experimental.vector.reduce.add.i32.v32i32(<32 x i32>)
1131
1132declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
1133declare i16 @llvm.experimental.vector.reduce.add.i16.v16i16(<16 x i16>)
1134declare i16 @llvm.experimental.vector.reduce.add.i16.v32i16(<32 x i16>)
1135declare i16 @llvm.experimental.vector.reduce.add.i16.v64i16(<64 x i16>)
1136
1137declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
1138declare i8 @llvm.experimental.vector.reduce.add.i8.v32i8(<32 x i8>)
1139declare i8 @llvm.experimental.vector.reduce.add.i8.v64i8(<64 x i8>)
1140declare i8 @llvm.experimental.vector.reduce.add.i8.v128i8(<128 x i8>)