blob: 066ddb77a20feff2f55729a6a36e8ef1d9bd67af [file] [log] [blame]
Simon Pilgrime612ab02018-10-20 14:29:59 +00001; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
Craig Topper381b4fb2018-12-05 07:56:50 +00002; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
4; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
5; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
6; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
7; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
8; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW
9; RUN: opt < %s -cost-model -mtriple=x86_64-apple-darwin -analyze -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512DQ
Simon Pilgrime612ab02018-10-20 14:29:59 +000010
11define i32 @reduce_i64(i32 %arg) {
12; SSE2-LABEL: 'reduce_i64'
Sander de Smalen51c2fa02019-06-13 09:37:38 +000013; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
14; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
15; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
16; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
17; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000018; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
19;
20; SSSE3-LABEL: 'reduce_i64'
Sander de Smalen51c2fa02019-06-13 09:37:38 +000021; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
22; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
23; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
24; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
25; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000026; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
27;
28; SSE42-LABEL: 'reduce_i64'
Sander de Smalen51c2fa02019-06-13 09:37:38 +000029; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
30; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
31; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
32; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
33; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000034; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
35;
36; AVX-LABEL: 'reduce_i64'
Sander de Smalen51c2fa02019-06-13 09:37:38 +000037; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
38; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
39; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
40; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
41; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000042; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
43;
44; AVX512-LABEL: 'reduce_i64'
Sander de Smalen51c2fa02019-06-13 09:37:38 +000045; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
46; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
47; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
48; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
49; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000050; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
51;
Sander de Smalen51c2fa02019-06-13 09:37:38 +000052 %V1 = call i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64> undef)
53 %V2 = call i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64> undef)
54 %V4 = call i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64> undef)
55 %V8 = call i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64> undef)
56 %V16 = call i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000057 ret i32 undef
58}
59
60define i32 @reduce_i32(i32 %arg) {
61; SSE2-LABEL: 'reduce_i32'
Sander de Smalen51c2fa02019-06-13 09:37:38 +000062; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
63; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
64; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
65; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
66; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000067; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
68;
69; SSSE3-LABEL: 'reduce_i32'
Sander de Smalen51c2fa02019-06-13 09:37:38 +000070; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
71; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
72; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
73; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
74; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000075; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
76;
77; SSE42-LABEL: 'reduce_i32'
Sander de Smalen51c2fa02019-06-13 09:37:38 +000078; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
79; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
80; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
81; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
82; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000083; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
84;
85; AVX-LABEL: 'reduce_i32'
Sander de Smalen51c2fa02019-06-13 09:37:38 +000086; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
87; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
88; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
89; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
90; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000091; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
92;
93; AVX512-LABEL: 'reduce_i32'
Sander de Smalen51c2fa02019-06-13 09:37:38 +000094; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
95; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
96; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
97; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
98; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +000099; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
100;
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000101 %V2 = call i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32> undef)
102 %V4 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> undef)
103 %V8 = call i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32> undef)
104 %V16 = call i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32> undef)
105 %V32 = call i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000106 ret i32 undef
107}
108
109define i32 @reduce_i16(i32 %arg) {
110; SSE2-LABEL: 'reduce_i16'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000111; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
112; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
113; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
114; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
115; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
116; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000117; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
118;
119; SSSE3-LABEL: 'reduce_i16'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000120; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
121; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
122; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
123; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
124; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
125; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000126; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
127;
128; SSE42-LABEL: 'reduce_i16'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000129; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
130; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
131; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
132; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
133; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
134; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000135; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
136;
137; AVX1-LABEL: 'reduce_i16'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000138; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
139; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
140; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
141; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
142; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
143; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000144; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
145;
146; AVX2-LABEL: 'reduce_i16'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000147; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
148; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
149; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
150; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
151; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
152; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000153; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
154;
155; AVX512F-LABEL: 'reduce_i16'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000156; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
157; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
158; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
159; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
160; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
161; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000162; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
163;
164; AVX512BW-LABEL: 'reduce_i16'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000165; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
166; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
167; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
168; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
169; AVX512BW-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
170; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000171; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
172;
173; AVX512DQ-LABEL: 'reduce_i16'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000174; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
175; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
176; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
177; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
178; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
179; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000180; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
181;
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000182 %V2 = call i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16> undef)
183 %V4 = call i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16> undef)
184 %V8 = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> undef)
185 %V16 = call i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16> undef)
186 %V32 = call i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16> undef)
187 %V64 = call i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000188 ret i32 undef
189}
190
191define i32 @reduce_i8(i32 %arg) {
192; SSE2-LABEL: 'reduce_i8'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000193; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
194; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
195; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
196; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
197; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
198; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
199; SSE2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000200; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
201;
202; SSSE3-LABEL: 'reduce_i8'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000203; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
204; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
205; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
206; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
207; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
208; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
209; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000210; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
211;
212; SSE42-LABEL: 'reduce_i8'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000213; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
214; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
215; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
216; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
217; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
218; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
219; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000220; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
221;
222; AVX1-LABEL: 'reduce_i8'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000223; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
224; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
225; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
226; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
227; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
228; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
229; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000230; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
231;
232; AVX2-LABEL: 'reduce_i8'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000233; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
234; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
235; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
236; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
237; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
238; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
239; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000240; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
241;
242; AVX512F-LABEL: 'reduce_i8'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000243; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
244; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
245; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
246; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
247; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
248; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
249; AVX512F-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000250; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
251;
252; AVX512BW-LABEL: 'reduce_i8'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000253; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
254; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
255; AVX512BW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
256; AVX512BW-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
257; AVX512BW-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
258; AVX512BW-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
259; AVX512BW-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000260; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
261;
262; AVX512DQ-LABEL: 'reduce_i8'
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000263; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
264; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
265; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
266; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
267; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
268; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
269; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000270; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
271;
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000272 %V2 = call i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8> undef)
273 %V4 = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> undef)
274 %V8 = call i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8> undef)
275 %V16 = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> undef)
276 %V32 = call i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8> undef)
277 %V64 = call i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8> undef)
278 %V128 = call i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8> undef)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000279 ret i32 undef
280}
281
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000282declare i64 @llvm.experimental.vector.reduce.add.v1i64(<1 x i64>)
283declare i64 @llvm.experimental.vector.reduce.add.v2i64(<2 x i64>)
284declare i64 @llvm.experimental.vector.reduce.add.v4i64(<4 x i64>)
285declare i64 @llvm.experimental.vector.reduce.add.v8i64(<8 x i64>)
286declare i64 @llvm.experimental.vector.reduce.add.v16i64(<16 x i64>)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000287
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000288declare i32 @llvm.experimental.vector.reduce.add.v2i32(<2 x i32>)
289declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
290declare i32 @llvm.experimental.vector.reduce.add.v8i32(<8 x i32>)
291declare i32 @llvm.experimental.vector.reduce.add.v16i32(<16 x i32>)
292declare i32 @llvm.experimental.vector.reduce.add.v32i32(<32 x i32>)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000293
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000294declare i16 @llvm.experimental.vector.reduce.add.v2i16(<2 x i16>)
295declare i16 @llvm.experimental.vector.reduce.add.v4i16(<4 x i16>)
296declare i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16>)
297declare i16 @llvm.experimental.vector.reduce.add.v16i16(<16 x i16>)
298declare i16 @llvm.experimental.vector.reduce.add.v32i16(<32 x i16>)
299declare i16 @llvm.experimental.vector.reduce.add.v64i16(<64 x i16>)
Simon Pilgrime612ab02018-10-20 14:29:59 +0000300
Sander de Smalen51c2fa02019-06-13 09:37:38 +0000301declare i8 @llvm.experimental.vector.reduce.add.v2i8(<2 x i8>)
302declare i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8>)
303declare i8 @llvm.experimental.vector.reduce.add.v8i8(<8 x i8>)
304declare i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8>)
305declare i8 @llvm.experimental.vector.reduce.add.v32i8(<32 x i8>)
306declare i8 @llvm.experimental.vector.reduce.add.v64i8(<64 x i8>)
307declare i8 @llvm.experimental.vector.reduce.add.v128i8(<128 x i8>)