blob: 6a68a7f330f87ffff91bb9e9db3f069ee7c12109 [file] [log] [blame]
Simon Pilgrim3cd61a02016-08-21 18:34:47 +00001; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSSE3
2; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
Simon Pilgrim07d7a212016-08-21 19:06:25 +00003; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
Simon Pilgrim3cd61a02016-08-21 18:34:47 +00005; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
7
8target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
9target triple = "x86_64-apple-macosx10.8.0"
10
11; CHECK-LABEL: 'fadd'
12define i32 @fadd(i32 %arg) {
13 ; SSSE3: cost of 2 {{.*}} %A = fadd
14 ; SSE42: cost of 2 {{.*}} %A = fadd
15 ; AVX: cost of 2 {{.*}} %A = fadd
16 ; AVX2: cost of 2 {{.*}} %A = fadd
17 ; AVX512: cost of 2 {{.*}} %A = fadd
18 %A = fadd <4 x float> undef, undef
19 ; SSSE3: cost of 4 {{.*}} %B = fadd
20 ; SSE42: cost of 4 {{.*}} %B = fadd
21 ; AVX: cost of 2 {{.*}} %B = fadd
22 ; AVX2: cost of 2 {{.*}} %B = fadd
23 ; AVX512: cost of 2 {{.*}} %B = fadd
24 %B = fadd <8 x float> undef, undef
25 ; SSSE3: cost of 8 {{.*}} %C = fadd
26 ; SSE42: cost of 8 {{.*}} %C = fadd
27 ; AVX: cost of 4 {{.*}} %C = fadd
28 ; AVX2: cost of 4 {{.*}} %C = fadd
29 ; AVX512: cost of 2 {{.*}} %C = fadd
30 %C = fadd <16 x float> undef, undef
31
32 ; SSSE3: cost of 2 {{.*}} %D = fadd
33 ; SSE42: cost of 2 {{.*}} %D = fadd
34 ; AVX: cost of 2 {{.*}} %D = fadd
35 ; AVX2: cost of 2 {{.*}} %D = fadd
36 ; AVX512: cost of 2 {{.*}} %D = fadd
37 %D = fadd <2 x double> undef, undef
38 ; SSSE3: cost of 4 {{.*}} %E = fadd
39 ; SSE42: cost of 4 {{.*}} %E = fadd
40 ; AVX: cost of 2 {{.*}} %E = fadd
41 ; AVX2: cost of 2 {{.*}} %E = fadd
42 ; AVX512: cost of 2 {{.*}} %E = fadd
43 %E = fadd <4 x double> undef, undef
44 ; SSSE3: cost of 8 {{.*}} %F = fadd
45 ; SSE42: cost of 8 {{.*}} %F = fadd
46 ; AVX: cost of 4 {{.*}} %F = fadd
47 ; AVX2: cost of 4 {{.*}} %F = fadd
48 ; AVX512: cost of 2 {{.*}} %F = fadd
49 %F = fadd <8 x double> undef, undef
50
51 ret i32 undef
52}
53
54; CHECK-LABEL: 'fsub'
55define i32 @fsub(i32 %arg) {
56 ; SSSE3: cost of 2 {{.*}} %A = fsub
57 ; SSE42: cost of 2 {{.*}} %A = fsub
58 ; AVX: cost of 2 {{.*}} %A = fsub
59 ; AVX2: cost of 2 {{.*}} %A = fsub
60 ; AVX512: cost of 2 {{.*}} %A = fsub
61 %A = fsub <4 x float> undef, undef
62 ; SSSE3: cost of 4 {{.*}} %B = fsub
63 ; SSE42: cost of 4 {{.*}} %B = fsub
64 ; AVX: cost of 2 {{.*}} %B = fsub
65 ; AVX2: cost of 2 {{.*}} %B = fsub
66 ; AVX512: cost of 2 {{.*}} %B = fsub
67 %B = fsub <8 x float> undef, undef
68 ; SSSE3: cost of 8 {{.*}} %C = fsub
69 ; SSE42: cost of 8 {{.*}} %C = fsub
70 ; AVX: cost of 4 {{.*}} %C = fsub
71 ; AVX2: cost of 4 {{.*}} %C = fsub
72 ; AVX512: cost of 2 {{.*}} %C = fsub
73 %C = fsub <16 x float> undef, undef
74
75 ; SSSE3: cost of 2 {{.*}} %D = fsub
76 ; SSE42: cost of 2 {{.*}} %D = fsub
77 ; AVX: cost of 2 {{.*}} %D = fsub
78 ; AVX2: cost of 2 {{.*}} %D = fsub
79 ; AVX512: cost of 2 {{.*}} %D = fsub
80 %D = fsub <2 x double> undef, undef
81 ; SSSE3: cost of 4 {{.*}} %E = fsub
82 ; SSE42: cost of 4 {{.*}} %E = fsub
83 ; AVX: cost of 2 {{.*}} %E = fsub
84 ; AVX2: cost of 2 {{.*}} %E = fsub
85 ; AVX512: cost of 2 {{.*}} %E = fsub
86 %E = fsub <4 x double> undef, undef
87 ; SSSE3: cost of 8 {{.*}} %F = fsub
88 ; SSE42: cost of 8 {{.*}} %F = fsub
89 ; AVX: cost of 4 {{.*}} %F = fsub
90 ; AVX2: cost of 4 {{.*}} %F = fsub
91 ; AVX512: cost of 2 {{.*}} %F = fsub
92 %F = fsub <8 x double> undef, undef
93
94 ret i32 undef
95}
96
97; CHECK-LABEL: 'fmul'
98define i32 @fmul(i32 %arg) {
99 ; SSSE3: cost of 2 {{.*}} %A = fmul
100 ; SSE42: cost of 2 {{.*}} %A = fmul
101 ; AVX: cost of 2 {{.*}} %A = fmul
102 ; AVX2: cost of 2 {{.*}} %A = fmul
103 ; AVX512: cost of 2 {{.*}} %A = fmul
104 %A = fmul <4 x float> undef, undef
105 ; SSSE3: cost of 4 {{.*}} %B = fmul
106 ; SSE42: cost of 4 {{.*}} %B = fmul
107 ; AVX: cost of 2 {{.*}} %B = fmul
108 ; AVX2: cost of 2 {{.*}} %B = fmul
109 ; AVX512: cost of 2 {{.*}} %B = fmul
110 %B = fmul <8 x float> undef, undef
111 ; SSSE3: cost of 8 {{.*}} %C = fmul
112 ; SSE42: cost of 8 {{.*}} %C = fmul
113 ; AVX: cost of 4 {{.*}} %C = fmul
114 ; AVX2: cost of 4 {{.*}} %C = fmul
115 ; AVX512: cost of 2 {{.*}} %C = fmul
116 %C = fmul <16 x float> undef, undef
117
118 ; SSSE3: cost of 2 {{.*}} %D = fmul
119 ; SSE42: cost of 2 {{.*}} %D = fmul
120 ; AVX: cost of 2 {{.*}} %D = fmul
121 ; AVX2: cost of 2 {{.*}} %D = fmul
122 ; AVX512: cost of 2 {{.*}} %D = fmul
123 %D = fmul <2 x double> undef, undef
124 ; SSSE3: cost of 4 {{.*}} %E = fmul
125 ; SSE42: cost of 4 {{.*}} %E = fmul
126 ; AVX: cost of 2 {{.*}} %E = fmul
127 ; AVX2: cost of 2 {{.*}} %E = fmul
128 ; AVX512: cost of 2 {{.*}} %E = fmul
129 %E = fmul <4 x double> undef, undef
130 ; SSSE3: cost of 8 {{.*}} %F = fmul
131 ; SSE42: cost of 8 {{.*}} %F = fmul
132 ; AVX: cost of 4 {{.*}} %F = fmul
133 ; AVX2: cost of 4 {{.*}} %F = fmul
134 ; AVX512: cost of 2 {{.*}} %F = fmul
135 %F = fmul <8 x double> undef, undef
136
137 ret i32 undef
138}
139
140; CHECK-LABEL: 'fdiv'
141define i32 @fdiv(i32 %arg) {
142 ; SSSE3: cost of 2 {{.*}} %A = fdiv
143 ; SSE42: cost of 2 {{.*}} %A = fdiv
144 ; AVX: cost of 2 {{.*}} %A = fdiv
145 ; AVX2: cost of 2 {{.*}} %A = fdiv
146 ; AVX512: cost of 2 {{.*}} %A = fdiv
147 %A = fdiv <4 x float> undef, undef
148 ; SSSE3: cost of 4 {{.*}} %B = fdiv
149 ; SSE42: cost of 4 {{.*}} %B = fdiv
150 ; AVX: cost of 2 {{.*}} %B = fdiv
151 ; AVX2: cost of 2 {{.*}} %B = fdiv
152 ; AVX512: cost of 2 {{.*}} %B = fdiv
153 %B = fdiv <8 x float> undef, undef
154 ; SSSE3: cost of 8 {{.*}} %C = fdiv
155 ; SSE42: cost of 8 {{.*}} %C = fdiv
156 ; AVX: cost of 4 {{.*}} %C = fdiv
157 ; AVX2: cost of 4 {{.*}} %C = fdiv
158 ; AVX512: cost of 2 {{.*}} %C = fdiv
159 %C = fdiv <16 x float> undef, undef
160
161 ; SSSE3: cost of 2 {{.*}} %D = fdiv
162 ; SSE42: cost of 2 {{.*}} %D = fdiv
163 ; AVX: cost of 2 {{.*}} %D = fdiv
164 ; AVX2: cost of 2 {{.*}} %D = fdiv
165 ; AVX512: cost of 2 {{.*}} %D = fdiv
166 %D = fdiv <2 x double> undef, undef
167 ; SSSE3: cost of 4 {{.*}} %E = fdiv
168 ; SSE42: cost of 4 {{.*}} %E = fdiv
169 ; AVX: cost of 2 {{.*}} %E = fdiv
170 ; AVX2: cost of 2 {{.*}} %E = fdiv
171 ; AVX512: cost of 2 {{.*}} %E = fdiv
172 %E = fdiv <4 x double> undef, undef
173 ; SSSE3: cost of 8 {{.*}} %F = fdiv
174 ; SSE42: cost of 8 {{.*}} %F = fdiv
175 ; AVX: cost of 4 {{.*}} %F = fdiv
176 ; AVX2: cost of 4 {{.*}} %F = fdiv
177 ; AVX512: cost of 2 {{.*}} %F = fdiv
178 %F = fdiv <8 x double> undef, undef
179
180 ret i32 undef
181}
182
183; CHECK-LABEL: 'frem'
184define i32 @frem(i32 %arg) {
185 ; SSSE3: cost of 14 {{.*}} %A = frem
186 ; SSE42: cost of 14 {{.*}} %A = frem
187 ; AVX: cost of 14 {{.*}} %A = frem
188 ; AVX2: cost of 14 {{.*}} %A = frem
189 ; AVX512: cost of 14 {{.*}} %A = frem
190 %A = frem <4 x float> undef, undef
191 ; SSSE3: cost of 28 {{.*}} %B = frem
192 ; SSE42: cost of 28 {{.*}} %B = frem
193 ; AVX: cost of 30 {{.*}} %B = frem
194 ; AVX2: cost of 30 {{.*}} %B = frem
195 ; AVX512: cost of 30 {{.*}} %B = frem
196 %B = frem <8 x float> undef, undef
197 ; SSSE3: cost of 56 {{.*}} %C = frem
198 ; SSE42: cost of 56 {{.*}} %C = frem
199 ; AVX: cost of 60 {{.*}} %C = frem
200 ; AVX2: cost of 60 {{.*}} %C = frem
201 ; AVX512: cost of 62 {{.*}} %C = frem
202 %C = frem <16 x float> undef, undef
203
204 ; SSSE3: cost of 6 {{.*}} %D = frem
205 ; SSE42: cost of 6 {{.*}} %D = frem
206 ; AVX: cost of 6 {{.*}} %D = frem
207 ; AVX2: cost of 6 {{.*}} %D = frem
208 ; AVX512: cost of 6 {{.*}} %D = frem
209 %D = frem <2 x double> undef, undef
210 ; SSSE3: cost of 12 {{.*}} %E = frem
211 ; SSE42: cost of 12 {{.*}} %E = frem
212 ; AVX: cost of 14 {{.*}} %E = frem
213 ; AVX2: cost of 14 {{.*}} %E = frem
214 ; AVX512: cost of 14 {{.*}} %E = frem
215 %E = frem <4 x double> undef, undef
216 ; SSSE3: cost of 24 {{.*}} %F = frem
217 ; SSE42: cost of 24 {{.*}} %F = frem
218 ; AVX: cost of 28 {{.*}} %F = frem
219 ; AVX2: cost of 28 {{.*}} %F = frem
220 ; AVX512: cost of 30 {{.*}} %F = frem
221 %F = frem <8 x double> undef, undef
222
223 ret i32 undef
224}
Simon Pilgrim07d7a212016-08-21 19:06:25 +0000225
226; CHECK-LABEL: 'fsqrt'
227define i32 @fsqrt(i32 %arg) {
228 ; SSSE3: cost of 1 {{.*}} %A = call <4 x float> @llvm.sqrt.v4f32
229 ; SSE42: cost of 1 {{.*}} %A = call <4 x float> @llvm.sqrt.v4f32
230 ; AVX: cost of 1 {{.*}} %A = call <4 x float> @llvm.sqrt.v4f32
231 ; AVX2: cost of 1 {{.*}} %A = call <4 x float> @llvm.sqrt.v4f32
232 ; AVX512: cost of 1 {{.*}} %A = call <4 x float> @llvm.sqrt.v4f32
233 %A = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
234 ; SSSE3: cost of 4 {{.*}} %B = call <8 x float> @llvm.sqrt.v8f32
235 ; SSE42: cost of 4 {{.*}} %B = call <8 x float> @llvm.sqrt.v8f32
236 ; AVX: cost of 1 {{.*}} %B = call <8 x float> @llvm.sqrt.v8f32
237 ; AVX2: cost of 1 {{.*}} %B = call <8 x float> @llvm.sqrt.v8f32
238 ; AVX512: cost of 1 {{.*}} %B = call <8 x float> @llvm.sqrt.v8f32
239 %B = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
240 ; SSSE3: cost of 8 {{.*}} %C = call <16 x float> @llvm.sqrt.v16f32
241 ; SSE42: cost of 8 {{.*}} %C = call <16 x float> @llvm.sqrt.v16f32
242 ; AVX: cost of 4 {{.*}} %C = call <16 x float> @llvm.sqrt.v16f32
243 ; AVX2: cost of 4 {{.*}} %C = call <16 x float> @llvm.sqrt.v16f32
244 ; AVX512: cost of 1 {{.*}} %C = call <16 x float> @llvm.sqrt.v16f32
245 %C = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
246
247 ; SSSE3: cost of 1 {{.*}} %D = call <2 x double> @llvm.sqrt.v2f64
248 ; SSE42: cost of 1 {{.*}} %D = call <2 x double> @llvm.sqrt.v2f64
249 ; AVX: cost of 1 {{.*}} %D = call <2 x double> @llvm.sqrt.v2f64
250 ; AVX2: cost of 1 {{.*}} %D = call <2 x double> @llvm.sqrt.v2f64
251 ; AVX512: cost of 1 {{.*}} %D = call <2 x double> @llvm.sqrt.v2f64
252 %D = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
253 ; SSSE3: cost of 4 {{.*}} %E = call <4 x double> @llvm.sqrt.v4f64
254 ; SSE42: cost of 4 {{.*}} %E = call <4 x double> @llvm.sqrt.v4f64
255 ; AVX: cost of 1 {{.*}} %E = call <4 x double> @llvm.sqrt.v4f64
256 ; AVX2: cost of 1 {{.*}} %E = call <4 x double> @llvm.sqrt.v4f64
257 ; AVX512: cost of 1 {{.*}} %E = call <4 x double> @llvm.sqrt.v4f64
258 %E = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
259 ; SSSE3: cost of 8 {{.*}} %F = call <8 x double> @llvm.sqrt.v8f64
260 ; SSE42: cost of 8 {{.*}} %F = call <8 x double> @llvm.sqrt.v8f64
261 ; AVX: cost of 4 {{.*}} %F = call <8 x double> @llvm.sqrt.v8f64
262 ; AVX2: cost of 4 {{.*}} %F = call <8 x double> @llvm.sqrt.v8f64
263 ; AVX512: cost of 1 {{.*}} %F = call <8 x double> @llvm.sqrt.v8f64
264 %F = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
265
266 ret i32 undef
267}
268
269; CHECK-LABEL: 'fma'
270define i32 @fma(i32 %arg) {
271 ; SSSE3: cost of 52 {{.*}} %A = call <4 x float> @llvm.fma.v4f32
272 ; SSE42: cost of 52 {{.*}} %A = call <4 x float> @llvm.fma.v4f32
273 ; AVX: cost of 1 {{.*}} %A = call <4 x float> @llvm.fma.v4f32
274 ; AVX2: cost of 1 {{.*}} %A = call <4 x float> @llvm.fma.v4f32
275 ; AVX512: cost of 1 {{.*}} %A = call <4 x float> @llvm.fma.v4f32
276 %A = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
277 ; SSSE3: cost of 104 {{.*}} %B = call <8 x float> @llvm.fma.v8f32
278 ; SSE42: cost of 104 {{.*}} %B = call <8 x float> @llvm.fma.v8f32
279 ; AVX: cost of 1 {{.*}} %B = call <8 x float> @llvm.fma.v8f32
280 ; AVX2: cost of 1 {{.*}} %B = call <8 x float> @llvm.fma.v8f32
281 ; AVX512: cost of 1 {{.*}} %B = call <8 x float> @llvm.fma.v8f32
282 %B = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
283 ; SSSE3: cost of 208 {{.*}} %C = call <16 x float> @llvm.fma.v16f32
284 ; SSE42: cost of 208 {{.*}} %C = call <16 x float> @llvm.fma.v16f32
285 ; AVX: cost of 4 {{.*}} %C = call <16 x float> @llvm.fma.v16f32
286 ; AVX2: cost of 4 {{.*}} %C = call <16 x float> @llvm.fma.v16f32
287 ; AVX512: cost of 1 {{.*}} %C = call <16 x float> @llvm.fma.v16f32
288 %C = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
289
290 ; SSSE3: cost of 24 {{.*}} %D = call <2 x double> @llvm.fma.v2f64
291 ; SSE42: cost of 24 {{.*}} %D = call <2 x double> @llvm.fma.v2f64
292 ; AVX: cost of 1 {{.*}} %D = call <2 x double> @llvm.fma.v2f64
293 ; AVX2: cost of 1 {{.*}} %D = call <2 x double> @llvm.fma.v2f64
294 ; AVX512: cost of 1 {{.*}} %D = call <2 x double> @llvm.fma.v2f64
295 %D = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
296 ; SSSE3: cost of 48 {{.*}} %E = call <4 x double> @llvm.fma.v4f64
297 ; SSE42: cost of 48 {{.*}} %E = call <4 x double> @llvm.fma.v4f64
298 ; AVX: cost of 1 {{.*}} %E = call <4 x double> @llvm.fma.v4f64
299 ; AVX2: cost of 1 {{.*}} %E = call <4 x double> @llvm.fma.v4f64
300 ; AVX512: cost of 1 {{.*}} %E = call <4 x double> @llvm.fma.v4f64
301 %E = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
302 ; SSSE3: cost of 96 {{.*}} %F = call <8 x double> @llvm.fma.v8f64
303 ; SSE42: cost of 96 {{.*}} %F = call <8 x double> @llvm.fma.v8f64
304 ; AVX: cost of 4 {{.*}} %F = call <8 x double> @llvm.fma.v8f64
305 ; AVX2: cost of 4 {{.*}} %F = call <8 x double> @llvm.fma.v8f64
306 ; AVX512: cost of 1 {{.*}} %F = call <8 x double> @llvm.fma.v8f64
307 %F = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
308
309 ret i32 undef
310}
311
312declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
313declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
314declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
315
316declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
317declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
318declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
319
320declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
321declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
322declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
323
324declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
325declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
326declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)