blob: d806d459a5f09ef2c72c0796d96450d9ff07db65 [file] [log] [blame]
Simon Pilgrimb0a05762016-08-21 19:14:48 +00001; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
Simon Pilgrim3cd61a02016-08-21 18:34:47 +00002; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE42
Simon Pilgrim07d7a212016-08-21 19:06:25 +00003; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
Simon Pilgrim3cd61a02016-08-21 18:34:47 +00005; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
6; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
7
8target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
9target triple = "x86_64-apple-macosx10.8.0"
10
11; CHECK-LABEL: 'fadd'
12define i32 @fadd(i32 %arg) {
Simon Pilgrim91780592016-09-18 21:01:20 +000013 ; SSE2: cost of 2 {{.*}} %F32 = fadd
14 ; SSE42: cost of 2 {{.*}} %F32 = fadd
15 ; AVX: cost of 2 {{.*}} %F32 = fadd
16 ; AVX2: cost of 2 {{.*}} %F32 = fadd
17 ; AVX512: cost of 2 {{.*}} %F32 = fadd
18 %F32 = fadd float undef, undef
19 ; SSE2: cost of 2 {{.*}} %V4F32 = fadd
20 ; SSE42: cost of 2 {{.*}} %V4F32 = fadd
21 ; AVX: cost of 2 {{.*}} %V4F32 = fadd
22 ; AVX2: cost of 2 {{.*}} %V4F32 = fadd
23 ; AVX512: cost of 2 {{.*}} %V4F32 = fadd
24 %V4F32 = fadd <4 x float> undef, undef
25 ; SSE2: cost of 4 {{.*}} %V8F32 = fadd
26 ; SSE42: cost of 4 {{.*}} %V8F32 = fadd
27 ; AVX: cost of 2 {{.*}} %V8F32 = fadd
28 ; AVX2: cost of 2 {{.*}} %V8F32 = fadd
29 ; AVX512: cost of 2 {{.*}} %V8F32 = fadd
30 %V8F32 = fadd <8 x float> undef, undef
31 ; SSE2: cost of 8 {{.*}} %V16F32 = fadd
32 ; SSE42: cost of 8 {{.*}} %V16F32 = fadd
33 ; AVX: cost of 4 {{.*}} %V16F32 = fadd
34 ; AVX2: cost of 4 {{.*}} %V16F32 = fadd
35 ; AVX512: cost of 2 {{.*}} %V16F32 = fadd
36 %V16F32 = fadd <16 x float> undef, undef
Simon Pilgrim3cd61a02016-08-21 18:34:47 +000037
Simon Pilgrim91780592016-09-18 21:01:20 +000038 ; SSE2: cost of 2 {{.*}} %F64 = fadd
39 ; SSE42: cost of 2 {{.*}} %F64 = fadd
40 ; AVX: cost of 2 {{.*}} %F64 = fadd
41 ; AVX2: cost of 2 {{.*}} %F64 = fadd
42 ; AVX512: cost of 2 {{.*}} %F64 = fadd
43 %F64 = fadd double undef, undef
44 ; SSE2: cost of 2 {{.*}} %V2F64 = fadd
45 ; SSE42: cost of 2 {{.*}} %V2F64 = fadd
46 ; AVX: cost of 2 {{.*}} %V2F64 = fadd
47 ; AVX2: cost of 2 {{.*}} %V2F64 = fadd
48 ; AVX512: cost of 2 {{.*}} %V2F64 = fadd
49 %V2F64 = fadd <2 x double> undef, undef
50 ; SSE2: cost of 4 {{.*}} %V4F64 = fadd
51 ; SSE42: cost of 4 {{.*}} %V4F64 = fadd
52 ; AVX: cost of 2 {{.*}} %V4F64 = fadd
53 ; AVX2: cost of 2 {{.*}} %V4F64 = fadd
54 ; AVX512: cost of 2 {{.*}} %V4F64 = fadd
55 %V4F64 = fadd <4 x double> undef, undef
56 ; SSE2: cost of 8 {{.*}} %V8F64 = fadd
57 ; SSE42: cost of 8 {{.*}} %V8F64 = fadd
58 ; AVX: cost of 4 {{.*}} %V8F64 = fadd
59 ; AVX2: cost of 4 {{.*}} %V8F64 = fadd
60 ; AVX512: cost of 2 {{.*}} %V8F64 = fadd
61 %V8F64 = fadd <8 x double> undef, undef
Simon Pilgrim3cd61a02016-08-21 18:34:47 +000062
63 ret i32 undef
64}
65
66; CHECK-LABEL: 'fsub'
67define i32 @fsub(i32 %arg) {
Simon Pilgrim91780592016-09-18 21:01:20 +000068 ; SSE2: cost of 2 {{.*}} %F32 = fsub
69 ; SSE42: cost of 2 {{.*}} %F32 = fsub
70 ; AVX: cost of 2 {{.*}} %F32 = fsub
71 ; AVX2: cost of 2 {{.*}} %F32 = fsub
72 ; AVX512: cost of 2 {{.*}} %F32 = fsub
73 %F32 = fsub float undef, undef
74 ; SSE2: cost of 2 {{.*}} %V4F32 = fsub
75 ; SSE42: cost of 2 {{.*}} %V4F32 = fsub
76 ; AVX: cost of 2 {{.*}} %V4F32 = fsub
77 ; AVX2: cost of 2 {{.*}} %V4F32 = fsub
78 ; AVX512: cost of 2 {{.*}} %V4F32 = fsub
79 %V4F32 = fsub <4 x float> undef, undef
80 ; SSE2: cost of 4 {{.*}} %V8F32 = fsub
81 ; SSE42: cost of 4 {{.*}} %V8F32 = fsub
82 ; AVX: cost of 2 {{.*}} %V8F32 = fsub
83 ; AVX2: cost of 2 {{.*}} %V8F32 = fsub
84 ; AVX512: cost of 2 {{.*}} %V8F32 = fsub
85 %V8F32 = fsub <8 x float> undef, undef
86 ; SSE2: cost of 8 {{.*}} %V16F32 = fsub
87 ; SSE42: cost of 8 {{.*}} %V16F32 = fsub
88 ; AVX: cost of 4 {{.*}} %V16F32 = fsub
89 ; AVX2: cost of 4 {{.*}} %V16F32 = fsub
90 ; AVX512: cost of 2 {{.*}} %V16F32 = fsub
91 %V16F32 = fsub <16 x float> undef, undef
Simon Pilgrim3cd61a02016-08-21 18:34:47 +000092
Simon Pilgrim91780592016-09-18 21:01:20 +000093 ; SSE2: cost of 2 {{.*}} %F64 = fsub
94 ; SSE42: cost of 2 {{.*}} %F64 = fsub
95 ; AVX: cost of 2 {{.*}} %F64 = fsub
96 ; AVX2: cost of 2 {{.*}} %F64 = fsub
97 ; AVX512: cost of 2 {{.*}} %F64 = fsub
98 %F64 = fsub double undef, undef
99 ; SSE2: cost of 2 {{.*}} %V2F64 = fsub
100 ; SSE42: cost of 2 {{.*}} %V2F64 = fsub
101 ; AVX: cost of 2 {{.*}} %V2F64 = fsub
102 ; AVX2: cost of 2 {{.*}} %V2F64 = fsub
103 ; AVX512: cost of 2 {{.*}} %V2F64 = fsub
104 %V2F64 = fsub <2 x double> undef, undef
105 ; SSE2: cost of 4 {{.*}} %V4F64 = fsub
106 ; SSE42: cost of 4 {{.*}} %V4F64 = fsub
107 ; AVX: cost of 2 {{.*}} %V4F64 = fsub
108 ; AVX2: cost of 2 {{.*}} %V4F64 = fsub
109 ; AVX512: cost of 2 {{.*}} %V4F64 = fsub
110 %V4F64 = fsub <4 x double> undef, undef
111 ; SSE2: cost of 8 {{.*}} %V8F64 = fsub
112 ; SSE42: cost of 8 {{.*}} %V8F64 = fsub
113 ; AVX: cost of 4 {{.*}} %V8F64 = fsub
114 ; AVX2: cost of 4 {{.*}} %V8F64 = fsub
115 ; AVX512: cost of 2 {{.*}} %V8F64 = fsub
116 %V8F64 = fsub <8 x double> undef, undef
Simon Pilgrim3cd61a02016-08-21 18:34:47 +0000117
118 ret i32 undef
119}
120
121; CHECK-LABEL: 'fmul'
122define i32 @fmul(i32 %arg) {
Simon Pilgrim91780592016-09-18 21:01:20 +0000123 ; SSE2: cost of 2 {{.*}} %F32 = fmul
124 ; SSE42: cost of 2 {{.*}} %F32 = fmul
125 ; AVX: cost of 2 {{.*}} %F32 = fmul
126 ; AVX2: cost of 2 {{.*}} %F32 = fmul
127 ; AVX512: cost of 2 {{.*}} %F32 = fmul
128 %F32 = fmul float undef, undef
129 ; SSE2: cost of 2 {{.*}} %V4F32 = fmul
130 ; SSE42: cost of 2 {{.*}} %V4F32 = fmul
131 ; AVX: cost of 2 {{.*}} %V4F32 = fmul
132 ; AVX2: cost of 2 {{.*}} %V4F32 = fmul
133 ; AVX512: cost of 2 {{.*}} %V4F32 = fmul
134 %V4F32 = fmul <4 x float> undef, undef
135 ; SSE2: cost of 4 {{.*}} %V8F32 = fmul
136 ; SSE42: cost of 4 {{.*}} %V8F32 = fmul
137 ; AVX: cost of 2 {{.*}} %V8F32 = fmul
138 ; AVX2: cost of 2 {{.*}} %V8F32 = fmul
139 ; AVX512: cost of 2 {{.*}} %V8F32 = fmul
140 %V8F32 = fmul <8 x float> undef, undef
141 ; SSE2: cost of 8 {{.*}} %V16F32 = fmul
142 ; SSE42: cost of 8 {{.*}} %V16F32 = fmul
143 ; AVX: cost of 4 {{.*}} %V16F32 = fmul
144 ; AVX2: cost of 4 {{.*}} %V16F32 = fmul
145 ; AVX512: cost of 2 {{.*}} %V16F32 = fmul
146 %V16F32 = fmul <16 x float> undef, undef
Simon Pilgrim3cd61a02016-08-21 18:34:47 +0000147
Simon Pilgrim91780592016-09-18 21:01:20 +0000148 ; SSE2: cost of 2 {{.*}} %F64 = fmul
149 ; SSE42: cost of 2 {{.*}} %F64 = fmul
150 ; AVX: cost of 2 {{.*}} %F64 = fmul
151 ; AVX2: cost of 2 {{.*}} %F64 = fmul
152 ; AVX512: cost of 2 {{.*}} %F64 = fmul
153 %F64 = fmul double undef, undef
154 ; SSE2: cost of 2 {{.*}} %V2F64 = fmul
155 ; SSE42: cost of 2 {{.*}} %V2F64 = fmul
156 ; AVX: cost of 2 {{.*}} %V2F64 = fmul
157 ; AVX2: cost of 2 {{.*}} %V2F64 = fmul
158 ; AVX512: cost of 2 {{.*}} %V2F64 = fmul
159 %V2F64 = fmul <2 x double> undef, undef
160 ; SSE2: cost of 4 {{.*}} %V4F64 = fmul
161 ; SSE42: cost of 4 {{.*}} %V4F64 = fmul
162 ; AVX: cost of 2 {{.*}} %V4F64 = fmul
163 ; AVX2: cost of 2 {{.*}} %V4F64 = fmul
164 ; AVX512: cost of 2 {{.*}} %V4F64 = fmul
165 %V4F64 = fmul <4 x double> undef, undef
166 ; SSE2: cost of 8 {{.*}} %V8F64 = fmul
167 ; SSE42: cost of 8 {{.*}} %V8F64 = fmul
168 ; AVX: cost of 4 {{.*}} %V8F64 = fmul
169 ; AVX2: cost of 4 {{.*}} %V8F64 = fmul
170 ; AVX512: cost of 2 {{.*}} %V8F64 = fmul
171 %V8F64 = fmul <8 x double> undef, undef
Simon Pilgrim3cd61a02016-08-21 18:34:47 +0000172
173 ret i32 undef
174}
175
176; CHECK-LABEL: 'fdiv'
177define i32 @fdiv(i32 %arg) {
Simon Pilgrim91780592016-09-18 21:01:20 +0000178 ; SSE2: cost of 2 {{.*}} %F32 = fdiv
179 ; SSE42: cost of 2 {{.*}} %F32 = fdiv
180 ; AVX: cost of 2 {{.*}} %F32 = fdiv
181 ; AVX2: cost of 2 {{.*}} %F32 = fdiv
182 ; AVX512: cost of 2 {{.*}} %F32 = fdiv
183 %F32 = fdiv float undef, undef
184 ; SSE2: cost of 2 {{.*}} %V4F32 = fdiv
185 ; SSE42: cost of 2 {{.*}} %V4F32 = fdiv
186 ; AVX: cost of 2 {{.*}} %V4F32 = fdiv
187 ; AVX2: cost of 2 {{.*}} %V4F32 = fdiv
188 ; AVX512: cost of 2 {{.*}} %V4F32 = fdiv
189 %V4F32 = fdiv <4 x float> undef, undef
190 ; SSE2: cost of 4 {{.*}} %V8F32 = fdiv
191 ; SSE42: cost of 4 {{.*}} %V8F32 = fdiv
192 ; AVX: cost of 2 {{.*}} %V8F32 = fdiv
193 ; AVX2: cost of 2 {{.*}} %V8F32 = fdiv
194 ; AVX512: cost of 2 {{.*}} %V8F32 = fdiv
195 %V8F32 = fdiv <8 x float> undef, undef
196 ; SSE2: cost of 8 {{.*}} %V16F32 = fdiv
197 ; SSE42: cost of 8 {{.*}} %V16F32 = fdiv
198 ; AVX: cost of 4 {{.*}} %V16F32 = fdiv
199 ; AVX2: cost of 4 {{.*}} %V16F32 = fdiv
200 ; AVX512: cost of 2 {{.*}} %V16F32 = fdiv
201 %V16F32 = fdiv <16 x float> undef, undef
Simon Pilgrim3cd61a02016-08-21 18:34:47 +0000202
Simon Pilgrim91780592016-09-18 21:01:20 +0000203 ; SSE2: cost of 2 {{.*}} %F64 = fdiv
204 ; SSE42: cost of 2 {{.*}} %F64 = fdiv
205 ; AVX: cost of 2 {{.*}} %F64 = fdiv
206 ; AVX2: cost of 2 {{.*}} %F64 = fdiv
207 ; AVX512: cost of 2 {{.*}} %F64 = fdiv
208 %F64 = fdiv double undef, undef
209 ; SSE2: cost of 2 {{.*}} %V2F64 = fdiv
210 ; SSE42: cost of 2 {{.*}} %V2F64 = fdiv
211 ; AVX: cost of 2 {{.*}} %V2F64 = fdiv
212 ; AVX2: cost of 2 {{.*}} %V2F64 = fdiv
213 ; AVX512: cost of 2 {{.*}} %V2F64 = fdiv
214 %V2F64 = fdiv <2 x double> undef, undef
215 ; SSE2: cost of 4 {{.*}} %V4F64 = fdiv
216 ; SSE42: cost of 4 {{.*}} %V4F64 = fdiv
217 ; AVX: cost of 2 {{.*}} %V4F64 = fdiv
218 ; AVX2: cost of 2 {{.*}} %V4F64 = fdiv
219 ; AVX512: cost of 2 {{.*}} %V4F64 = fdiv
220 %V4F64 = fdiv <4 x double> undef, undef
221 ; SSE2: cost of 8 {{.*}} %V8F64 = fdiv
222 ; SSE42: cost of 8 {{.*}} %V8F64 = fdiv
223 ; AVX: cost of 4 {{.*}} %V8F64 = fdiv
224 ; AVX2: cost of 4 {{.*}} %V8F64 = fdiv
225 ; AVX512: cost of 2 {{.*}} %V8F64 = fdiv
226 %V8F64 = fdiv <8 x double> undef, undef
Simon Pilgrim3cd61a02016-08-21 18:34:47 +0000227
228 ret i32 undef
229}
230
231; CHECK-LABEL: 'frem'
232define i32 @frem(i32 %arg) {
Simon Pilgrim91780592016-09-18 21:01:20 +0000233 ; SSE2: cost of 2 {{.*}} %F32 = frem
234 ; SSE42: cost of 2 {{.*}} %F32 = frem
235 ; AVX: cost of 2 {{.*}} %F32 = frem
236 ; AVX2: cost of 2 {{.*}} %F32 = frem
237 ; AVX512: cost of 2 {{.*}} %F32 = frem
238 %F32 = frem float undef, undef
239 ; SSE2: cost of 14 {{.*}} %V4F32 = frem
240 ; SSE42: cost of 14 {{.*}} %V4F32 = frem
241 ; AVX: cost of 14 {{.*}} %V4F32 = frem
242 ; AVX2: cost of 14 {{.*}} %V4F32 = frem
243 ; AVX512: cost of 14 {{.*}} %V4F32 = frem
244 %V4F32 = frem <4 x float> undef, undef
245 ; SSE2: cost of 28 {{.*}} %V8F32 = frem
246 ; SSE42: cost of 28 {{.*}} %V8F32 = frem
247 ; AVX: cost of 30 {{.*}} %V8F32 = frem
248 ; AVX2: cost of 30 {{.*}} %V8F32 = frem
249 ; AVX512: cost of 30 {{.*}} %V8F32 = frem
250 %V8F32 = frem <8 x float> undef, undef
251 ; SSE2: cost of 56 {{.*}} %V16F32 = frem
252 ; SSE42: cost of 56 {{.*}} %V16F32 = frem
253 ; AVX: cost of 60 {{.*}} %V16F32 = frem
254 ; AVX2: cost of 60 {{.*}} %V16F32 = frem
255 ; AVX512: cost of 62 {{.*}} %V16F32 = frem
256 %V16F32 = frem <16 x float> undef, undef
Simon Pilgrim3cd61a02016-08-21 18:34:47 +0000257
Simon Pilgrim91780592016-09-18 21:01:20 +0000258 ; SSE2: cost of 2 {{.*}} %F64 = frem
259 ; SSE42: cost of 2 {{.*}} %F64 = frem
260 ; AVX: cost of 2 {{.*}} %F64 = frem
261 ; AVX2: cost of 2 {{.*}} %F64 = frem
262 ; AVX512: cost of 2 {{.*}} %F64 = frem
263 %F64 = frem double undef, undef
264 ; SSE2: cost of 6 {{.*}} %V2F64 = frem
265 ; SSE42: cost of 6 {{.*}} %V2F64 = frem
266 ; AVX: cost of 6 {{.*}} %V2F64 = frem
267 ; AVX2: cost of 6 {{.*}} %V2F64 = frem
268 ; AVX512: cost of 6 {{.*}} %V2F64 = frem
269 %V2F64 = frem <2 x double> undef, undef
270 ; SSE2: cost of 12 {{.*}} %V4F64 = frem
271 ; SSE42: cost of 12 {{.*}} %V4F64 = frem
272 ; AVX: cost of 14 {{.*}} %V4F64 = frem
273 ; AVX2: cost of 14 {{.*}} %V4F64 = frem
274 ; AVX512: cost of 14 {{.*}} %V4F64 = frem
275 %V4F64 = frem <4 x double> undef, undef
276 ; SSE2: cost of 24 {{.*}} %V8F64 = frem
277 ; SSE42: cost of 24 {{.*}} %V8F64 = frem
278 ; AVX: cost of 28 {{.*}} %V8F64 = frem
279 ; AVX2: cost of 28 {{.*}} %V8F64 = frem
280 ; AVX512: cost of 30 {{.*}} %V8F64 = frem
281 %V8F64 = frem <8 x double> undef, undef
Simon Pilgrim3cd61a02016-08-21 18:34:47 +0000282
283 ret i32 undef
284}
Simon Pilgrim07d7a212016-08-21 19:06:25 +0000285
286; CHECK-LABEL: 'fsqrt'
287define i32 @fsqrt(i32 %arg) {
Simon Pilgrim91780592016-09-18 21:01:20 +0000288 ; SSE2: cost of 1 {{.*}} %F32 = call float @llvm.sqrt.f32
289 ; SSE42: cost of 1 {{.*}} %F32 = call float @llvm.sqrt.f32
290 ; AVX: cost of 1 {{.*}} %F32 = call float @llvm.sqrt.f32
291 ; AVX2: cost of 1 {{.*}} %F32 = call float @llvm.sqrt.f32
292 ; AVX512: cost of 1 {{.*}} %F32 = call float @llvm.sqrt.f32
293 %F32 = call float @llvm.sqrt.f32(float undef)
294 ; SSE2: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
295 ; SSE42: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
296 ; AVX: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
297 ; AVX2: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
298 ; AVX512: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
299 %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
300 ; SSE2: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
301 ; SSE42: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
302 ; AVX: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
303 ; AVX2: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
304 ; AVX512: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
305 %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
306 ; SSE2: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
307 ; SSE42: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
308 ; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
309 ; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
310 ; AVX512: cost of 1 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
311 %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
Simon Pilgrim07d7a212016-08-21 19:06:25 +0000312
Simon Pilgrim91780592016-09-18 21:01:20 +0000313 ; SSE2: cost of 1 {{.*}} %F64 = call double @llvm.sqrt.f64
314 ; SSE42: cost of 1 {{.*}} %F64 = call double @llvm.sqrt.f64
315 ; AVX: cost of 1 {{.*}} %F64 = call double @llvm.sqrt.f64
316 ; AVX2: cost of 1 {{.*}} %F64 = call double @llvm.sqrt.f64
317 ; AVX512: cost of 1 {{.*}} %F64 = call double @llvm.sqrt.f64
318 %F64 = call double @llvm.sqrt.f64(double undef)
319 ; SSE2: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
320 ; SSE42: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
321 ; AVX: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
322 ; AVX2: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
323 ; AVX512: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
324 %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
325 ; SSE2: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
326 ; SSE42: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
327 ; AVX: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
328 ; AVX2: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
329 ; AVX512: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
330 %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
331 ; SSE2: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
332 ; SSE42: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
333 ; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
334 ; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
335 ; AVX512: cost of 1 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
336 %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
Simon Pilgrim07d7a212016-08-21 19:06:25 +0000337
338 ret i32 undef
339}
340
Simon Pilgrim8b021c32016-10-01 16:30:13 +0000341; CHECK-LABEL: 'fabs'
342define i32 @fabs(i32 %arg) {
343 ; SSE2: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
344 ; SSE42: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
345 ; AVX: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
346 ; AVX2: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
347 ; AVX512: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
348 %F32 = call float @llvm.fabs.f32(float undef)
349 ; SSE2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
350 ; SSE42: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
351 ; AVX: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
352 ; AVX2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
353 ; AVX512: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
354 %V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
355 ; SSE2: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
356 ; SSE42: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
357 ; AVX: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
358 ; AVX2: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
359 ; AVX512: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
360 %V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
361 ; SSE2: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
362 ; SSE42: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
363 ; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
364 ; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
365 ; AVX512: cost of 2 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
366 %V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
367
368 ; SSE2: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
369 ; SSE42: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
370 ; AVX: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
371 ; AVX2: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
372 ; AVX512: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
373 %F64 = call double @llvm.fabs.f64(double undef)
374 ; SSE2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
375 ; SSE42: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
376 ; AVX: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
377 ; AVX2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
378 ; AVX512: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
379 %V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
380 ; SSE2: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
381 ; SSE42: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
382 ; AVX: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
383 ; AVX2: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
384 ; AVX512: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
385 %V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
386 ; SSE2: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
387 ; SSE42: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
388 ; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
389 ; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
390 ; AVX512: cost of 2 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
391 %V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
392
393 ret i32 undef
394}
395
Simon Pilgrim07d7a212016-08-21 19:06:25 +0000396; CHECK-LABEL: 'fma'
397define i32 @fma(i32 %arg) {
Simon Pilgrim91780592016-09-18 21:01:20 +0000398 ; SSE2: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
399 ; SSE42: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
400 ; AVX: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
401 ; AVX2: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
402 ; AVX512: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
403 %F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
404 ; SSE2: cost of 52 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
405 ; SSE42: cost of 52 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
406 ; AVX: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
407 ; AVX2: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
408 ; AVX512: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
409 %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
410 ; SSE2: cost of 104 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
411 ; SSE42: cost of 104 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
412 ; AVX: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
413 ; AVX2: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
414 ; AVX512: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
415 %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
416 ; SSE2: cost of 208 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
417 ; SSE42: cost of 208 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
418 ; AVX: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
419 ; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
420 ; AVX512: cost of 1 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
421 %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
Simon Pilgrim07d7a212016-08-21 19:06:25 +0000422
Simon Pilgrim91780592016-09-18 21:01:20 +0000423 ; SSE2: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
424 ; SSE42: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
425 ; AVX: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
426 ; AVX2: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
427 ; AVX512: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
428 %F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
429 ; SSE2: cost of 24 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
430 ; SSE42: cost of 24 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
431 ; AVX: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
432 ; AVX2: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
433 ; AVX512: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
434 %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
435 ; SSE2: cost of 48 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
436 ; SSE42: cost of 48 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
437 ; AVX: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
438 ; AVX2: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
439 ; AVX512: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
440 %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
441 ; SSE2: cost of 96 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
442 ; SSE42: cost of 96 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
443 ; AVX: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
444 ; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
445 ; AVX512: cost of 1 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
446 %V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
Simon Pilgrim07d7a212016-08-21 19:06:25 +0000447
448 ret i32 undef
449}
450
Simon Pilgrim91780592016-09-18 21:01:20 +0000451declare float @llvm.sqrt.f32(float)
Simon Pilgrim07d7a212016-08-21 19:06:25 +0000452declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
453declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
454declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
455
Simon Pilgrim91780592016-09-18 21:01:20 +0000456declare double @llvm.sqrt.f64(double)
Simon Pilgrim07d7a212016-08-21 19:06:25 +0000457declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
458declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
459declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
460
Simon Pilgrim8b021c32016-10-01 16:30:13 +0000461declare float @llvm.fabs.f32(float)
462declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
463declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
464declare <16 x float> @llvm.fabs.v16f32(<16 x float>)
465
466declare double @llvm.fabs.f64(double)
467declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
468declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
469declare <8 x double> @llvm.fabs.v8f64(<8 x double>)
470
Simon Pilgrim91780592016-09-18 21:01:20 +0000471declare float @llvm.fma.f32(float, float, float)
Simon Pilgrim07d7a212016-08-21 19:06:25 +0000472declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
473declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
474declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
475
Simon Pilgrim91780592016-09-18 21:01:20 +0000476declare double @llvm.fma.f64(double, double, double)
Simon Pilgrim07d7a212016-08-21 19:06:25 +0000477declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
478declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
479declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)