blob: cb75549a6b3efe522048745fb9b8058b13aa9099 [file] [log] [blame]
Marat Dukhanffbf96a2020-05-14 02:59:08 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cfloat>
8#include <cmath>
9#include <functional>
10#include <random>
11#include <vector>
12
13#include <cpuinfo.h>
14
15#include <benchmark/benchmark.h>
16#include "bench/utils.h"
17#include <xnnpack/AlignedAllocator.h>
18#include <xnnpack/common.h>
19#include <xnnpack/math-stubs.h>
20
21
22class Rounding : public benchmark::Fixture {
23 public:
24 inline Rounding()
25 {
26 cpuinfo_initialize();
27 const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
28 const size_t l1d_reserve = 1024;
29 n_ = (l1d_size - l1d_reserve) / (2 * sizeof(float));
30 n_ = n_ / 16 * 16;
31 }
32
33 virtual void SetUp(const benchmark::State&) override
34 {
35 std::random_device random_device;
36 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070037 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
Marat Dukhanffbf96a2020-05-14 02:59:08 -070038
39 input_.resize(n());
40 std::generate(input_.begin(), input_.end(), std::ref(f32rng));
41 output_.resize(n());
42 std::fill(output_.begin(), output_.end(), 0xA5);
43 }
44
45 virtual void TearDown(benchmark::State& state) override
46 {
47 state.SetItemsProcessed(uint64_t(state.iterations()) * n());
48 state.SetBytesProcessed(uint64_t(state.iterations()) * n() * 2 * sizeof(float));
49 input_.clear();
50 output_.clear();
51 }
52
53 inline const float* input() const
54 {
55 return input_.data();
56 }
57
58 inline float* output()
59 {
60 return output_.data();
61 }
62
63 inline size_t n() const
64 {
65 return n_;
66 }
67
68 protected:
Marat Dukhane13e6392021-07-26 22:22:35 -070069 std::vector<float, AlignedAllocator<float, 64>> input_;
70 std::vector<float, AlignedAllocator<float, 64>> output_;
Marat Dukhanffbf96a2020-05-14 02:59:08 -070071 size_t n_;
72};
73
74class RoundingToNearestEven : public Rounding { };
75class RoundingDown : public Rounding { };
76class RoundingUp : public Rounding { };
77class RoundingTowardsZero : public Rounding { };
78
79BENCHMARK_F(RoundingToNearestEven, scalar_addsub)(benchmark::State& state) {
80 for (auto _ : state) {
81 xnn_math_f32_roundne__scalar_addsub(
82 n() * sizeof(float), input(), output());
83 }
84}
85
86BENCHMARK_F(RoundingToNearestEven, scalar_nearbyint)(benchmark::State& state) {
87 for (auto _ : state) {
88 xnn_math_f32_roundne__scalar_nearbyint(
89 n() * sizeof(float), input(), output());
90 }
91}
92
93BENCHMARK_F(RoundingToNearestEven, scalar_rint)(benchmark::State& state) {
94 for (auto _ : state) {
95 xnn_math_f32_roundne__scalar_rint(
96 n() * sizeof(float), input(), output());
97 }
98}
99
100BENCHMARK_F(RoundingDown, scalar_addsub)(benchmark::State& state) {
101 for (auto _ : state) {
102 xnn_math_f32_roundd__scalar_addsub(
103 n() * sizeof(float), input(), output());
104 }
105}
106
107BENCHMARK_F(RoundingDown, scalar_cvt)(benchmark::State& state) {
108 for (auto _ : state) {
109 xnn_math_f32_roundd__scalar_cvt(
110 n() * sizeof(float), input(), output());
111 }
112}
113
114BENCHMARK_F(RoundingDown, scalar_floor)(benchmark::State& state) {
115 for (auto _ : state) {
116 xnn_math_f32_roundd__scalar_floor(
117 n() * sizeof(float), input(), output());
118 }
119}
120
121BENCHMARK_F(RoundingUp, scalar_addsub)(benchmark::State& state) {
122 for (auto _ : state) {
123 xnn_math_f32_roundu__scalar_addsub(
124 n() * sizeof(float), input(), output());
125 }
126}
127
128BENCHMARK_F(RoundingUp, scalar_cvt)(benchmark::State& state) {
129 for (auto _ : state) {
130 xnn_math_f32_roundu__scalar_cvt(
131 n() * sizeof(float), input(), output());
132 }
133}
134
135BENCHMARK_F(RoundingUp, scalar_ceil)(benchmark::State& state) {
136 for (auto _ : state) {
137 xnn_math_f32_roundu__scalar_ceil(
138 n() * sizeof(float), input(), output());
139 }
140}
141
142BENCHMARK_F(RoundingTowardsZero, scalar_addsub)(benchmark::State& state) {
143 for (auto _ : state) {
144 xnn_math_f32_roundz__scalar_addsub(
145 n() * sizeof(float), input(), output());
146 }
147}
148
149BENCHMARK_F(RoundingTowardsZero, scalar_cvt)(benchmark::State& state) {
150 for (auto _ : state) {
151 xnn_math_f32_roundz__scalar_cvt(
152 n() * sizeof(float), input(), output());
153 }
154}
155
156BENCHMARK_F(RoundingTowardsZero, scalar_trunc)(benchmark::State& state) {
157 for (auto _ : state) {
158 xnn_math_f32_roundz__scalar_trunc(
159 n() * sizeof(float), input(), output());
160 }
161}
162
Marat Dukhan4c617792021-12-21 15:47:58 -0800163#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhand3f3d872020-06-24 13:08:25 -0700164 BENCHMARK_F(RoundingToNearestEven, wasmsimd_addsub)(benchmark::State& state) {
165 for (auto _ : state) {
166 xnn_math_f32_roundne__wasmsimd_addsub(
167 n() * sizeof(float), input(), output());
168 }
169 }
170
Marat Dukhan33b4f752021-09-03 10:53:53 -0700171 BENCHMARK_F(RoundingToNearestEven, wasmsimd_native)(benchmark::State& state) {
172 for (auto _ : state) {
173 xnn_math_f32_roundne__wasmsimd_native(
174 n() * sizeof(float), input(), output());
175 }
176 }
177
Marat Dukhand3f3d872020-06-24 13:08:25 -0700178 BENCHMARK_F(RoundingDown, wasmsimd_addsub)(benchmark::State& state) {
179 for (auto _ : state) {
180 xnn_math_f32_roundd__wasmsimd_addsub(
181 n() * sizeof(float), input(), output());
182 }
183 }
184
Marat Dukhan066a0cb2021-08-05 22:39:40 -0700185 BENCHMARK_F(RoundingDown, wasmsimd_cvt)(benchmark::State& state) {
186 for (auto _ : state) {
187 xnn_math_f32_roundd__wasmsimd_cvt(
188 n() * sizeof(float), input(), output());
189 }
190 }
191
Marat Dukhan33b4f752021-09-03 10:53:53 -0700192 BENCHMARK_F(RoundingDown, wasmsimd_native)(benchmark::State& state) {
193 for (auto _ : state) {
194 xnn_math_f32_roundd__wasmsimd_native(
195 n() * sizeof(float), input(), output());
196 }
197 }
198
Marat Dukhand3f3d872020-06-24 13:08:25 -0700199 BENCHMARK_F(RoundingUp, wasmsimd_addsub)(benchmark::State& state) {
200 for (auto _ : state) {
201 xnn_math_f32_roundu__wasmsimd_addsub(
202 n() * sizeof(float), input(), output());
203 }
204 }
205
Marat Dukhan066a0cb2021-08-05 22:39:40 -0700206 BENCHMARK_F(RoundingUp, wasmsimd_cvt)(benchmark::State& state) {
207 for (auto _ : state) {
208 xnn_math_f32_roundu__wasmsimd_cvt(
209 n() * sizeof(float), input(), output());
210 }
211 }
212
Marat Dukhan33b4f752021-09-03 10:53:53 -0700213 BENCHMARK_F(RoundingUp, wasmsimd_native)(benchmark::State& state) {
214 for (auto _ : state) {
215 xnn_math_f32_roundu__wasmsimd_native(
216 n() * sizeof(float), input(), output());
217 }
218 }
219
Marat Dukhand3f3d872020-06-24 13:08:25 -0700220 BENCHMARK_F(RoundingTowardsZero, wasmsimd_addsub)(benchmark::State& state) {
221 for (auto _ : state) {
222 xnn_math_f32_roundz__wasmsimd_addsub(
223 n() * sizeof(float), input(), output());
224 }
225 }
Marat Dukhan066a0cb2021-08-05 22:39:40 -0700226
227 BENCHMARK_F(RoundingTowardsZero, wasmsimd_cvt)(benchmark::State& state) {
228 for (auto _ : state) {
229 xnn_math_f32_roundz__wasmsimd_cvt(
230 n() * sizeof(float), input(), output());
231 }
232 }
Marat Dukhan33b4f752021-09-03 10:53:53 -0700233
234 BENCHMARK_F(RoundingTowardsZero, wasmsimd_native)(benchmark::State& state) {
235 for (auto _ : state) {
236 xnn_math_f32_roundz__wasmsimd_native(
237 n() * sizeof(float), input(), output());
238 }
239 }
Marat Dukhan4c617792021-12-21 15:47:58 -0800240#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhand3f3d872020-06-24 13:08:25 -0700241
Marat Dukhanffbf96a2020-05-14 02:59:08 -0700242#if XNN_ARCH_ARM || XNN_ARCH_ARM64
243 BENCHMARK_F(RoundingToNearestEven, neon_addsub)(benchmark::State& state) {
244 for (auto _ : state) {
245 xnn_math_f32_roundne__neon_addsub(
246 n() * sizeof(float), input(), output());
247 }
248 }
249
250 BENCHMARK_F(RoundingToNearestEven, neonv8)(benchmark::State& state) {
251 for (auto _ : state) {
252 xnn_math_f32_roundne__neonv8(
253 n() * sizeof(float), input(), output());
254 }
255 }
256
257 BENCHMARK_F(RoundingDown, neon_addsub)(benchmark::State& state) {
258 for (auto _ : state) {
259 xnn_math_f32_roundd__neon_addsub(
260 n() * sizeof(float), input(), output());
261 }
262 }
263
264 BENCHMARK_F(RoundingDown, neon_cvt)(benchmark::State& state) {
265 for (auto _ : state) {
266 xnn_math_f32_roundd__neon_cvt(
267 n() * sizeof(float), input(), output());
268 }
269 }
270
271 BENCHMARK_F(RoundingDown, neonv8)(benchmark::State& state) {
272 for (auto _ : state) {
273 xnn_math_f32_roundd__neonv8(
274 n() * sizeof(float), input(), output());
275 }
276 }
277
278 BENCHMARK_F(RoundingUp, neon_addsub)(benchmark::State& state) {
279 for (auto _ : state) {
280 xnn_math_f32_roundu__neon_addsub(
281 n() * sizeof(float), input(), output());
282 }
283 }
284
285 BENCHMARK_F(RoundingUp, neon_cvt)(benchmark::State& state) {
286 for (auto _ : state) {
287 xnn_math_f32_roundu__neon_cvt(
288 n() * sizeof(float), input(), output());
289 }
290 }
291
292 BENCHMARK_F(RoundingUp, neonv8)(benchmark::State& state) {
293 for (auto _ : state) {
294 xnn_math_f32_roundu__neonv8(
295 n() * sizeof(float), input(), output());
296 }
297 }
298
299 BENCHMARK_F(RoundingTowardsZero, neon_addsub)(benchmark::State& state) {
300 for (auto _ : state) {
301 xnn_math_f32_roundz__neon_addsub(
302 n() * sizeof(float), input(), output());
303 }
304 }
305
306 BENCHMARK_F(RoundingTowardsZero, neon_cvt)(benchmark::State& state) {
307 for (auto _ : state) {
308 xnn_math_f32_roundz__neon_cvt(
309 n() * sizeof(float), input(), output());
310 }
311 }
312
313 BENCHMARK_F(RoundingTowardsZero, neonv8)(benchmark::State& state) {
314 for (auto _ : state) {
315 xnn_math_f32_roundz__neonv8(
316 n() * sizeof(float), input(), output());
317 }
318 }
319#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
320
321#if XNN_ARCH_X86 || XNN_ARCH_X86_64
322 BENCHMARK_F(RoundingToNearestEven, sse_addsub)(benchmark::State& state) {
323 for (auto _ : state) {
324 xnn_math_f32_roundne__sse_addsub(
325 n() * sizeof(float), input(), output());
326 }
327 }
328
329 BENCHMARK_F(RoundingToNearestEven, sse2_cvt)(benchmark::State& state) {
330 for (auto _ : state) {
331 xnn_math_f32_roundne__sse2_cvt(
332 n() * sizeof(float), input(), output());
333 }
334 }
335
336 BENCHMARK_F(RoundingToNearestEven, sse4)(benchmark::State& state) {
337 for (auto _ : state) {
338 xnn_math_f32_roundne__sse41(
339 n() * sizeof(float), input(), output());
340 }
341 }
342
343 BENCHMARK_F(RoundingDown, sse_addsub)(benchmark::State& state) {
344 for (auto _ : state) {
345 xnn_math_f32_roundd__sse_addsub(
346 n() * sizeof(float), input(), output());
347 }
348 }
349
350 BENCHMARK_F(RoundingDown, sse2_cvt)(benchmark::State& state) {
351 for (auto _ : state) {
352 xnn_math_f32_roundd__sse2_cvt(
353 n() * sizeof(float), input(), output());
354 }
355 }
356
357 BENCHMARK_F(RoundingDown, sse4)(benchmark::State& state) {
358 for (auto _ : state) {
359 xnn_math_f32_roundd__sse41(
360 n() * sizeof(float), input(), output());
361 }
362 }
363
364 BENCHMARK_F(RoundingUp, sse_addsub)(benchmark::State& state) {
365 for (auto _ : state) {
366 xnn_math_f32_roundu__sse_addsub(
367 n() * sizeof(float), input(), output());
368 }
369 }
370
371 BENCHMARK_F(RoundingUp, sse2_cvt)(benchmark::State& state) {
372 for (auto _ : state) {
373 xnn_math_f32_roundu__sse2_cvt(
374 n() * sizeof(float), input(), output());
375 }
376 }
377
378 BENCHMARK_F(RoundingUp, sse4)(benchmark::State& state) {
379 for (auto _ : state) {
380 xnn_math_f32_roundu__sse41(
381 n() * sizeof(float), input(), output());
382 }
383 }
384
385 BENCHMARK_F(RoundingTowardsZero, sse_addsub)(benchmark::State& state) {
386 for (auto _ : state) {
387 xnn_math_f32_roundz__sse_addsub(
388 n() * sizeof(float), input(), output());
389 }
390 }
391
392 BENCHMARK_F(RoundingTowardsZero, sse2_cvt)(benchmark::State& state) {
393 for (auto _ : state) {
394 xnn_math_f32_roundz__sse2_cvt(
395 n() * sizeof(float), input(), output());
396 }
397 }
398
399 BENCHMARK_F(RoundingTowardsZero, sse4)(benchmark::State& state) {
400 for (auto _ : state) {
401 xnn_math_f32_roundz__sse41(
402 n() * sizeof(float), input(), output());
403 }
404 }
405#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
406
407
408#ifndef XNNPACK_BENCHMARK_NO_MAIN
409BENCHMARK_MAIN();
410#endif