blob: 5dd60d998cbda1caa9bca080cbfba8fcc5c106a8 [file] [log] [blame]
Marat Dukhanffbf96a2020-05-14 02:59:08 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cfloat>
8#include <cmath>
9#include <functional>
10#include <random>
11#include <vector>
12
13#include <cpuinfo.h>
14
15#include <benchmark/benchmark.h>
16#include "bench/utils.h"
17#include <xnnpack/AlignedAllocator.h>
18#include <xnnpack/common.h>
19#include <xnnpack/math-stubs.h>
20
21
22class Rounding : public benchmark::Fixture {
23 public:
24 inline Rounding()
25 {
26 cpuinfo_initialize();
27 const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
28 const size_t l1d_reserve = 1024;
29 n_ = (l1d_size - l1d_reserve) / (2 * sizeof(float));
30 n_ = n_ / 16 * 16;
31 }
32
33 virtual void SetUp(const benchmark::State&) override
34 {
35 std::random_device random_device;
36 auto rng = std::mt19937(random_device());
37 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), rng);
38
39 input_.resize(n());
40 std::generate(input_.begin(), input_.end(), std::ref(f32rng));
41 output_.resize(n());
42 std::fill(output_.begin(), output_.end(), 0xA5);
43 }
44
45 virtual void TearDown(benchmark::State& state) override
46 {
47 state.SetItemsProcessed(uint64_t(state.iterations()) * n());
48 state.SetBytesProcessed(uint64_t(state.iterations()) * n() * 2 * sizeof(float));
49 input_.clear();
50 output_.clear();
51 }
52
53 inline const float* input() const
54 {
55 return input_.data();
56 }
57
58 inline float* output()
59 {
60 return output_.data();
61 }
62
63 inline size_t n() const
64 {
65 return n_;
66 }
67
68 protected:
69 std::vector<float, AlignedAllocator<float, 32>> input_;
70 std::vector<float, AlignedAllocator<float, 32>> output_;
71 size_t n_;
72};
73
74class RoundingToNearestEven : public Rounding { };
75class RoundingDown : public Rounding { };
76class RoundingUp : public Rounding { };
77class RoundingTowardsZero : public Rounding { };
78
79BENCHMARK_F(RoundingToNearestEven, scalar_addsub)(benchmark::State& state) {
80 for (auto _ : state) {
81 xnn_math_f32_roundne__scalar_addsub(
82 n() * sizeof(float), input(), output());
83 }
84}
85
86BENCHMARK_F(RoundingToNearestEven, scalar_nearbyint)(benchmark::State& state) {
87 for (auto _ : state) {
88 xnn_math_f32_roundne__scalar_nearbyint(
89 n() * sizeof(float), input(), output());
90 }
91}
92
93BENCHMARK_F(RoundingToNearestEven, scalar_rint)(benchmark::State& state) {
94 for (auto _ : state) {
95 xnn_math_f32_roundne__scalar_rint(
96 n() * sizeof(float), input(), output());
97 }
98}
99
100BENCHMARK_F(RoundingDown, scalar_addsub)(benchmark::State& state) {
101 for (auto _ : state) {
102 xnn_math_f32_roundd__scalar_addsub(
103 n() * sizeof(float), input(), output());
104 }
105}
106
107BENCHMARK_F(RoundingDown, scalar_cvt)(benchmark::State& state) {
108 for (auto _ : state) {
109 xnn_math_f32_roundd__scalar_cvt(
110 n() * sizeof(float), input(), output());
111 }
112}
113
114BENCHMARK_F(RoundingDown, scalar_floor)(benchmark::State& state) {
115 for (auto _ : state) {
116 xnn_math_f32_roundd__scalar_floor(
117 n() * sizeof(float), input(), output());
118 }
119}
120
121BENCHMARK_F(RoundingUp, scalar_addsub)(benchmark::State& state) {
122 for (auto _ : state) {
123 xnn_math_f32_roundu__scalar_addsub(
124 n() * sizeof(float), input(), output());
125 }
126}
127
128BENCHMARK_F(RoundingUp, scalar_cvt)(benchmark::State& state) {
129 for (auto _ : state) {
130 xnn_math_f32_roundu__scalar_cvt(
131 n() * sizeof(float), input(), output());
132 }
133}
134
135BENCHMARK_F(RoundingUp, scalar_ceil)(benchmark::State& state) {
136 for (auto _ : state) {
137 xnn_math_f32_roundu__scalar_ceil(
138 n() * sizeof(float), input(), output());
139 }
140}
141
142BENCHMARK_F(RoundingTowardsZero, scalar_addsub)(benchmark::State& state) {
143 for (auto _ : state) {
144 xnn_math_f32_roundz__scalar_addsub(
145 n() * sizeof(float), input(), output());
146 }
147}
148
149BENCHMARK_F(RoundingTowardsZero, scalar_cvt)(benchmark::State& state) {
150 for (auto _ : state) {
151 xnn_math_f32_roundz__scalar_cvt(
152 n() * sizeof(float), input(), output());
153 }
154}
155
156BENCHMARK_F(RoundingTowardsZero, scalar_trunc)(benchmark::State& state) {
157 for (auto _ : state) {
158 xnn_math_f32_roundz__scalar_trunc(
159 n() * sizeof(float), input(), output());
160 }
161}
162
Marat Dukhand3f3d872020-06-24 13:08:25 -0700163#if XNN_ARCH_WASMSIMD
164 BENCHMARK_F(RoundingToNearestEven, wasmsimd_addsub)(benchmark::State& state) {
165 for (auto _ : state) {
166 xnn_math_f32_roundne__wasmsimd_addsub(
167 n() * sizeof(float), input(), output());
168 }
169 }
170
171 BENCHMARK_F(RoundingDown, wasmsimd_addsub)(benchmark::State& state) {
172 for (auto _ : state) {
173 xnn_math_f32_roundd__wasmsimd_addsub(
174 n() * sizeof(float), input(), output());
175 }
176 }
177
178 BENCHMARK_F(RoundingUp, wasmsimd_addsub)(benchmark::State& state) {
179 for (auto _ : state) {
180 xnn_math_f32_roundu__wasmsimd_addsub(
181 n() * sizeof(float), input(), output());
182 }
183 }
184
185 BENCHMARK_F(RoundingTowardsZero, wasmsimd_addsub)(benchmark::State& state) {
186 for (auto _ : state) {
187 xnn_math_f32_roundz__wasmsimd_addsub(
188 n() * sizeof(float), input(), output());
189 }
190 }
191#endif // XNN_ARCH_WASMSIMD
192
Marat Dukhanffbf96a2020-05-14 02:59:08 -0700193#if XNN_ARCH_ARM || XNN_ARCH_ARM64
194 BENCHMARK_F(RoundingToNearestEven, neon_addsub)(benchmark::State& state) {
195 for (auto _ : state) {
196 xnn_math_f32_roundne__neon_addsub(
197 n() * sizeof(float), input(), output());
198 }
199 }
200
201 BENCHMARK_F(RoundingToNearestEven, neonv8)(benchmark::State& state) {
202 for (auto _ : state) {
203 xnn_math_f32_roundne__neonv8(
204 n() * sizeof(float), input(), output());
205 }
206 }
207
208 BENCHMARK_F(RoundingDown, neon_addsub)(benchmark::State& state) {
209 for (auto _ : state) {
210 xnn_math_f32_roundd__neon_addsub(
211 n() * sizeof(float), input(), output());
212 }
213 }
214
215 BENCHMARK_F(RoundingDown, neon_cvt)(benchmark::State& state) {
216 for (auto _ : state) {
217 xnn_math_f32_roundd__neon_cvt(
218 n() * sizeof(float), input(), output());
219 }
220 }
221
222 BENCHMARK_F(RoundingDown, neonv8)(benchmark::State& state) {
223 for (auto _ : state) {
224 xnn_math_f32_roundd__neonv8(
225 n() * sizeof(float), input(), output());
226 }
227 }
228
229 BENCHMARK_F(RoundingUp, neon_addsub)(benchmark::State& state) {
230 for (auto _ : state) {
231 xnn_math_f32_roundu__neon_addsub(
232 n() * sizeof(float), input(), output());
233 }
234 }
235
236 BENCHMARK_F(RoundingUp, neon_cvt)(benchmark::State& state) {
237 for (auto _ : state) {
238 xnn_math_f32_roundu__neon_cvt(
239 n() * sizeof(float), input(), output());
240 }
241 }
242
243 BENCHMARK_F(RoundingUp, neonv8)(benchmark::State& state) {
244 for (auto _ : state) {
245 xnn_math_f32_roundu__neonv8(
246 n() * sizeof(float), input(), output());
247 }
248 }
249
250 BENCHMARK_F(RoundingTowardsZero, neon_addsub)(benchmark::State& state) {
251 for (auto _ : state) {
252 xnn_math_f32_roundz__neon_addsub(
253 n() * sizeof(float), input(), output());
254 }
255 }
256
257 BENCHMARK_F(RoundingTowardsZero, neon_cvt)(benchmark::State& state) {
258 for (auto _ : state) {
259 xnn_math_f32_roundz__neon_cvt(
260 n() * sizeof(float), input(), output());
261 }
262 }
263
264 BENCHMARK_F(RoundingTowardsZero, neonv8)(benchmark::State& state) {
265 for (auto _ : state) {
266 xnn_math_f32_roundz__neonv8(
267 n() * sizeof(float), input(), output());
268 }
269 }
270#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
271
272#if XNN_ARCH_X86 || XNN_ARCH_X86_64
273 BENCHMARK_F(RoundingToNearestEven, sse_addsub)(benchmark::State& state) {
274 for (auto _ : state) {
275 xnn_math_f32_roundne__sse_addsub(
276 n() * sizeof(float), input(), output());
277 }
278 }
279
280 BENCHMARK_F(RoundingToNearestEven, sse2_cvt)(benchmark::State& state) {
281 for (auto _ : state) {
282 xnn_math_f32_roundne__sse2_cvt(
283 n() * sizeof(float), input(), output());
284 }
285 }
286
287 BENCHMARK_F(RoundingToNearestEven, sse4)(benchmark::State& state) {
288 for (auto _ : state) {
289 xnn_math_f32_roundne__sse41(
290 n() * sizeof(float), input(), output());
291 }
292 }
293
294 BENCHMARK_F(RoundingDown, sse_addsub)(benchmark::State& state) {
295 for (auto _ : state) {
296 xnn_math_f32_roundd__sse_addsub(
297 n() * sizeof(float), input(), output());
298 }
299 }
300
301 BENCHMARK_F(RoundingDown, sse2_cvt)(benchmark::State& state) {
302 for (auto _ : state) {
303 xnn_math_f32_roundd__sse2_cvt(
304 n() * sizeof(float), input(), output());
305 }
306 }
307
308 BENCHMARK_F(RoundingDown, sse4)(benchmark::State& state) {
309 for (auto _ : state) {
310 xnn_math_f32_roundd__sse41(
311 n() * sizeof(float), input(), output());
312 }
313 }
314
315 BENCHMARK_F(RoundingUp, sse_addsub)(benchmark::State& state) {
316 for (auto _ : state) {
317 xnn_math_f32_roundu__sse_addsub(
318 n() * sizeof(float), input(), output());
319 }
320 }
321
322 BENCHMARK_F(RoundingUp, sse2_cvt)(benchmark::State& state) {
323 for (auto _ : state) {
324 xnn_math_f32_roundu__sse2_cvt(
325 n() * sizeof(float), input(), output());
326 }
327 }
328
329 BENCHMARK_F(RoundingUp, sse4)(benchmark::State& state) {
330 for (auto _ : state) {
331 xnn_math_f32_roundu__sse41(
332 n() * sizeof(float), input(), output());
333 }
334 }
335
336 BENCHMARK_F(RoundingTowardsZero, sse_addsub)(benchmark::State& state) {
337 for (auto _ : state) {
338 xnn_math_f32_roundz__sse_addsub(
339 n() * sizeof(float), input(), output());
340 }
341 }
342
343 BENCHMARK_F(RoundingTowardsZero, sse2_cvt)(benchmark::State& state) {
344 for (auto _ : state) {
345 xnn_math_f32_roundz__sse2_cvt(
346 n() * sizeof(float), input(), output());
347 }
348 }
349
350 BENCHMARK_F(RoundingTowardsZero, sse4)(benchmark::State& state) {
351 for (auto _ : state) {
352 xnn_math_f32_roundz__sse41(
353 n() * sizeof(float), input(), output());
354 }
355 }
356#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
357
358
359#ifndef XNNPACK_BENCHMARK_NO_MAIN
360BENCHMARK_MAIN();
361#endif