blob: 40255aa23c94e0e37ec1787f77e3641ab7d2645c [file] [log] [blame]
Marat Dukhan515c9772019-10-17 18:07:57 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cfloat>
8#include <cmath>
9#include <functional>
10#include <random>
11#include <vector>
12
13#include <benchmark/benchmark.h>
14#include <fp16/fp16.h>
15
16#include <xnnpack/AlignedAllocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/math-stubs.h>
19
20
21static void ExpError(benchmark::State& state,
22 xnn_f32_unary_math_function exp,
23 size_t tile_size)
24{
25 // The smallest x for which expf(x) is normalized (-0x1.5D589Ep6f).
26 const uint32_t min_input = 0xC2AEAC4FL;
27 const size_t num_tiles = 100;
28
29 double max_ulp_error = 0.0;
30 std::vector<float, AlignedAllocator<float, 64>> x(tile_size * num_tiles);
31 std::vector<float, AlignedAllocator<float, 64>> y(tile_size * num_tiles);
32 for (auto _ : state) {
33 for (uint32_t n = min_input; int32_t(n) < 0; n -= tile_size * num_tiles) {
34 for (uint32_t i = 0; i < tile_size * num_tiles; i++) {
35 x[i] = fp32_from_bits(std::max<uint32_t>(n - i, 0x80000000));
36 }
37 std::fill(y.begin(), y.end(), std::nanf(""));
38
39 exp(tile_size * num_tiles * sizeof(float), x.data(), y.data());
40
41 for (uint32_t i = 0; i < tile_size * num_tiles; i++) {
42 const double y_ref = std::exp(double(x[i]));
43 const double abs_error = std::abs(y_ref - double(y[i]));
44 const float y_abs = std::abs(y_ref);
45 const float y_ulp = fp32_from_bits(fp32_to_bits(y_abs) + 1) - y_abs;
46 max_ulp_error = std::max<double>(max_ulp_error, abs_error / y_ulp);
47 }
48 }
49 }
50
51 state.counters["ULPERROR"] = benchmark::Counter(max_ulp_error);
52}
53
54#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanffd68402019-11-15 15:19:11 -080055 static void f32_expminus__sse2_p5(benchmark::State& state) {
56 ExpError(state, xnn_math_f32_expminus__sse2_p5, 4);
57 }
Marat Dukhan515c9772019-10-17 18:07:57 -070058 static void f32_expminus__avx2_p5(benchmark::State& state) {
59 ExpError(state, xnn_math_f32_expminus__avx2_p5, 8);
60 }
61
Marat Dukhanffd68402019-11-15 15:19:11 -080062 BENCHMARK(f32_expminus__sse2_p5)->Unit(benchmark::kMillisecond)->Iterations(1);
Marat Dukhan515c9772019-10-17 18:07:57 -070063 BENCHMARK(f32_expminus__avx2_p5)->Unit(benchmark::kMillisecond)->Iterations(1);
64#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
65
Marat Dukhan346a9e52019-11-15 09:06:30 -080066#if XNN_ARCH_ARM || XNN_ARCH_ARM64
67 static void f32_expminus__neonfma_p5(benchmark::State& state) {
68 ExpError(state, xnn_math_f32_expminus__neonfma_p5, 4);
69 }
Marat Dukhan189ae802019-11-26 11:28:44 -080070 static void f32_expminus__neonfma_lut64_p2(benchmark::State& state) {
71 ExpError(state, xnn_math_f32_expminus__neonfma_lut64_p2, 4);
72 }
73 static void f32_expminus__neonfma_lut2048_p1(benchmark::State& state) {
74 ExpError(state, xnn_math_f32_expminus__neonfma_lut2048_p1, 4);
75 }
Marat Dukhan346a9e52019-11-15 09:06:30 -080076
77 BENCHMARK(f32_expminus__neonfma_p5)->Unit(benchmark::kMillisecond)->Iterations(1);
Marat Dukhan189ae802019-11-26 11:28:44 -080078 BENCHMARK(f32_expminus__neonfma_lut64_p2)->Unit(benchmark::kMillisecond)->Iterations(1);
79 BENCHMARK(f32_expminus__neonfma_lut2048_p1)->Unit(benchmark::kMillisecond)->Iterations(1);
Marat Dukhan346a9e52019-11-15 09:06:30 -080080#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
81
Marat Dukhan5e9a91e2019-12-22 19:13:03 -080082static void f32_expminus__scalar_p5(benchmark::State& state) {
83 ExpError(state, xnn_math_f32_expminus__scalar_p5, 1);
84}
85static void f32_expminus__scalar_lut64_p2(benchmark::State& state) {
86 ExpError(state, xnn_math_f32_expminus__scalar_lut64_p2, 1);
87}
88static void f32_expminus__scalar_lut2048_p1(benchmark::State& state) {
89 ExpError(state, xnn_math_f32_expminus__scalar_lut2048_p1, 1);
90}
91
92BENCHMARK(f32_expminus__scalar_p5)->Unit(benchmark::kMillisecond)->Iterations(1);
93BENCHMARK(f32_expminus__scalar_lut64_p2)->Unit(benchmark::kMillisecond)->Iterations(1);
94BENCHMARK(f32_expminus__scalar_lut2048_p1)->Unit(benchmark::kMillisecond)->Iterations(1);
95
Marat Dukhan515c9772019-10-17 18:07:57 -070096#ifndef XNNPACK_BENCHMARK_NO_MAIN
97BENCHMARK_MAIN();
98#endif