blob: 56def3873f3ee6573dc18c7c23da437a3eca688b [file] [log] [blame]
Marat Dukhan14bec502019-11-18 11:35:31 -08001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
Marat Dukhan14bec502019-11-18 11:35:31 -080012#include <benchmark/benchmark.h>
13#include "bench/utils.h"
14#include <xnnpack/AlignedAllocator.h>
15#include <xnnpack/common.h>
16#include <xnnpack/params.h>
Marat Dukhan1e782c42019-11-21 17:02:40 -080017#include <xnnpack/vunary.h>
Marat Dukhan14bec502019-11-18 11:35:31 -080018
19
20static void f32_sigmoid(
21 benchmark::State& state,
Marat Dukhan1e782c42019-11-21 17:02:40 -080022 xnn_f32_vunary_ukernel_function sigmoid)
Marat Dukhan14bec502019-11-18 11:35:31 -080023{
24 const size_t elements = state.range(0);
25
26 std::random_device random_device;
27 auto rng = std::mt19937(random_device());
28 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), rng);
29
30 std::vector<float, AlignedAllocator<float, 64>> x(elements);
31 std::vector<float, AlignedAllocator<float, 64>> y(elements);
32 std::generate(x.begin(), x.end(), std::ref(f32rng));
33 std::fill(y.begin(), y.end(), std::nanf(""));
34
35 for (auto _ : state) {
36 sigmoid(elements * sizeof(float), x.data(), y.data(), nullptr /* params */);
37 }
38
39 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
40
41 const size_t elements_per_iteration = elements;
42 state.counters["elements"] =
43 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
44
45 const size_t bytes_per_iteration = 2 * elements * sizeof(float);
46 state.counters["bytes"] =
47 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
48}
49
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080050#if XNN_ARCH_ARM64
Marat Dukhan4a24a582020-01-06 13:30:00 -080051 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080052 ->RangeMultiplier(10)
53 ->Range(1000, 1000000)
54 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080055 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080056 ->RangeMultiplier(10)
57 ->Range(1000, 1000000)
58 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080059 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080060 ->RangeMultiplier(10)
61 ->Range(1000, 1000000)
62 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080063 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080064 ->RangeMultiplier(10)
65 ->Range(1000, 1000000)
66 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080067 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080068 ->RangeMultiplier(10)
69 ->Range(1000, 1000000)
70 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080071 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080072 ->RangeMultiplier(10)
73 ->Range(1000, 1000000)
74 ->UseRealTime();
75
Marat Dukhan4a24a582020-01-06 13:30:00 -080076 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4)
Marat Dukhan68b3b452020-01-02 10:11:15 -080077 ->RangeMultiplier(10)
78 ->Range(1000, 1000000)
79 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080080 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8)
Marat Dukhan68b3b452020-01-02 10:11:15 -080081 ->RangeMultiplier(10)
82 ->Range(1000, 1000000)
83 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080084 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12)
Marat Dukhan68b3b452020-01-02 10:11:15 -080085 ->RangeMultiplier(10)
86 ->Range(1000, 1000000)
87 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080088 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16)
Marat Dukhan68b3b452020-01-02 10:11:15 -080089 ->RangeMultiplier(10)
90 ->Range(1000, 1000000)
91 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080092 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20)
Marat Dukhan68b3b452020-01-02 10:11:15 -080093 ->RangeMultiplier(10)
94 ->Range(1000, 1000000)
95 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080096 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24)
Marat Dukhan68b3b452020-01-02 10:11:15 -080097 ->RangeMultiplier(10)
98 ->Range(1000, 1000000)
99 ->UseRealTime();
100
Marat Dukhan4a24a582020-01-06 13:30:00 -0800101 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800102 ->RangeMultiplier(10)
103 ->Range(1000, 1000000)
104 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800105 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800106 ->RangeMultiplier(10)
107 ->Range(1000, 1000000)
108 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800109 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800110 ->RangeMultiplier(10)
111 ->Range(1000, 1000000)
112 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800113 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800114 ->RangeMultiplier(10)
115 ->Range(1000, 1000000)
116 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800117 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800118 ->RangeMultiplier(10)
119 ->Range(1000, 1000000)
120 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800121 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800122 ->RangeMultiplier(10)
123 ->Range(1000, 1000000)
124 ->UseRealTime();
125#endif // XNN_ARCH_ARM64
126
Marat Dukhan14bec502019-11-18 11:35:31 -0800127#if XNN_ARCH_ARM || XNN_ARCH_ARM64
128 BENCHMARK_CAPTURE(f32_sigmoid, neon_frac_p9_p10_nr1recps_x16, xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16)
129 ->RangeMultiplier(10)
130 ->Range(1000, 1000000)
131 ->UseRealTime();
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800132
Marat Dukhan4a24a582020-01-06 13:30:00 -0800133 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800134 ->RangeMultiplier(10)
135 ->Range(1000, 1000000)
136 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800137 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800138 ->RangeMultiplier(10)
139 ->Range(1000, 1000000)
140 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800141 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800142 ->RangeMultiplier(10)
143 ->Range(1000, 1000000)
144 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800145 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16)
Marat Dukhan14bec502019-11-18 11:35:31 -0800146 ->RangeMultiplier(10)
147 ->Range(1000, 1000000)
148 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800149 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800150 ->RangeMultiplier(10)
151 ->Range(1000, 1000000)
152 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800153 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800154 ->RangeMultiplier(10)
155 ->Range(1000, 1000000)
156 ->UseRealTime();
157
Marat Dukhan4a24a582020-01-06 13:30:00 -0800158 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800159 ->RangeMultiplier(10)
160 ->Range(1000, 1000000)
161 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800162 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800163 ->RangeMultiplier(10)
164 ->Range(1000, 1000000)
165 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800166 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800167 ->RangeMultiplier(10)
168 ->Range(1000, 1000000)
169 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800170 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800171 ->RangeMultiplier(10)
172 ->Range(1000, 1000000)
173 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800174 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800175 ->RangeMultiplier(10)
176 ->Range(1000, 1000000)
177 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800178 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800179 ->RangeMultiplier(10)
180 ->Range(1000, 1000000)
181 ->UseRealTime();
182
Marat Dukhan4a24a582020-01-06 13:30:00 -0800183 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800184 ->RangeMultiplier(10)
185 ->Range(1000, 1000000)
186 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800187 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800188 ->RangeMultiplier(10)
189 ->Range(1000, 1000000)
190 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800191 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800192 ->RangeMultiplier(10)
193 ->Range(1000, 1000000)
194 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800195 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800196 ->RangeMultiplier(10)
197 ->Range(1000, 1000000)
198 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800199 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800200 ->RangeMultiplier(10)
201 ->Range(1000, 1000000)
202 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800203 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800204 ->RangeMultiplier(10)
205 ->Range(1000, 1000000)
206 ->UseRealTime();
207
Marat Dukhan4a24a582020-01-06 13:30:00 -0800208 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x4, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800209 ->RangeMultiplier(10)
210 ->Range(1000, 1000000)
211 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800212 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x8, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800213 ->RangeMultiplier(10)
214 ->Range(1000, 1000000)
215 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800216 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x12, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800217 ->RangeMultiplier(10)
218 ->Range(1000, 1000000)
219 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800220 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x16, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800221 ->RangeMultiplier(10)
222 ->Range(1000, 1000000)
223 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800224 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x20, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800225 ->RangeMultiplier(10)
226 ->Range(1000, 1000000)
227 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800228 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x24, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800229 ->RangeMultiplier(10)
230 ->Range(1000, 1000000)
231 ->UseRealTime();
232
Marat Dukhan4a24a582020-01-06 13:30:00 -0800233 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800234 ->RangeMultiplier(10)
235 ->Range(1000, 1000000)
236 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800237 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800238 ->RangeMultiplier(10)
239 ->Range(1000, 1000000)
240 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800241 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800242 ->RangeMultiplier(10)
243 ->Range(1000, 1000000)
244 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800245 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800246 ->RangeMultiplier(10)
247 ->Range(1000, 1000000)
248 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800249 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800250 ->RangeMultiplier(10)
251 ->Range(1000, 1000000)
252 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800253 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800254 ->RangeMultiplier(10)
255 ->Range(1000, 1000000)
256 ->UseRealTime();
257
Marat Dukhan4a24a582020-01-06 13:30:00 -0800258 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800259 ->RangeMultiplier(10)
260 ->Range(1000, 1000000)
261 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800262 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800263 ->RangeMultiplier(10)
264 ->Range(1000, 1000000)
265 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800266 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800267 ->RangeMultiplier(10)
268 ->Range(1000, 1000000)
269 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800270 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x16)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800271 ->RangeMultiplier(10)
272 ->Range(1000, 1000000)
273 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800274 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800275 ->RangeMultiplier(10)
276 ->Range(1000, 1000000)
277 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800278 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800279 ->RangeMultiplier(10)
280 ->Range(1000, 1000000)
281 ->UseRealTime();
282
Marat Dukhan4a24a582020-01-06 13:30:00 -0800283 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800284 ->RangeMultiplier(10)
285 ->Range(1000, 1000000)
286 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800287 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800288 ->RangeMultiplier(10)
289 ->Range(1000, 1000000)
290 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800291 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800292 ->RangeMultiplier(10)
293 ->Range(1000, 1000000)
294 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800295 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800296 ->RangeMultiplier(10)
297 ->Range(1000, 1000000)
298 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800299 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800300 ->RangeMultiplier(10)
301 ->Range(1000, 1000000)
302 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800303 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800304 ->RangeMultiplier(10)
305 ->Range(1000, 1000000)
306 ->UseRealTime();
307
Marat Dukhan4a24a582020-01-06 13:30:00 -0800308 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x4, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800309 ->RangeMultiplier(10)
310 ->Range(1000, 1000000)
311 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800312 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x8, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800313 ->RangeMultiplier(10)
314 ->Range(1000, 1000000)
315 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800316 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x12, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800317 ->RangeMultiplier(10)
318 ->Range(1000, 1000000)
319 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800320 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x16, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800321 ->RangeMultiplier(10)
322 ->Range(1000, 1000000)
323 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800324 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x20, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800325 ->RangeMultiplier(10)
326 ->Range(1000, 1000000)
327 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800328 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x24, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800329 ->RangeMultiplier(10)
330 ->Range(1000, 1000000)
331 ->UseRealTime();
332
Marat Dukhan4a24a582020-01-06 13:30:00 -0800333 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800334 ->RangeMultiplier(10)
335 ->Range(1000, 1000000)
336 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800337 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800338 ->RangeMultiplier(10)
339 ->Range(1000, 1000000)
340 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800341 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800342 ->RangeMultiplier(10)
343 ->Range(1000, 1000000)
344 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800345 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800346 ->RangeMultiplier(10)
347 ->Range(1000, 1000000)
348 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800349 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800350 ->RangeMultiplier(10)
351 ->Range(1000, 1000000)
352 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800353 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800354 ->RangeMultiplier(10)
355 ->Range(1000, 1000000)
356 ->UseRealTime();
357
Marat Dukhan4a24a582020-01-06 13:30:00 -0800358 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800359 ->RangeMultiplier(10)
360 ->Range(1000, 1000000)
361 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800362 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800363 ->RangeMultiplier(10)
364 ->Range(1000, 1000000)
365 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800366 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800367 ->RangeMultiplier(10)
368 ->Range(1000, 1000000)
369 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800370 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800371 ->RangeMultiplier(10)
372 ->Range(1000, 1000000)
373 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800374 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800375 ->RangeMultiplier(10)
376 ->Range(1000, 1000000)
377 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800378 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800379 ->RangeMultiplier(10)
380 ->Range(1000, 1000000)
381 ->UseRealTime();
382
Marat Dukhan4a24a582020-01-06 13:30:00 -0800383 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800384 ->RangeMultiplier(10)
385 ->Range(1000, 1000000)
386 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800387 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800388 ->RangeMultiplier(10)
389 ->Range(1000, 1000000)
390 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800391 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800392 ->RangeMultiplier(10)
393 ->Range(1000, 1000000)
394 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800395 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800396 ->RangeMultiplier(10)
397 ->Range(1000, 1000000)
398 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800399 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800400 ->RangeMultiplier(10)
401 ->Range(1000, 1000000)
402 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800403 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800404 ->RangeMultiplier(10)
405 ->Range(1000, 1000000)
406 ->UseRealTime();
407
Marat Dukhan4a24a582020-01-06 13:30:00 -0800408 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x4, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800409 ->RangeMultiplier(10)
410 ->Range(1000, 1000000)
411 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800412 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x8, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800413 ->RangeMultiplier(10)
414 ->Range(1000, 1000000)
415 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800416 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x12, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800417 ->RangeMultiplier(10)
418 ->Range(1000, 1000000)
419 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800420 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x16, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800421 ->RangeMultiplier(10)
422 ->Range(1000, 1000000)
423 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800424 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x20, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800425 ->RangeMultiplier(10)
426 ->Range(1000, 1000000)
427 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800428 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x24, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800429 ->RangeMultiplier(10)
430 ->Range(1000, 1000000)
431 ->UseRealTime();
Marat Dukhan14bec502019-11-18 11:35:31 -0800432#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
433
Marat Dukhan7bee7512019-11-18 15:15:48 -0800434#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanfa0a4322020-01-06 16:14:29 -0800435 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x8, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x8)
436 ->RangeMultiplier(10)
437 ->Range(1000, 1000000)
438 ->UseRealTime();
439 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x16, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16)
440 ->RangeMultiplier(10)
441 ->Range(1000, 1000000)
442 ->UseRealTime();
443 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x24, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24)
444 ->RangeMultiplier(10)
445 ->Range(1000, 1000000)
446 ->UseRealTime();
447 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x32, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32)
448 ->RangeMultiplier(10)
449 ->Range(1000, 1000000)
450 ->UseRealTime();
451 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x40, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x40)
452 ->RangeMultiplier(10)
453 ->Range(1000, 1000000)
454 ->UseRealTime();
455 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x48, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48)
456 ->RangeMultiplier(10)
457 ->Range(1000, 1000000)
458 ->UseRealTime();
459 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x56, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56)
460 ->RangeMultiplier(10)
461 ->Range(1000, 1000000)
462 ->UseRealTime();
463 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x64, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64)
464 ->RangeMultiplier(10)
465 ->Range(1000, 1000000)
466 ->UseRealTime();
467 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x72, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72)
468 ->RangeMultiplier(10)
469 ->Range(1000, 1000000)
470 ->UseRealTime();
471 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x80, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80)
472 ->RangeMultiplier(10)
473 ->Range(1000, 1000000)
474 ->UseRealTime();
475
476 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x8, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x8)
477 ->RangeMultiplier(10)
478 ->Range(1000, 1000000)
479 ->UseRealTime();
480 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x16, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16)
481 ->RangeMultiplier(10)
482 ->Range(1000, 1000000)
483 ->UseRealTime();
484 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x24, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24)
485 ->RangeMultiplier(10)
486 ->Range(1000, 1000000)
487 ->UseRealTime();
488 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x32, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32)
489 ->RangeMultiplier(10)
490 ->Range(1000, 1000000)
491 ->UseRealTime();
492 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x40, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40)
493 ->RangeMultiplier(10)
494 ->Range(1000, 1000000)
495 ->UseRealTime();
496 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x48, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48)
497 ->RangeMultiplier(10)
498 ->Range(1000, 1000000)
499 ->UseRealTime();
500 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x56, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56)
501 ->RangeMultiplier(10)
502 ->Range(1000, 1000000)
503 ->UseRealTime();
504 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x64, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64)
505 ->RangeMultiplier(10)
506 ->Range(1000, 1000000)
507 ->UseRealTime();
508 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x72, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72)
509 ->RangeMultiplier(10)
510 ->Range(1000, 1000000)
511 ->UseRealTime();
512 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x80, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80)
513 ->RangeMultiplier(10)
514 ->Range(1000, 1000000)
515 ->UseRealTime();
516
517 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x8, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x8)
518 ->RangeMultiplier(10)
519 ->Range(1000, 1000000)
520 ->UseRealTime();
521 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x16, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16)
522 ->RangeMultiplier(10)
523 ->Range(1000, 1000000)
524 ->UseRealTime();
525 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x24, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24)
526 ->RangeMultiplier(10)
527 ->Range(1000, 1000000)
528 ->UseRealTime();
529 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x32, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32)
530 ->RangeMultiplier(10)
531 ->Range(1000, 1000000)
532 ->UseRealTime();
533 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x40, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40)
534 ->RangeMultiplier(10)
535 ->Range(1000, 1000000)
536 ->UseRealTime();
537 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x48, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48)
538 ->RangeMultiplier(10)
539 ->Range(1000, 1000000)
540 ->UseRealTime();
541 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x56, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56)
542 ->RangeMultiplier(10)
543 ->Range(1000, 1000000)
544 ->UseRealTime();
545 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x64, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64)
546 ->RangeMultiplier(10)
547 ->Range(1000, 1000000)
548 ->UseRealTime();
549 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x72, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72)
550 ->RangeMultiplier(10)
551 ->Range(1000, 1000000)
552 ->UseRealTime();
553 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x80, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80)
554 ->RangeMultiplier(10)
555 ->Range(1000, 1000000)
556 ->UseRealTime();
557
Marat Dukhan7bee7512019-11-18 15:15:48 -0800558 BENCHMARK_CAPTURE(f32_sigmoid, sse2_p5_div_x8, xnn_f32_sigmoid_ukernel__sse2_p5_div_x8)
559 ->RangeMultiplier(10)
560 ->Range(1000, 1000000)
561 ->UseRealTime();
562 BENCHMARK_CAPTURE(f32_sigmoid, sse2_p5_div_x16, xnn_f32_sigmoid_ukernel__sse2_p5_div_x16)
563 ->RangeMultiplier(10)
564 ->Range(1000, 1000000)
565 ->UseRealTime();
Marat Dukhan496e7352019-11-21 15:48:40 -0800566 BENCHMARK_CAPTURE(f32_sigmoid, sse41_p5_div_x8, xnn_f32_sigmoid_ukernel__sse41_p5_div_x8)
567 ->RangeMultiplier(10)
568 ->Range(1000, 1000000)
569 ->UseRealTime();
570 BENCHMARK_CAPTURE(f32_sigmoid, sse41_p5_div_x16, xnn_f32_sigmoid_ukernel__sse41_p5_div_x16)
571 ->RangeMultiplier(10)
572 ->Range(1000, 1000000)
573 ->UseRealTime();
Marat Dukhan7bee7512019-11-18 15:15:48 -0800574#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
575
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800576#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
577 BENCHMARK_CAPTURE(f32_sigmoid, psimd_p5_div_x4, xnn_f32_sigmoid_ukernel__psimd_p5_div_x4)
578 ->RangeMultiplier(10)
579 ->Range(1000, 1000000)
580 ->UseRealTime();
581 BENCHMARK_CAPTURE(f32_sigmoid, psimd_p5_div_x8, xnn_f32_sigmoid_ukernel__psimd_p5_div_x8)
582 ->RangeMultiplier(10)
583 ->Range(1000, 1000000)
584 ->UseRealTime();
585 BENCHMARK_CAPTURE(f32_sigmoid, psimd_p5_div_x12, xnn_f32_sigmoid_ukernel__psimd_p5_div_x12)
586 ->RangeMultiplier(10)
587 ->Range(1000, 1000000)
588 ->UseRealTime();
589 BENCHMARK_CAPTURE(f32_sigmoid, psimd_p5_div_x16, xnn_f32_sigmoid_ukernel__psimd_p5_div_x16)
590 ->RangeMultiplier(10)
591 ->Range(1000, 1000000)
592 ->UseRealTime();
593 BENCHMARK_CAPTURE(f32_sigmoid, psimd_p5_div_x20, xnn_f32_sigmoid_ukernel__psimd_p5_div_x20)
594 ->RangeMultiplier(10)
595 ->Range(1000, 1000000)
596 ->UseRealTime();
597 BENCHMARK_CAPTURE(f32_sigmoid, psimd_p5_div_x24, xnn_f32_sigmoid_ukernel__psimd_p5_div_x24)
598 ->RangeMultiplier(10)
599 ->Range(1000, 1000000)
600 ->UseRealTime();
601#endif // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
602
Marat Dukhan3a77ea72019-12-23 12:10:24 -0800603BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut2048_p1_div_x1, xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x1)
604 ->RangeMultiplier(10)
605 ->Range(1000, 1000000)
606 ->UseRealTime();
607BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut2048_p1_div_x2, xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2)
608 ->RangeMultiplier(10)
609 ->Range(1000, 1000000)
610 ->UseRealTime();
611BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut2048_p1_div_x4, xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4)
612 ->RangeMultiplier(10)
613 ->Range(1000, 1000000)
614 ->UseRealTime();
615
616BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut64_p2_div_x1, xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x1)
617 ->RangeMultiplier(10)
618 ->Range(1000, 1000000)
619 ->UseRealTime();
620BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut64_p2_div_x2, xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2)
621 ->RangeMultiplier(10)
622 ->Range(1000, 1000000)
623 ->UseRealTime();
624BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut64_p2_div_x4, xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4)
625 ->RangeMultiplier(10)
626 ->Range(1000, 1000000)
627 ->UseRealTime();
628
629BENCHMARK_CAPTURE(f32_sigmoid, scalar_p5_div_x1, xnn_f32_sigmoid_ukernel__scalar_p5_div_x1)
630 ->RangeMultiplier(10)
631 ->Range(1000, 1000000)
632 ->UseRealTime();
633BENCHMARK_CAPTURE(f32_sigmoid, scalar_p5_div_x2, xnn_f32_sigmoid_ukernel__scalar_p5_div_x2)
634 ->RangeMultiplier(10)
635 ->Range(1000, 1000000)
636 ->UseRealTime();
637BENCHMARK_CAPTURE(f32_sigmoid, scalar_p5_div_x4, xnn_f32_sigmoid_ukernel__scalar_p5_div_x4)
638 ->RangeMultiplier(10)
639 ->Range(1000, 1000000)
640 ->UseRealTime();
641
Marat Dukhan14bec502019-11-18 11:35:31 -0800642#ifndef XNNPACK_BENCHMARK_NO_MAIN
643BENCHMARK_MAIN();
644#endif