blob: 15a49dfaa8a58fb82eccd17296356b8864b9c880 [file] [log] [blame]
Marat Dukhan14bec502019-11-18 11:35:31 -08001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
Marat Dukhan14bec502019-11-18 11:35:31 -080012#include <benchmark/benchmark.h>
13#include "bench/utils.h"
14#include <xnnpack/AlignedAllocator.h>
15#include <xnnpack/common.h>
16#include <xnnpack/params.h>
Marat Dukhan1e782c42019-11-21 17:02:40 -080017#include <xnnpack/vunary.h>
Marat Dukhan14bec502019-11-18 11:35:31 -080018
19
20static void f32_sigmoid(
21 benchmark::State& state,
Marat Dukhan1e782c42019-11-21 17:02:40 -080022 xnn_f32_vunary_ukernel_function sigmoid)
Marat Dukhan14bec502019-11-18 11:35:31 -080023{
24 const size_t elements = state.range(0);
25
26 std::random_device random_device;
27 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070028 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
Marat Dukhan14bec502019-11-18 11:35:31 -080029
30 std::vector<float, AlignedAllocator<float, 64>> x(elements);
31 std::vector<float, AlignedAllocator<float, 64>> y(elements);
32 std::generate(x.begin(), x.end(), std::ref(f32rng));
33 std::fill(y.begin(), y.end(), std::nanf(""));
34
35 for (auto _ : state) {
36 sigmoid(elements * sizeof(float), x.data(), y.data(), nullptr /* params */);
37 }
38
39 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
40
41 const size_t elements_per_iteration = elements;
42 state.counters["elements"] =
43 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
44
45 const size_t bytes_per_iteration = 2 * elements * sizeof(float);
46 state.counters["bytes"] =
47 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
48}
49
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080050#if XNN_ARCH_ARM64
Marat Dukhan4a24a582020-01-06 13:30:00 -080051 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080052 ->RangeMultiplier(10)
53 ->Range(1000, 1000000)
54 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080055 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080056 ->RangeMultiplier(10)
57 ->Range(1000, 1000000)
58 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080059 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080060 ->RangeMultiplier(10)
61 ->Range(1000, 1000000)
62 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080063 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080064 ->RangeMultiplier(10)
65 ->Range(1000, 1000000)
66 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080067 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080068 ->RangeMultiplier(10)
69 ->Range(1000, 1000000)
70 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080071 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_div_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -080072 ->RangeMultiplier(10)
73 ->Range(1000, 1000000)
74 ->UseRealTime();
75
Marat Dukhan4a24a582020-01-06 13:30:00 -080076 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x4)
Marat Dukhan68b3b452020-01-02 10:11:15 -080077 ->RangeMultiplier(10)
78 ->Range(1000, 1000000)
79 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080080 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8)
Marat Dukhan68b3b452020-01-02 10:11:15 -080081 ->RangeMultiplier(10)
82 ->Range(1000, 1000000)
83 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080084 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12)
Marat Dukhan68b3b452020-01-02 10:11:15 -080085 ->RangeMultiplier(10)
86 ->Range(1000, 1000000)
87 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080088 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x16)
Marat Dukhan68b3b452020-01-02 10:11:15 -080089 ->RangeMultiplier(10)
90 ->Range(1000, 1000000)
91 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080092 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x20)
Marat Dukhan68b3b452020-01-02 10:11:15 -080093 ->RangeMultiplier(10)
94 ->Range(1000, 1000000)
95 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -080096 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_div_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x24)
Marat Dukhan68b3b452020-01-02 10:11:15 -080097 ->RangeMultiplier(10)
98 ->Range(1000, 1000000)
99 ->UseRealTime();
100
Marat Dukhan4a24a582020-01-06 13:30:00 -0800101 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800102 ->RangeMultiplier(10)
103 ->Range(1000, 1000000)
104 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800105 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800106 ->RangeMultiplier(10)
107 ->Range(1000, 1000000)
108 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800109 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800110 ->RangeMultiplier(10)
111 ->Range(1000, 1000000)
112 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800113 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800114 ->RangeMultiplier(10)
115 ->Range(1000, 1000000)
116 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800117 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800118 ->RangeMultiplier(10)
119 ->Range(1000, 1000000)
120 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800121 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_div_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800122 ->RangeMultiplier(10)
123 ->Range(1000, 1000000)
124 ->UseRealTime();
125#endif // XNN_ARCH_ARM64
126
Marat Dukhan14bec502019-11-18 11:35:31 -0800127#if XNN_ARCH_ARM || XNN_ARCH_ARM64
128 BENCHMARK_CAPTURE(f32_sigmoid, neon_frac_p9_p10_nr1recps_x16, xnn_f32_sigmoid_ukernel__neon_frac_p9_p10_nr1recps_x16)
129 ->RangeMultiplier(10)
130 ->Range(1000, 1000000)
131 ->UseRealTime();
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800132
Marat Dukhan4a24a582020-01-06 13:30:00 -0800133 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800134 ->RangeMultiplier(10)
135 ->Range(1000, 1000000)
136 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800137 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800138 ->RangeMultiplier(10)
139 ->Range(1000, 1000000)
140 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800141 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800142 ->RangeMultiplier(10)
143 ->Range(1000, 1000000)
144 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800145 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x16)
Marat Dukhan14bec502019-11-18 11:35:31 -0800146 ->RangeMultiplier(10)
147 ->Range(1000, 1000000)
148 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800149 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800150 ->RangeMultiplier(10)
151 ->Range(1000, 1000000)
152 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800153 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2fma_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800154 ->RangeMultiplier(10)
155 ->Range(1000, 1000000)
156 ->UseRealTime();
157
Marat Dukhan4a24a582020-01-06 13:30:00 -0800158 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800159 ->RangeMultiplier(10)
160 ->Range(1000, 1000000)
161 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800162 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800163 ->RangeMultiplier(10)
164 ->Range(1000, 1000000)
165 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800166 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800167 ->RangeMultiplier(10)
168 ->Range(1000, 1000000)
169 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800170 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800171 ->RangeMultiplier(10)
172 ->Range(1000, 1000000)
173 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800174 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800175 ->RangeMultiplier(10)
176 ->Range(1000, 1000000)
177 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800178 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr1recps1fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr1recps1fma_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800179 ->RangeMultiplier(10)
180 ->Range(1000, 1000000)
181 ->UseRealTime();
182
Marat Dukhan4a24a582020-01-06 13:30:00 -0800183 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800184 ->RangeMultiplier(10)
185 ->Range(1000, 1000000)
186 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800187 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800188 ->RangeMultiplier(10)
189 ->Range(1000, 1000000)
190 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800191 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800192 ->RangeMultiplier(10)
193 ->Range(1000, 1000000)
194 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800195 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800196 ->RangeMultiplier(10)
197 ->Range(1000, 1000000)
198 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800199 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800200 ->RangeMultiplier(10)
201 ->Range(1000, 1000000)
202 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800203 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_p5_nr2recps_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_nr2recps_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800204 ->RangeMultiplier(10)
205 ->Range(1000, 1000000)
206 ->UseRealTime();
207
Marat Dukhan4a24a582020-01-06 13:30:00 -0800208 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x4, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800209 ->RangeMultiplier(10)
210 ->Range(1000, 1000000)
211 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800212 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x8, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800213 ->RangeMultiplier(10)
214 ->Range(1000, 1000000)
215 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800216 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x12, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800217 ->RangeMultiplier(10)
218 ->Range(1000, 1000000)
219 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800220 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x16, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800221 ->RangeMultiplier(10)
222 ->Range(1000, 1000000)
223 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800224 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x20, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800225 ->RangeMultiplier(10)
226 ->Range(1000, 1000000)
227 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800228 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_p5_nr2recps_x24, xnn_f32_sigmoid_ukernel__neon_rr2_p5_nr2recps_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800229 ->RangeMultiplier(10)
230 ->Range(1000, 1000000)
231 ->UseRealTime();
232
Marat Dukhan4a24a582020-01-06 13:30:00 -0800233 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x4)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800234 ->RangeMultiplier(10)
235 ->Range(1000, 1000000)
236 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800237 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800238 ->RangeMultiplier(10)
239 ->Range(1000, 1000000)
240 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800241 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x12)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800242 ->RangeMultiplier(10)
243 ->Range(1000, 1000000)
244 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800245 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x16)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800246 ->RangeMultiplier(10)
247 ->Range(1000, 1000000)
248 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800249 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x20)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800250 ->RangeMultiplier(10)
251 ->Range(1000, 1000000)
252 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800253 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x24)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800254 ->RangeMultiplier(10)
255 ->Range(1000, 1000000)
256 ->UseRealTime();
257
Marat Dukhan4a24a582020-01-06 13:30:00 -0800258 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x4)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800259 ->RangeMultiplier(10)
260 ->Range(1000, 1000000)
261 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800262 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800263 ->RangeMultiplier(10)
264 ->Range(1000, 1000000)
265 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800266 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x12)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800267 ->RangeMultiplier(10)
268 ->Range(1000, 1000000)
269 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800270 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x16)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800271 ->RangeMultiplier(10)
272 ->Range(1000, 1000000)
273 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800274 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x20)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800275 ->RangeMultiplier(10)
276 ->Range(1000, 1000000)
277 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800278 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr1recps1fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x24)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800279 ->RangeMultiplier(10)
280 ->Range(1000, 1000000)
281 ->UseRealTime();
282
Marat Dukhan4a24a582020-01-06 13:30:00 -0800283 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x4)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800284 ->RangeMultiplier(10)
285 ->Range(1000, 1000000)
286 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800287 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800288 ->RangeMultiplier(10)
289 ->Range(1000, 1000000)
290 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800291 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x12)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800292 ->RangeMultiplier(10)
293 ->Range(1000, 1000000)
294 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800295 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x16)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800296 ->RangeMultiplier(10)
297 ->Range(1000, 1000000)
298 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800299 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x20)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800300 ->RangeMultiplier(10)
301 ->Range(1000, 1000000)
302 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800303 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut64_p2_nr2recps_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x24)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800304 ->RangeMultiplier(10)
305 ->Range(1000, 1000000)
306 ->UseRealTime();
307
Marat Dukhan4a24a582020-01-06 13:30:00 -0800308 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x4, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x4)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800309 ->RangeMultiplier(10)
310 ->Range(1000, 1000000)
311 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800312 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x8, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800313 ->RangeMultiplier(10)
314 ->Range(1000, 1000000)
315 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800316 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x12, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x12)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800317 ->RangeMultiplier(10)
318 ->Range(1000, 1000000)
319 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800320 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x16, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x16)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800321 ->RangeMultiplier(10)
322 ->Range(1000, 1000000)
323 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800324 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x20, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x20)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800325 ->RangeMultiplier(10)
326 ->Range(1000, 1000000)
327 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800328 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut64_p2_nr2recps_x24, xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x24)
Marat Dukhan68b3b452020-01-02 10:11:15 -0800329 ->RangeMultiplier(10)
330 ->Range(1000, 1000000)
331 ->UseRealTime();
332
Marat Dukhan4a24a582020-01-06 13:30:00 -0800333 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800334 ->RangeMultiplier(10)
335 ->Range(1000, 1000000)
336 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800337 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800338 ->RangeMultiplier(10)
339 ->Range(1000, 1000000)
340 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800341 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800342 ->RangeMultiplier(10)
343 ->Range(1000, 1000000)
344 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800345 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800346 ->RangeMultiplier(10)
347 ->Range(1000, 1000000)
348 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800349 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800350 ->RangeMultiplier(10)
351 ->Range(1000, 1000000)
352 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800353 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800354 ->RangeMultiplier(10)
355 ->Range(1000, 1000000)
356 ->UseRealTime();
357
Marat Dukhan4a24a582020-01-06 13:30:00 -0800358 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800359 ->RangeMultiplier(10)
360 ->Range(1000, 1000000)
361 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800362 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800363 ->RangeMultiplier(10)
364 ->Range(1000, 1000000)
365 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800366 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800367 ->RangeMultiplier(10)
368 ->Range(1000, 1000000)
369 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800370 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800371 ->RangeMultiplier(10)
372 ->Range(1000, 1000000)
373 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800374 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800375 ->RangeMultiplier(10)
376 ->Range(1000, 1000000)
377 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800378 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr1recps1fma_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800379 ->RangeMultiplier(10)
380 ->Range(1000, 1000000)
381 ->UseRealTime();
382
Marat Dukhan4a24a582020-01-06 13:30:00 -0800383 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x4, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800384 ->RangeMultiplier(10)
385 ->Range(1000, 1000000)
386 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800387 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x8, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800388 ->RangeMultiplier(10)
389 ->Range(1000, 1000000)
390 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800391 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x12, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800392 ->RangeMultiplier(10)
393 ->Range(1000, 1000000)
394 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800395 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x16, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800396 ->RangeMultiplier(10)
397 ->Range(1000, 1000000)
398 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800399 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x20, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800400 ->RangeMultiplier(10)
401 ->Range(1000, 1000000)
402 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800403 BENCHMARK_CAPTURE(f32_sigmoid, neonfma_rr1_lut2048_p1_nr2recps_x24, xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800404 ->RangeMultiplier(10)
405 ->Range(1000, 1000000)
406 ->UseRealTime();
407
Marat Dukhan4a24a582020-01-06 13:30:00 -0800408 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x4, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x4)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800409 ->RangeMultiplier(10)
410 ->Range(1000, 1000000)
411 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800412 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x8, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800413 ->RangeMultiplier(10)
414 ->Range(1000, 1000000)
415 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800416 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x12, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800417 ->RangeMultiplier(10)
418 ->Range(1000, 1000000)
419 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800420 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x16, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800421 ->RangeMultiplier(10)
422 ->Range(1000, 1000000)
423 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800424 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x20, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800425 ->RangeMultiplier(10)
426 ->Range(1000, 1000000)
427 ->UseRealTime();
Marat Dukhan4a24a582020-01-06 13:30:00 -0800428 BENCHMARK_CAPTURE(f32_sigmoid, neon_rr2_lut2048_p1_nr2recps_x24, xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24)
Marat Dukhan8d3c07e2020-01-02 01:20:59 -0800429 ->RangeMultiplier(10)
430 ->Range(1000, 1000000)
431 ->UseRealTime();
Marat Dukhan14bec502019-11-18 11:35:31 -0800432#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
433
Marat Dukhan7bee7512019-11-18 15:15:48 -0800434#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhand9ca7e62020-09-23 23:45:29 -0700435 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_div_x16, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x16)
436 ->RangeMultiplier(10)
437 ->Range(1000, 1000000)
438 ->UseRealTime();
439 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_div_x32, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x32)
440 ->RangeMultiplier(10)
441 ->Range(1000, 1000000)
442 ->UseRealTime();
443 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_div_x48, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x48)
444 ->RangeMultiplier(10)
445 ->Range(1000, 1000000)
446 ->UseRealTime();
447 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_div_x64, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x64)
448 ->RangeMultiplier(10)
449 ->Range(1000, 1000000)
450 ->UseRealTime();
451 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_div_x80, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x80)
452 ->RangeMultiplier(10)
453 ->Range(1000, 1000000)
454 ->UseRealTime();
455 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_div_x96, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x96)
456 ->RangeMultiplier(10)
457 ->Range(1000, 1000000)
458 ->UseRealTime();
459 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_div_x112, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x112)
460 ->RangeMultiplier(10)
461 ->Range(1000, 1000000)
462 ->UseRealTime();
463 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_div_x128, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_div_x128)
464 ->RangeMultiplier(10)
465 ->Range(1000, 1000000)
466 ->UseRealTime();
467
468 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_nr1fma_x16, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x16)
469 ->RangeMultiplier(10)
470 ->Range(1000, 1000000)
471 ->UseRealTime();
472 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_nr1fma_x32, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x32)
473 ->RangeMultiplier(10)
474 ->Range(1000, 1000000)
475 ->UseRealTime();
476 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_nr1fma_x48, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x48)
477 ->RangeMultiplier(10)
478 ->Range(1000, 1000000)
479 ->UseRealTime();
480 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_nr1fma_x64, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x64)
481 ->RangeMultiplier(10)
482 ->Range(1000, 1000000)
483 ->UseRealTime();
484 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_nr1fma_x80, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x80)
485 ->RangeMultiplier(10)
486 ->Range(1000, 1000000)
487 ->UseRealTime();
488 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_nr1fma_x96, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x96)
489 ->RangeMultiplier(10)
490 ->Range(1000, 1000000)
491 ->UseRealTime();
492 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_nr1fma_x112, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x112)
493 ->RangeMultiplier(10)
494 ->Range(1000, 1000000)
495 ->UseRealTime();
496 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_p5_scalef_nr1fma_x128, xnn_f32_sigmoid_ukernel__avx512f_rr1_p5_scalef_nr1fma_x128)
497 ->RangeMultiplier(10)
498 ->Range(1000, 1000000)
499 ->UseRealTime();
500
501 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_div_x16, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x16)
502 ->RangeMultiplier(10)
503 ->Range(1000, 1000000)
504 ->UseRealTime();
505 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_div_x32, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x32)
506 ->RangeMultiplier(10)
507 ->Range(1000, 1000000)
508 ->UseRealTime();
509 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_div_x48, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x48)
510 ->RangeMultiplier(10)
511 ->Range(1000, 1000000)
512 ->UseRealTime();
513 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_div_x64, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x64)
514 ->RangeMultiplier(10)
515 ->Range(1000, 1000000)
516 ->UseRealTime();
517 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_div_x80, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x80)
518 ->RangeMultiplier(10)
519 ->Range(1000, 1000000)
520 ->UseRealTime();
521 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_div_x96, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x96)
522 ->RangeMultiplier(10)
523 ->Range(1000, 1000000)
524 ->UseRealTime();
525 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_div_x112, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x112)
526 ->RangeMultiplier(10)
527 ->Range(1000, 1000000)
528 ->UseRealTime();
529 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_div_x128, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_div_x128)
530 ->RangeMultiplier(10)
531 ->Range(1000, 1000000)
532 ->UseRealTime();
533
534 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x16, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x16)
535 ->RangeMultiplier(10)
536 ->Range(1000, 1000000)
537 ->UseRealTime();
538 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x32, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x32)
539 ->RangeMultiplier(10)
540 ->Range(1000, 1000000)
541 ->UseRealTime();
542 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x48, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x48)
543 ->RangeMultiplier(10)
544 ->Range(1000, 1000000)
545 ->UseRealTime();
546 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x64, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x64)
547 ->RangeMultiplier(10)
548 ->Range(1000, 1000000)
549 ->UseRealTime();
550 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x80, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x80)
551 ->RangeMultiplier(10)
552 ->Range(1000, 1000000)
553 ->UseRealTime();
554 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x96, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x96)
555 ->RangeMultiplier(10)
556 ->Range(1000, 1000000)
557 ->UseRealTime();
558 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x112, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x112)
559 ->RangeMultiplier(10)
560 ->Range(1000, 1000000)
561 ->UseRealTime();
562 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut16_p3_perm_scalef_nr1fma_x128, xnn_f32_sigmoid_ukernel__avx512f_rr1_lut16_p3_perm_scalef_nr1fma_x128)
563 ->RangeMultiplier(10)
564 ->Range(1000, 1000000)
565 ->UseRealTime();
566
567 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_div_x16, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x16)
568 ->RangeMultiplier(10)
569 ->Range(1000, 1000000)
570 ->UseRealTime();
571 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_div_x32, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x32)
572 ->RangeMultiplier(10)
573 ->Range(1000, 1000000)
574 ->UseRealTime();
575 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_div_x48, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x48)
576 ->RangeMultiplier(10)
577 ->Range(1000, 1000000)
578 ->UseRealTime();
579 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_div_x64, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x64)
580 ->RangeMultiplier(10)
581 ->Range(1000, 1000000)
582 ->UseRealTime();
583 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_div_x80, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x80)
584 ->RangeMultiplier(10)
585 ->Range(1000, 1000000)
586 ->UseRealTime();
587 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_div_x96, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x96)
588 ->RangeMultiplier(10)
589 ->Range(1000, 1000000)
590 ->UseRealTime();
591 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_div_x112, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x112)
592 ->RangeMultiplier(10)
593 ->Range(1000, 1000000)
594 ->UseRealTime();
595 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_div_x128, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_x128)
596 ->RangeMultiplier(10)
597 ->Range(1000, 1000000)
598 ->UseRealTime();
599
600 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x16, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x16)
601 ->RangeMultiplier(10)
602 ->Range(1000, 1000000)
603 ->UseRealTime();
604 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x32, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x32)
605 ->RangeMultiplier(10)
606 ->Range(1000, 1000000)
607 ->UseRealTime();
608 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x48, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x48)
609 ->RangeMultiplier(10)
610 ->Range(1000, 1000000)
611 ->UseRealTime();
612 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x64, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x64)
613 ->RangeMultiplier(10)
614 ->Range(1000, 1000000)
615 ->UseRealTime();
616 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x80, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x80)
617 ->RangeMultiplier(10)
618 ->Range(1000, 1000000)
619 ->UseRealTime();
620 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x96, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x96)
621 ->RangeMultiplier(10)
622 ->Range(1000, 1000000)
623 ->UseRealTime();
624 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x112, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x112)
625 ->RangeMultiplier(10)
626 ->Range(1000, 1000000)
627 ->UseRealTime();
628 BENCHMARK_CAPTURE(f32_sigmoid, avx512f_lut32_p2_perm2_scalef_nr1fma_x128, xnn_f32_sigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_nr1fma_x128)
629 ->RangeMultiplier(10)
630 ->Range(1000, 1000000)
631 ->UseRealTime();
632
Marat Dukhanfa0a4322020-01-06 16:14:29 -0800633 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x8, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x8)
634 ->RangeMultiplier(10)
635 ->Range(1000, 1000000)
636 ->UseRealTime();
637 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x16, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x16)
638 ->RangeMultiplier(10)
639 ->Range(1000, 1000000)
640 ->UseRealTime();
641 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x24, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x24)
642 ->RangeMultiplier(10)
643 ->Range(1000, 1000000)
644 ->UseRealTime();
645 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x32, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x32)
646 ->RangeMultiplier(10)
647 ->Range(1000, 1000000)
648 ->UseRealTime();
649 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x40, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x40)
650 ->RangeMultiplier(10)
651 ->Range(1000, 1000000)
652 ->UseRealTime();
653 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x48, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x48)
654 ->RangeMultiplier(10)
655 ->Range(1000, 1000000)
656 ->UseRealTime();
657 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x56, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x56)
658 ->RangeMultiplier(10)
659 ->Range(1000, 1000000)
660 ->UseRealTime();
661 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x64, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x64)
662 ->RangeMultiplier(10)
663 ->Range(1000, 1000000)
664 ->UseRealTime();
665 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x72, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x72)
666 ->RangeMultiplier(10)
667 ->Range(1000, 1000000)
668 ->UseRealTime();
669 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_div_x80, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_div_x80)
670 ->RangeMultiplier(10)
671 ->Range(1000, 1000000)
672 ->UseRealTime();
673
674 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x8, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x8)
675 ->RangeMultiplier(10)
676 ->Range(1000, 1000000)
677 ->UseRealTime();
678 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x16, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x16)
679 ->RangeMultiplier(10)
680 ->Range(1000, 1000000)
681 ->UseRealTime();
682 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x24, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x24)
683 ->RangeMultiplier(10)
684 ->Range(1000, 1000000)
685 ->UseRealTime();
686 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x32, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x32)
687 ->RangeMultiplier(10)
688 ->Range(1000, 1000000)
689 ->UseRealTime();
690 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x40, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x40)
691 ->RangeMultiplier(10)
692 ->Range(1000, 1000000)
693 ->UseRealTime();
694 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x48, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x48)
695 ->RangeMultiplier(10)
696 ->Range(1000, 1000000)
697 ->UseRealTime();
698 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x56, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x56)
699 ->RangeMultiplier(10)
700 ->Range(1000, 1000000)
701 ->UseRealTime();
702 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x64, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x64)
703 ->RangeMultiplier(10)
704 ->Range(1000, 1000000)
705 ->UseRealTime();
706 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x72, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x72)
707 ->RangeMultiplier(10)
708 ->Range(1000, 1000000)
709 ->UseRealTime();
710 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr1fma_x80, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr1fma_x80)
711 ->RangeMultiplier(10)
712 ->Range(1000, 1000000)
713 ->UseRealTime();
714
715 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x8, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x8)
716 ->RangeMultiplier(10)
717 ->Range(1000, 1000000)
718 ->UseRealTime();
719 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x16, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x16)
720 ->RangeMultiplier(10)
721 ->Range(1000, 1000000)
722 ->UseRealTime();
723 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x24, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x24)
724 ->RangeMultiplier(10)
725 ->Range(1000, 1000000)
726 ->UseRealTime();
727 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x32, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x32)
728 ->RangeMultiplier(10)
729 ->Range(1000, 1000000)
730 ->UseRealTime();
731 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x40, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x40)
732 ->RangeMultiplier(10)
733 ->Range(1000, 1000000)
734 ->UseRealTime();
735 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x48, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x48)
736 ->RangeMultiplier(10)
737 ->Range(1000, 1000000)
738 ->UseRealTime();
739 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x56, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x56)
740 ->RangeMultiplier(10)
741 ->Range(1000, 1000000)
742 ->UseRealTime();
743 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x64, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x64)
744 ->RangeMultiplier(10)
745 ->Range(1000, 1000000)
746 ->UseRealTime();
747 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x72, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x72)
748 ->RangeMultiplier(10)
749 ->Range(1000, 1000000)
750 ->UseRealTime();
751 BENCHMARK_CAPTURE(f32_sigmoid, avx2_p5_nr2fma_x80, xnn_f32_sigmoid_ukernel__avx2_rr1_p5_nr2fma_x80)
752 ->RangeMultiplier(10)
753 ->Range(1000, 1000000)
754 ->UseRealTime();
755
Marat Dukhand243c1a2020-09-17 18:48:10 -0700756 BENCHMARK_CAPTURE(f32_sigmoid, sse41_p5_div_x4, xnn_f32_sigmoid_ukernel__sse41_p5_div_x4)
Marat Dukhan7bee7512019-11-18 15:15:48 -0800757 ->RangeMultiplier(10)
758 ->Range(1000, 1000000)
759 ->UseRealTime();
Marat Dukhan496e7352019-11-21 15:48:40 -0800760 BENCHMARK_CAPTURE(f32_sigmoid, sse41_p5_div_x8, xnn_f32_sigmoid_ukernel__sse41_p5_div_x8)
761 ->RangeMultiplier(10)
762 ->Range(1000, 1000000)
763 ->UseRealTime();
Marat Dukhand243c1a2020-09-17 18:48:10 -0700764 BENCHMARK_CAPTURE(f32_sigmoid, sse41_p5_div_x12, xnn_f32_sigmoid_ukernel__sse41_p5_div_x12)
765 ->RangeMultiplier(10)
766 ->Range(1000, 1000000)
767 ->UseRealTime();
Marat Dukhan496e7352019-11-21 15:48:40 -0800768 BENCHMARK_CAPTURE(f32_sigmoid, sse41_p5_div_x16, xnn_f32_sigmoid_ukernel__sse41_p5_div_x16)
769 ->RangeMultiplier(10)
770 ->Range(1000, 1000000)
771 ->UseRealTime();
Marat Dukhand243c1a2020-09-17 18:48:10 -0700772 BENCHMARK_CAPTURE(f32_sigmoid, sse41_p5_div_x20, xnn_f32_sigmoid_ukernel__sse41_p5_div_x20)
773 ->RangeMultiplier(10)
774 ->Range(1000, 1000000)
775 ->UseRealTime();
776 BENCHMARK_CAPTURE(f32_sigmoid, sse41_p5_div_x24, xnn_f32_sigmoid_ukernel__sse41_p5_div_x24)
777 ->RangeMultiplier(10)
778 ->Range(1000, 1000000)
779 ->UseRealTime();
780
781 BENCHMARK_CAPTURE(f32_sigmoid, sse41_lut64_p2_div_x4, xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x4)
782 ->RangeMultiplier(10)
783 ->Range(1000, 1000000)
784 ->UseRealTime();
785 BENCHMARK_CAPTURE(f32_sigmoid, sse41_lut64_p2_div_x8, xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8)
786 ->RangeMultiplier(10)
787 ->Range(1000, 1000000)
788 ->UseRealTime();
789 BENCHMARK_CAPTURE(f32_sigmoid, sse41_lut64_p2_div_x12, xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12)
790 ->RangeMultiplier(10)
791 ->Range(1000, 1000000)
792 ->UseRealTime();
793 BENCHMARK_CAPTURE(f32_sigmoid, sse41_lut64_p2_div_x16, xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16)
794 ->RangeMultiplier(10)
795 ->Range(1000, 1000000)
796 ->UseRealTime();
797 BENCHMARK_CAPTURE(f32_sigmoid, sse41_lut64_p2_div_x20, xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20)
798 ->RangeMultiplier(10)
799 ->Range(1000, 1000000)
800 ->UseRealTime();
801 BENCHMARK_CAPTURE(f32_sigmoid, sse41_lut64_p2_div_x24, xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24)
802 ->RangeMultiplier(10)
803 ->Range(1000, 1000000)
804 ->UseRealTime();
805
806 BENCHMARK_CAPTURE(f32_sigmoid, sse2_p5_div_x4, xnn_f32_sigmoid_ukernel__sse2_p5_div_x4)
807 ->RangeMultiplier(10)
808 ->Range(1000, 1000000)
809 ->UseRealTime();
810 BENCHMARK_CAPTURE(f32_sigmoid, sse2_p5_div_x8, xnn_f32_sigmoid_ukernel__sse2_p5_div_x8)
811 ->RangeMultiplier(10)
812 ->Range(1000, 1000000)
813 ->UseRealTime();
814 BENCHMARK_CAPTURE(f32_sigmoid, sse2_p5_div_x12, xnn_f32_sigmoid_ukernel__sse2_p5_div_x12)
815 ->RangeMultiplier(10)
816 ->Range(1000, 1000000)
817 ->UseRealTime();
818 BENCHMARK_CAPTURE(f32_sigmoid, sse2_p5_div_x16, xnn_f32_sigmoid_ukernel__sse2_p5_div_x16)
819 ->RangeMultiplier(10)
820 ->Range(1000, 1000000)
821 ->UseRealTime();
822 BENCHMARK_CAPTURE(f32_sigmoid, sse2_p5_div_x20, xnn_f32_sigmoid_ukernel__sse2_p5_div_x20)
823 ->RangeMultiplier(10)
824 ->Range(1000, 1000000)
825 ->UseRealTime();
826 BENCHMARK_CAPTURE(f32_sigmoid, sse2_p5_div_x24, xnn_f32_sigmoid_ukernel__sse2_p5_div_x24)
827 ->RangeMultiplier(10)
828 ->Range(1000, 1000000)
829 ->UseRealTime();
830
831 BENCHMARK_CAPTURE(f32_sigmoid, sse2_lut64_p2_div_x4, xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x4)
832 ->RangeMultiplier(10)
833 ->Range(1000, 1000000)
834 ->UseRealTime();
835 BENCHMARK_CAPTURE(f32_sigmoid, sse2_lut64_p2_div_x8, xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x8)
836 ->RangeMultiplier(10)
837 ->Range(1000, 1000000)
838 ->UseRealTime();
839 BENCHMARK_CAPTURE(f32_sigmoid, sse2_lut64_p2_div_x12, xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x12)
840 ->RangeMultiplier(10)
841 ->Range(1000, 1000000)
842 ->UseRealTime();
843 BENCHMARK_CAPTURE(f32_sigmoid, sse2_lut64_p2_div_x16, xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x16)
844 ->RangeMultiplier(10)
845 ->Range(1000, 1000000)
846 ->UseRealTime();
847 BENCHMARK_CAPTURE(f32_sigmoid, sse2_lut64_p2_div_x20, xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x20)
848 ->RangeMultiplier(10)
849 ->Range(1000, 1000000)
850 ->UseRealTime();
851 BENCHMARK_CAPTURE(f32_sigmoid, sse2_lut64_p2_div_x24, xnn_f32_sigmoid_ukernel__sse2_lut64_p2_div_x24)
852 ->RangeMultiplier(10)
853 ->Range(1000, 1000000)
854 ->UseRealTime();
Marat Dukhan7bee7512019-11-18 15:15:48 -0800855#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
856
Marat Dukhanb3635ed2020-07-16 12:36:28 -0700857#if XNN_ARCH_WASMSIMD
Marat Dukhand187a5b2020-07-20 01:07:17 -0700858 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_lut64_p2_div_x4, xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x4)
859 ->RangeMultiplier(10)
860 ->Range(1000, 1000000)
861 ->UseRealTime();
862 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_lut64_p2_div_x8, xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x8)
863 ->RangeMultiplier(10)
864 ->Range(1000, 1000000)
865 ->UseRealTime();
866 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_lut64_p2_div_x12, xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12)
867 ->RangeMultiplier(10)
868 ->Range(1000, 1000000)
869 ->UseRealTime();
870 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_lut64_p2_div_x16, xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x16)
871 ->RangeMultiplier(10)
872 ->Range(1000, 1000000)
873 ->UseRealTime();
874 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_lut64_p2_div_x20, xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x20)
875 ->RangeMultiplier(10)
876 ->Range(1000, 1000000)
877 ->UseRealTime();
878 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_lut64_p2_div_x24, xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x24)
879 ->RangeMultiplier(10)
880 ->Range(1000, 1000000)
881 ->UseRealTime();
882
Marat Dukhanb3635ed2020-07-16 12:36:28 -0700883 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_p5_div_x4, xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x4)
884 ->RangeMultiplier(10)
885 ->Range(1000, 1000000)
886 ->UseRealTime();
887 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_p5_div_x8, xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x8)
888 ->RangeMultiplier(10)
889 ->Range(1000, 1000000)
890 ->UseRealTime();
891 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_p5_div_x12, xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x12)
892 ->RangeMultiplier(10)
893 ->Range(1000, 1000000)
894 ->UseRealTime();
895 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_p5_div_x16, xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x16)
896 ->RangeMultiplier(10)
897 ->Range(1000, 1000000)
898 ->UseRealTime();
899 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_p5_div_x20, xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x20)
900 ->RangeMultiplier(10)
901 ->Range(1000, 1000000)
902 ->UseRealTime();
903 BENCHMARK_CAPTURE(f32_sigmoid, wasmsimd_p5_div_x24, xnn_f32_sigmoid_ukernel__wasmsimd_p5_div_x24)
904 ->RangeMultiplier(10)
905 ->Range(1000, 1000000)
906 ->UseRealTime();
907#endif // XNN_ARCH_WASMSIMD
908
Marat Dukhan3a77ea72019-12-23 12:10:24 -0800909BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut2048_p1_div_x1, xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x1)
910 ->RangeMultiplier(10)
911 ->Range(1000, 1000000)
912 ->UseRealTime();
913BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut2048_p1_div_x2, xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x2)
914 ->RangeMultiplier(10)
915 ->Range(1000, 1000000)
916 ->UseRealTime();
917BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut2048_p1_div_x4, xnn_f32_sigmoid_ukernel__scalar_lut2048_p1_div_x4)
918 ->RangeMultiplier(10)
919 ->Range(1000, 1000000)
920 ->UseRealTime();
921
922BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut64_p2_div_x1, xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x1)
923 ->RangeMultiplier(10)
924 ->Range(1000, 1000000)
925 ->UseRealTime();
926BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut64_p2_div_x2, xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x2)
927 ->RangeMultiplier(10)
928 ->Range(1000, 1000000)
929 ->UseRealTime();
930BENCHMARK_CAPTURE(f32_sigmoid, scalar_lut64_p2_div_x4, xnn_f32_sigmoid_ukernel__scalar_lut64_p2_div_x4)
931 ->RangeMultiplier(10)
932 ->Range(1000, 1000000)
933 ->UseRealTime();
934
935BENCHMARK_CAPTURE(f32_sigmoid, scalar_p5_div_x1, xnn_f32_sigmoid_ukernel__scalar_p5_div_x1)
936 ->RangeMultiplier(10)
937 ->Range(1000, 1000000)
938 ->UseRealTime();
939BENCHMARK_CAPTURE(f32_sigmoid, scalar_p5_div_x2, xnn_f32_sigmoid_ukernel__scalar_p5_div_x2)
940 ->RangeMultiplier(10)
941 ->Range(1000, 1000000)
942 ->UseRealTime();
943BENCHMARK_CAPTURE(f32_sigmoid, scalar_p5_div_x4, xnn_f32_sigmoid_ukernel__scalar_p5_div_x4)
944 ->RangeMultiplier(10)
945 ->Range(1000, 1000000)
946 ->UseRealTime();
947
Marat Dukhan14bec502019-11-18 11:35:31 -0800948#ifndef XNNPACK_BENCHMARK_NO_MAIN
949BENCHMARK_MAIN();
950#endif