blob: f3393b46d88d4a25ed3df9c62d5c627e2190532d [file] [log] [blame]
Marat Dukhaned6baaf2020-12-01 15:07:08 -08001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
12#include <benchmark/benchmark.h>
13#include "bench/utils.h"
14
15#include <xnnpack/AlignedAllocator.h>
16#include <xnnpack/common.h>
17#include <xnnpack/vunary.h>
18#include <xnnpack/params.h>
19#include <xnnpack/params-init.h>
20
21
22static void f32_elu(
23 benchmark::State& state,
24 xnn_f32_velu_ukernel_function elu,
25 benchmark::utils::IsaCheckFunction isa_check = nullptr)
26{
27 if (isa_check && !isa_check(state)) {
28 return;
29 }
30
31 const size_t elements = state.range(0);
32
33 std::random_device random_device;
34 auto rng = std::mt19937(random_device());
35 auto f32rng = std::bind(std::uniform_real_distribution<float>(-20.0f, 10.0f), std::ref(rng));
36
37 std::vector<float, AlignedAllocator<float, 64>> x(elements);
38 std::vector<float, AlignedAllocator<float, 64>> y(elements);
39 std::generate(x.begin(), x.end(), std::ref(f32rng));
40 std::fill(y.begin(), y.end(), std::nanf(""));
41
42 const union xnn_f32_elu_params params =
43 xnn_init_f32_elu_params(1.0f /* prescale */, 1.0f /* alpha */, 1.0f /* beta */);
44 for (auto _ : state) {
45 elu(elements * sizeof(float), x.data(), y.data(), &params);
46 }
47
48 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
49 if (cpu_frequency != 0) {
50 state.counters["cpufreq"] = cpu_frequency;
51 }
52
53 const size_t elements_per_iteration = elements;
54 state.counters["elements"] =
55 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
56
57 const size_t bytes_per_iteration = 2 * elements * sizeof(float);
58 state.counters["bytes"] =
59 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
60}
61
62#if XNN_ARCH_ARM || XNN_ARCH_ARM64
63 BENCHMARK_CAPTURE(f32_elu, neonfma_lut16_p3_x4, xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, benchmark::utils::CheckNEONFMA)
64 ->RangeMultiplier(10)
65 ->Range(1000, 1000000)
66 ->UseRealTime();
67 BENCHMARK_CAPTURE(f32_elu, neonfma_lut16_p3_x8, xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, benchmark::utils::CheckNEONFMA)
68 ->RangeMultiplier(10)
69 ->Range(1000, 1000000)
70 ->UseRealTime();
71 BENCHMARK_CAPTURE(f32_elu, neonfma_lut16_p3_x12, xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, benchmark::utils::CheckNEONFMA)
72 ->RangeMultiplier(10)
73 ->Range(1000, 1000000)
74 ->UseRealTime();
75 BENCHMARK_CAPTURE(f32_elu, neonfma_lut16_p3_x16, xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, benchmark::utils::CheckNEONFMA)
76 ->RangeMultiplier(10)
77 ->Range(1000, 1000000)
78 ->UseRealTime();
79 BENCHMARK_CAPTURE(f32_elu, neonfma_lut16_p3_x20, xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, benchmark::utils::CheckNEONFMA)
80 ->RangeMultiplier(10)
81 ->Range(1000, 1000000)
82 ->UseRealTime();
83 BENCHMARK_CAPTURE(f32_elu, neonfma_lut16_p3_x24, xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, benchmark::utils::CheckNEONFMA)
84 ->RangeMultiplier(10)
85 ->Range(1000, 1000000)
86 ->UseRealTime();
87
88 BENCHMARK_CAPTURE(f32_elu, neonfma_p6_x4, xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, benchmark::utils::CheckNEONFMA)
89 ->RangeMultiplier(10)
90 ->Range(1000, 1000000)
91 ->UseRealTime();
92 BENCHMARK_CAPTURE(f32_elu, neonfma_p6_x8, xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, benchmark::utils::CheckNEONFMA)
93 ->RangeMultiplier(10)
94 ->Range(1000, 1000000)
95 ->UseRealTime();
96 BENCHMARK_CAPTURE(f32_elu, neonfma_p6_x12, xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, benchmark::utils::CheckNEONFMA)
97 ->RangeMultiplier(10)
98 ->Range(1000, 1000000)
99 ->UseRealTime();
100 BENCHMARK_CAPTURE(f32_elu, neonfma_p6_x16, xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, benchmark::utils::CheckNEONFMA)
101 ->RangeMultiplier(10)
102 ->Range(1000, 1000000)
103 ->UseRealTime();
104 BENCHMARK_CAPTURE(f32_elu, neonfma_p6_x20, xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, benchmark::utils::CheckNEONFMA)
105 ->RangeMultiplier(10)
106 ->Range(1000, 1000000)
107 ->UseRealTime();
108 BENCHMARK_CAPTURE(f32_elu, neonfma_p6_x24, xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, benchmark::utils::CheckNEONFMA)
109 ->RangeMultiplier(10)
110 ->Range(1000, 1000000)
111 ->UseRealTime();
112
113 BENCHMARK_CAPTURE(f32_elu, neon_lut16_p3_x4, xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, benchmark::utils::CheckNEON)
114 ->RangeMultiplier(10)
115 ->Range(1000, 1000000)
116 ->UseRealTime();
117 BENCHMARK_CAPTURE(f32_elu, neon_lut16_p3_x8, xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, benchmark::utils::CheckNEON)
118 ->RangeMultiplier(10)
119 ->Range(1000, 1000000)
120 ->UseRealTime();
121 BENCHMARK_CAPTURE(f32_elu, neon_lut16_p3_x12, xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, benchmark::utils::CheckNEON)
122 ->RangeMultiplier(10)
123 ->Range(1000, 1000000)
124 ->UseRealTime();
125 BENCHMARK_CAPTURE(f32_elu, neon_lut16_p3_x16, xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, benchmark::utils::CheckNEON)
126 ->RangeMultiplier(10)
127 ->Range(1000, 1000000)
128 ->UseRealTime();
129 BENCHMARK_CAPTURE(f32_elu, neon_lut16_p3_x20, xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, benchmark::utils::CheckNEON)
130 ->RangeMultiplier(10)
131 ->Range(1000, 1000000)
132 ->UseRealTime();
133 BENCHMARK_CAPTURE(f32_elu, neon_lut16_p3_x24, xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, benchmark::utils::CheckNEON)
134 ->RangeMultiplier(10)
135 ->Range(1000, 1000000)
136 ->UseRealTime();
137
138 BENCHMARK_CAPTURE(f32_elu, neon_p6_x4, xnn_f32_velu_ukernel__neon_rr2_p6_x4, benchmark::utils::CheckNEON)
139 ->RangeMultiplier(10)
140 ->Range(1000, 1000000)
141 ->UseRealTime();
142 BENCHMARK_CAPTURE(f32_elu, neon_p6_x8, xnn_f32_velu_ukernel__neon_rr2_p6_x8, benchmark::utils::CheckNEON)
143 ->RangeMultiplier(10)
144 ->Range(1000, 1000000)
145 ->UseRealTime();
146 BENCHMARK_CAPTURE(f32_elu, neon_p6_x12, xnn_f32_velu_ukernel__neon_rr2_p6_x12, benchmark::utils::CheckNEON)
147 ->RangeMultiplier(10)
148 ->Range(1000, 1000000)
149 ->UseRealTime();
150 BENCHMARK_CAPTURE(f32_elu, neon_p6_x16, xnn_f32_velu_ukernel__neon_rr2_p6_x16, benchmark::utils::CheckNEON)
151 ->RangeMultiplier(10)
152 ->Range(1000, 1000000)
153 ->UseRealTime();
154 BENCHMARK_CAPTURE(f32_elu, neon_p6_x20, xnn_f32_velu_ukernel__neon_rr2_p6_x20, benchmark::utils::CheckNEON)
155 ->RangeMultiplier(10)
156 ->Range(1000, 1000000)
157 ->UseRealTime();
158 BENCHMARK_CAPTURE(f32_elu, neon_p6_x24, xnn_f32_velu_ukernel__neon_rr2_p6_x24, benchmark::utils::CheckNEON)
159 ->RangeMultiplier(10)
160 ->Range(1000, 1000000)
161 ->UseRealTime();
162#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
163
164#if XNN_ARCH_X86 || XNN_ARCH_X86_64
165 BENCHMARK_CAPTURE(f32_elu, avx512f_lut16_p3_x16, xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, benchmark::utils::CheckAVX512F)
166 ->RangeMultiplier(10)
167 ->Range(1000, 1000000)
168 ->UseRealTime();
169 BENCHMARK_CAPTURE(f32_elu, avx512f_lut16_p3_x32, xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, benchmark::utils::CheckAVX512F)
170 ->RangeMultiplier(10)
171 ->Range(1000, 1000000)
172 ->UseRealTime();
173 BENCHMARK_CAPTURE(f32_elu, avx512f_lut16_p3_x48, xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, benchmark::utils::CheckAVX512F)
174 ->RangeMultiplier(10)
175 ->Range(1000, 1000000)
176 ->UseRealTime();
177 BENCHMARK_CAPTURE(f32_elu, avx512f_lut16_p3_x64, xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, benchmark::utils::CheckAVX512F)
178 ->RangeMultiplier(10)
179 ->Range(1000, 1000000)
180 ->UseRealTime();
181 BENCHMARK_CAPTURE(f32_elu, avx512f_lut16_p3_x80, xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, benchmark::utils::CheckAVX512F)
182 ->RangeMultiplier(10)
183 ->Range(1000, 1000000)
184 ->UseRealTime();
185 BENCHMARK_CAPTURE(f32_elu, avx512f_lut16_p3_x96, xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, benchmark::utils::CheckAVX512F)
186 ->RangeMultiplier(10)
187 ->Range(1000, 1000000)
188 ->UseRealTime();
189 BENCHMARK_CAPTURE(f32_elu, avx512f_lut16_p3_x112, xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, benchmark::utils::CheckAVX512F)
190 ->RangeMultiplier(10)
191 ->Range(1000, 1000000)
192 ->UseRealTime();
193 BENCHMARK_CAPTURE(f32_elu, avx512f_lut16_p3_x128, xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, benchmark::utils::CheckAVX512F)
194 ->RangeMultiplier(10)
195 ->Range(1000, 1000000)
196 ->UseRealTime();
197
198 BENCHMARK_CAPTURE(f32_elu, avx512f_p6_x16, xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, benchmark::utils::CheckAVX512F)
199 ->RangeMultiplier(10)
200 ->Range(1000, 1000000)
201 ->UseRealTime();
202 BENCHMARK_CAPTURE(f32_elu, avx512f_p6_x32, xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, benchmark::utils::CheckAVX512F)
203 ->RangeMultiplier(10)
204 ->Range(1000, 1000000)
205 ->UseRealTime();
206 BENCHMARK_CAPTURE(f32_elu, avx512f_p6_x48, xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, benchmark::utils::CheckAVX512F)
207 ->RangeMultiplier(10)
208 ->Range(1000, 1000000)
209 ->UseRealTime();
210 BENCHMARK_CAPTURE(f32_elu, avx512f_p6_x64, xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, benchmark::utils::CheckAVX512F)
211 ->RangeMultiplier(10)
212 ->Range(1000, 1000000)
213 ->UseRealTime();
214 BENCHMARK_CAPTURE(f32_elu, avx512f_p6_x80, xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, benchmark::utils::CheckAVX512F)
215 ->RangeMultiplier(10)
216 ->Range(1000, 1000000)
217 ->UseRealTime();
218 BENCHMARK_CAPTURE(f32_elu, avx512f_p6_x96, xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, benchmark::utils::CheckAVX512F)
219 ->RangeMultiplier(10)
220 ->Range(1000, 1000000)
221 ->UseRealTime();
222 BENCHMARK_CAPTURE(f32_elu, avx512f_p6_x112, xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, benchmark::utils::CheckAVX512F)
223 ->RangeMultiplier(10)
224 ->Range(1000, 1000000)
225 ->UseRealTime();
226 BENCHMARK_CAPTURE(f32_elu, avx512f_p6_x128, xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, benchmark::utils::CheckAVX512F)
227 ->RangeMultiplier(10)
228 ->Range(1000, 1000000)
229 ->UseRealTime();
230
231 BENCHMARK_CAPTURE(f32_elu, avx2_lut4_p4_x8, xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, benchmark::utils::CheckAVX2)
232 ->RangeMultiplier(10)
233 ->Range(1000, 1000000)
234 ->UseRealTime();
235 BENCHMARK_CAPTURE(f32_elu, avx2_lut4_p4_x16, xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, benchmark::utils::CheckAVX2)
236 ->RangeMultiplier(10)
237 ->Range(1000, 1000000)
238 ->UseRealTime();
239 BENCHMARK_CAPTURE(f32_elu, avx2_lut4_p4_x24, xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, benchmark::utils::CheckAVX2)
240 ->RangeMultiplier(10)
241 ->Range(1000, 1000000)
242 ->UseRealTime();
243 BENCHMARK_CAPTURE(f32_elu, avx2_lut4_p4_x32, xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, benchmark::utils::CheckAVX2)
244 ->RangeMultiplier(10)
245 ->Range(1000, 1000000)
246 ->UseRealTime();
247 BENCHMARK_CAPTURE(f32_elu, avx2_lut4_p4_x40, xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, benchmark::utils::CheckAVX2)
248 ->RangeMultiplier(10)
249 ->Range(1000, 1000000)
250 ->UseRealTime();
251 BENCHMARK_CAPTURE(f32_elu, avx2_lut4_p4_x48, xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, benchmark::utils::CheckAVX2)
252 ->RangeMultiplier(10)
253 ->Range(1000, 1000000)
254 ->UseRealTime();
255 BENCHMARK_CAPTURE(f32_elu, avx2_lut4_p4_x56, xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, benchmark::utils::CheckAVX2)
256 ->RangeMultiplier(10)
257 ->Range(1000, 1000000)
258 ->UseRealTime();
259 BENCHMARK_CAPTURE(f32_elu, avx2_lut4_p4_x64, xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, benchmark::utils::CheckAVX2)
260 ->RangeMultiplier(10)
261 ->Range(1000, 1000000)
262 ->UseRealTime();
263 BENCHMARK_CAPTURE(f32_elu, avx2_lut4_p4_x72, xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, benchmark::utils::CheckAVX2)
264 ->RangeMultiplier(10)
265 ->Range(1000, 1000000)
266 ->UseRealTime();
267 BENCHMARK_CAPTURE(f32_elu, avx2_lut4_p4_x80, xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, benchmark::utils::CheckAVX2)
268 ->RangeMultiplier(10)
269 ->Range(1000, 1000000)
270 ->UseRealTime();
271
272 BENCHMARK_CAPTURE(f32_elu, avx2_lut8_p4_x8, xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, benchmark::utils::CheckAVX2)
273 ->RangeMultiplier(10)
274 ->Range(1000, 1000000)
275 ->UseRealTime();
276 BENCHMARK_CAPTURE(f32_elu, avx2_lut8_p4_x16, xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, benchmark::utils::CheckAVX2)
277 ->RangeMultiplier(10)
278 ->Range(1000, 1000000)
279 ->UseRealTime();
280 BENCHMARK_CAPTURE(f32_elu, avx2_lut8_p4_x24, xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, benchmark::utils::CheckAVX2)
281 ->RangeMultiplier(10)
282 ->Range(1000, 1000000)
283 ->UseRealTime();
284 BENCHMARK_CAPTURE(f32_elu, avx2_lut8_p4_x32, xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, benchmark::utils::CheckAVX2)
285 ->RangeMultiplier(10)
286 ->Range(1000, 1000000)
287 ->UseRealTime();
288 BENCHMARK_CAPTURE(f32_elu, avx2_lut8_p4_x40, xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, benchmark::utils::CheckAVX2)
289 ->RangeMultiplier(10)
290 ->Range(1000, 1000000)
291 ->UseRealTime();
292 BENCHMARK_CAPTURE(f32_elu, avx2_lut8_p4_x48, xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, benchmark::utils::CheckAVX2)
293 ->RangeMultiplier(10)
294 ->Range(1000, 1000000)
295 ->UseRealTime();
296 BENCHMARK_CAPTURE(f32_elu, avx2_lut8_p4_x56, xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, benchmark::utils::CheckAVX2)
297 ->RangeMultiplier(10)
298 ->Range(1000, 1000000)
299 ->UseRealTime();
300 BENCHMARK_CAPTURE(f32_elu, avx2_lut8_p4_x64, xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, benchmark::utils::CheckAVX2)
301 ->RangeMultiplier(10)
302 ->Range(1000, 1000000)
303 ->UseRealTime();
304 BENCHMARK_CAPTURE(f32_elu, avx2_lut8_p4_x72, xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, benchmark::utils::CheckAVX2)
305 ->RangeMultiplier(10)
306 ->Range(1000, 1000000)
307 ->UseRealTime();
308 BENCHMARK_CAPTURE(f32_elu, avx2_lut8_p4_x80, xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, benchmark::utils::CheckAVX2)
309 ->RangeMultiplier(10)
310 ->Range(1000, 1000000)
311 ->UseRealTime();
312
313 BENCHMARK_CAPTURE(f32_elu, avx2_lut16_p3_x8, xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, benchmark::utils::CheckAVX2)
314 ->RangeMultiplier(10)
315 ->Range(1000, 1000000)
316 ->UseRealTime();
317 BENCHMARK_CAPTURE(f32_elu, avx2_lut16_p3_x16, xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, benchmark::utils::CheckAVX2)
318 ->RangeMultiplier(10)
319 ->Range(1000, 1000000)
320 ->UseRealTime();
321 BENCHMARK_CAPTURE(f32_elu, avx2_lut16_p3_x24, xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, benchmark::utils::CheckAVX2)
322 ->RangeMultiplier(10)
323 ->Range(1000, 1000000)
324 ->UseRealTime();
325 BENCHMARK_CAPTURE(f32_elu, avx2_lut16_p3_x32, xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, benchmark::utils::CheckAVX2)
326 ->RangeMultiplier(10)
327 ->Range(1000, 1000000)
328 ->UseRealTime();
329 BENCHMARK_CAPTURE(f32_elu, avx2_lut16_p3_x40, xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, benchmark::utils::CheckAVX2)
330 ->RangeMultiplier(10)
331 ->Range(1000, 1000000)
332 ->UseRealTime();
333 BENCHMARK_CAPTURE(f32_elu, avx2_lut16_p3_x48, xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, benchmark::utils::CheckAVX2)
334 ->RangeMultiplier(10)
335 ->Range(1000, 1000000)
336 ->UseRealTime();
337 BENCHMARK_CAPTURE(f32_elu, avx2_lut16_p3_x56, xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, benchmark::utils::CheckAVX2)
338 ->RangeMultiplier(10)
339 ->Range(1000, 1000000)
340 ->UseRealTime();
341 BENCHMARK_CAPTURE(f32_elu, avx2_lut16_p3_x64, xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, benchmark::utils::CheckAVX2)
342 ->RangeMultiplier(10)
343 ->Range(1000, 1000000)
344 ->UseRealTime();
345 BENCHMARK_CAPTURE(f32_elu, avx2_lut16_p3_x72, xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, benchmark::utils::CheckAVX2)
346 ->RangeMultiplier(10)
347 ->Range(1000, 1000000)
348 ->UseRealTime();
349 BENCHMARK_CAPTURE(f32_elu, avx2_lut16_p3_x80, xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, benchmark::utils::CheckAVX2)
350 ->RangeMultiplier(10)
351 ->Range(1000, 1000000)
352 ->UseRealTime();
353
354 BENCHMARK_CAPTURE(f32_elu, avx2_p6_x8, xnn_f32_velu_ukernel__avx2_rr1_p6_x8, benchmark::utils::CheckAVX2)
355 ->RangeMultiplier(10)
356 ->Range(1000, 1000000)
357 ->UseRealTime();
358 BENCHMARK_CAPTURE(f32_elu, avx2_p6_x16, xnn_f32_velu_ukernel__avx2_rr1_p6_x16, benchmark::utils::CheckAVX2)
359 ->RangeMultiplier(10)
360 ->Range(1000, 1000000)
361 ->UseRealTime();
362 BENCHMARK_CAPTURE(f32_elu, avx2_p6_x24, xnn_f32_velu_ukernel__avx2_rr1_p6_x24, benchmark::utils::CheckAVX2)
363 ->RangeMultiplier(10)
364 ->Range(1000, 1000000)
365 ->UseRealTime();
366 BENCHMARK_CAPTURE(f32_elu, avx2_p6_x32, xnn_f32_velu_ukernel__avx2_rr1_p6_x32, benchmark::utils::CheckAVX2)
367 ->RangeMultiplier(10)
368 ->Range(1000, 1000000)
369 ->UseRealTime();
370 BENCHMARK_CAPTURE(f32_elu, avx2_p6_x40, xnn_f32_velu_ukernel__avx2_rr1_p6_x40, benchmark::utils::CheckAVX2)
371 ->RangeMultiplier(10)
372 ->Range(1000, 1000000)
373 ->UseRealTime();
374 BENCHMARK_CAPTURE(f32_elu, avx2_p6_x48, xnn_f32_velu_ukernel__avx2_rr1_p6_x48, benchmark::utils::CheckAVX2)
375 ->RangeMultiplier(10)
376 ->Range(1000, 1000000)
377 ->UseRealTime();
378 BENCHMARK_CAPTURE(f32_elu, avx2_p6_x56, xnn_f32_velu_ukernel__avx2_rr1_p6_x56, benchmark::utils::CheckAVX2)
379 ->RangeMultiplier(10)
380 ->Range(1000, 1000000)
381 ->UseRealTime();
382 BENCHMARK_CAPTURE(f32_elu, avx2_p6_x64, xnn_f32_velu_ukernel__avx2_rr1_p6_x64, benchmark::utils::CheckAVX2)
383 ->RangeMultiplier(10)
384 ->Range(1000, 1000000)
385 ->UseRealTime();
386 BENCHMARK_CAPTURE(f32_elu, avx2_p6_x72, xnn_f32_velu_ukernel__avx2_rr1_p6_x72, benchmark::utils::CheckAVX2)
387 ->RangeMultiplier(10)
388 ->Range(1000, 1000000)
389 ->UseRealTime();
390 BENCHMARK_CAPTURE(f32_elu, avx2_p6_x80, xnn_f32_velu_ukernel__avx2_rr1_p6_x80, benchmark::utils::CheckAVX2)
391 ->RangeMultiplier(10)
392 ->Range(1000, 1000000)
393 ->UseRealTime();
394
395 BENCHMARK_CAPTURE(f32_elu, avx_lut4_p4_x8, xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, benchmark::utils::CheckAVX)
396 ->RangeMultiplier(10)
397 ->Range(1000, 1000000)
398 ->UseRealTime();
399 BENCHMARK_CAPTURE(f32_elu, avx_lut4_p4_x16, xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, benchmark::utils::CheckAVX)
400 ->RangeMultiplier(10)
401 ->Range(1000, 1000000)
402 ->UseRealTime();
403 BENCHMARK_CAPTURE(f32_elu, avx_lut4_p4_x24, xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, benchmark::utils::CheckAVX)
404 ->RangeMultiplier(10)
405 ->Range(1000, 1000000)
406 ->UseRealTime();
407 BENCHMARK_CAPTURE(f32_elu, avx_lut4_p4_x32, xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, benchmark::utils::CheckAVX)
408 ->RangeMultiplier(10)
409 ->Range(1000, 1000000)
410 ->UseRealTime();
411 BENCHMARK_CAPTURE(f32_elu, avx_lut4_p4_x40, xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, benchmark::utils::CheckAVX)
412 ->RangeMultiplier(10)
413 ->Range(1000, 1000000)
414 ->UseRealTime();
415 BENCHMARK_CAPTURE(f32_elu, avx_lut4_p4_x48, xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, benchmark::utils::CheckAVX)
416 ->RangeMultiplier(10)
417 ->Range(1000, 1000000)
418 ->UseRealTime();
419
420 BENCHMARK_CAPTURE(f32_elu, avx_lut16_p3_x8, xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, benchmark::utils::CheckAVX)
421 ->RangeMultiplier(10)
422 ->Range(1000, 1000000)
423 ->UseRealTime();
424 BENCHMARK_CAPTURE(f32_elu, avx_lut16_p3_x16, xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, benchmark::utils::CheckAVX)
425 ->RangeMultiplier(10)
426 ->Range(1000, 1000000)
427 ->UseRealTime();
428 BENCHMARK_CAPTURE(f32_elu, avx_lut16_p3_x24, xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, benchmark::utils::CheckAVX)
429 ->RangeMultiplier(10)
430 ->Range(1000, 1000000)
431 ->UseRealTime();
432 BENCHMARK_CAPTURE(f32_elu, avx_lut16_p3_x32, xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, benchmark::utils::CheckAVX)
433 ->RangeMultiplier(10)
434 ->Range(1000, 1000000)
435 ->UseRealTime();
436 BENCHMARK_CAPTURE(f32_elu, avx_lut16_p3_x40, xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, benchmark::utils::CheckAVX)
437 ->RangeMultiplier(10)
438 ->Range(1000, 1000000)
439 ->UseRealTime();
440 BENCHMARK_CAPTURE(f32_elu, avx_lut16_p3_x48, xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, benchmark::utils::CheckAVX)
441 ->RangeMultiplier(10)
442 ->Range(1000, 1000000)
443 ->UseRealTime();
444
445 BENCHMARK_CAPTURE(f32_elu, avx_p6_x8, xnn_f32_velu_ukernel__avx_rr2_p6_x8, benchmark::utils::CheckAVX)
446 ->RangeMultiplier(10)
447 ->Range(1000, 1000000)
448 ->UseRealTime();
449 BENCHMARK_CAPTURE(f32_elu, avx_p6_x16, xnn_f32_velu_ukernel__avx_rr2_p6_x16, benchmark::utils::CheckAVX)
450 ->RangeMultiplier(10)
451 ->Range(1000, 1000000)
452 ->UseRealTime();
453 BENCHMARK_CAPTURE(f32_elu, avx_p6_x24, xnn_f32_velu_ukernel__avx_rr2_p6_x24, benchmark::utils::CheckAVX)
454 ->RangeMultiplier(10)
455 ->Range(1000, 1000000)
456 ->UseRealTime();
457 BENCHMARK_CAPTURE(f32_elu, avx_p6_x32, xnn_f32_velu_ukernel__avx_rr2_p6_x32, benchmark::utils::CheckAVX)
458 ->RangeMultiplier(10)
459 ->Range(1000, 1000000)
460 ->UseRealTime();
461 BENCHMARK_CAPTURE(f32_elu, avx_p6_x40, xnn_f32_velu_ukernel__avx_rr2_p6_x40, benchmark::utils::CheckAVX)
462 ->RangeMultiplier(10)
463 ->Range(1000, 1000000)
464 ->UseRealTime();
465 BENCHMARK_CAPTURE(f32_elu, avx_p6_x48, xnn_f32_velu_ukernel__avx_rr2_p6_x48, benchmark::utils::CheckAVX)
466 ->RangeMultiplier(10)
467 ->Range(1000, 1000000)
468 ->UseRealTime();
469
470 BENCHMARK_CAPTURE(f32_elu, sse41_lut16_p3_x4, xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, benchmark::utils::CheckSSE41)
471 ->RangeMultiplier(10)
472 ->Range(1000, 1000000)
473 ->UseRealTime();
474 BENCHMARK_CAPTURE(f32_elu, sse41_lut16_p3_x8, xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, benchmark::utils::CheckSSE41)
475 ->RangeMultiplier(10)
476 ->Range(1000, 1000000)
477 ->UseRealTime();
478 BENCHMARK_CAPTURE(f32_elu, sse41_lut16_p3_x12, xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, benchmark::utils::CheckSSE41)
479 ->RangeMultiplier(10)
480 ->Range(1000, 1000000)
481 ->UseRealTime();
482 BENCHMARK_CAPTURE(f32_elu, sse41_lut16_p3_x16, xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, benchmark::utils::CheckSSE41)
483 ->RangeMultiplier(10)
484 ->Range(1000, 1000000)
485 ->UseRealTime();
486 BENCHMARK_CAPTURE(f32_elu, sse41_lut16_p3_x20, xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, benchmark::utils::CheckSSE41)
487 ->RangeMultiplier(10)
488 ->Range(1000, 1000000)
489 ->UseRealTime();
490 BENCHMARK_CAPTURE(f32_elu, sse41_lut16_p3_x24, xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, benchmark::utils::CheckSSE41)
491 ->RangeMultiplier(10)
492 ->Range(1000, 1000000)
493 ->UseRealTime();
494
495 BENCHMARK_CAPTURE(f32_elu, sse41_p6_x4, xnn_f32_velu_ukernel__sse41_rr2_p6_x4, benchmark::utils::CheckSSE41)
496 ->RangeMultiplier(10)
497 ->Range(1000, 1000000)
498 ->UseRealTime();
499 BENCHMARK_CAPTURE(f32_elu, sse41_p6_x8, xnn_f32_velu_ukernel__sse41_rr2_p6_x8, benchmark::utils::CheckSSE41)
500 ->RangeMultiplier(10)
501 ->Range(1000, 1000000)
502 ->UseRealTime();
503 BENCHMARK_CAPTURE(f32_elu, sse41_p6_x12, xnn_f32_velu_ukernel__sse41_rr2_p6_x12, benchmark::utils::CheckSSE41)
504 ->RangeMultiplier(10)
505 ->Range(1000, 1000000)
506 ->UseRealTime();
507 BENCHMARK_CAPTURE(f32_elu, sse41_p6_x16, xnn_f32_velu_ukernel__sse41_rr2_p6_x16, benchmark::utils::CheckSSE41)
508 ->RangeMultiplier(10)
509 ->Range(1000, 1000000)
510 ->UseRealTime();
511 BENCHMARK_CAPTURE(f32_elu, sse41_p6_x20, xnn_f32_velu_ukernel__sse41_rr2_p6_x20, benchmark::utils::CheckSSE41)
512 ->RangeMultiplier(10)
513 ->Range(1000, 1000000)
514 ->UseRealTime();
515 BENCHMARK_CAPTURE(f32_elu, sse41_p6_x24, xnn_f32_velu_ukernel__sse41_rr2_p6_x24, benchmark::utils::CheckSSE41)
516 ->RangeMultiplier(10)
517 ->Range(1000, 1000000)
518 ->UseRealTime();
519
520 BENCHMARK_CAPTURE(f32_elu, sse2_lut16_p3_x4, xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4)
521 ->RangeMultiplier(10)
522 ->Range(1000, 1000000)
523 ->UseRealTime();
524 BENCHMARK_CAPTURE(f32_elu, sse2_lut16_p3_x8, xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8)
525 ->RangeMultiplier(10)
526 ->Range(1000, 1000000)
527 ->UseRealTime();
528 BENCHMARK_CAPTURE(f32_elu, sse2_lut16_p3_x12, xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12)
529 ->RangeMultiplier(10)
530 ->Range(1000, 1000000)
531 ->UseRealTime();
532 BENCHMARK_CAPTURE(f32_elu, sse2_lut16_p3_x16, xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16)
533 ->RangeMultiplier(10)
534 ->Range(1000, 1000000)
535 ->UseRealTime();
536 BENCHMARK_CAPTURE(f32_elu, sse2_lut16_p3_x20, xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20)
537 ->RangeMultiplier(10)
538 ->Range(1000, 1000000)
539 ->UseRealTime();
540 BENCHMARK_CAPTURE(f32_elu, sse2_lut16_p3_x24, xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24)
541 ->RangeMultiplier(10)
542 ->Range(1000, 1000000)
543 ->UseRealTime();
544
545 BENCHMARK_CAPTURE(f32_elu, sse2_p6_x4, xnn_f32_velu_ukernel__sse2_rr2_p6_x4)
546 ->RangeMultiplier(10)
547 ->Range(1000, 1000000)
548 ->UseRealTime();
549 BENCHMARK_CAPTURE(f32_elu, sse2_p6_x8, xnn_f32_velu_ukernel__sse2_rr2_p6_x8)
550 ->RangeMultiplier(10)
551 ->Range(1000, 1000000)
552 ->UseRealTime();
553 BENCHMARK_CAPTURE(f32_elu, sse2_p6_x12, xnn_f32_velu_ukernel__sse2_rr2_p6_x12)
554 ->RangeMultiplier(10)
555 ->Range(1000, 1000000)
556 ->UseRealTime();
557 BENCHMARK_CAPTURE(f32_elu, sse2_p6_x16, xnn_f32_velu_ukernel__sse2_rr2_p6_x16)
558 ->RangeMultiplier(10)
559 ->Range(1000, 1000000)
560 ->UseRealTime();
561 BENCHMARK_CAPTURE(f32_elu, sse2_p6_x20, xnn_f32_velu_ukernel__sse2_rr2_p6_x20)
562 ->RangeMultiplier(10)
563 ->Range(1000, 1000000)
564 ->UseRealTime();
565 BENCHMARK_CAPTURE(f32_elu, sse2_p6_x24, xnn_f32_velu_ukernel__sse2_rr2_p6_x24)
566 ->RangeMultiplier(10)
567 ->Range(1000, 1000000)
568 ->UseRealTime();
569#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
570
571#if XNN_ARCH_WASMSIMD
572 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_lut16_p3_x4, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4)
573 ->RangeMultiplier(10)
574 ->Range(1000, 1000000)
575 ->UseRealTime();
576 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_lut16_p3_x8, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8)
577 ->RangeMultiplier(10)
578 ->Range(1000, 1000000)
579 ->UseRealTime();
580 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_lut16_p3_x12, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12)
581 ->RangeMultiplier(10)
582 ->Range(1000, 1000000)
583 ->UseRealTime();
584 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_lut16_p3_x16, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16)
585 ->RangeMultiplier(10)
586 ->Range(1000, 1000000)
587 ->UseRealTime();
588 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_lut16_p3_x20, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20)
589 ->RangeMultiplier(10)
590 ->Range(1000, 1000000)
591 ->UseRealTime();
592 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_lut16_p3_x24, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24)
593 ->RangeMultiplier(10)
594 ->Range(1000, 1000000)
595 ->UseRealTime();
596
597 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_lut16_p3_x4, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4)
598 ->RangeMultiplier(10)
599 ->Range(1000, 1000000)
600 ->UseRealTime();
601 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_lut16_p3_x8, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8)
602 ->RangeMultiplier(10)
603 ->Range(1000, 1000000)
604 ->UseRealTime();
605 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_lut16_p3_x12, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12)
606 ->RangeMultiplier(10)
607 ->Range(1000, 1000000)
608 ->UseRealTime();
609 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_lut16_p3_x16, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16)
610 ->RangeMultiplier(10)
611 ->Range(1000, 1000000)
612 ->UseRealTime();
613 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_lut16_p3_x20, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20)
614 ->RangeMultiplier(10)
615 ->Range(1000, 1000000)
616 ->UseRealTime();
617 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_lut16_p3_x24, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24)
618 ->RangeMultiplier(10)
619 ->Range(1000, 1000000)
620 ->UseRealTime();
621
622 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_p6_x4, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4)
623 ->RangeMultiplier(10)
624 ->Range(1000, 1000000)
625 ->UseRealTime();
626 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_p6_x8, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8)
627 ->RangeMultiplier(10)
628 ->Range(1000, 1000000)
629 ->UseRealTime();
630 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_p6_x12, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12)
631 ->RangeMultiplier(10)
632 ->Range(1000, 1000000)
633 ->UseRealTime();
634 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_p6_x16, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16)
635 ->RangeMultiplier(10)
636 ->Range(1000, 1000000)
637 ->UseRealTime();
638 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_p6_x20, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20)
639 ->RangeMultiplier(10)
640 ->Range(1000, 1000000)
641 ->UseRealTime();
642 BENCHMARK_CAPTURE(f32_elu, wasmsimd_arm_p6_x24, xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24)
643 ->RangeMultiplier(10)
644 ->Range(1000, 1000000)
645 ->UseRealTime();
646
647 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_p6_x4, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4)
648 ->RangeMultiplier(10)
649 ->Range(1000, 1000000)
650 ->UseRealTime();
651 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_p6_x8, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8)
652 ->RangeMultiplier(10)
653 ->Range(1000, 1000000)
654 ->UseRealTime();
655 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_p6_x12, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12)
656 ->RangeMultiplier(10)
657 ->Range(1000, 1000000)
658 ->UseRealTime();
659 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_p6_x16, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16)
660 ->RangeMultiplier(10)
661 ->Range(1000, 1000000)
662 ->UseRealTime();
663 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_p6_x20, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20)
664 ->RangeMultiplier(10)
665 ->Range(1000, 1000000)
666 ->UseRealTime();
667 BENCHMARK_CAPTURE(f32_elu, wasmsimd_x86_p6_x24, xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24)
668 ->RangeMultiplier(10)
669 ->Range(1000, 1000000)
670 ->UseRealTime();
671#endif // XNN_ARCH_WASMSIMD
672
673#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
674 BENCHMARK_CAPTURE(f32_elu, wasm_lut16_p3_x1, xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1)
675 ->RangeMultiplier(10)
676 ->Range(1000, 1000000)
677 ->UseRealTime();
678 BENCHMARK_CAPTURE(f32_elu, wasm_lut16_p3_x2, xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2)
679 ->RangeMultiplier(10)
680 ->Range(1000, 1000000)
681 ->UseRealTime();
682 BENCHMARK_CAPTURE(f32_elu, wasm_lut16_p3_x3, xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3)
683 ->RangeMultiplier(10)
684 ->Range(1000, 1000000)
685 ->UseRealTime();
686 BENCHMARK_CAPTURE(f32_elu, wasm_lut16_p3_x4, xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4)
687 ->RangeMultiplier(10)
688 ->Range(1000, 1000000)
689 ->UseRealTime();
690 BENCHMARK_CAPTURE(f32_elu, wasm_lut16_p3_x5, xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5)
691 ->RangeMultiplier(10)
692 ->Range(1000, 1000000)
693 ->UseRealTime();
694 BENCHMARK_CAPTURE(f32_elu, wasm_lut16_p3_x6, xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6)
695 ->RangeMultiplier(10)
696 ->Range(1000, 1000000)
697 ->UseRealTime();
698
699 BENCHMARK_CAPTURE(f32_elu, wasm_p6_x1, xnn_f32_velu_ukernel__wasm_rr2_p6_x1)
700 ->RangeMultiplier(10)
701 ->Range(1000, 1000000)
702 ->UseRealTime();
703 BENCHMARK_CAPTURE(f32_elu, wasm_p6_x2, xnn_f32_velu_ukernel__wasm_rr2_p6_x2)
704 ->RangeMultiplier(10)
705 ->Range(1000, 1000000)
706 ->UseRealTime();
707 BENCHMARK_CAPTURE(f32_elu, wasm_p6_x3, xnn_f32_velu_ukernel__wasm_rr2_p6_x3)
708 ->RangeMultiplier(10)
709 ->Range(1000, 1000000)
710 ->UseRealTime();
711 BENCHMARK_CAPTURE(f32_elu, wasm_p6_x4, xnn_f32_velu_ukernel__wasm_rr2_p6_x4)
712 ->RangeMultiplier(10)
713 ->Range(1000, 1000000)
714 ->UseRealTime();
715 BENCHMARK_CAPTURE(f32_elu, wasm_p6_x5, xnn_f32_velu_ukernel__wasm_rr2_p6_x5)
716 ->RangeMultiplier(10)
717 ->Range(1000, 1000000)
718 ->UseRealTime();
719 BENCHMARK_CAPTURE(f32_elu, wasm_p6_x6, xnn_f32_velu_ukernel__wasm_rr2_p6_x6)
720 ->RangeMultiplier(10)
721 ->Range(1000, 1000000)
722 ->UseRealTime();
723#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
724
725BENCHMARK_CAPTURE(f32_elu, scalar_lut16_p3_x1, xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1)
726 ->RangeMultiplier(10)
727 ->Range(1000, 1000000)
728 ->UseRealTime();
729BENCHMARK_CAPTURE(f32_elu, scalar_lut16_p3_x2, xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2)
730 ->RangeMultiplier(10)
731 ->Range(1000, 1000000)
732 ->UseRealTime();
733BENCHMARK_CAPTURE(f32_elu, scalar_lut16_p3_x3, xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3)
734 ->RangeMultiplier(10)
735 ->Range(1000, 1000000)
736 ->UseRealTime();
737BENCHMARK_CAPTURE(f32_elu, scalar_lut16_p3_x4, xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4)
738 ->RangeMultiplier(10)
739 ->Range(1000, 1000000)
740 ->UseRealTime();
741BENCHMARK_CAPTURE(f32_elu, scalar_lut16_p3_x5, xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5)
742 ->RangeMultiplier(10)
743 ->Range(1000, 1000000)
744 ->UseRealTime();
745BENCHMARK_CAPTURE(f32_elu, scalar_lut16_p3_x6, xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6)
746 ->RangeMultiplier(10)
747 ->Range(1000, 1000000)
748 ->UseRealTime();
749
750BENCHMARK_CAPTURE(f32_elu, scalar_p6_x1, xnn_f32_velu_ukernel__scalar_rr2_p6_x1)
751 ->RangeMultiplier(10)
752 ->Range(1000, 1000000)
753 ->UseRealTime();
754BENCHMARK_CAPTURE(f32_elu, scalar_p6_x2, xnn_f32_velu_ukernel__scalar_rr2_p6_x2)
755 ->RangeMultiplier(10)
756 ->Range(1000, 1000000)
757 ->UseRealTime();
758BENCHMARK_CAPTURE(f32_elu, scalar_p6_x3, xnn_f32_velu_ukernel__scalar_rr2_p6_x3)
759 ->RangeMultiplier(10)
760 ->Range(1000, 1000000)
761 ->UseRealTime();
762BENCHMARK_CAPTURE(f32_elu, scalar_p6_x4, xnn_f32_velu_ukernel__scalar_rr2_p6_x4)
763 ->RangeMultiplier(10)
764 ->Range(1000, 1000000)
765 ->UseRealTime();
766BENCHMARK_CAPTURE(f32_elu, scalar_p6_x5, xnn_f32_velu_ukernel__scalar_rr2_p6_x5)
767 ->RangeMultiplier(10)
768 ->Range(1000, 1000000)
769 ->UseRealTime();
770BENCHMARK_CAPTURE(f32_elu, scalar_p6_x6, xnn_f32_velu_ukernel__scalar_rr2_p6_x6)
771 ->RangeMultiplier(10)
772 ->Range(1000, 1000000)
773 ->UseRealTime();
774
775#ifndef XNNPACK_BENCHMARK_NO_MAIN
776BENCHMARK_MAIN();
777#endif