blob: c25f89193dc52184a3751722a58a4862743e5eb9 [file] [log] [blame]
Marat Dukhanf4db2f32020-06-30 10:55:30 -07001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
12#include <benchmark/benchmark.h>
13#include "bench/utils.h"
14
15#include <xnnpack/AlignedAllocator.h>
16#include <xnnpack/common.h>
Marat Dukhanf4db2f32020-06-30 10:55:30 -070017#include <xnnpack/params.h>
18#include <xnnpack/params-init.h>
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070019#include <xnnpack/vunary.h>
Marat Dukhanf4db2f32020-06-30 10:55:30 -070020
21
22static void f32_vsqrt(
23 benchmark::State& state,
24 xnn_f32_vsqrt_ukernel_function vsqrt,
Marat Dukhane72b2822021-12-30 14:46:58 -080025 xnn_init_f32_sqrt_params_fn init_params = nullptr,
Marat Dukhanf4db2f32020-06-30 10:55:30 -070026 benchmark::utils::IsaCheckFunction isa_check = nullptr)
27{
28 if (isa_check && !isa_check(state)) {
29 return;
30 }
31
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070032 const size_t num_elements = state.range(0);
33 std::vector<float, AlignedAllocator<float, 64>> input(num_elements);
34 std::vector<float, AlignedAllocator<float, 64>> output(num_elements);
Marat Dukhanf4db2f32020-06-30 10:55:30 -070035
36 std::random_device random_device;
37 auto rng = std::mt19937(random_device());
38 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 10.0f), std::ref(rng));
39 std::generate(input.begin(), input.end(), std::ref(f32rng));
40 std::fill(output.begin(), output.end(), std::nanf(""));
41
Marat Dukhanf56f4c42021-05-17 01:47:20 -070042 union xnn_f32_sqrt_params params;
Marat Dukhane72b2822021-12-30 14:46:58 -080043 if (init_params != nullptr) {
44 init_params(&params);
45 }
Marat Dukhanf4db2f32020-06-30 10:55:30 -070046 for (auto _ : state) {
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070047 vsqrt(num_elements * sizeof(float), input.data(), output.data(), &params);
Marat Dukhanf4db2f32020-06-30 10:55:30 -070048 }
49
50 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
51 if (cpu_frequency != 0) {
52 state.counters["cpufreq"] = cpu_frequency;
53 }
54
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070055 const size_t elements_per_iteration = num_elements;
Marat Dukhanf4db2f32020-06-30 10:55:30 -070056 state.counters["elements"] =
57 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
58
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070059 const size_t bytes_per_iteration = 2 * num_elements * sizeof(float);
Marat Dukhanf4db2f32020-06-30 10:55:30 -070060 state.counters["bytes"] =
61 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
62}
63
64#if XNN_ARCH_ARM64
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070065 BENCHMARK_CAPTURE(f32_vsqrt, neon_sqrt_x4,
66 xnn_f32_vsqrt_ukernel__neon_sqrt_x4)
67 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -070068 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070069 BENCHMARK_CAPTURE(f32_vsqrt, neon_sqrt_x8,
70 xnn_f32_vsqrt_ukernel__neon_sqrt_x8)
71 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -070072 ->UseRealTime();
73#endif // XNN_ARCH_ARM64
74
75#if XNN_ARCH_ARM64 || XNN_ARCH_ARM64
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070076 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr1rsqrts1fma1adj_x4,
77 xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4,
Marat Dukhane72b2822021-12-30 14:46:58 -080078 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070079 benchmark::utils::CheckNEONFMA)
80 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -070081 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070082 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr1rsqrts1fma1adj_x8,
83 xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8,
Marat Dukhane72b2822021-12-30 14:46:58 -080084 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070085 benchmark::utils::CheckNEONFMA)
86 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -070087 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070088 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr1rsqrts1fma1adj_x12,
89 xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12,
Marat Dukhane72b2822021-12-30 14:46:58 -080090 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070091 benchmark::utils::CheckNEONFMA)
92 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -070093 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070094 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr1rsqrts1fma1adj_x16,
95 xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16,
Marat Dukhane72b2822021-12-30 14:46:58 -080096 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -070097 benchmark::utils::CheckNEONFMA)
98 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -070099 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700100 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr1rsqrts1fma1adj_x20,
101 xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20,
Marat Dukhane72b2822021-12-30 14:46:58 -0800102 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700103 benchmark::utils::CheckNEONFMA)
104 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700105 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700106 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr1rsqrts1fma1adj_x24,
107 xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24,
Marat Dukhane72b2822021-12-30 14:46:58 -0800108 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700109 benchmark::utils::CheckNEONFMA)
110 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700111 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700112 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr1rsqrts1fma1adj_x28,
113 xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28,
Marat Dukhane72b2822021-12-30 14:46:58 -0800114 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700115 benchmark::utils::CheckNEONFMA)
116 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700117 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700118 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr1rsqrts1fma1adj_x32,
119 xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32,
Marat Dukhane72b2822021-12-30 14:46:58 -0800120 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700121 benchmark::utils::CheckNEONFMA)
122 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700123 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700124 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr1rsqrts1fma1adj_x36,
125 xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36,
Marat Dukhane72b2822021-12-30 14:46:58 -0800126 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700127 benchmark::utils::CheckNEONFMA)
128 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700129 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700130 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr1rsqrts1fma1adj_x40,
131 xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40,
Marat Dukhane72b2822021-12-30 14:46:58 -0800132 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700133 benchmark::utils::CheckNEONFMA)
134 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700135 ->UseRealTime();
136
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700137 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr2fma1adj_x4,
138 xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4,
Marat Dukhane72b2822021-12-30 14:46:58 -0800139 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700140 benchmark::utils::CheckNEONFMA)
141 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700142 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700143 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr2fma1adj_x8,
144 xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8,
Marat Dukhane72b2822021-12-30 14:46:58 -0800145 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700146 benchmark::utils::CheckNEONFMA)
147 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700148 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700149 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr2fma1adj_x12,
150 xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12,
Marat Dukhane72b2822021-12-30 14:46:58 -0800151 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700152 benchmark::utils::CheckNEONFMA)
153 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700154 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700155 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr2fma1adj_x16,
156 xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16,
Marat Dukhane72b2822021-12-30 14:46:58 -0800157 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700158 benchmark::utils::CheckNEONFMA)
159 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700160 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700161 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr2fma1adj_x20,
162 xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20,
Marat Dukhane72b2822021-12-30 14:46:58 -0800163 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700164 benchmark::utils::CheckNEONFMA)
165 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700166 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700167 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr2fma1adj_x24,
168 xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24,
Marat Dukhane72b2822021-12-30 14:46:58 -0800169 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700170 benchmark::utils::CheckNEONFMA)
171 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700172 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700173 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr2fma1adj_x28,
174 xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28,
Marat Dukhane72b2822021-12-30 14:46:58 -0800175 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700176 benchmark::utils::CheckNEONFMA)
177 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700178 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700179 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr2fma1adj_x32,
180 xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32,
Marat Dukhane72b2822021-12-30 14:46:58 -0800181 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700182 benchmark::utils::CheckNEONFMA)
183 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700184 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700185 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr2fma1adj_x36,
186 xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36,
Marat Dukhane72b2822021-12-30 14:46:58 -0800187 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700188 benchmark::utils::CheckNEONFMA)
189 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700190 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700191 BENCHMARK_CAPTURE(f32_vsqrt, neonfma_nr2fma1adj_x40,
192 xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40,
Marat Dukhane72b2822021-12-30 14:46:58 -0800193 nullptr /* init params */,
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700194 benchmark::utils::CheckNEONFMA)
195 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700196 ->UseRealTime();
197#endif // XNN_ARCH_ARM64 || XNN_ARCH_ARM64
198
199#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhane72b2822021-12-30 14:46:58 -0800200 BENCHMARK_CAPTURE(f32_vsqrt, avx512f_nr1fma1adj_x16,
201 xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16,
202 xnn_init_f32_sqrt_avx512_params,
203 benchmark::utils::CheckAVX512F)
204 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
205 ->UseRealTime();
206 BENCHMARK_CAPTURE(f32_vsqrt, avx512f_nr1fma1adj_x32,
207 xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32,
208 xnn_init_f32_sqrt_avx512_params,
209 benchmark::utils::CheckAVX512F)
210 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
211 ->UseRealTime();
212 BENCHMARK_CAPTURE(f32_vsqrt, avx512f_nr1fma1adj_x48,
213 xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48,
214 xnn_init_f32_sqrt_avx512_params,
215 benchmark::utils::CheckAVX512F)
216 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
217 ->UseRealTime();
218 BENCHMARK_CAPTURE(f32_vsqrt, avx512f_nr1fma1adj_x64,
219 xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64,
220 xnn_init_f32_sqrt_avx512_params,
221 benchmark::utils::CheckAVX512F)
222 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
223 ->UseRealTime();
224 BENCHMARK_CAPTURE(f32_vsqrt, avx512f_nr1fma1adj_x80,
225 xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80,
226 xnn_init_f32_sqrt_avx512_params,
227 benchmark::utils::CheckAVX512F)
228 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
229 ->UseRealTime();
230 BENCHMARK_CAPTURE(f32_vsqrt, avx512f_nr1fma1adj_x96,
231 xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96,
232 xnn_init_f32_sqrt_avx512_params,
233 benchmark::utils::CheckAVX512F)
234 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
235 ->UseRealTime();
236 BENCHMARK_CAPTURE(f32_vsqrt, avx512f_nr1fma1adj_x112,
237 xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112,
238 xnn_init_f32_sqrt_avx512_params,
239 benchmark::utils::CheckAVX512F)
240 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
241 ->UseRealTime();
242 BENCHMARK_CAPTURE(f32_vsqrt, avx512f_nr1fma1adj_x128,
243 xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128,
244 xnn_init_f32_sqrt_avx512_params,
245 benchmark::utils::CheckAVX512F)
246 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
247 ->UseRealTime();
248
249 BENCHMARK_CAPTURE(f32_vsqrt, fma3_nr1fma1adj_x8,
250 xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8,
251 xnn_init_f32_sqrt_fma_params,
252 benchmark::utils::CheckFMA3)
253 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
254 ->UseRealTime();
255 BENCHMARK_CAPTURE(f32_vsqrt, fma3_nr1fma1adj_x16,
256 xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16,
257 xnn_init_f32_sqrt_fma_params,
258 benchmark::utils::CheckFMA3)
259 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
260 ->UseRealTime();
261 BENCHMARK_CAPTURE(f32_vsqrt, fma3_nr1fma1adj_x24,
262 xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24,
263 xnn_init_f32_sqrt_fma_params,
264 benchmark::utils::CheckFMA3)
265 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
266 ->UseRealTime();
267 BENCHMARK_CAPTURE(f32_vsqrt, fma3_nr1fma1adj_x32,
268 xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32,
269 xnn_init_f32_sqrt_fma_params,
270 benchmark::utils::CheckFMA3)
271 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
272 ->UseRealTime();
273 BENCHMARK_CAPTURE(f32_vsqrt, fma3_nr1fma1adj_x40,
274 xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40,
275 xnn_init_f32_sqrt_fma_params,
276 benchmark::utils::CheckFMA3)
277 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
278 ->UseRealTime();
279 BENCHMARK_CAPTURE(f32_vsqrt, fma3_nr1fma1adj_x48,
280 xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48,
281 xnn_init_f32_sqrt_fma_params,
282 benchmark::utils::CheckFMA3)
283 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
284 ->UseRealTime();
285 BENCHMARK_CAPTURE(f32_vsqrt, fma3_nr1fma1adj_x56,
286 xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56,
287 xnn_init_f32_sqrt_fma_params,
288 benchmark::utils::CheckFMA3)
289 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
290 ->UseRealTime();
291 BENCHMARK_CAPTURE(f32_vsqrt, fma3_nr1fma1adj_x64,
292 xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64,
293 xnn_init_f32_sqrt_fma_params,
294 benchmark::utils::CheckFMA3)
295 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
296 ->UseRealTime();
297
298 BENCHMARK_CAPTURE(f32_vsqrt, avx_sqrt_x8,
299 xnn_f32_vsqrt_ukernel__avx_sqrt_x8,
300 xnn_init_f32_sqrt_avx_params,
301 benchmark::utils::CheckAVX)
302 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
303 ->UseRealTime();
304 BENCHMARK_CAPTURE(f32_vsqrt, avx_sqrt_x16,
305 xnn_f32_vsqrt_ukernel__avx_sqrt_x16,
306 xnn_init_f32_sqrt_avx_params,
307 benchmark::utils::CheckAVX)
308 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
309 ->UseRealTime();
310
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700311 BENCHMARK_CAPTURE(f32_vsqrt, sse_sqrt_x4,
312 xnn_f32_vsqrt_ukernel__sse_sqrt_x4)
313 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700314 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700315 BENCHMARK_CAPTURE(f32_vsqrt, sse_sqrt_x8,
316 xnn_f32_vsqrt_ukernel__sse_sqrt_x8)
317 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700318 ->UseRealTime();
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700319#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
320
Marat Dukhan4c617792021-12-21 15:47:58 -0800321#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700322 BENCHMARK_CAPTURE(f32_vsqrt, wasmsimd_sqrt_x4,
323 xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x4)
324 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700325 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700326 BENCHMARK_CAPTURE(f32_vsqrt, wasmsimd_sqrt_x8,
327 xnn_f32_vsqrt_ukernel__wasmsimd_sqrt_x8)
328 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700329 ->UseRealTime();
Marat Dukhan4c617792021-12-21 15:47:58 -0800330#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700331
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700332BENCHMARK_CAPTURE(f32_vsqrt, scalar_sqrt_x1,
333 xnn_f32_vsqrt_ukernel__scalar_sqrt_x1)
334 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700335 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700336BENCHMARK_CAPTURE(f32_vsqrt, scalar_sqrt_x2,
337 xnn_f32_vsqrt_ukernel__scalar_sqrt_x2)
338 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700339 ->UseRealTime();
Marat Dukhan5aeb32b2021-08-06 22:08:39 -0700340BENCHMARK_CAPTURE(f32_vsqrt, scalar_sqrt_x4,
341 xnn_f32_vsqrt_ukernel__scalar_sqrt_x4)
342 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanf4db2f32020-06-30 10:55:30 -0700343 ->UseRealTime();
344
345#ifndef XNNPACK_BENCHMARK_NO_MAIN
346BENCHMARK_MAIN();
347#endif