blob: c027d06b3fb83ff70bb79a4508d9b33ea7bbcf00 [file] [log] [blame]
Marat Dukhand77f77d2021-10-24 15:39:59 -07001// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
12#include <benchmark/benchmark.h>
13#include "bench/utils.h"
14
15#include <fp16/fp16.h>
16#include <xnnpack/AlignedAllocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/params.h>
Marat Dukhanb7c1b712021-12-30 07:23:57 -080019#include <xnnpack/params-init.h>
Marat Dukhand77f77d2021-10-24 15:39:59 -070020#include <xnnpack/vcvt.h>
21
22
23static void f32_f16_vcvt(
24 benchmark::State& state,
25 xnn_f32_f16_vcvt_ukernel_function cvt,
Marat Dukhanb7c1b712021-12-30 07:23:57 -080026 xnn_init_f32_f16_cvt_params_fn init_params = nullptr,
Marat Dukhand77f77d2021-10-24 15:39:59 -070027 benchmark::utils::IsaCheckFunction isa_check = nullptr)
28{
29 if (isa_check && !isa_check(state)) {
30 return;
31 }
32
33 const size_t num_elements = state.range(0);
34
35 std::random_device random_device;
36 auto rng = std::mt19937(random_device());
37 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
38
39 std::vector<float, AlignedAllocator<float, 64>> x(num_elements + XNN_EXTRA_BYTES / sizeof(float));
40 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> y(num_elements);
41 std::generate(x.begin(), x.end(), std::ref(f32rng));
42 std::fill(y.begin(), y.end(), UINT16_C(0x7E00));
43
Marat Dukhanb7c1b712021-12-30 07:23:57 -080044 xnn_f32_f16_cvt_params params;
45 if (init_params != nullptr) {
46 init_params(&params);
47 }
Marat Dukhand77f77d2021-10-24 15:39:59 -070048 for (auto _ : state) {
Marat Dukhanb7c1b712021-12-30 07:23:57 -080049 cvt(num_elements * sizeof(uint16_t), x.data(), y.data(), &params);
Marat Dukhand77f77d2021-10-24 15:39:59 -070050 }
51
52 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
53 if (cpu_frequency != 0) {
54 state.counters["cpufreq"] = cpu_frequency;
55 }
56
57 const size_t elements_per_iteration = num_elements;
58 state.counters["elements"] =
59 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
60
61 const size_t bytes_per_iteration = num_elements * (sizeof(uint16_t) + sizeof(float));
62 state.counters["bytes"] =
63 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
64}
65
66#if XNN_ARCH_ARM || XNN_ARCH_ARM64
67 BENCHMARK_CAPTURE(f32_f16_vcvt, neonfp16_x8,
68 xnn_f32_f16_vcvt_ukernel__neonfp16_x8,
Marat Dukhanb7c1b712021-12-30 07:23:57 -080069 nullptr /* init params */,
Marat Dukhand77f77d2021-10-24 15:39:59 -070070 benchmark::utils::CheckNEONFP16)
71 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
72 ->UseRealTime();
73 BENCHMARK_CAPTURE(f32_f16_vcvt, neonfp16_x16,
74 xnn_f32_f16_vcvt_ukernel__neonfp16_x16,
Marat Dukhanb7c1b712021-12-30 07:23:57 -080075 nullptr /* init params */,
Marat Dukhand77f77d2021-10-24 15:39:59 -070076 benchmark::utils::CheckNEONFP16)
77 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
78 ->UseRealTime();
Marat Dukhanb7c1b712021-12-30 07:23:57 -080079
80 BENCHMARK_CAPTURE(f32_f16_vcvt, neon_x8,
81 xnn_f32_f16_vcvt_ukernel__neon_x8,
82 xnn_init_f32_f16_cvt_neon_params,
83 benchmark::utils::CheckNEON)
84 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
85 ->UseRealTime();
86 BENCHMARK_CAPTURE(f32_f16_vcvt, neon_x16,
87 xnn_f32_f16_vcvt_ukernel__neon_x16,
88 xnn_init_f32_f16_cvt_neon_params,
89 benchmark::utils::CheckNEON)
90 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
91 ->UseRealTime();
92 BENCHMARK_CAPTURE(f32_f16_vcvt, neon_x24,
93 xnn_f32_f16_vcvt_ukernel__neon_x24,
94 xnn_init_f32_f16_cvt_neon_params,
95 benchmark::utils::CheckNEON)
96 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
97 ->UseRealTime();
98 BENCHMARK_CAPTURE(f32_f16_vcvt, neon_x32,
99 xnn_f32_f16_vcvt_ukernel__neon_x32,
100 xnn_init_f32_f16_cvt_neon_params,
101 benchmark::utils::CheckNEON)
102 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
103 ->UseRealTime();
Marat Dukhand77f77d2021-10-24 15:39:59 -0700104#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
105
106#if XNN_ARCH_X86 || XNN_ARCH_X86_64
107 BENCHMARK_CAPTURE(f32_f16_vcvt, avx512skx_x16,
108 xnn_f32_f16_vcvt_ukernel__avx512skx_x16,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800109 nullptr /* init params */,
Marat Dukhand77f77d2021-10-24 15:39:59 -0700110 benchmark::utils::CheckAVX512SKX)
111 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
112 ->UseRealTime();
113 BENCHMARK_CAPTURE(f32_f16_vcvt, avx512skx_x32,
114 xnn_f32_f16_vcvt_ukernel__avx512skx_x32,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800115 nullptr /* init params */,
Marat Dukhand77f77d2021-10-24 15:39:59 -0700116 benchmark::utils::CheckAVX512SKX)
117 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
118 ->UseRealTime();
119
120 BENCHMARK_CAPTURE(f32_f16_vcvt, f16c_x8,
121 xnn_f32_f16_vcvt_ukernel__f16c_x8,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800122 xnn_init_f32_f16_cvt_f16c_params,
Marat Dukhand77f77d2021-10-24 15:39:59 -0700123 benchmark::utils::CheckF16C)
124 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
125 ->UseRealTime();
126 BENCHMARK_CAPTURE(f32_f16_vcvt, f16c_x16,
127 xnn_f32_f16_vcvt_ukernel__f16c_x16,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800128 xnn_init_f32_f16_cvt_f16c_params,
Marat Dukhand77f77d2021-10-24 15:39:59 -0700129 benchmark::utils::CheckF16C)
130 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800131 ->UseRealTime();
Marat Dukhaneb844232021-11-08 23:07:53 -0800132
133 BENCHMARK_CAPTURE(f32_f16_vcvt, avx_x8,
134 xnn_f32_f16_vcvt_ukernel__avx_x8,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800135 xnn_init_f32_f16_cvt_sse2_params,
Marat Dukhaneb844232021-11-08 23:07:53 -0800136 benchmark::utils::CheckAVX)
137 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
138 ->UseRealTime();
139 BENCHMARK_CAPTURE(f32_f16_vcvt, avx_x16,
140 xnn_f32_f16_vcvt_ukernel__avx_x16,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800141 xnn_init_f32_f16_cvt_sse2_params,
Marat Dukhaneb844232021-11-08 23:07:53 -0800142 benchmark::utils::CheckAVX)
143 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800144 ->UseRealTime();
Marat Dukhaneb844232021-11-08 23:07:53 -0800145 BENCHMARK_CAPTURE(f32_f16_vcvt, avx_x24,
146 xnn_f32_f16_vcvt_ukernel__avx_x24,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800147 xnn_init_f32_f16_cvt_sse2_params,
Marat Dukhaneb844232021-11-08 23:07:53 -0800148 benchmark::utils::CheckAVX)
149 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800150 ->UseRealTime();
Marat Dukhaneb844232021-11-08 23:07:53 -0800151 BENCHMARK_CAPTURE(f32_f16_vcvt, avx_x32,
152 xnn_f32_f16_vcvt_ukernel__avx_x32,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800153 xnn_init_f32_f16_cvt_sse2_params,
Marat Dukhaneb844232021-11-08 23:07:53 -0800154 benchmark::utils::CheckAVX)
155 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800156 ->UseRealTime();
Marat Dukhaneb844232021-11-08 23:07:53 -0800157
158 BENCHMARK_CAPTURE(f32_f16_vcvt, sse41_x8,
159 xnn_f32_f16_vcvt_ukernel__sse41_x8,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800160 xnn_init_f32_f16_cvt_sse2_params,
Marat Dukhaneb844232021-11-08 23:07:53 -0800161 benchmark::utils::CheckSSE41)
162 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
163 ->UseRealTime();
164 BENCHMARK_CAPTURE(f32_f16_vcvt, sse41_x16,
165 xnn_f32_f16_vcvt_ukernel__sse41_x16,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800166 xnn_init_f32_f16_cvt_sse2_params,
Marat Dukhaneb844232021-11-08 23:07:53 -0800167 benchmark::utils::CheckSSE41)
168 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800169 ->UseRealTime();
Marat Dukhaneb844232021-11-08 23:07:53 -0800170 BENCHMARK_CAPTURE(f32_f16_vcvt, sse41_x24,
171 xnn_f32_f16_vcvt_ukernel__sse41_x24,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800172 xnn_init_f32_f16_cvt_sse2_params,
Marat Dukhaneb844232021-11-08 23:07:53 -0800173 benchmark::utils::CheckSSE41)
174 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800175 ->UseRealTime();
Marat Dukhaneb844232021-11-08 23:07:53 -0800176 BENCHMARK_CAPTURE(f32_f16_vcvt, sse41_x32,
177 xnn_f32_f16_vcvt_ukernel__sse41_x32,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800178 xnn_init_f32_f16_cvt_sse2_params,
Marat Dukhaneb844232021-11-08 23:07:53 -0800179 benchmark::utils::CheckSSE41)
180 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800181 ->UseRealTime();
Marat Dukhaneb844232021-11-08 23:07:53 -0800182
183 BENCHMARK_CAPTURE(f32_f16_vcvt, sse2_x8,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800184 xnn_f32_f16_vcvt_ukernel__sse2_x8,
185 xnn_init_f32_f16_cvt_sse2_params)
Marat Dukhaneb844232021-11-08 23:07:53 -0800186 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
187 ->UseRealTime();
188 BENCHMARK_CAPTURE(f32_f16_vcvt, sse2_x16,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800189 xnn_f32_f16_vcvt_ukernel__sse2_x16,
190 xnn_init_f32_f16_cvt_sse2_params)
Marat Dukhaneb844232021-11-08 23:07:53 -0800191 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800192 ->UseRealTime();
Marat Dukhaneb844232021-11-08 23:07:53 -0800193 BENCHMARK_CAPTURE(f32_f16_vcvt, sse2_x24,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800194 xnn_f32_f16_vcvt_ukernel__sse2_x24,
195 xnn_init_f32_f16_cvt_sse2_params)
Marat Dukhaneb844232021-11-08 23:07:53 -0800196 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800197 ->UseRealTime();
Marat Dukhaneb844232021-11-08 23:07:53 -0800198 BENCHMARK_CAPTURE(f32_f16_vcvt, sse2_x32,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800199 xnn_f32_f16_vcvt_ukernel__sse2_x32,
200 xnn_init_f32_f16_cvt_sse2_params)
Marat Dukhaneb844232021-11-08 23:07:53 -0800201 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800202 ->UseRealTime();
Marat Dukhand77f77d2021-10-24 15:39:59 -0700203#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
204
Marat Dukhan4c617792021-12-21 15:47:58 -0800205#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan22e31c82021-11-09 00:00:28 -0800206 BENCHMARK_CAPTURE(f32_f16_vcvt, wasmsimd_x8,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800207 xnn_f32_f16_vcvt_ukernel__wasmsimd_x8,
208 xnn_init_f32_f16_cvt_wasmsimd_params)
Marat Dukhan22e31c82021-11-09 00:00:28 -0800209 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
210 ->UseRealTime();
211 BENCHMARK_CAPTURE(f32_f16_vcvt, wasmsimd_x16,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800212 xnn_f32_f16_vcvt_ukernel__wasmsimd_x16,
213 xnn_init_f32_f16_cvt_wasmsimd_params)
Marat Dukhan22e31c82021-11-09 00:00:28 -0800214 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800215 ->UseRealTime();
Marat Dukhan22e31c82021-11-09 00:00:28 -0800216 BENCHMARK_CAPTURE(f32_f16_vcvt, wasmsimd_x24,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800217 xnn_f32_f16_vcvt_ukernel__wasmsimd_x24,
218 xnn_init_f32_f16_cvt_wasmsimd_params)
Marat Dukhan22e31c82021-11-09 00:00:28 -0800219 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800220 ->UseRealTime();
Marat Dukhan22e31c82021-11-09 00:00:28 -0800221 BENCHMARK_CAPTURE(f32_f16_vcvt, wasmsimd_x32,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800222 xnn_f32_f16_vcvt_ukernel__wasmsimd_x32,
223 xnn_init_f32_f16_cvt_wasmsimd_params)
Marat Dukhan22e31c82021-11-09 00:00:28 -0800224 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
Marat Dukhan5740f752021-12-02 15:19:10 -0800225 ->UseRealTime();
Marat Dukhan4c617792021-12-21 15:47:58 -0800226#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan22e31c82021-11-09 00:00:28 -0800227
Marat Dukhan1fe89952021-11-10 01:27:15 -0800228BENCHMARK_CAPTURE(f32_f16_vcvt, scalar_bitcast_x1,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800229 xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x1,
230 xnn_init_f32_f16_cvt_scalar_bitcast_params)
Marat Dukhan1fe89952021-11-10 01:27:15 -0800231 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
232 ->UseRealTime();
233BENCHMARK_CAPTURE(f32_f16_vcvt, scalar_bitcast_x2,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800234 xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x2,
235 xnn_init_f32_f16_cvt_scalar_bitcast_params)
Marat Dukhan1fe89952021-11-10 01:27:15 -0800236 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
237 ->UseRealTime();
238BENCHMARK_CAPTURE(f32_f16_vcvt, scalar_bitcast_x3,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800239 xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x3,
240 xnn_init_f32_f16_cvt_scalar_bitcast_params)
Marat Dukhan1fe89952021-11-10 01:27:15 -0800241 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
242 ->UseRealTime();
243BENCHMARK_CAPTURE(f32_f16_vcvt, scalar_bitcast_x4,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800244 xnn_f32_f16_vcvt_ukernel__scalar_bitcast_x4,
245 xnn_init_f32_f16_cvt_scalar_bitcast_params)
Marat Dukhan1fe89952021-11-10 01:27:15 -0800246 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
247 ->UseRealTime();
248
249BENCHMARK_CAPTURE(f32_f16_vcvt, scalar_fabsf_x1,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800250 xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x1,
251 xnn_init_f32_f16_cvt_scalar_fabsf_params)
Marat Dukhan1fe89952021-11-10 01:27:15 -0800252 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
253 ->UseRealTime();
254BENCHMARK_CAPTURE(f32_f16_vcvt, scalar_fabsf_x2,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800255 xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x2,
256 xnn_init_f32_f16_cvt_scalar_fabsf_params)
Marat Dukhan1fe89952021-11-10 01:27:15 -0800257 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
258 ->UseRealTime();
259BENCHMARK_CAPTURE(f32_f16_vcvt, scalar_fabsf_x3,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800260 xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x3,
261 xnn_init_f32_f16_cvt_scalar_fabsf_params)
Marat Dukhan1fe89952021-11-10 01:27:15 -0800262 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
263 ->UseRealTime();
264BENCHMARK_CAPTURE(f32_f16_vcvt, scalar_fabsf_x4,
Marat Dukhanb7c1b712021-12-30 07:23:57 -0800265 xnn_f32_f16_vcvt_ukernel__scalar_fabsf_x4,
266 xnn_init_f32_f16_cvt_scalar_fabsf_params)
Marat Dukhan1fe89952021-11-10 01:27:15 -0800267 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
268 ->UseRealTime();
269
Marat Dukhand77f77d2021-10-24 15:39:59 -0700270#ifndef XNNPACK_BENCHMARK_NO_MAIN
271BENCHMARK_MAIN();
272#endif