blob: 2f73ce41ca985dd0cf530d58b6a0c4e755b887d9 [file] [log] [blame]
Marat Dukhan434352f2021-10-16 18:28:55 -07001// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
12#include <benchmark/benchmark.h>
13#include "bench/utils.h"
14
15#include <fp16/fp16.h>
16#include <xnnpack/AlignedAllocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/params.h>
19#include <xnnpack/vcvt.h>
20
21
22static void f16_f32_vcvt(
23 benchmark::State& state,
24 xnn_f16_f32_vcvt_ukernel_function cvt,
25 benchmark::utils::IsaCheckFunction isa_check = nullptr)
26{
27 if (isa_check && !isa_check(state)) {
28 return;
29 }
30
31 const size_t num_elements = state.range(0);
32
33 std::random_device random_device;
34 auto rng = std::mt19937(random_device());
35 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
36 auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
37
Marat Dukhand77f77d2021-10-24 15:39:59 -070038 std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> x(num_elements + XNN_EXTRA_BYTES / sizeof(uint16_t));
Marat Dukhan434352f2021-10-16 18:28:55 -070039 std::vector<float, AlignedAllocator<float, 64>> y(num_elements);
40 std::generate(x.begin(), x.end(), std::ref(f16rng));
41 std::fill(y.begin(), y.end(), std::nanf(""));
42
43 for (auto _ : state) {
44 cvt(num_elements * sizeof(float), x.data(), y.data(), nullptr /* params */);
45 }
46
47 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
48 if (cpu_frequency != 0) {
49 state.counters["cpufreq"] = cpu_frequency;
50 }
51
52 const size_t elements_per_iteration = num_elements;
53 state.counters["elements"] =
54 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
55
56 const size_t bytes_per_iteration = num_elements * (sizeof(uint16_t) + sizeof(float));
57 state.counters["bytes"] =
58 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
59}
60
61#if XNN_ARCH_ARM || XNN_ARCH_ARM64
62 BENCHMARK_CAPTURE(f16_f32_vcvt, neonfp16_x8,
63 xnn_f16_f32_vcvt_ukernel__neonfp16_x8,
64 benchmark::utils::CheckNEONFP16)
65 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
66 ->UseRealTime();
67 BENCHMARK_CAPTURE(f16_f32_vcvt, neonfp16_x16,
68 xnn_f16_f32_vcvt_ukernel__neonfp16_x16,
69 benchmark::utils::CheckNEONFP16)
70 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
71 ->UseRealTime();
72
73 BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int16_x8,
74 xnn_f16_f32_vcvt_ukernel__neon_int16_x8,
75 benchmark::utils::CheckNEON)
76 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
77 ->UseRealTime();
78 BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int16_x16,
79 xnn_f16_f32_vcvt_ukernel__neon_int16_x16,
80 benchmark::utils::CheckNEON)
81 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
82 ->UseRealTime();
83 BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int16_x24,
84 xnn_f16_f32_vcvt_ukernel__neon_int16_x24,
85 benchmark::utils::CheckNEON)
86 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
87 ->UseRealTime();
88 BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int16_x32,
89 xnn_f16_f32_vcvt_ukernel__neon_int16_x32,
90 benchmark::utils::CheckNEON)
91 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
92 ->UseRealTime();
93
94 BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int32_x8,
95 xnn_f16_f32_vcvt_ukernel__neon_int32_x8,
96 benchmark::utils::CheckNEON)
97 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
98 ->UseRealTime();
99 BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int32_x16,
100 xnn_f16_f32_vcvt_ukernel__neon_int32_x16,
101 benchmark::utils::CheckNEON)
102 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
103 ->UseRealTime();
104 BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int32_x24,
105 xnn_f16_f32_vcvt_ukernel__neon_int32_x24,
106 benchmark::utils::CheckNEON)
107 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
108 ->UseRealTime();
109 BENCHMARK_CAPTURE(f16_f32_vcvt, neon_int32_x32,
110 xnn_f16_f32_vcvt_ukernel__neon_int32_x32,
111 benchmark::utils::CheckNEON)
112 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
113 ->UseRealTime();
114#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
115
116#if XNN_ARCH_X86 || XNN_ARCH_X86_64
117 BENCHMARK_CAPTURE(f16_f32_vcvt, avx512skx_x16,
118 xnn_f16_f32_vcvt_ukernel__avx512skx_x16,
119 benchmark::utils::CheckAVX512SKX)
120 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
121 ->UseRealTime();
122 BENCHMARK_CAPTURE(f16_f32_vcvt, avx512skx_x32,
123 xnn_f16_f32_vcvt_ukernel__avx512skx_x32,
124 benchmark::utils::CheckAVX512SKX)
125 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
126 ->UseRealTime();
127
128 BENCHMARK_CAPTURE(f16_f32_vcvt, f16c_x8,
129 xnn_f16_f32_vcvt_ukernel__f16c_x8,
130 benchmark::utils::CheckF16C)
131 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
132 ->UseRealTime();
133 BENCHMARK_CAPTURE(f16_f32_vcvt, f16c_x16,
134 xnn_f16_f32_vcvt_ukernel__f16c_x16,
135 benchmark::utils::CheckF16C)
136 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
137 ->UseRealTime();
138
139 BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int16_x8,
140 xnn_f16_f32_vcvt_ukernel__avx_int16_x8,
141 benchmark::utils::CheckAVX)
142 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
143 ->UseRealTime();
144 BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int16_x16,
145 xnn_f16_f32_vcvt_ukernel__avx_int16_x16,
146 benchmark::utils::CheckAVX)
147 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
148 ->UseRealTime();
149 BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int16_x24,
150 xnn_f16_f32_vcvt_ukernel__avx_int16_x24,
151 benchmark::utils::CheckAVX)
152 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
153 ->UseRealTime();
154 BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int16_x32,
155 xnn_f16_f32_vcvt_ukernel__avx_int16_x32,
156 benchmark::utils::CheckAVX)
157 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
158 ->UseRealTime();
159
160 BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int32_x8,
161 xnn_f16_f32_vcvt_ukernel__avx_int32_x8,
162 benchmark::utils::CheckAVX)
163 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
164 ->UseRealTime();
165 BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int32_x16,
166 xnn_f16_f32_vcvt_ukernel__avx_int32_x16,
167 benchmark::utils::CheckAVX)
168 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
169 ->UseRealTime();
170 BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int32_x24,
171 xnn_f16_f32_vcvt_ukernel__avx_int32_x24,
172 benchmark::utils::CheckAVX)
173 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
174 ->UseRealTime();
175 BENCHMARK_CAPTURE(f16_f32_vcvt, avx_int32_x32,
176 xnn_f16_f32_vcvt_ukernel__avx_int32_x32,
177 benchmark::utils::CheckAVX)
178 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
179 ->UseRealTime();
180
181 BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int16_x8,
182 xnn_f16_f32_vcvt_ukernel__sse41_int16_x8,
183 benchmark::utils::CheckSSE41)
184 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
185 ->UseRealTime();
186 BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int16_x16,
187 xnn_f16_f32_vcvt_ukernel__sse41_int16_x16,
188 benchmark::utils::CheckSSE41)
189 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
190 ->UseRealTime();
191 BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int16_x24,
192 xnn_f16_f32_vcvt_ukernel__sse41_int16_x24,
193 benchmark::utils::CheckSSE41)
194 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
195 ->UseRealTime();
196 BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int16_x32,
197 xnn_f16_f32_vcvt_ukernel__sse41_int16_x32,
198 benchmark::utils::CheckSSE41)
199 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
200 ->UseRealTime();
201
202 BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int32_x8,
203 xnn_f16_f32_vcvt_ukernel__sse41_int32_x8,
204 benchmark::utils::CheckSSE41)
205 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
206 ->UseRealTime();
207 BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int32_x16,
208 xnn_f16_f32_vcvt_ukernel__sse41_int32_x16,
209 benchmark::utils::CheckSSE41)
210 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
211 ->UseRealTime();
212 BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int32_x24,
213 xnn_f16_f32_vcvt_ukernel__sse41_int32_x24,
214 benchmark::utils::CheckSSE41)
215 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
216 ->UseRealTime();
217 BENCHMARK_CAPTURE(f16_f32_vcvt, sse41_int32_x32,
218 xnn_f16_f32_vcvt_ukernel__sse41_int32_x32,
219 benchmark::utils::CheckSSE41)
220 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
221 ->UseRealTime();
222
223 BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int16_x8,
224 xnn_f16_f32_vcvt_ukernel__sse2_int16_x8)
225 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
226 ->UseRealTime();
227 BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int16_x16,
228 xnn_f16_f32_vcvt_ukernel__sse2_int16_x16)
229 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
230 ->UseRealTime();
231 BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int16_x24,
232 xnn_f16_f32_vcvt_ukernel__sse2_int16_x24)
233 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
234 ->UseRealTime();
235 BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int16_x32,
236 xnn_f16_f32_vcvt_ukernel__sse2_int16_x32)
237 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
238 ->UseRealTime();
239
240 BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int32_x8,
241 xnn_f16_f32_vcvt_ukernel__sse2_int32_x8)
242 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
243 ->UseRealTime();
244 BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int32_x16,
245 xnn_f16_f32_vcvt_ukernel__sse2_int32_x16)
246 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
247 ->UseRealTime();
248 BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int32_x24,
249 xnn_f16_f32_vcvt_ukernel__sse2_int32_x24)
250 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
251 ->UseRealTime();
252 BENCHMARK_CAPTURE(f16_f32_vcvt, sse2_int32_x32,
253 xnn_f16_f32_vcvt_ukernel__sse2_int32_x32)
254 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
255 ->UseRealTime();
256#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
257
Marat Dukhan4c617792021-12-21 15:47:58 -0800258#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan434352f2021-10-16 18:28:55 -0700259 BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int16_x8,
260 xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x8)
261 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
262 ->UseRealTime();
263 BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int16_x16,
264 xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x16)
265 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
266 ->UseRealTime();
267 BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int16_x24,
268 xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x24)
269 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
270 ->UseRealTime();
271 BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int16_x32,
272 xnn_f16_f32_vcvt_ukernel__wasmsimd_int16_x32)
273 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
274 ->UseRealTime();
275
276 BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int32_x8,
277 xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x8)
278 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
279 ->UseRealTime();
280 BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int32_x16,
281 xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x16)
282 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
283 ->UseRealTime();
284 BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int32_x24,
285 xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x24)
286 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
287 ->UseRealTime();
288 BENCHMARK_CAPTURE(f16_f32_vcvt, wasmsimd_int32_x32,
289 xnn_f16_f32_vcvt_ukernel__wasmsimd_int32_x32)
290 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
291 ->UseRealTime();
Marat Dukhan4c617792021-12-21 15:47:58 -0800292#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan434352f2021-10-16 18:28:55 -0700293
Marat Dukhane2c00012021-10-17 22:02:35 -0700294BENCHMARK_CAPTURE(f16_f32_vcvt, scalar_float_x1,
295 xnn_f16_f32_vcvt_ukernel__scalar_float_x1)
296 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
297 ->UseRealTime();
298BENCHMARK_CAPTURE(f16_f32_vcvt, scalar_float_x2,
299 xnn_f16_f32_vcvt_ukernel__scalar_float_x2)
300 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
301 ->UseRealTime();
302BENCHMARK_CAPTURE(f16_f32_vcvt, scalar_float_x3,
303 xnn_f16_f32_vcvt_ukernel__scalar_float_x3)
304 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
305 ->UseRealTime();
306BENCHMARK_CAPTURE(f16_f32_vcvt, scalar_float_x4,
307 xnn_f16_f32_vcvt_ukernel__scalar_float_x4)
308 ->Apply(benchmark::utils::UnaryElementwiseParameters<uint16_t, float>)
309 ->UseRealTime();
310
Marat Dukhan434352f2021-10-16 18:28:55 -0700311#ifndef XNNPACK_BENCHMARK_NO_MAIN
312BENCHMARK_MAIN();
313#endif