blob: 8c3ef13c2564f770c5c67e51d2150395c8260bf5 [file] [log] [blame]
Marat Dukhana11ca342020-06-25 23:45:07 -07001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
12#include <benchmark/benchmark.h>
13#include "bench/utils.h"
14
15#include <xnnpack/AlignedAllocator.h>
16#include <xnnpack/common.h>
Marat Dukhana11ca342020-06-25 23:45:07 -070017#include <xnnpack/params.h>
18#include <xnnpack/params-init.h>
Marat Dukhana91559a2021-05-05 23:58:21 -070019#include <xnnpack/vunary.h>
Marat Dukhana11ca342020-06-25 23:45:07 -070020
21
Marat Dukhan6674d692021-05-05 22:27:00 -070022static void f32_vhswish(
Marat Dukhana11ca342020-06-25 23:45:07 -070023 benchmark::State& state,
Marat Dukhan6674d692021-05-05 22:27:00 -070024 xnn_f32_vhswish_ukernel_function hswish,
Marat Dukhan0d10cc72021-12-23 19:49:19 -080025 xnn_init_f32_hswish_params_fn init_params,
Marat Dukhana11ca342020-06-25 23:45:07 -070026 benchmark::utils::IsaCheckFunction isa_check = nullptr)
27{
28 if (isa_check && !isa_check(state)) {
29 return;
30 }
31
Marat Dukhan8634f7e2021-08-06 22:09:51 -070032 const size_t num_elements = state.range(0);
33 std::vector<float, AlignedAllocator<float, 64>> input(num_elements);
34 std::vector<float, AlignedAllocator<float, 64>> output(num_elements);
Marat Dukhana11ca342020-06-25 23:45:07 -070035
36 std::random_device random_device;
37 auto rng = std::mt19937(random_device());
38 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
39 std::generate(input.begin(), input.end(), std::ref(f32rng));
40 std::fill(output.begin(), output.end(), std::nanf(""));
41
Marat Dukhanf56f4c42021-05-17 01:47:20 -070042 union xnn_f32_hswish_params params;
Marat Dukhan0d10cc72021-12-23 19:49:19 -080043 init_params(&params);
Marat Dukhana11ca342020-06-25 23:45:07 -070044 for (auto _ : state) {
Marat Dukhan8634f7e2021-08-06 22:09:51 -070045 hswish(num_elements * sizeof(float), input.data(), output.data(), &params);
Marat Dukhana11ca342020-06-25 23:45:07 -070046 }
47
48 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
49 if (cpu_frequency != 0) {
50 state.counters["cpufreq"] = cpu_frequency;
51 }
52
Marat Dukhan8634f7e2021-08-06 22:09:51 -070053 const size_t elements_per_iteration = num_elements;
Marat Dukhana11ca342020-06-25 23:45:07 -070054 state.counters["elements"] =
55 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
56
Marat Dukhan8634f7e2021-08-06 22:09:51 -070057 const size_t bytes_per_iteration = 2 * num_elements * sizeof(float);
Marat Dukhana11ca342020-06-25 23:45:07 -070058 state.counters["bytes"] =
59 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
60}
61
62#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan8634f7e2021-08-06 22:09:51 -070063 BENCHMARK_CAPTURE(f32_vhswish, neon_x4,
64 xnn_f32_vhswish_ukernel__neon_x4,
Marat Dukhan0d10cc72021-12-23 19:49:19 -080065 xnn_init_f32_hswish_scalar_params,
Marat Dukhan8634f7e2021-08-06 22:09:51 -070066 benchmark::utils::CheckNEON)
67 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -070068 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -070069 BENCHMARK_CAPTURE(f32_vhswish, neon_x8,
70 xnn_f32_vhswish_ukernel__neon_x8,
Marat Dukhan0d10cc72021-12-23 19:49:19 -080071 xnn_init_f32_hswish_scalar_params,
Marat Dukhan8634f7e2021-08-06 22:09:51 -070072 benchmark::utils::CheckNEON)
73 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -070074 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -070075 BENCHMARK_CAPTURE(f32_vhswish, neon_x16,
76 xnn_f32_vhswish_ukernel__neon_x16,
Marat Dukhan0d10cc72021-12-23 19:49:19 -080077 xnn_init_f32_hswish_scalar_params,
Marat Dukhan8634f7e2021-08-06 22:09:51 -070078 benchmark::utils::CheckNEON)
79 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhan55dde5b2020-07-10 22:48:54 -070080 ->UseRealTime();
Marat Dukhana11ca342020-06-25 23:45:07 -070081#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
82
83#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan8634f7e2021-08-06 22:09:51 -070084 BENCHMARK_CAPTURE(f32_vhswish, sse_x4,
Marat Dukhan0d10cc72021-12-23 19:49:19 -080085 xnn_f32_vhswish_ukernel__sse_x4,
86 xnn_init_f32_hswish_sse_params)
Marat Dukhan8634f7e2021-08-06 22:09:51 -070087 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -070088 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -070089 BENCHMARK_CAPTURE(f32_vhswish, sse_x8,
Marat Dukhan0d10cc72021-12-23 19:49:19 -080090 xnn_f32_vhswish_ukernel__sse_x8,
91 xnn_init_f32_hswish_sse_params)
Marat Dukhan8634f7e2021-08-06 22:09:51 -070092 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -070093 ->UseRealTime();
94
Marat Dukhan8634f7e2021-08-06 22:09:51 -070095 BENCHMARK_CAPTURE(f32_vhswish, avx_x8,
96 xnn_f32_vhswish_ukernel__avx_x8,
Marat Dukhan0d10cc72021-12-23 19:49:19 -080097 xnn_init_f32_hswish_avx_params,
Marat Dukhan8634f7e2021-08-06 22:09:51 -070098 benchmark::utils::CheckAVX)
99 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700100 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700101 BENCHMARK_CAPTURE(f32_vhswish, avx_x16,
102 xnn_f32_vhswish_ukernel__avx_x16,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800103 xnn_init_f32_hswish_avx_params,
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700104 benchmark::utils::CheckAVX)
105 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700106 ->UseRealTime();
107
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700108 BENCHMARK_CAPTURE(f32_vhswish, fma3_x8,
109 xnn_f32_vhswish_ukernel__fma3_x8,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800110 xnn_init_f32_hswish_avx_params,
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700111 benchmark::utils::CheckFMA3)
112 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700113 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700114 BENCHMARK_CAPTURE(f32_vhswish, fma3_x16,
115 xnn_f32_vhswish_ukernel__fma3_x16,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800116 xnn_init_f32_hswish_avx_params,
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700117 benchmark::utils::CheckFMA3)
118 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700119 ->UseRealTime();
120
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700121 BENCHMARK_CAPTURE(f32_vhswish, avx512f_x16,
122 xnn_f32_vhswish_ukernel__avx512f_x16,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800123 xnn_init_f32_hswish_avx512_params,
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700124 benchmark::utils::CheckAVX512F)
125 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700126 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700127 BENCHMARK_CAPTURE(f32_vhswish, avx512f_x32,
128 xnn_f32_vhswish_ukernel__avx512f_x32,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800129 xnn_init_f32_hswish_avx512_params,
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700130 benchmark::utils::CheckAVX512F)
131 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700132 ->UseRealTime();
133#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
134
Marat Dukhan4c617792021-12-21 15:47:58 -0800135#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700136 BENCHMARK_CAPTURE(f32_vhswish, wasmsimd_x4,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800137 xnn_f32_vhswish_ukernel__wasmsimd_x4,
138 xnn_init_f32_hswish_wasmsimd_params)
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700139 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700140 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700141 BENCHMARK_CAPTURE(f32_vhswish, wasmsimd_x8,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800142 xnn_f32_vhswish_ukernel__wasmsimd_x8,
143 xnn_init_f32_hswish_wasmsimd_params)
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700144 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700145 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700146 BENCHMARK_CAPTURE(f32_vhswish, wasmsimd_x16,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800147 xnn_f32_vhswish_ukernel__wasmsimd_x16,
148 xnn_init_f32_hswish_wasmsimd_params)
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700149 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhanc303fe62020-06-26 10:09:25 -0700150 ->UseRealTime();
Marat Dukhan4c617792021-12-21 15:47:58 -0800151#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhana11ca342020-06-25 23:45:07 -0700152
Marat Dukhan4c617792021-12-21 15:47:58 -0800153#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700154 BENCHMARK_CAPTURE(f32_vhswish, wasm_x1,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800155 xnn_f32_vhswish_ukernel__wasm_x1,
156 xnn_init_f32_hswish_scalar_params)
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700157 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700158 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700159 BENCHMARK_CAPTURE(f32_vhswish, wasm_x2,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800160 xnn_f32_vhswish_ukernel__wasm_x2,
161 xnn_init_f32_hswish_scalar_params)
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700162 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700163 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700164 BENCHMARK_CAPTURE(f32_vhswish, wasm_x4,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800165 xnn_f32_vhswish_ukernel__wasm_x4,
166 xnn_init_f32_hswish_scalar_params)
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700167 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700168 ->UseRealTime();
Marat Dukhan4c617792021-12-21 15:47:58 -0800169#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhana11ca342020-06-25 23:45:07 -0700170
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700171BENCHMARK_CAPTURE(f32_vhswish, scalar_x1,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800172 xnn_f32_vhswish_ukernel__scalar_x1,
173 xnn_init_f32_hswish_scalar_params)
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700174 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700175 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700176BENCHMARK_CAPTURE(f32_vhswish, scalar_x2,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800177 xnn_f32_vhswish_ukernel__scalar_x2,
178 xnn_init_f32_hswish_scalar_params)
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700179 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700180 ->UseRealTime();
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700181BENCHMARK_CAPTURE(f32_vhswish, scalar_x4,
Marat Dukhan0d10cc72021-12-23 19:49:19 -0800182 xnn_f32_vhswish_ukernel__scalar_x4,
183 xnn_init_f32_hswish_scalar_params)
Marat Dukhan8634f7e2021-08-06 22:09:51 -0700184 ->Apply(benchmark::utils::UnaryElementwiseParameters<float, float>)
Marat Dukhana11ca342020-06-25 23:45:07 -0700185 ->UseRealTime();
186
187#ifndef XNNPACK_BENCHMARK_NO_MAIN
188BENCHMARK_MAIN();
189#endif