blob: 8b8aa3d4ef7777dd2833c9a90b6fa6ca9ec072eb [file] [log] [blame]
Marat Dukhan56bdd4a2020-08-03 19:47:04 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2020 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cfloat>
11#include <cmath>
12#include <functional>
13#include <random>
14#include <vector>
15
16#include <cpuinfo.h>
17
18#include <benchmark/benchmark.h>
19#include "bench/utils.h"
20#include <xnnpack/AlignedAllocator.h>
21#include <xnnpack/common.h>
22#include <xnnpack/requantization-stubs.h>
23
24
25class Requantization : public benchmark::Fixture {
26 public:
27 inline Requantization()
28 {
29 cpuinfo_initialize();
30 const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
31 const size_t l1d_reserve = 1024;
32 n_ = (l1d_size - l1d_reserve) / (sizeof(int32_t) + sizeof(int8_t));
33 n_ = n_ / 16 * 16;
34 }
35
Frank Barchardbd3c9aa2021-07-15 16:02:59 -070036 virtual void SetUp(benchmark::State& state) override
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070037 {
38 std::random_device random_device;
39 auto rng = std::mt19937(random_device());
Marat Dukhanecd83112020-08-03 21:50:28 -070040 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(), std::ref(rng));
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070041
42 input_.resize(n());
Marat Dukhanecd83112020-08-03 21:50:28 -070043 std::generate(input_.begin(), input_.end(), std::ref(i32rng));
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070044 output_.resize(n());
45 std::fill(output_.begin(), output_.end(), 0xA5);
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070046
Frank Barchardbd3c9aa2021-07-15 16:02:59 -070047 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
48 if (cpu_frequency != 0) {
49 state.counters["cpufreq"] = cpu_frequency;
50 }
51 }
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070052 virtual void TearDown(benchmark::State& state) override
53 {
54 state.SetItemsProcessed(uint64_t(state.iterations()) * n());
55 state.SetBytesProcessed(uint64_t(state.iterations()) * n() * (sizeof(int32_t) + sizeof(int8_t)));
56 input_.clear();
57 output_.clear();
58 }
59
60 inline const int32_t* input() const
61 {
62 return input_.data();
63 }
64
65 inline int8_t* output()
66 {
67 return output_.data();
68 }
69
70 inline size_t n() const
71 {
72 return n_;
73 }
74
75 protected:
Marat Dukhane13e6392021-07-26 22:22:35 -070076 std::vector<int32_t, AlignedAllocator<int32_t, 64>> input_;
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070077 std::vector<int8_t> output_;
78 size_t n_;
79};
80
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070081
82#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070083 BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state) {
84 for (auto _ : state) {
85 xnn_qs8_requantize_fp32__neon(
86 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
87 }
88 }
89
Marat Dukhan9976cd82021-05-24 23:15:45 -070090 BENCHMARK_F(Requantization, gemmlowp__neon)(benchmark::State& state) {
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070091 for (auto _ : state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -070092 xnn_qs8_requantize_gemmlowp__neon(
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070093 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
94 }
95 }
Marat Dukhan9976cd82021-05-24 23:15:45 -070096
Marat Dukhan06716242021-05-26 15:56:39 -070097 BENCHMARK_F(Requantization, rndna__neon)(benchmark::State& state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -070098 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -070099 xnn_qs8_requantize_rndna__neon(
Marat Dukhan9976cd82021-05-24 23:15:45 -0700100 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
101 }
102 }
Marat Dukhan062bee32021-05-27 20:31:07 -0700103
Marat Dukhand3d818c2021-07-16 17:56:54 -0700104 BENCHMARK_F(Requantization, rndnu__neon_mull)(benchmark::State& state) {
Marat Dukhan062bee32021-05-27 20:31:07 -0700105 for (auto _ : state) {
Marat Dukhand3d818c2021-07-16 17:56:54 -0700106 xnn_qs8_requantize_rndnu__neon_mull(
Marat Dukhan062bee32021-05-27 20:31:07 -0700107 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
108 }
109 }
Marat Dukhan7b1aeb92021-07-16 15:13:51 -0700110
Marat Dukhand3d818c2021-07-16 17:56:54 -0700111 BENCHMARK_F(Requantization, rndnu__neon_qdmulh)(benchmark::State& state) {
Marat Dukhan7b1aeb92021-07-16 15:13:51 -0700112 for (auto _ : state) {
Marat Dukhand3d818c2021-07-16 17:56:54 -0700113 xnn_qs8_requantize_rndnu__neon_qdmulh(
Marat Dukhan7b1aeb92021-07-16 15:13:51 -0700114 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
115 }
116 }
Marat Dukhan9976cd82021-05-24 23:15:45 -0700117#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
118
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700119
120#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan9976cd82021-05-24 23:15:45 -0700121 BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state) {
122 for (auto _ : state) {
123 xnn_qs8_requantize_fp32__sse2(
124 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
125 }
126 }
127
128 BENCHMARK_F(Requantization, fp32__sse4)(benchmark::State& state) {
129 for (auto _ : state) {
130 xnn_qs8_requantize_fp32__sse4(
131 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
132 }
133 }
134
135 BENCHMARK_F(Requantization, gemmlowp__sse2)(benchmark::State& state) {
136 for (auto _ : state) {
137 xnn_qs8_requantize_gemmlowp__sse2(
138 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
139 }
140 }
141
142 BENCHMARK_F(Requantization, gemmlowp__ssse3)(benchmark::State& state) {
143 for (auto _ : state) {
144 xnn_qs8_requantize_gemmlowp__ssse3(
145 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
146 }
147 }
148
149 BENCHMARK_F(Requantization, gemmlowp__sse4)(benchmark::State& state) {
150 for (auto _ : state) {
151 xnn_qs8_requantize_gemmlowp__sse4(
152 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
153 }
154 }
155
Marat Dukhan06716242021-05-26 15:56:39 -0700156 BENCHMARK_F(Requantization, rndna__sse2)(benchmark::State& state) {
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700157 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700158 xnn_qs8_requantize_rndna__sse2(
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700159 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
160 }
161 }
162
Marat Dukhan06716242021-05-26 15:56:39 -0700163 BENCHMARK_F(Requantization, rndna__ssse3)(benchmark::State& state) {
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700164 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700165 xnn_qs8_requantize_rndna__ssse3(
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700166 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
167 }
168 }
169
Marat Dukhan06716242021-05-26 15:56:39 -0700170 BENCHMARK_F(Requantization, rndna__sse4)(benchmark::State& state) {
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700171 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700172 xnn_qs8_requantize_rndna__sse4(
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700173 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
174 }
175 }
Marat Dukhan062bee32021-05-27 20:31:07 -0700176
Marat Dukhan0d979d52021-06-09 13:21:18 -0700177 BENCHMARK_F(Requantization, rndnu__sse4_sra)(benchmark::State& state) {
Marat Dukhan062bee32021-05-27 20:31:07 -0700178 for (auto _ : state) {
Marat Dukhan0d979d52021-06-09 13:21:18 -0700179 xnn_qs8_requantize_rndnu__sse4_sra(
180 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
181 }
182 }
183
184 BENCHMARK_F(Requantization, rndnu__sse4_srl)(benchmark::State& state) {
185 for (auto _ : state) {
186 xnn_qs8_requantize_rndnu__sse4_srl(
Marat Dukhan062bee32021-05-27 20:31:07 -0700187 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
188 }
189 }
Marat Dukhan9976cd82021-05-24 23:15:45 -0700190#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700191
Marat Dukhan9976cd82021-05-24 23:15:45 -0700192
Marat Dukhan4c617792021-12-21 15:47:58 -0800193#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9976cd82021-05-24 23:15:45 -0700194 BENCHMARK_F(Requantization, fp32__wasmsimd)(benchmark::State& state) {
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700195 for (auto _ : state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700196 xnn_qs8_requantize_fp32__wasmsimd(
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700197 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
198 }
199 }
200
Marat Dukhan9976cd82021-05-24 23:15:45 -0700201 BENCHMARK_F(Requantization, gemmlowp__wasmsimd)(benchmark::State& state) {
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700202 for (auto _ : state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700203 xnn_qs8_requantize_gemmlowp__wasmsimd(
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700204 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
205 }
206 }
Marat Dukhan4c617792021-12-21 15:47:58 -0800207#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700208
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700209
Marat Dukhan9976cd82021-05-24 23:15:45 -0700210BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state) {
211 for (auto _ : state) {
212 xnn_qs8_requantize_fp32__scalar_lrintf(
213 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700214 }
Marat Dukhan9976cd82021-05-24 23:15:45 -0700215}
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700216
Marat Dukhan2ac722e2022-01-04 01:54:20 -0800217BENCHMARK_F(Requantization, fp32__scalar_fmagic)(benchmark::State& state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700218 for (auto _ : state) {
Marat Dukhan2ac722e2022-01-04 01:54:20 -0800219 xnn_qs8_requantize_fp32__scalar_fmagic(
Marat Dukhan9976cd82021-05-24 23:15:45 -0700220 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700221 }
Marat Dukhan9976cd82021-05-24 23:15:45 -0700222}
223
224BENCHMARK_F(Requantization, gemmlowp__scalar)(benchmark::State& state) {
225 for (auto _ : state) {
226 xnn_qs8_requantize_gemmlowp__scalar(
227 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
228 }
229}
230
Marat Dukhan06716242021-05-26 15:56:39 -0700231BENCHMARK_F(Requantization, rndna__scalar_signed64)(benchmark::State& state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700232 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700233 xnn_qs8_requantize_rndna__scalar_signed64(
Marat Dukhan9976cd82021-05-24 23:15:45 -0700234 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
235 }
236}
237
Marat Dukhan06716242021-05-26 15:56:39 -0700238BENCHMARK_F(Requantization, rndna__scalar_unsigned32)(benchmark::State& state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700239 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700240 xnn_qs8_requantize_rndna__scalar_unsigned32(
Marat Dukhan9976cd82021-05-24 23:15:45 -0700241 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
242 }
243}
244
Marat Dukhan06716242021-05-26 15:56:39 -0700245BENCHMARK_F(Requantization, rndna__scalar_unsigned64)(benchmark::State& state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700246 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700247 xnn_qs8_requantize_rndna__scalar_unsigned64(
Marat Dukhan9976cd82021-05-24 23:15:45 -0700248 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
249 }
250}
251
Marat Dukhan062bee32021-05-27 20:31:07 -0700252BENCHMARK_F(Requantization, rndnu__scalar)(benchmark::State& state) {
253 for (auto _ : state) {
254 xnn_qs8_requantize_rndnu__scalar(
255 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
256 }
257}
258
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700259
260#ifndef XNNPACK_BENCHMARK_NO_MAIN
261BENCHMARK_MAIN();
262#endif