blob: c20f2b49eba8f584afff03e62def57022cffd356 [file] [log] [blame]
Marat Dukhan56bdd4a2020-08-03 19:47:04 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2020 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cfloat>
11#include <cmath>
12#include <functional>
13#include <random>
14#include <vector>
15
16#include <cpuinfo.h>
17
18#include <benchmark/benchmark.h>
19#include "bench/utils.h"
20#include <xnnpack/AlignedAllocator.h>
21#include <xnnpack/common.h>
22#include <xnnpack/requantization-stubs.h>
23
24
25class Requantization : public benchmark::Fixture {
26 public:
27 inline Requantization()
28 {
29 cpuinfo_initialize();
30 const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
31 const size_t l1d_reserve = 1024;
32 n_ = (l1d_size - l1d_reserve) / (sizeof(int32_t) + sizeof(int8_t));
33 n_ = n_ / 16 * 16;
34 }
35
36 virtual void SetUp(const benchmark::State&) override
37 {
38 std::random_device random_device;
39 auto rng = std::mt19937(random_device());
Marat Dukhanecd83112020-08-03 21:50:28 -070040 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(), std::ref(rng));
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070041
42 input_.resize(n());
Marat Dukhanecd83112020-08-03 21:50:28 -070043 std::generate(input_.begin(), input_.end(), std::ref(i32rng));
Marat Dukhan56bdd4a2020-08-03 19:47:04 -070044 output_.resize(n());
45 std::fill(output_.begin(), output_.end(), 0xA5);
46 }
47
48 virtual void TearDown(benchmark::State& state) override
49 {
50 state.SetItemsProcessed(uint64_t(state.iterations()) * n());
51 state.SetBytesProcessed(uint64_t(state.iterations()) * n() * (sizeof(int32_t) + sizeof(int8_t)));
52 input_.clear();
53 output_.clear();
54 }
55
56 inline const int32_t* input() const
57 {
58 return input_.data();
59 }
60
61 inline int8_t* output()
62 {
63 return output_.data();
64 }
65
66 inline size_t n() const
67 {
68 return n_;
69 }
70
71 protected:
72 std::vector<int32_t, AlignedAllocator<int32_t, 32>> input_;
73 std::vector<int8_t> output_;
74 size_t n_;
75};
76
77BENCHMARK_F(Requantization, precise__scalar_unsigned32)(benchmark::State& state) {
78 for (auto _ : state) {
79 xnn_qs8_requantize_precise__scalar_unsigned32(
80 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
81 }
82}
83
84BENCHMARK_F(Requantization, precise__scalar_unsigned64)(benchmark::State& state) {
85 for (auto _ : state) {
86 xnn_qs8_requantize_precise__scalar_unsigned64(
87 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
88 }
89}
90
91BENCHMARK_F(Requantization, precise__scalar_signed64)(benchmark::State& state) {
92 for (auto _ : state) {
93 xnn_qs8_requantize_precise__scalar_signed64(
94 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
95 }
96}
97
98BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state) {
99 for (auto _ : state) {
100 xnn_qs8_requantize_fp32__scalar_lrintf(
101 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
102 }
103}
104
105BENCHMARK_F(Requantization, fp32__scalar_magic)(benchmark::State& state) {
106 for (auto _ : state) {
107 xnn_qs8_requantize_fp32__scalar_magic(
108 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
109 }
110}
111
Marat Dukhan3e07b912020-08-03 20:14:20 -0700112BENCHMARK_F(Requantization, q31__scalar)(benchmark::State& state) {
113 for (auto _ : state) {
114 xnn_qs8_requantize_q31__scalar(
115 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
116 }
117}
118
Marat Dukhan56bdd4a2020-08-03 19:47:04 -0700119#if !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
120 BENCHMARK_F(Requantization, precise__psimd)(benchmark::State& state) {
121 for (auto _ : state) {
122 xnn_qs8_requantize_precise__psimd(
123 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
124 }
125 }
126
127 BENCHMARK_F(Requantization, fp32__psimd)(benchmark::State& state) {
128 for (auto _ : state) {
129 xnn_qs8_requantize_fp32__psimd(
130 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
131 }
132 }
133#endif // !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
134
135
136#if XNN_ARCH_WASMSIMD
137 BENCHMARK_F(Requantization, fp32__wasmsimd)(benchmark::State& state) {
138 for (auto _ : state) {
139 xnn_qs8_requantize_fp32__wasmsimd(
140 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
141 }
142 }
143
144 BENCHMARK_F(Requantization, q31__wasmsimd)(benchmark::State& state) {
145 for (auto _ : state) {
146 xnn_qs8_requantize_q31__wasmsimd(
147 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
148 }
149 }
150#endif
151
152
153#if XNN_ARCH_ARM || XNN_ARCH_ARM64
154 BENCHMARK_F(Requantization, precise__neon)(benchmark::State& state) {
155 for (auto _ : state) {
156 xnn_qs8_requantize_precise__neon(
157 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
158 }
159 }
160
161 BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state) {
162 for (auto _ : state) {
163 xnn_qs8_requantize_fp32__neon(
164 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
165 }
166 }
167
168 BENCHMARK_F(Requantization, q31__neon)(benchmark::State& state) {
169 for (auto _ : state) {
170 xnn_qs8_requantize_q31__neon(
171 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
172 }
173 }
174#endif
175
176#if XNN_ARCH_X86 || XNN_ARCH_X86_64
177 BENCHMARK_F(Requantization, precise__sse2)(benchmark::State& state) {
178 for (auto _ : state) {
179 xnn_qs8_requantize_precise__sse2(
180 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
181 }
182 }
183
184 BENCHMARK_F(Requantization, precise__ssse3)(benchmark::State& state) {
185 for (auto _ : state) {
186 xnn_qs8_requantize_precise__ssse3(
187 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
188 }
189 }
190
191 BENCHMARK_F(Requantization, precise__sse4)(benchmark::State& state) {
192 for (auto _ : state) {
193 xnn_qs8_requantize_precise__sse4(
194 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
195 }
196 }
197
198 BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state) {
199 for (auto _ : state) {
200 xnn_qs8_requantize_fp32__sse2(
201 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
202 }
203 }
204
205 BENCHMARK_F(Requantization, fp32__sse4)(benchmark::State& state) {
206 for (auto _ : state) {
207 xnn_qs8_requantize_fp32__sse4(
208 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
209 }
210 }
211
212 BENCHMARK_F(Requantization, q31__sse2)(benchmark::State& state) {
213 for (auto _ : state) {
214 xnn_qs8_requantize_q31__sse2(
215 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
216 }
217 }
218
219 BENCHMARK_F(Requantization, q31__ssse3)(benchmark::State& state) {
220 for (auto _ : state) {
221 xnn_qs8_requantize_q31__ssse3(
222 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
223 }
224 }
225
226 BENCHMARK_F(Requantization, q31__sse4)(benchmark::State& state) {
227 for (auto _ : state) {
228 xnn_qs8_requantize_q31__sse4(
229 n(), input(), 0x1.0p-12f /* scale */, -1 /* zero point */, -127 /* qmin */, 126 /* qmax */, output());
230 }
231 }
232#endif
233
234#ifndef XNNPACK_BENCHMARK_NO_MAIN
235BENCHMARK_MAIN();
236#endif