blob: cfd95f7e866f7b69d3779ffb5d81fd09dbf456e0 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cfloat>
11#include <cmath>
12#include <functional>
13#include <random>
14#include <vector>
15
16#include <cpuinfo.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017
18#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070019#include "bench/utils.h"
20#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070021#include <xnnpack/common.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070022#include <xnnpack/requantization-stubs.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023
Marat Dukhanfe7acb62020-03-09 19:30:05 -070024
25inline uint32_t divide_round_up(uint32_t x, uint32_t q) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070026 return x / q + uint32_t(x % q != 0);
27}
28
Marat Dukhanfe7acb62020-03-09 19:30:05 -070029inline uint32_t round_up(uint32_t x, uint32_t q) {
30 return q * divide_round_up(x, q);
XNNPACK Teamb455b122019-09-27 18:10:33 -070031}
32
Marat Dukhanfe7acb62020-03-09 19:30:05 -070033inline uint32_t min(uint32_t a, uint32_t b) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070034 return a < b ? a : b;
35}
36
37class Requantization : public benchmark::Fixture {
38 public:
39 inline Requantization()
40 {
41 cpuinfo_initialize();
42 const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
43 const size_t l1d_reserve = 1024;
44 n_ = (l1d_size - l1d_reserve) / (sizeof(int32_t) + sizeof(uint8_t));
45 n_ = n_ / 16 * 16;
46 }
47
48 virtual void SetUp(const benchmark::State&) override
49 {
50 std::random_device random_device;
51 auto rng = std::mt19937(random_device());
52 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(), rng);
53
54 input_.resize(n());
55 std::generate(input_.begin(), input_.end(), std::ref(s32rng));
56 output_.resize(n());
57 std::fill(output_.begin(), output_.end(), 0xA5);
58 }
59
60 virtual void TearDown(benchmark::State& state) override
61 {
62 state.SetItemsProcessed(uint64_t(state.iterations()) * n());
63 state.SetBytesProcessed(uint64_t(state.iterations()) * n() * (sizeof(int32_t) + sizeof(uint8_t)));
64 input_.clear();
65 output_.clear();
66 }
67
68 inline const int32_t* input() const
69 {
70 return input_.data();
71 }
72
73 inline uint8_t* output()
74 {
75 return output_.data();
76 }
77
78 inline size_t n() const
79 {
80 return n_;
81 }
82
83 protected:
84 std::vector<int32_t, AlignedAllocator<int32_t, 32>> input_;
85 std::vector<uint8_t> output_;
86 size_t n_;
87};
88
Marat Dukhanfe7acb62020-03-09 19:30:05 -070089BENCHMARK_F(Requantization, precise__scalar_unsigned32)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070090 for (auto _ : state) {
91 xnn_requantize_precise__scalar_unsigned32(
92 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
93 }
94}
95
Marat Dukhanfe7acb62020-03-09 19:30:05 -070096BENCHMARK_F(Requantization, precise__scalar_unsigned64)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070097 for (auto _ : state) {
98 xnn_requantize_precise__scalar_unsigned64(
99 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
100 }
101}
102
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700103BENCHMARK_F(Requantization, precise__scalar_signed64)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700104 for (auto _ : state) {
105 xnn_requantize_precise__scalar_signed64(
106 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
107 }
108}
109
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700110BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700111 for (auto _ : state) {
112 xnn_requantize_fp32__scalar_lrintf(
113 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
114 }
115}
116
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700117BENCHMARK_F(Requantization, fp32__scalar_magic)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700118 for (auto _ : state) {
119 xnn_requantize_fp32__scalar_magic(
120 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
121 }
122}
123
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700124BENCHMARK_F(Requantization, gemmlowp__scalar)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700125 for (auto _ : state) {
126 xnn_requantize_gemmlowp__scalar(
127 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
128 }
129}
130
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700131#if !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
132 BENCHMARK_F(Requantization, precise__psimd)(benchmark::State& state) {
133 for (auto _ : state) {
134 xnn_requantize_precise__psimd(
135 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
136 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700137 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700138
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700139 BENCHMARK_F(Requantization, fp32__psimd)(benchmark::State& state) {
140 for (auto _ : state) {
141 xnn_requantize_fp32__psimd(
142 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
143 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700144 }
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700145#endif // !XNN_ARCH_ASMJS && !XNN_ARCH_WASM
146
XNNPACK Teamb455b122019-09-27 18:10:33 -0700147
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700148#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700149 BENCHMARK_F(Requantization, precise__neon)(benchmark::State& state) {
150 for (auto _ : state) {
151 xnn_requantize_precise__neon(
152 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
153 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700154 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700155
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700156 BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state) {
157 for (auto _ : state) {
158 xnn_requantize_fp32__neon(
159 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
160 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700161 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700162
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700163 BENCHMARK_F(Requantization, q31__neon)(benchmark::State& state) {
164 for (auto _ : state) {
165 xnn_requantize_q31__neon(
166 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
167 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700168 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700169
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700170 BENCHMARK_F(Requantization, gemmlowp__neon)(benchmark::State& state) {
171 for (auto _ : state) {
172 xnn_requantize_gemmlowp__neon(
173 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
174 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700175 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700176#endif
177
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700178#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700179 BENCHMARK_F(Requantization, precise__sse2)(benchmark::State& state) {
180 for (auto _ : state) {
181 xnn_requantize_precise__sse2(
182 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
183 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700184 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700185
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700186 BENCHMARK_F(Requantization, precise__ssse3)(benchmark::State& state) {
187 for (auto _ : state) {
188 xnn_requantize_precise__ssse3(
189 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
190 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700191 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700192
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700193 BENCHMARK_F(Requantization, precise__sse4)(benchmark::State& state) {
194 for (auto _ : state) {
195 xnn_requantize_precise__sse4(
196 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
197 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700198 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700199
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700200 BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state) {
201 for (auto _ : state) {
202 xnn_requantize_fp32__sse2(
203 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
204 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700205 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700206
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700207 BENCHMARK_F(Requantization, q31__sse2)(benchmark::State& state) {
208 for (auto _ : state) {
209 xnn_requantize_q31__sse2(
210 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
211 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700212 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700213
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700214 BENCHMARK_F(Requantization, q31__ssse3)(benchmark::State& state) {
215 for (auto _ : state) {
216 xnn_requantize_q31__ssse3(
217 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
218 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700219 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700220
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700221 BENCHMARK_F(Requantization, q31__sse4)(benchmark::State& state) {
222 for (auto _ : state) {
223 xnn_requantize_q31__sse4(
224 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
225 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700226 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700227
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700228 BENCHMARK_F(Requantization, gemmlowp__sse2)(benchmark::State& state) {
229 for (auto _ : state) {
230 xnn_requantize_gemmlowp__sse2(
231 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
232 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700233 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700234
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700235 BENCHMARK_F(Requantization, gemmlowp__ssse3)(benchmark::State& state) {
236 for (auto _ : state) {
237 xnn_requantize_gemmlowp__ssse3(
238 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
239 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700240 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700241
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700242 BENCHMARK_F(Requantization, gemmlowp__sse4)(benchmark::State& state) {
243 for (auto _ : state) {
244 xnn_requantize_gemmlowp__sse4(
245 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
246 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700247 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700248#endif
249
250#ifndef XNNPACK_BENCHMARK_NO_MAIN
251BENCHMARK_MAIN();
252#endif