blob: 1008155ca5abd2b7fa5036fd8f2a288a8b52950a [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cfloat>
11#include <cmath>
12#include <functional>
13#include <random>
14#include <vector>
15
16#include <cpuinfo.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017
18#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070019#include "bench/utils.h"
20#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070021#include <xnnpack/common.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070022#include <xnnpack/requantization-stubs.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023
Marat Dukhanfe7acb62020-03-09 19:30:05 -070024
25inline uint32_t divide_round_up(uint32_t x, uint32_t q) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070026 return x / q + uint32_t(x % q != 0);
27}
28
Marat Dukhanfe7acb62020-03-09 19:30:05 -070029inline uint32_t round_up(uint32_t x, uint32_t q) {
30 return q * divide_round_up(x, q);
XNNPACK Teamb455b122019-09-27 18:10:33 -070031}
32
Marat Dukhanfe7acb62020-03-09 19:30:05 -070033inline uint32_t min(uint32_t a, uint32_t b) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070034 return a < b ? a : b;
35}
36
37class Requantization : public benchmark::Fixture {
38 public:
39 inline Requantization()
40 {
41 cpuinfo_initialize();
42 const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
43 const size_t l1d_reserve = 1024;
44 n_ = (l1d_size - l1d_reserve) / (sizeof(int32_t) + sizeof(uint8_t));
45 n_ = n_ / 16 * 16;
46 }
47
48 virtual void SetUp(const benchmark::State&) override
49 {
50 std::random_device random_device;
51 auto rng = std::mt19937(random_device());
52 auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(), rng);
53
54 input_.resize(n());
55 std::generate(input_.begin(), input_.end(), std::ref(s32rng));
56 output_.resize(n());
57 std::fill(output_.begin(), output_.end(), 0xA5);
58 }
59
60 virtual void TearDown(benchmark::State& state) override
61 {
62 state.SetItemsProcessed(uint64_t(state.iterations()) * n());
63 state.SetBytesProcessed(uint64_t(state.iterations()) * n() * (sizeof(int32_t) + sizeof(uint8_t)));
64 input_.clear();
65 output_.clear();
66 }
67
68 inline const int32_t* input() const
69 {
70 return input_.data();
71 }
72
73 inline uint8_t* output()
74 {
75 return output_.data();
76 }
77
78 inline size_t n() const
79 {
80 return n_;
81 }
82
83 protected:
84 std::vector<int32_t, AlignedAllocator<int32_t, 32>> input_;
85 std::vector<uint8_t> output_;
86 size_t n_;
87};
88
Marat Dukhanfe7acb62020-03-09 19:30:05 -070089BENCHMARK_F(Requantization, precise__scalar_unsigned32)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070090 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -070091 xnn_qu8_requantize_precise__scalar_unsigned32(
XNNPACK Teamb455b122019-09-27 18:10:33 -070092 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
93 }
94}
95
Marat Dukhanfe7acb62020-03-09 19:30:05 -070096BENCHMARK_F(Requantization, precise__scalar_unsigned64)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070097 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -070098 xnn_qu8_requantize_precise__scalar_unsigned64(
XNNPACK Teamb455b122019-09-27 18:10:33 -070099 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
100 }
101}
102
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700103BENCHMARK_F(Requantization, precise__scalar_signed64)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700104 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700105 xnn_qu8_requantize_precise__scalar_signed64(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700106 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
107 }
108}
109
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700110BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700111 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700112 xnn_qu8_requantize_fp32__scalar_lrintf(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700113 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
114 }
115}
116
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700117BENCHMARK_F(Requantization, fp32__scalar_magic)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700118 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700119 xnn_qu8_requantize_fp32__scalar_magic(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700120 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
121 }
122}
123
Marat Dukhana199d492020-07-24 15:01:25 -0700124#if !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700125 BENCHMARK_F(Requantization, precise__psimd)(benchmark::State& state) {
126 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700127 xnn_qu8_requantize_precise__psimd(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700128 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
129 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700130 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700131
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700132 BENCHMARK_F(Requantization, fp32__psimd)(benchmark::State& state) {
133 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700134 xnn_qu8_requantize_fp32__psimd(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700135 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
136 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700137 }
Marat Dukhana199d492020-07-24 15:01:25 -0700138#endif // !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700139
XNNPACK Teamb455b122019-09-27 18:10:33 -0700140
Marat Dukhan290055c2020-06-09 12:24:29 -0700141#if XNN_ARCH_WASMSIMD
142 BENCHMARK_F(Requantization, fp32__wasmsimd)(benchmark::State& state) {
143 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700144 xnn_qu8_requantize_fp32__wasmsimd(
Marat Dukhan290055c2020-06-09 12:24:29 -0700145 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
146 }
147 }
148#endif
149
150
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700151#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700152 BENCHMARK_F(Requantization, precise__neon)(benchmark::State& state) {
153 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700154 xnn_qu8_requantize_precise__neon(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700155 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
156 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700157 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700158
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700159 BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state) {
160 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700161 xnn_qu8_requantize_fp32__neon(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700162 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
163 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700164 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700165
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700166 BENCHMARK_F(Requantization, q31__neon)(benchmark::State& state) {
167 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700168 xnn_qu8_requantize_q31__neon(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700169 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
170 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700171 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700172#endif
173
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700174#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700175 BENCHMARK_F(Requantization, precise__sse2)(benchmark::State& state) {
176 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700177 xnn_qu8_requantize_precise__sse2(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700178 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
179 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700180 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700181
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700182 BENCHMARK_F(Requantization, precise__ssse3)(benchmark::State& state) {
183 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700184 xnn_qu8_requantize_precise__ssse3(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700185 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
186 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700187 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700188
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700189 BENCHMARK_F(Requantization, precise__sse4)(benchmark::State& state) {
190 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700191 xnn_qu8_requantize_precise__sse4(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700192 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
193 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700194 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700195
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700196 BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state) {
197 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700198 xnn_qu8_requantize_fp32__sse2(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700199 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
200 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700201 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700202
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700203 BENCHMARK_F(Requantization, q31__sse2)(benchmark::State& state) {
204 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700205 xnn_qu8_requantize_q31__sse2(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700206 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
207 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700208 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700209
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700210 BENCHMARK_F(Requantization, q31__ssse3)(benchmark::State& state) {
211 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700212 xnn_qu8_requantize_q31__ssse3(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700213 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
214 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700215 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700216
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700217 BENCHMARK_F(Requantization, q31__sse4)(benchmark::State& state) {
218 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700219 xnn_qu8_requantize_q31__sse4(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700220 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
221 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700222 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700223#endif
224
225#ifndef XNNPACK_BENCHMARK_NO_MAIN
226BENCHMARK_MAIN();
227#endif