blob: 2353484bd400a81b7f9428b5cd8a4d7d359506e0 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cfloat>
11#include <cmath>
12#include <functional>
13#include <random>
14#include <vector>
15
16#include <cpuinfo.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017
18#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070019#include "bench/utils.h"
20#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070021#include <xnnpack/common.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070022#include <xnnpack/requantization-stubs.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023
Marat Dukhanfe7acb62020-03-09 19:30:05 -070024
XNNPACK Teamb455b122019-09-27 18:10:33 -070025class Requantization : public benchmark::Fixture {
26 public:
27 inline Requantization()
28 {
29 cpuinfo_initialize();
30 const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
31 const size_t l1d_reserve = 1024;
32 n_ = (l1d_size - l1d_reserve) / (sizeof(int32_t) + sizeof(uint8_t));
33 n_ = n_ / 16 * 16;
34 }
35
36 virtual void SetUp(const benchmark::State&) override
37 {
38 std::random_device random_device;
39 auto rng = std::mt19937(random_device());
Marat Dukhanecd83112020-08-03 21:50:28 -070040 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070041
42 input_.resize(n());
Marat Dukhanecd83112020-08-03 21:50:28 -070043 std::generate(input_.begin(), input_.end(), std::ref(i32rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070044 output_.resize(n());
45 std::fill(output_.begin(), output_.end(), 0xA5);
46 }
47
48 virtual void TearDown(benchmark::State& state) override
49 {
50 state.SetItemsProcessed(uint64_t(state.iterations()) * n());
51 state.SetBytesProcessed(uint64_t(state.iterations()) * n() * (sizeof(int32_t) + sizeof(uint8_t)));
52 input_.clear();
53 output_.clear();
54 }
55
56 inline const int32_t* input() const
57 {
58 return input_.data();
59 }
60
61 inline uint8_t* output()
62 {
63 return output_.data();
64 }
65
66 inline size_t n() const
67 {
68 return n_;
69 }
70
71 protected:
72 std::vector<int32_t, AlignedAllocator<int32_t, 32>> input_;
73 std::vector<uint8_t> output_;
74 size_t n_;
75};
76
Marat Dukhanfe7acb62020-03-09 19:30:05 -070077BENCHMARK_F(Requantization, precise__scalar_unsigned32)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070078 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -070079 xnn_qu8_requantize_precise__scalar_unsigned32(
XNNPACK Teamb455b122019-09-27 18:10:33 -070080 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
81 }
82}
83
Marat Dukhanfe7acb62020-03-09 19:30:05 -070084BENCHMARK_F(Requantization, precise__scalar_unsigned64)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070085 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -070086 xnn_qu8_requantize_precise__scalar_unsigned64(
XNNPACK Teamb455b122019-09-27 18:10:33 -070087 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
88 }
89}
90
Marat Dukhanfe7acb62020-03-09 19:30:05 -070091BENCHMARK_F(Requantization, precise__scalar_signed64)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070092 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -070093 xnn_qu8_requantize_precise__scalar_signed64(
XNNPACK Teamb455b122019-09-27 18:10:33 -070094 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
95 }
96}
97
Marat Dukhanfe7acb62020-03-09 19:30:05 -070098BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -070099 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700100 xnn_qu8_requantize_fp32__scalar_lrintf(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700101 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
102 }
103}
104
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700105BENCHMARK_F(Requantization, fp32__scalar_magic)(benchmark::State& state) {
XNNPACK Teamb455b122019-09-27 18:10:33 -0700106 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700107 xnn_qu8_requantize_fp32__scalar_magic(
XNNPACK Teamb455b122019-09-27 18:10:33 -0700108 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
109 }
110}
111
Marat Dukhandfcaa3c2020-08-03 20:20:11 -0700112BENCHMARK_F(Requantization, q31__scalar)(benchmark::State& state) {
113 for (auto _ : state) {
114 xnn_qu8_requantize_q31__scalar(
115 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
116 }
117}
118
Marat Dukhana199d492020-07-24 15:01:25 -0700119#if !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700120 BENCHMARK_F(Requantization, precise__psimd)(benchmark::State& state) {
121 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700122 xnn_qu8_requantize_precise__psimd(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700123 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
124 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700125 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700126
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700127 BENCHMARK_F(Requantization, fp32__psimd)(benchmark::State& state) {
128 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700129 xnn_qu8_requantize_fp32__psimd(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700130 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
131 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700132 }
Marat Dukhana199d492020-07-24 15:01:25 -0700133#endif // !XNN_ARCH_WASM && !XNN_COMPILER_MSVC && !XNN_COMPILER_ICC
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700134
XNNPACK Teamb455b122019-09-27 18:10:33 -0700135
Marat Dukhan290055c2020-06-09 12:24:29 -0700136#if XNN_ARCH_WASMSIMD
137 BENCHMARK_F(Requantization, fp32__wasmsimd)(benchmark::State& state) {
138 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700139 xnn_qu8_requantize_fp32__wasmsimd(
Marat Dukhan290055c2020-06-09 12:24:29 -0700140 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
141 }
142 }
Marat Dukhan22de5e72020-08-03 21:53:18 -0700143
144 BENCHMARK_F(Requantization, q31__wasmsimd)(benchmark::State& state) {
145 for (auto _ : state) {
146 xnn_qu8_requantize_q31__wasmsimd(
147 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
148 }
149 }
Marat Dukhan290055c2020-06-09 12:24:29 -0700150#endif
151
152
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700153#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700154 BENCHMARK_F(Requantization, precise__neon)(benchmark::State& state) {
155 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700156 xnn_qu8_requantize_precise__neon(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700157 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
158 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700159 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700160
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700161 BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state) {
162 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700163 xnn_qu8_requantize_fp32__neon(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700164 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
165 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700166 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700167
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700168 BENCHMARK_F(Requantization, q31__neon)(benchmark::State& state) {
169 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700170 xnn_qu8_requantize_q31__neon(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700171 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
172 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700173 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700174#endif
175
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700176#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700177 BENCHMARK_F(Requantization, precise__sse2)(benchmark::State& state) {
178 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700179 xnn_qu8_requantize_precise__sse2(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700180 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
181 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700182 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700183
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700184 BENCHMARK_F(Requantization, precise__ssse3)(benchmark::State& state) {
185 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700186 xnn_qu8_requantize_precise__ssse3(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700187 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
188 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700189 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700190
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700191 BENCHMARK_F(Requantization, precise__sse4)(benchmark::State& state) {
192 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700193 xnn_qu8_requantize_precise__sse4(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700194 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
195 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700196 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700197
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700198 BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state) {
199 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700200 xnn_qu8_requantize_fp32__sse2(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700201 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
202 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700203 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700204
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700205 BENCHMARK_F(Requantization, q31__sse2)(benchmark::State& state) {
206 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700207 xnn_qu8_requantize_q31__sse2(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700208 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
209 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700210 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700211
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700212 BENCHMARK_F(Requantization, q31__ssse3)(benchmark::State& state) {
213 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700214 xnn_qu8_requantize_q31__ssse3(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700215 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
216 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700217 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700218
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700219 BENCHMARK_F(Requantization, q31__sse4)(benchmark::State& state) {
220 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -0700221 xnn_qu8_requantize_q31__sse4(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700222 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
223 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700224 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700225#endif
226
227#ifndef XNNPACK_BENCHMARK_NO_MAIN
228BENCHMARK_MAIN();
229#endif