blob: f0fad8b59faa1800f18c6ce83d84e3c30d2e361e [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cfloat>
11#include <cmath>
12#include <functional>
13#include <random>
14#include <vector>
15
16#include <cpuinfo.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070017
18#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070019#include "bench/utils.h"
20#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070021#include <xnnpack/common.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070022#include <xnnpack/requantization-stubs.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023
Marat Dukhanfe7acb62020-03-09 19:30:05 -070024
XNNPACK Teamb455b122019-09-27 18:10:33 -070025class Requantization : public benchmark::Fixture {
26 public:
27 inline Requantization()
28 {
29 cpuinfo_initialize();
30 const size_t l1d_size = cpuinfo_get_l1d_cache(0)->size;
31 const size_t l1d_reserve = 1024;
32 n_ = (l1d_size - l1d_reserve) / (sizeof(int32_t) + sizeof(uint8_t));
33 n_ = n_ / 16 * 16;
34 }
35
Frank Barchardbd3c9aa2021-07-15 16:02:59 -070036 virtual void SetUp(benchmark::State& state) override
XNNPACK Teamb455b122019-09-27 18:10:33 -070037 {
38 std::random_device random_device;
39 auto rng = std::mt19937(random_device());
Marat Dukhanecd83112020-08-03 21:50:28 -070040 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070041
42 input_.resize(n());
Marat Dukhanecd83112020-08-03 21:50:28 -070043 std::generate(input_.begin(), input_.end(), std::ref(i32rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070044 output_.resize(n());
45 std::fill(output_.begin(), output_.end(), 0xA5);
Frank Barchardbd3c9aa2021-07-15 16:02:59 -070046
47 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
48 if (cpu_frequency != 0) {
49 state.counters["cpufreq"] = cpu_frequency;
50 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070051 }
52
53 virtual void TearDown(benchmark::State& state) override
54 {
55 state.SetItemsProcessed(uint64_t(state.iterations()) * n());
56 state.SetBytesProcessed(uint64_t(state.iterations()) * n() * (sizeof(int32_t) + sizeof(uint8_t)));
57 input_.clear();
58 output_.clear();
59 }
60
61 inline const int32_t* input() const
62 {
63 return input_.data();
64 }
65
66 inline uint8_t* output()
67 {
68 return output_.data();
69 }
70
71 inline size_t n() const
72 {
73 return n_;
74 }
75
76 protected:
Marat Dukhane13e6392021-07-26 22:22:35 -070077 std::vector<int32_t, AlignedAllocator<int32_t, 64>> input_;
XNNPACK Teamb455b122019-09-27 18:10:33 -070078 std::vector<uint8_t> output_;
79 size_t n_;
80};
81
Marat Dukhan290055c2020-06-09 12:24:29 -070082
Marat Dukhan1dadbf72019-10-01 10:46:20 -070083#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Marat Dukhanfe7acb62020-03-09 19:30:05 -070084 BENCHMARK_F(Requantization, fp32__neon)(benchmark::State& state) {
85 for (auto _ : state) {
Marat Dukhan5b69f8b2020-07-24 15:26:48 -070086 xnn_qu8_requantize_fp32__neon(
Marat Dukhanfe7acb62020-03-09 19:30:05 -070087 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
88 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070089 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070090
Marat Dukhan9976cd82021-05-24 23:15:45 -070091 BENCHMARK_F(Requantization, gemmlowp__neon)(benchmark::State& state) {
Marat Dukhanfe7acb62020-03-09 19:30:05 -070092 for (auto _ : state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -070093 xnn_qu8_requantize_gemmlowp__neon(
Marat Dukhanfe7acb62020-03-09 19:30:05 -070094 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
95 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070096 }
Marat Dukhan9976cd82021-05-24 23:15:45 -070097
Marat Dukhan06716242021-05-26 15:56:39 -070098 BENCHMARK_F(Requantization, rndna__neon)(benchmark::State& state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -070099 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700100 xnn_qu8_requantize_rndna__neon(
Marat Dukhan9976cd82021-05-24 23:15:45 -0700101 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
102 }
103 }
104#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
105
XNNPACK Teamb455b122019-09-27 18:10:33 -0700106
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700107#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan9976cd82021-05-24 23:15:45 -0700108 BENCHMARK_F(Requantization, fp32__sse2)(benchmark::State& state) {
109 for (auto _ : state) {
110 xnn_qu8_requantize_fp32__sse2(
111 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
112 }
113 }
114
115 BENCHMARK_F(Requantization, gemmlowp__sse2)(benchmark::State& state) {
116 for (auto _ : state) {
117 xnn_qu8_requantize_gemmlowp__sse2(
118 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
119 }
120 }
121
122 BENCHMARK_F(Requantization, gemmlowp__ssse3)(benchmark::State& state) {
123 for (auto _ : state) {
124 xnn_qu8_requantize_gemmlowp__ssse3(
125 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
126 }
127 }
128
129 BENCHMARK_F(Requantization, gemmlowp__sse4)(benchmark::State& state) {
130 for (auto _ : state) {
131 xnn_qu8_requantize_gemmlowp__sse4(
132 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
133 }
134 }
135
Marat Dukhan06716242021-05-26 15:56:39 -0700136 BENCHMARK_F(Requantization, rndna__sse2)(benchmark::State& state) {
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700137 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700138 xnn_qu8_requantize_rndna__sse2(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700139 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
140 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700141 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700142
Marat Dukhan06716242021-05-26 15:56:39 -0700143 BENCHMARK_F(Requantization, rndna__ssse3)(benchmark::State& state) {
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700144 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700145 xnn_qu8_requantize_rndna__ssse3(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700146 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
147 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700148 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700149
Marat Dukhan06716242021-05-26 15:56:39 -0700150 BENCHMARK_F(Requantization, rndna__sse4)(benchmark::State& state) {
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700151 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700152 xnn_qu8_requantize_rndna__sse4(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700153 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
154 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700155 }
Marat Dukhan9976cd82021-05-24 23:15:45 -0700156#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700157
Marat Dukhan9976cd82021-05-24 23:15:45 -0700158
Marat Dukhan4c617792021-12-21 15:47:58 -0800159#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan9976cd82021-05-24 23:15:45 -0700160 BENCHMARK_F(Requantization, fp32__wasmsimd)(benchmark::State& state) {
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700161 for (auto _ : state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700162 xnn_qu8_requantize_fp32__wasmsimd(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700163 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
164 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700165 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700166
Marat Dukhan9976cd82021-05-24 23:15:45 -0700167 BENCHMARK_F(Requantization, gemmlowp__wasmsimd)(benchmark::State& state) {
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700168 for (auto _ : state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700169 xnn_qu8_requantize_gemmlowp__wasmsimd(
Marat Dukhanfe7acb62020-03-09 19:30:05 -0700170 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
171 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700172 }
Marat Dukhan4c617792021-12-21 15:47:58 -0800173#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
XNNPACK Teamb455b122019-09-27 18:10:33 -0700174
XNNPACK Teamb455b122019-09-27 18:10:33 -0700175
Marat Dukhan9976cd82021-05-24 23:15:45 -0700176BENCHMARK_F(Requantization, fp32__scalar_lrintf)(benchmark::State& state) {
177 for (auto _ : state) {
178 xnn_qu8_requantize_fp32__scalar_lrintf(
179 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700180 }
Marat Dukhan9976cd82021-05-24 23:15:45 -0700181}
182
Marat Dukhan2ac722e2022-01-04 01:54:20 -0800183BENCHMARK_F(Requantization, fp32__scalar_fmagic)(benchmark::State& state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700184 for (auto _ : state) {
Marat Dukhan2ac722e2022-01-04 01:54:20 -0800185 xnn_qu8_requantize_fp32__scalar_fmagic(
Marat Dukhan9976cd82021-05-24 23:15:45 -0700186 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
187 }
188}
189
190BENCHMARK_F(Requantization, gemmlowp__scalar)(benchmark::State& state) {
191 for (auto _ : state) {
192 xnn_qu8_requantize_gemmlowp__scalar(
193 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
194 }
195}
196
Marat Dukhan06716242021-05-26 15:56:39 -0700197BENCHMARK_F(Requantization, rndna__scalar_signed64)(benchmark::State& state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700198 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700199 xnn_qu8_requantize_rndna__scalar_signed64(
Marat Dukhan9976cd82021-05-24 23:15:45 -0700200 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
201 }
202}
203
Marat Dukhan06716242021-05-26 15:56:39 -0700204BENCHMARK_F(Requantization, rndna__scalar_unsigned32)(benchmark::State& state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700205 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700206 xnn_qu8_requantize_rndna__scalar_unsigned32(
Marat Dukhan9976cd82021-05-24 23:15:45 -0700207 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
208 }
209}
210
Marat Dukhan06716242021-05-26 15:56:39 -0700211BENCHMARK_F(Requantization, rndna__scalar_unsigned64)(benchmark::State& state) {
Marat Dukhan9976cd82021-05-24 23:15:45 -0700212 for (auto _ : state) {
Marat Dukhan06716242021-05-26 15:56:39 -0700213 xnn_qu8_requantize_rndna__scalar_unsigned64(
Marat Dukhan9976cd82021-05-24 23:15:45 -0700214 n(), input(), 0x1.0p-12f /* scale */, 128 /* zero point */, 1 /* qmin */, 254 /* qmax */, output());
215 }
216}
217
XNNPACK Teamb455b122019-09-27 18:10:33 -0700218
219#ifndef XNNPACK_BENCHMARK_NO_MAIN
220BENCHMARK_MAIN();
221#endif