blob: ef9ca3838894bc6f6d4b40b9248a1ec37e940078 [file] [log] [blame]
Marat Dukhan595e1702020-07-31 10:12:52 -07001// Copyright 2020 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cfloat>
8#include <chrono>
9#include <cmath>
10#include <functional>
11#include <limits>
12#include <mutex>
13#include <random>
14#include <vector>
15
16#include <cpuinfo.h>
17
18#include <benchmark/benchmark.h>
Frank Barchard31328cb2020-10-12 11:55:18 -070019#ifdef BENCHMARK_RUY
20#include "ruy/ruy.h"
21#endif // BENCHMARK_RUY
Marat Dukhan595e1702020-07-31 10:12:52 -070022#include "bench/gemm.h"
23#include "bench/utils.h"
24#include <xnnpack/AlignedAllocator.h>
25#include <xnnpack/common.h>
26#include <xnnpack/gemm.h>
27#include <xnnpack/pack.h>
28#include <xnnpack/params-init.h>
29#include <xnnpack/params.h>
30
Marat Dukhan595e1702020-07-31 10:12:52 -070031static void GEMMBenchmark(benchmark::State& state,
Marat Dukhane3d17bf2021-05-24 22:22:43 -070032 xnn_qs8_gemm_minmax_ukernel_function gemm,
Marat Dukhan1566fee2020-08-02 21:55:41 -070033 size_t mr, size_t nr, size_t kr, size_t sr,
Marat Dukhane3d17bf2021-05-24 22:22:43 -070034 xnn_init_qs8_conv_minmax_params_fn init_params,
Marat Dukhand5694df2021-05-20 17:10:40 -070035 benchmark::utils::IsaCheckFunction isa_check = nullptr,
36 bool extended_weights = false)
Marat Dukhan595e1702020-07-31 10:12:52 -070037{
38 if (!cpuinfo_initialize()) {
39 state.SkipWithError("cpuinfo initialization failed");
40 return;
41 }
Marat Dukhan1566fee2020-08-02 21:55:41 -070042 if (isa_check && !isa_check(state)) {
43 return;
44 }
Marat Dukhan595e1702020-07-31 10:12:52 -070045
46 const size_t mc = state.range(0);
47 const size_t nc = state.range(1);
48 const size_t kc = state.range(2);
49
50 const size_t nc_stride = benchmark::utils::RoundUp(nc, nr);
Marat Dukhanfbd67a72022-01-31 18:03:50 -080051 const size_t kc_stride = benchmark::utils::RoundUp(kc, kr * sr);
Marat Dukhan595e1702020-07-31 10:12:52 -070052
53 std::random_device random_device;
54 auto rng = std::mt19937(random_device());
Marat Dukhanecd83112020-08-03 21:50:28 -070055 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));
56 auto i8rng = std::bind(
Marat Dukhanb6576052021-07-26 22:13:32 -070057 std::uniform_int_distribution<int32_t>(-std::numeric_limits<int8_t>::max(), std::numeric_limits<int8_t>::max()), std::ref(rng));
Marat Dukhan595e1702020-07-31 10:12:52 -070058
Marat Dukhan91351ef2021-08-04 16:32:28 -070059 std::vector<int8_t> a(mc * kc + XNN_EXTRA_BYTES / sizeof(int8_t));
Marat Dukhanecd83112020-08-03 21:50:28 -070060 std::generate(a.begin(), a.end(), std::ref(i8rng));
Marat Dukhan595e1702020-07-31 10:12:52 -070061 std::vector<int8_t> k(nc * kc);
Marat Dukhanecd83112020-08-03 21:50:28 -070062 std::generate(k.begin(), k.end(), std::ref(i8rng));
Marat Dukhan595e1702020-07-31 10:12:52 -070063 std::vector<int32_t> b(nc);
Marat Dukhanecd83112020-08-03 21:50:28 -070064 std::generate(b.begin(), b.end(), std::ref(i32rng));
Marat Dukhan595e1702020-07-31 10:12:52 -070065
Marat Dukhand5694df2021-05-20 17:10:40 -070066 const size_t w_element_size = extended_weights ? sizeof(int16_t) : sizeof(int8_t);
67 const size_t w_size = nc_stride * sizeof(int32_t) + kc_stride * nc_stride * w_element_size;
Marat Dukhan595e1702020-07-31 10:12:52 -070068 const size_t c_elements = mc * nc;
69 const size_t num_buffers = 1 +
Marat Dukhan683fab32020-08-03 19:42:52 -070070 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(), w_size + c_elements * sizeof(int8_t));
Marat Dukhan595e1702020-07-31 10:12:52 -070071
Marat Dukhane13e6392021-07-26 22:22:35 -070072 std::vector<char, AlignedAllocator<char, 64>> w(w_size * num_buffers);
Marat Dukhan595e1702020-07-31 10:12:52 -070073 std::fill(w.begin(), w.end(), 0);
74 const xnn_qs8_packing_params packing_params = { 127 };
Marat Dukhand5694df2021-05-20 17:10:40 -070075 if (extended_weights) {
Marat Dukhan0b043742021-06-02 18:29:11 -070076 xnn_pack_qs8_gemm_xw_goi_w(1 /* groups */, nc, kc, nr, kr, sr, k.data(), b.data(), w.data(), 0, &packing_params);
Marat Dukhand5694df2021-05-20 17:10:40 -070077 } else {
Marat Dukhan0b043742021-06-02 18:29:11 -070078 xnn_pack_qs8_gemm_goi_w(1 /* groups */, nc, kc, nr, kr, sr, k.data(), b.data(), w.data(), 0, &packing_params);
Marat Dukhand5694df2021-05-20 17:10:40 -070079 }
Marat Dukhan595e1702020-07-31 10:12:52 -070080 std::vector<int8_t> c(c_elements * num_buffers);
81 std::fill(c.begin(), c.end(), 0xA5);
82
Marat Dukhane3d17bf2021-05-24 22:22:43 -070083 union xnn_qs8_conv_minmax_params quantization_params;
Marat Dukhan725f47e2021-05-22 10:06:19 -070084 init_params(&quantization_params, 0.75f, 127, -127, 126);
Marat Dukhan595e1702020-07-31 10:12:52 -070085
86 size_t buffer_index = 0;
87 for (auto _ : state) {
88 // Use circular buffers (exceeding cache size) and prefetch to control cache state:
89 // - A is always in L1 cache (if fits, otherwise L2, L3, etc)
90 // - W is not in cache (for any cache level)
91 // - C is not in cache (for any cache level)
92 state.PauseTiming();
93 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(int8_t));
94 buffer_index = (buffer_index + 1) % num_buffers;
95 state.ResumeTiming();
96
97 for (uint32_t m = 0; m < mc; m += mr) {
98 const uint32_t mb = min(mc - m, mr);
99 for (uint32_t n = 0; n < nc; n += nr) {
100 const uint32_t nb = min(nc - n, nr);
101 gemm(
102 mb, nb, kc * sizeof(int8_t),
103 a.data() + m * kc, kc * sizeof(int8_t),
Marat Dukhand5694df2021-05-20 17:10:40 -0700104 w.data() + w_size * buffer_index + n * (kc_stride * w_element_size + sizeof(int32_t)),
Marat Dukhan595e1702020-07-31 10:12:52 -0700105 c.data() + (mc * buffer_index + m) * nc + n, nc * sizeof(int8_t), nr * sizeof(int8_t),
106 &quantization_params);
107 }
108 }
109 }
110
Marat Dukhand713e8a2020-12-04 14:23:12 -0800111 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
112 if (cpu_frequency != 0) {
113 state.counters["cpufreq"] = cpu_frequency;
114 }
115
Marat Dukhan595e1702020-07-31 10:12:52 -0700116 state.counters["OPS"] = benchmark::Counter(
117 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate);
118}
119
Frank Barchard31328cb2020-10-12 11:55:18 -0700120#ifdef BENCHMARK_RUY
121static void RuyBenchmark(benchmark::State& state, size_t threads)
122{
123 const size_t mc = state.range(0);
124 const size_t nc = state.range(1);
125 const size_t kc = state.range(2);
126
127 std::random_device random_device;
128 auto rng = std::mt19937(random_device());
129 auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));
130 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
131
132 const size_t num_buffers = 1 +
133 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
134 nc * (sizeof(int8_t) * (mc + kc) + sizeof(int32_t)));
135
136 std::vector<int8_t> a(mc * kc);
137 std::generate(a.begin(), a.end(), std::ref(u8rng));
138 std::vector<int8_t> k(num_buffers * nc * kc);
139 std::generate(k.begin(), k.end(), std::ref(u8rng));
140 std::vector<int32_t> b(num_buffers * nc);
141 std::generate(b.begin(), b.end(), std::ref(i32rng));
142 std::vector<int8_t> c(num_buffers * nc * mc);
143 std::fill(c.begin(), c.end(), std::nanf(""));
144
145 // Note: context must be static to avoid the cost of re-creating it for each benchmark.
146 static ruy::Context context;
147 context.set_max_num_threads(threads);
148
149 ruy::Matrix<int8_t> ruy_a;
150 ruy::MakeSimpleLayout(nc, kc, ruy::Order::kRowMajor, ruy_a.mutable_layout());
151 ruy_a.set_zero_point(127);
152 ruy::Matrix<int8_t> ruy_b;
153 ruy::MakeSimpleLayout(kc, mc, ruy::Order::kColMajor, ruy_b.mutable_layout());
154 ruy_b.set_data(a.data());
155 ruy_b.set_zero_point(127);
156 ruy::Matrix<int8_t> ruy_c;
157 ruy::MakeSimpleLayout(nc, mc, ruy::Order::kColMajor, ruy_c.mutable_layout());
158 ruy_c.set_zero_point(127);
159
160 ruy::MulParams<int32_t, int8_t> mul_params;
161 mul_params.set_multiplier_fixedpoint(0x40000000);
162
163 // ruy::Context uses deferred initialization, which affects percieved GEMM performance. Initialization happens during
164 // the first GEMM calls, and per Benoit Jacob it takes up to ~250 milliseconds for performance to stabilize.
165 // Thus, on the first benchmark, we compute GEMM for 500 milliseconds (to be safe) without recording performance, and
166 // keep the ruy::Context object initialized (by being static) between subsequent benchmarks.
167 static std::once_flag warmup;
168 std::call_once(warmup, [&](){
169 auto start = std::chrono::steady_clock::now();
170 do {
171 ruy_a.set_data(k.data());
172 ruy_c.set_data(c.data());
173 mul_params.set_bias(b.data());
174
175 ruy::Mul(ruy_a, ruy_b, mul_params, &context, &ruy_c);
176 } while (std::chrono::duration<double>(std::chrono::steady_clock::now() - start).count() < 0.5);
177 });
178
179 size_t buffer_index = 0;
180 for (auto _ : state) {
181 // Use circular buffers (exceeding cache size) and prefetch to control cache state:
182 // - A is always in L1 cache (if fits, otherwise L2, L3, etc)
183 // - K is not in cache (for any cache level)
184 // - B is not in cache (for any cache level)
185 // - C is not in cache (for any cache level)
186 state.PauseTiming();
187 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(int8_t));
188 buffer_index = (buffer_index + 1) % num_buffers;
189 state.ResumeTiming();
190
191 ruy_a.set_data(k.data() + buffer_index * nc * kc);
192 ruy_c.set_data(c.data() + buffer_index * mc * nc);
193 mul_params.set_bias(b.data() + buffer_index * nc);
194
195 ruy::Mul(ruy_a, ruy_b, mul_params, &context, &ruy_c);
196 }
197
Marat Dukhand713e8a2020-12-04 14:23:12 -0800198 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
199 if (cpu_frequency != 0) {
200 state.counters["cpufreq"] = cpu_frequency;
201 }
202
Frank Barchard31328cb2020-10-12 11:55:18 -0700203 state.counters["OPS"] = benchmark::Counter(
204 uint64_t(state.iterations()) * 2 * mc * nc * kc, benchmark::Counter::kIsRate);
205}
206
207static void ruy_st(benchmark::State& state, const char* net)
208{
209 RuyBenchmark(state, 1);
210}
211#endif // BENCHMARK_RUY
Marat Dukhan595e1702020-07-31 10:12:52 -0700212
Zhi An Ng1bef0f22022-01-07 16:13:31 -0800213#if XNN_ARCH_ARM && XNN_PLATFORM_JIT && XNN_ENABLE_JIT
214 static void GEMMBenchmark(benchmark::State& state,
Zhi An Ng83844ae2022-01-14 09:52:25 -0800215 xnn_jit_gemm_code_generator_function generator,
Zhi An Ng1bef0f22022-01-07 16:13:31 -0800216 size_t mr, size_t nr, size_t kr, size_t sr,
217 xnn_init_qs8_conv_minmax_params_fn init_params,
218 benchmark::utils::IsaCheckFunction isa_check = nullptr)
219 {
220 xnn_code_buffer code_buffer;
221 xnn_allocate_code_memory(&code_buffer, XNN_DEFAULT_CODE_BUFFER_SIZE);
Zhi An Ng83844ae2022-01-14 09:52:25 -0800222 const size_t nc = state.range(1);
223 const size_t kc = state.range(2);
224 generator(&code_buffer, nc, kc, nullptr);
Zhi An Ng1bef0f22022-01-07 16:13:31 -0800225 GEMMBenchmark(
226 state,
227 reinterpret_cast<xnn_qs8_gemm_minmax_ukernel_function>(code_buffer.code),
Frank Barchardca510902022-02-02 23:21:53 -0800228 mr, nr, kr, sr, init_params, isa_check);
Zhi An Ng1bef0f22022-01-07 16:13:31 -0800229 xnn_release_code_memory(&code_buffer);
230 }
231
232 static void jit_qs8_gemm_4x8c4__aarch32_neondot_ld64(benchmark::State& state, const char* net) {
233 GEMMBenchmark(state, xnn_generate_qs8_gemm_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, 4, 8, 4, 1,
234 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
235 }
236 static void jit_qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64(benchmark::State& state, const char* net) {
237 GEMMBenchmark(state, xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, 4, 8, 1, 1,
238 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
239 }
240 static void jit_qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64(benchmark::State& state, const char* net) {
241 GEMMBenchmark(state, xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, 4, 8, 1, 1,
242 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
243 }
244 BENCHMARK_GEMM(jit_qs8_gemm_4x8c4__aarch32_neondot_ld64)
245 BENCHMARK_GEMM(jit_qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64)
246 BENCHMARK_GEMM(jit_qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64)
247#endif // XNN_ARCH_ARM && XNN_PLATFORM_JIT && XNN_ENABLE_JIT
248
Frank Barchardda7b2e22021-12-13 23:50:53 -0800249#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Frank Barchard9f3f4202021-12-16 18:13:51 -0800250 static void qs8_gemm_4x8c4__aarch32_neondot_ld64(benchmark::State& state, const char* net) {
251 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64, 4, 8, 4, 1,
252 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
253 }
Frank Barchard0f294ad2022-01-24 10:48:38 -0800254 static void qs8_gemm_4x8c4__aarch32_neondot_cortex_a55(benchmark::State& state, const char* net) {
255 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, 4, 8, 4, 1,
256 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
257 }
Frank Barchard6cb0fd02022-02-02 23:36:02 -0800258 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53(benchmark::State& state, const char* net) {
259 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53, 4, 8, 1, 1,
260 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
261 }
262 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, const char* net) {
263 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, 4, 8, 1, 1,
264 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
265 }
266 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7(benchmark::State& state, const char* net) {
267 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7, 4, 8, 1, 1,
268 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
269 }
270 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7(benchmark::State& state, const char* net) {
271 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7, 4, 8, 1, 1,
272 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
273 }
Frank Barchardda7b2e22021-12-13 23:50:53 -0800274 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64(benchmark::State& state, const char* net) {
275 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, 4, 8, 1, 1,
276 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
277 }
278 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64(benchmark::State& state, const char* net) {
279 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, 4, 8, 1, 1,
280 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
281 }
Frank Barchard9f3f4202021-12-16 18:13:51 -0800282 BENCHMARK_GEMM(qs8_gemm_4x8c4__aarch32_neondot_ld64)
Frank Barchard0f294ad2022-01-24 10:48:38 -0800283 BENCHMARK_GEMM(qs8_gemm_4x8c4__aarch32_neondot_cortex_a55)
Frank Barchard6cb0fd02022-02-02 23:36:02 -0800284 BENCHMARK_GEMM(qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53)
285 BENCHMARK_GEMM(qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53)
286 BENCHMARK_GEMM(qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7)
287 BENCHMARK_GEMM(qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800288 BENCHMARK_GEMM(qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64)
289 BENCHMARK_GEMM(qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64)
290#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
291
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700292#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard889ed102021-08-20 15:01:29 -0700293 static void qs8_gemm_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, const char* net) {
294 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, 4, 16, 4, 1,
295 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700296 }
Frank Barchard889ed102021-08-20 15:01:29 -0700297 static void qs8_gemm_1x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) {
298 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld32, 1, 16, 4, 1,
299 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700300 }
Frank Barchard889ed102021-08-20 15:01:29 -0700301 static void qs8_gemm_1x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) {
302 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64, 1, 16, 4, 1,
303 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700304 }
Frank Barchard889ed102021-08-20 15:01:29 -0700305 static void qs8_gemm_4x16c4__aarch64_neondot_ld32(benchmark::State& state, const char* net) {
306 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32, 4, 16, 4, 1,
307 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700308 }
Frank Barchard889ed102021-08-20 15:01:29 -0700309 static void qs8_gemm_4x16c4__aarch64_neondot_ld64(benchmark::State& state, const char* net) {
310 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64, 4, 16, 4, 1,
311 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700312 }
Frank Barchard889ed102021-08-20 15:01:29 -0700313 static void qs8_gemm_4x16c4__aarch64_neondot_ld128(benchmark::State& state, const char* net) {
314 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128, 4, 16, 4, 1,
315 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700316 }
Frank Barchard914f57b2021-12-13 12:31:42 -0800317 static void qs8_gemm_4x8__aarch64_neon_mlal_lane_ld64(benchmark::State& state, const char* net) {
318 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, 4, 8, 1, 1,
319 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
320 }
321 static void qs8_gemm_4x8__aarch64_neon_mlal_lane_prfm_ld64(benchmark::State& state, const char* net) {
322 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, 4, 8, 1, 1,
323 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
324 }
Frank Barchard889ed102021-08-20 15:01:29 -0700325 static void qs8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53(benchmark::State& state, const char* net) {
326 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, 4, 16, 1, 1,
327 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700328 }
Frank Barchard889ed102021-08-20 15:01:29 -0700329 static void qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, const char* net) {
330 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, 4, 16, 1, 1,
331 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700332 }
Frank Barchard5cffb642021-11-22 13:59:43 -0800333 static void qs8_gemm_4x16__aarch64_neon_mlal_lane_ld64(benchmark::State& state, const char* net) {
334 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, 4, 16, 1, 1,
335 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
336 }
337 static void qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64(benchmark::State& state, const char* net) {
338 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, 4, 16, 1, 1,
339 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
340 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800341 static void qs8_gemm_1x8c8__aarch64_neon_mlal_prfm(benchmark::State& state, const char* net) {
342 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm, 1, 8, 8, 1,
Frank Barchard889ed102021-08-20 15:01:29 -0700343 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700344 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800345 static void qs8_gemm_1x8c8__aarch64_neon_mlal(benchmark::State& state, const char* net) {
346 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal, 1, 8, 8, 1,
Frank Barchard889ed102021-08-20 15:01:29 -0700347 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700348 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800349 static void qs8_gemm_1x8c8__aarch64_neon_mlal_cortex_a53(benchmark::State& state, const char* net) {
350 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, 1, 8, 8, 1,
Frank Barchard889ed102021-08-20 15:01:29 -0700351 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700352 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800353 static void qs8_gemm_1x8c8__aarch64_neon_mlal_prfm_cortex_a53(benchmark::State& state, const char* net) {
354 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53, 1, 8, 8, 1,
Frank Barchard889ed102021-08-20 15:01:29 -0700355 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700356 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800357 static void qs8_gemm_2x8c8__aarch64_neon_mull(benchmark::State& state, const char* net) {
358 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, 2, 8, 8, 1,
Frank Barchard889ed102021-08-20 15:01:29 -0700359 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700360 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800361 static void qs8_gemm_2x8c8__aarch64_neon_mlal(benchmark::State& state, const char* net) {
362 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal, 2, 8, 8, 1,
Frank Barchard889ed102021-08-20 15:01:29 -0700363 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700364 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800365 static void qs8_gemm_2x8c8__aarch64_neon_mlal_prfm(benchmark::State& state, const char* net) {
366 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, 2, 8, 8, 1,
Frank Barchard889ed102021-08-20 15:01:29 -0700367 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700368 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800369 static void qs8_gemm_2x8c8__aarch64_neon_mlal_cortex_a53(benchmark::State& state, const char* net) {
370 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53, 2, 8, 8, 1,
Frank Barchard889ed102021-08-20 15:01:29 -0700371 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700372 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800373 static void qs8_gemm_2x8c8__aarch64_neon_mlal_prfm_cortex_a53(benchmark::State& state, const char* net) {
374 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53, 2, 8, 8, 1,
Frank Barchard889ed102021-08-20 15:01:29 -0700375 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700376 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800377 static void qs8_gemm_2x8c16__aarch64_neon_mlal(benchmark::State& state, const char* net) {
378 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__aarch64_neon_mlal, 2, 8, 16, 1,
Frank Barchard889ed102021-08-20 15:01:29 -0700379 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700380 }
381
Frank Barchard889ed102021-08-20 15:01:29 -0700382 BENCHMARK_GEMM(qs8_gemm_1x16c4__aarch64_neondot_ld32)
383 BENCHMARK_GEMM(qs8_gemm_1x16c4__aarch64_neondot_ld64)
384 BENCHMARK_GEMM(qs8_gemm_4x16c4__aarch64_neondot_ld32)
385 BENCHMARK_GEMM(qs8_gemm_4x16c4__aarch64_neondot_ld64)
386 BENCHMARK_GEMM(qs8_gemm_4x16c4__aarch64_neondot_ld128)
387 BENCHMARK_GEMM(qs8_gemm_4x16c4__aarch64_neondot_cortex_a55)
Frank Barchard914f57b2021-12-13 12:31:42 -0800388 BENCHMARK_GEMM(qs8_gemm_4x8__aarch64_neon_mlal_lane_ld64)
389 BENCHMARK_GEMM(qs8_gemm_4x8__aarch64_neon_mlal_lane_prfm_ld64)
Frank Barchard889ed102021-08-20 15:01:29 -0700390 BENCHMARK_GEMM(qs8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53)
391 BENCHMARK_GEMM(qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53)
Frank Barchard5cffb642021-11-22 13:59:43 -0800392 BENCHMARK_GEMM(qs8_gemm_4x16__aarch64_neon_mlal_lane_ld64)
393 BENCHMARK_GEMM(qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64)
Frank Barcharde22685a2021-11-12 11:36:58 -0800394 BENCHMARK_GEMM(qs8_gemm_1x8c8__aarch64_neon_mlal_prfm)
395 BENCHMARK_GEMM(qs8_gemm_1x8c8__aarch64_neon_mlal)
396 BENCHMARK_GEMM(qs8_gemm_1x8c8__aarch64_neon_mlal_prfm_cortex_a53)
397 BENCHMARK_GEMM(qs8_gemm_1x8c8__aarch64_neon_mlal_cortex_a53)
398 BENCHMARK_GEMM(qs8_gemm_2x8c8__aarch64_neon_mull)
399 BENCHMARK_GEMM(qs8_gemm_2x8c8__aarch64_neon_mlal)
400 BENCHMARK_GEMM(qs8_gemm_2x8c8__aarch64_neon_mlal_prfm)
401 BENCHMARK_GEMM(qs8_gemm_2x8c8__aarch64_neon_mlal_cortex_a53)
402 BENCHMARK_GEMM(qs8_gemm_2x8c8__aarch64_neon_mlal_prfm_cortex_a53)
403 BENCHMARK_GEMM(qs8_gemm_2x8c16__aarch64_neon_mlal)
Frank Barchard4c3e5a92021-08-16 19:17:39 -0700404#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
405
406
Marat Dukhanab671422020-08-05 16:39:04 -0700407#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard27bf92c2021-11-24 15:47:52 -0800408 static void qs8_gemm_1x8__neon_mlal_lane(benchmark::State& state, const char* net) {
409 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, 1, 8, 1, 1,
410 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barcharda93765f2021-01-27 16:25:34 -0800411 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800412 static void qs8_gemm_2x8__neon_mlal_lane(benchmark::State& state, const char* net) {
413 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, 2, 8, 1, 1,
414 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Marat Dukhanab671422020-08-05 16:39:04 -0700415 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800416 static void qs8_gemm_3x8__neon_mlal_lane(benchmark::State& state, const char* net) {
417 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, 3, 8, 1, 1,
418 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardec0bf142021-01-15 10:14:05 -0800419 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800420 static void qs8_gemm_4x8__neon_mlal_lane(benchmark::State& state, const char* net) {
421 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, 4, 8, 1, 1,
422 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardcfbc8492021-01-12 15:43:19 -0800423 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800424 static void qs8_gemm_6x8__neon_mlal_lane(benchmark::State& state, const char* net) {
425 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, 6, 8, 1, 1,
426 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard55497352021-04-30 11:47:56 -0700427 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800428 static void qs8_gemm_1x16__neon_mlal_lane(benchmark::State& state, const char* net) {
429 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, 1, 16, 1, 1,
430 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barcharda93765f2021-01-27 16:25:34 -0800431 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800432 static void qs8_gemm_2x16__neon_mlal_lane(benchmark::State& state, const char* net) {
433 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane, 2, 16, 1, 1,
434 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Marat Dukhanab671422020-08-05 16:39:04 -0700435 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800436 static void qs8_gemm_3x16__neon_mlal_lane(benchmark::State& state, const char* net) {
437 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, 3, 16, 1, 1,
438 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardec0bf142021-01-15 10:14:05 -0800439 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800440 static void qs8_gemm_4x16__neon_mlal_lane(benchmark::State& state, const char* net) {
441 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, 4, 16, 1, 1,
442 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardcfbc8492021-01-12 15:43:19 -0800443 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800444 static void qs8_gemm_6x16__neon_mlal_lane(benchmark::State& state, const char* net) {
445 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, 6, 16, 1, 1,
446 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard55497352021-04-30 11:47:56 -0700447 }
Frank Barchardf82ea822021-12-01 15:43:37 -0800448 static void qs8_gemm_1x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
449 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm, 1, 8, 1, 1,
450 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
451 }
452 static void qs8_gemm_2x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
453 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane_prfm, 2, 8, 1, 1,
454 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
455 }
456 static void qs8_gemm_3x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
457 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm, 3, 8, 1, 1,
458 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
459 }
460 static void qs8_gemm_4x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
461 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane_prfm, 4, 8, 1, 1,
462 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
463 }
464 static void qs8_gemm_6x8__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
465 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm, 6, 8, 1, 1,
466 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
467 }
468 static void qs8_gemm_1x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
469 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, 1, 16, 1, 1,
470 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
471 }
472 static void qs8_gemm_2x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
473 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm, 2, 16, 1, 1,
474 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
475 }
476 static void qs8_gemm_3x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
477 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm, 3, 16, 1, 1,
478 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
479 }
480 static void qs8_gemm_4x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
481 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm, 4, 16, 1, 1,
482 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
483 }
484 static void qs8_gemm_6x16__neon_mlal_lane_prfm(benchmark::State& state, const char* net) {
485 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, 6, 16, 1, 1,
486 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
487 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800488 static void qs8_gemm_1x8c2__neon_mull_dup(benchmark::State& state, const char* net) {
489 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, 1, 8, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700490 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barcharda93765f2021-01-27 16:25:34 -0800491 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800492 static void qs8_gemm_2x8c2__neon_mull_dup(benchmark::State& state, const char* net) {
493 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup, 2, 8, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700494 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard8247e212021-02-03 18:12:33 -0800495 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800496 static void qs8_gemm_3x8c2__neon_mull_dup(benchmark::State& state, const char* net) {
497 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, 3, 8, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700498 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard2302ffd2021-01-22 14:08:02 -0800499 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800500 static void qs8_gemm_4x8c2__neon_mull_dup(benchmark::State& state, const char* net) {
501 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, 4, 8, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700502 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard2302ffd2021-01-22 14:08:02 -0800503 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800504 static void qs8_gemm_1x16c2__neon_mull_dup(benchmark::State& state, const char* net) {
505 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, 1, 16, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700506 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barcharda93765f2021-01-27 16:25:34 -0800507 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800508 static void qs8_gemm_2x16c2__neon_mull_dup(benchmark::State& state, const char* net) {
509 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup, 2, 16, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700510 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard2302ffd2021-01-22 14:08:02 -0800511 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800512 static void qs8_gemm_3x16c2__neon_mull_dup(benchmark::State& state, const char* net) {
513 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, 3, 16, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700514 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard2302ffd2021-01-22 14:08:02 -0800515 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800516 static void qs8_gemm_4x16c2__neon_mull_dup(benchmark::State& state, const char* net) {
517 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, 4, 16, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700518 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard2302ffd2021-01-22 14:08:02 -0800519 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800520 static void qs8_gemm_1x8c2__neon_mlal_dup(benchmark::State& state, const char* net) {
521 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup, 1, 8, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700522 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard8247e212021-02-03 18:12:33 -0800523 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800524 static void qs8_gemm_2x8c2__neon_mlal_dup(benchmark::State& state, const char* net) {
525 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, 2, 8, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700526 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard8247e212021-02-03 18:12:33 -0800527 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800528 static void qs8_gemm_3x8c2__neon_mlal_dup(benchmark::State& state, const char* net) {
529 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, 3, 8, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700530 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard8247e212021-02-03 18:12:33 -0800531 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800532 static void qs8_gemm_4x8c2__neon_mlal_dup(benchmark::State& state, const char* net) {
533 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, 4, 8, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700534 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard8247e212021-02-03 18:12:33 -0800535 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800536 static void qs8_gemm_1x16c2__neon_mlal_dup(benchmark::State& state, const char* net) {
537 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, 1, 16, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700538 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard8247e212021-02-03 18:12:33 -0800539 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800540 static void qs8_gemm_2x16c2__neon_mlal_dup(benchmark::State& state, const char* net) {
541 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, 2, 16, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700542 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard8247e212021-02-03 18:12:33 -0800543 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800544 static void qs8_gemm_3x16c2__neon_mlal_dup(benchmark::State& state, const char* net) {
545 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup, 3, 16, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700546 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard8247e212021-02-03 18:12:33 -0800547 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800548 static void qs8_gemm_4x16c2__neon_mlal_dup(benchmark::State& state, const char* net) {
549 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup, 4, 16, 2, 1,
Frank Barchard1d412472021-10-25 17:27:21 -0700550 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard8247e212021-02-03 18:12:33 -0800551 }
Frank Barchard15eec022021-11-17 13:26:20 -0800552 static void qs8_gemm_1x8c2__neon_mull_ld1r(benchmark::State& state, const char* net) {
553 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, 1, 8, 2, 1,
554 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
555 }
556 static void qs8_gemm_2x8c2__neon_mull_ld1r(benchmark::State& state, const char* net) {
557 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, 2, 8, 2, 1,
558 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
559 }
560 static void qs8_gemm_3x8c2__neon_mull_ld1r(benchmark::State& state, const char* net) {
561 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r, 3, 8, 2, 1,
562 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
563 }
564 static void qs8_gemm_4x8c2__neon_mull_ld1r(benchmark::State& state, const char* net) {
565 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, 4, 8, 2, 1,
566 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
567 }
568 static void qs8_gemm_1x16c2__neon_mull_ld1r(benchmark::State& state, const char* net) {
569 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, 1, 16, 2, 1,
570 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
571 }
572 static void qs8_gemm_2x16c2__neon_mull_ld1r(benchmark::State& state, const char* net) {
573 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r, 2, 16, 2, 1,
574 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
575 }
576 static void qs8_gemm_3x16c2__neon_mull_ld1r(benchmark::State& state, const char* net) {
577 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r, 3, 16, 2, 1,
578 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
579 }
580 static void qs8_gemm_4x16c2__neon_mull_ld1r(benchmark::State& state, const char* net) {
581 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r, 4, 16, 2, 1,
582 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
583 }
584 static void qs8_gemm_1x8c2__neon_mlal_ld1r(benchmark::State& state, const char* net) {
585 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r, 1, 8, 2, 1,
586 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
587 }
588 static void qs8_gemm_2x8c2__neon_mlal_ld1r(benchmark::State& state, const char* net) {
589 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, 2, 8, 2, 1,
590 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
591 }
592 static void qs8_gemm_3x8c2__neon_mlal_ld1r(benchmark::State& state, const char* net) {
593 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r, 3, 8, 2, 1,
594 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
595 }
596 static void qs8_gemm_4x8c2__neon_mlal_ld1r(benchmark::State& state, const char* net) {
597 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r, 4, 8, 2, 1,
598 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
599 }
600 static void qs8_gemm_1x16c2__neon_mlal_ld1r(benchmark::State& state, const char* net) {
601 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r, 1, 16, 2, 1,
602 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
603 }
604 static void qs8_gemm_2x16c2__neon_mlal_ld1r(benchmark::State& state, const char* net) {
605 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r, 2, 16, 2, 1,
606 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
607 }
608 static void qs8_gemm_3x16c2__neon_mlal_ld1r(benchmark::State& state, const char* net) {
609 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r, 3, 16, 2, 1,
610 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
611 }
612 static void qs8_gemm_4x16c2__neon_mlal_ld1r(benchmark::State& state, const char* net) {
613 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, 4, 16, 2, 1,
614 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
615 }
616 static void qs8_gemm_1x8c2__neon_mull_ld2r(benchmark::State& state, const char* net) {
617 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, 1, 8, 2, 1,
618 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
619 }
620 static void qs8_gemm_2x8c2__neon_mull_ld2r(benchmark::State& state, const char* net) {
621 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, 2, 8, 2, 1,
622 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
623 }
624 static void qs8_gemm_3x8c2__neon_mull_ld2r(benchmark::State& state, const char* net) {
625 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r, 3, 8, 2, 1,
626 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
627 }
628 static void qs8_gemm_4x8c2__neon_mull_ld2r(benchmark::State& state, const char* net) {
629 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, 4, 8, 2, 1,
630 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
631 }
632 static void qs8_gemm_1x16c2__neon_mull_ld2r(benchmark::State& state, const char* net) {
633 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r, 1, 16, 2, 1,
634 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
635 }
636 static void qs8_gemm_2x16c2__neon_mull_ld2r(benchmark::State& state, const char* net) {
637 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r, 2, 16, 2, 1,
638 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
639 }
640 static void qs8_gemm_3x16c2__neon_mull_ld2r(benchmark::State& state, const char* net) {
641 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, 3, 16, 2, 1,
642 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
643 }
644 static void qs8_gemm_4x16c2__neon_mull_ld2r(benchmark::State& state, const char* net) {
645 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r, 4, 16, 2, 1,
646 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
647 }
648 static void qs8_gemm_1x8c2__neon_mlal_ld2r(benchmark::State& state, const char* net) {
649 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, 1, 8, 2, 1,
650 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
651 }
652 static void qs8_gemm_2x8c2__neon_mlal_ld2r(benchmark::State& state, const char* net) {
653 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r, 2, 8, 2, 1,
654 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
655 }
656 static void qs8_gemm_3x8c2__neon_mlal_ld2r(benchmark::State& state, const char* net) {
657 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r, 3, 8, 2, 1,
658 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
659 }
660 static void qs8_gemm_4x8c2__neon_mlal_ld2r(benchmark::State& state, const char* net) {
661 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r, 4, 8, 2, 1,
662 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
663 }
664 static void qs8_gemm_1x16c2__neon_mlal_ld2r(benchmark::State& state, const char* net) {
665 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r, 1, 16, 2, 1,
666 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
667 }
668 static void qs8_gemm_2x16c2__neon_mlal_ld2r(benchmark::State& state, const char* net) {
669 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r, 2, 16, 2, 1,
670 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
671 }
672 static void qs8_gemm_3x16c2__neon_mlal_ld2r(benchmark::State& state, const char* net) {
673 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r, 3, 16, 2, 1,
674 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
675 }
676 static void qs8_gemm_4x16c2__neon_mlal_ld2r(benchmark::State& state, const char* net) {
677 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r, 4, 16, 2, 1,
678 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
679 }
Frank Barchard42f5c502021-11-16 10:04:21 -0800680 static void qs8_gemm_1x8c2__neon_mull_ld4r(benchmark::State& state, const char* net) {
681 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r, 1, 8, 2, 1,
682 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
683 }
684 static void qs8_gemm_2x8c2__neon_mull_ld4r(benchmark::State& state, const char* net) {
685 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r, 2, 8, 2, 1,
686 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
687 }
688 static void qs8_gemm_3x8c2__neon_mull_ld4r(benchmark::State& state, const char* net) {
689 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r, 3, 8, 2, 1,
690 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
691 }
692 static void qs8_gemm_4x8c2__neon_mull_ld4r(benchmark::State& state, const char* net) {
693 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r, 4, 8, 2, 1,
694 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
695 }
696 static void qs8_gemm_1x16c2__neon_mull_ld4r(benchmark::State& state, const char* net) {
697 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, 1, 16, 2, 1,
698 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
699 }
700 static void qs8_gemm_2x16c2__neon_mull_ld4r(benchmark::State& state, const char* net) {
701 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, 2, 16, 2, 1,
702 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
703 }
704 static void qs8_gemm_3x16c2__neon_mull_ld4r(benchmark::State& state, const char* net) {
705 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, 3, 16, 2, 1,
706 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
707 }
708 static void qs8_gemm_4x16c2__neon_mull_ld4r(benchmark::State& state, const char* net) {
709 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, 4, 16, 2, 1,
710 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
711 }
712 static void qs8_gemm_1x8c2__neon_mlal_ld4r(benchmark::State& state, const char* net) {
713 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r, 1, 8, 2, 1,
714 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
715 }
716 static void qs8_gemm_2x8c2__neon_mlal_ld4r(benchmark::State& state, const char* net) {
717 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r, 2, 8, 2, 1,
718 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
719 }
720 static void qs8_gemm_3x8c2__neon_mlal_ld4r(benchmark::State& state, const char* net) {
721 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r, 3, 8, 2, 1,
722 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
723 }
724 static void qs8_gemm_4x8c2__neon_mlal_ld4r(benchmark::State& state, const char* net) {
725 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r, 4, 8, 2, 1,
726 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
727 }
728 static void qs8_gemm_1x16c2__neon_mlal_ld4r(benchmark::State& state, const char* net) {
729 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, 1, 16, 2, 1,
730 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
731 }
732 static void qs8_gemm_2x16c2__neon_mlal_ld4r(benchmark::State& state, const char* net) {
733 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, 2, 16, 2, 1,
734 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
735 }
736 static void qs8_gemm_3x16c2__neon_mlal_ld4r(benchmark::State& state, const char* net) {
737 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, 3, 16, 2, 1,
738 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
739 }
740 static void qs8_gemm_4x16c2__neon_mlal_ld4r(benchmark::State& state, const char* net) {
741 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, 4, 16, 2, 1,
742 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
743 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800744 static void qs8_gemm_1x8c2s4__neon_mull(benchmark::State& state, const char* net) {
745 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, 1, 8, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800746 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
747 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800748 static void qs8_gemm_2x8c2s4__neon_mull(benchmark::State& state, const char* net) {
749 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull, 2, 8, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800750 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
751 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800752 static void qs8_gemm_3x8c2s4__neon_mull(benchmark::State& state, const char* net) {
753 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull, 3, 8, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800754 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
755 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800756 static void qs8_gemm_4x8c2s4__neon_mull(benchmark::State& state, const char* net) {
757 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, 4, 8, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800758 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
759 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800760 static void qs8_gemm_1x16c2s4__neon_mull(benchmark::State& state, const char* net) {
761 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull, 1, 16, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800762 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
763 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800764 static void qs8_gemm_2x16c2s4__neon_mull(benchmark::State& state, const char* net) {
765 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull, 2, 16, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800766 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
767 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800768 static void qs8_gemm_3x16c2s4__neon_mull(benchmark::State& state, const char* net) {
769 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, 3, 16, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800770 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
771 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800772 static void qs8_gemm_4x16c2s4__neon_mull(benchmark::State& state, const char* net) {
773 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull, 4, 16, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800774 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
775 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800776 static void qs8_gemm_1x8c2s4__neon_mlal(benchmark::State& state, const char* net) {
777 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal, 1, 8, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800778 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
779 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800780 static void qs8_gemm_2x8c2s4__neon_mlal(benchmark::State& state, const char* net) {
781 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal, 2, 8, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800782 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
783 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800784 static void qs8_gemm_3x8c2s4__neon_mlal(benchmark::State& state, const char* net) {
785 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, 3, 8, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800786 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
787 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800788 static void qs8_gemm_4x8c2s4__neon_mlal(benchmark::State& state, const char* net) {
789 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, 4, 8, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800790 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
791 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800792 static void qs8_gemm_1x16c2s4__neon_mlal(benchmark::State& state, const char* net) {
793 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal, 1, 16, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800794 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
795 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800796 static void qs8_gemm_2x16c2s4__neon_mlal(benchmark::State& state, const char* net) {
797 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, 2, 16, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800798 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
799 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800800 static void qs8_gemm_3x16c2s4__neon_mlal(benchmark::State& state, const char* net) {
801 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, 3, 16, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800802 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
803 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800804 static void qs8_gemm_4x16c2s4__neon_mlal(benchmark::State& state, const char* net) {
805 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal, 4, 16, 2, 4,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800806 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
807 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800808 static void qs8_gemm_1x8c4__neon_mull_dup(benchmark::State& state, const char* net) {
809 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, 1, 8, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700810 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
811 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800812 static void qs8_gemm_2x8c4__neon_mull_dup(benchmark::State& state, const char* net) {
813 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup, 2, 8, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700814 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
815 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800816 static void qs8_gemm_3x8c4__neon_mull_dup(benchmark::State& state, const char* net) {
817 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, 3, 8, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700818 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
819 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800820 static void qs8_gemm_4x8c4__neon_mull_dup(benchmark::State& state, const char* net) {
821 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup, 4, 8, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700822 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
823 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800824 static void qs8_gemm_1x16c4__neon_mull_dup(benchmark::State& state, const char* net) {
825 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, 1, 16, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700826 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
827 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800828 static void qs8_gemm_2x16c4__neon_mull_dup(benchmark::State& state, const char* net) {
829 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup, 2, 16, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700830 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
831 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800832 static void qs8_gemm_3x16c4__neon_mull_dup(benchmark::State& state, const char* net) {
833 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup, 3, 16, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700834 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
835 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800836 static void qs8_gemm_4x16c4__neon_mull_dup(benchmark::State& state, const char* net) {
837 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup, 4, 16, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700838 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
839 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800840 static void qs8_gemm_1x8c4__neon_mlal_dup(benchmark::State& state, const char* net) {
841 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup, 1, 8, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700842 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
843 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800844 static void qs8_gemm_2x8c4__neon_mlal_dup(benchmark::State& state, const char* net) {
845 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup, 2, 8, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700846 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
847 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800848 static void qs8_gemm_3x8c4__neon_mlal_dup(benchmark::State& state, const char* net) {
849 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, 3, 8, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700850 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
851 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800852 static void qs8_gemm_4x8c4__neon_mlal_dup(benchmark::State& state, const char* net) {
853 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, 4, 8, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700854 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
855 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800856 static void qs8_gemm_1x16c4__neon_mlal_dup(benchmark::State& state, const char* net) {
857 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup, 1, 16, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700858 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
859 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800860 static void qs8_gemm_2x16c4__neon_mlal_dup(benchmark::State& state, const char* net) {
861 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, 2, 16, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700862 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
863 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800864 static void qs8_gemm_3x16c4__neon_mlal_dup(benchmark::State& state, const char* net) {
865 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup, 3, 16, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700866 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
867 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800868 static void qs8_gemm_4x16c4__neon_mlal_dup(benchmark::State& state, const char* net) {
869 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup, 4, 16, 4, 1,
Frank Barchard51320102021-11-05 16:01:30 -0700870 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
871 }
Frank Barchard64ab1b72021-11-22 10:57:40 -0800872 static void qs8_gemm_1x8c4__neon_mull_ld1r(benchmark::State& state, const char* net) {
873 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r, 1, 8, 4, 1,
874 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
875 }
876 static void qs8_gemm_2x8c4__neon_mull_ld1r(benchmark::State& state, const char* net) {
877 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, 2, 8, 4, 1,
878 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
879 }
880 static void qs8_gemm_3x8c4__neon_mull_ld1r(benchmark::State& state, const char* net) {
881 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r, 3, 8, 4, 1,
882 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
883 }
884 static void qs8_gemm_4x8c4__neon_mull_ld1r(benchmark::State& state, const char* net) {
885 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, 4, 8, 4, 1,
886 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
887 }
888 static void qs8_gemm_1x16c4__neon_mull_ld1r(benchmark::State& state, const char* net) {
889 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r, 1, 16, 4, 1,
890 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
891 }
892 static void qs8_gemm_2x16c4__neon_mull_ld1r(benchmark::State& state, const char* net) {
893 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r, 2, 16, 4, 1,
894 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
895 }
896 static void qs8_gemm_3x16c4__neon_mull_ld1r(benchmark::State& state, const char* net) {
897 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, 3, 16, 4, 1,
898 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
899 }
900 static void qs8_gemm_4x16c4__neon_mull_ld1r(benchmark::State& state, const char* net) {
901 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r, 4, 16, 4, 1,
902 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
903 }
904 static void qs8_gemm_1x8c4__neon_mlal_ld1r(benchmark::State& state, const char* net) {
905 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r, 1, 8, 4, 1,
906 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
907 }
908 static void qs8_gemm_2x8c4__neon_mlal_ld1r(benchmark::State& state, const char* net) {
909 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, 2, 8, 4, 1,
910 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
911 }
912 static void qs8_gemm_3x8c4__neon_mlal_ld1r(benchmark::State& state, const char* net) {
913 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r, 3, 8, 4, 1,
914 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
915 }
916 static void qs8_gemm_4x8c4__neon_mlal_ld1r(benchmark::State& state, const char* net) {
917 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r, 4, 8, 4, 1,
918 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
919 }
920 static void qs8_gemm_1x16c4__neon_mlal_ld1r(benchmark::State& state, const char* net) {
921 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r, 1, 16, 4, 1,
922 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
923 }
924 static void qs8_gemm_2x16c4__neon_mlal_ld1r(benchmark::State& state, const char* net) {
925 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r, 2, 16, 4, 1,
926 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
927 }
928 static void qs8_gemm_3x16c4__neon_mlal_ld1r(benchmark::State& state, const char* net) {
929 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r, 3, 16, 4, 1,
930 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
931 }
932 static void qs8_gemm_4x16c4__neon_mlal_ld1r(benchmark::State& state, const char* net) {
933 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r, 4, 16, 4, 1,
934 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
935 }
936 static void qs8_gemm_1x8c4__neon_mull_ld2r(benchmark::State& state, const char* net) {
937 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, 1, 8, 4, 1,
938 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
939 }
940 static void qs8_gemm_2x8c4__neon_mull_ld2r(benchmark::State& state, const char* net) {
941 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r, 2, 8, 4, 1,
942 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
943 }
944 static void qs8_gemm_3x8c4__neon_mull_ld2r(benchmark::State& state, const char* net) {
945 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, 3, 8, 4, 1,
946 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
947 }
948 static void qs8_gemm_4x8c4__neon_mull_ld2r(benchmark::State& state, const char* net) {
949 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r, 4, 8, 4, 1,
950 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
951 }
952 static void qs8_gemm_1x16c4__neon_mull_ld2r(benchmark::State& state, const char* net) {
953 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r, 1, 16, 4, 1,
954 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
955 }
956 static void qs8_gemm_2x16c4__neon_mull_ld2r(benchmark::State& state, const char* net) {
957 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r, 2, 16, 4, 1,
958 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
959 }
960 static void qs8_gemm_3x16c4__neon_mull_ld2r(benchmark::State& state, const char* net) {
961 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r, 3, 16, 4, 1,
962 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
963 }
964 static void qs8_gemm_4x16c4__neon_mull_ld2r(benchmark::State& state, const char* net) {
965 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r, 4, 16, 4, 1,
966 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
967 }
968 static void qs8_gemm_1x8c4__neon_mlal_ld2r(benchmark::State& state, const char* net) {
969 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, 1, 8, 4, 1,
970 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
971 }
972 static void qs8_gemm_2x8c4__neon_mlal_ld2r(benchmark::State& state, const char* net) {
973 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r, 2, 8, 4, 1,
974 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
975 }
976 static void qs8_gemm_3x8c4__neon_mlal_ld2r(benchmark::State& state, const char* net) {
977 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r, 3, 8, 4, 1,
978 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
979 }
980 static void qs8_gemm_4x8c4__neon_mlal_ld2r(benchmark::State& state, const char* net) {
981 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r, 4, 8, 4, 1,
982 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
983 }
984 static void qs8_gemm_1x16c4__neon_mlal_ld2r(benchmark::State& state, const char* net) {
985 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r, 1, 16, 4, 1,
986 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
987 }
988 static void qs8_gemm_2x16c4__neon_mlal_ld2r(benchmark::State& state, const char* net) {
989 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, 2, 16, 4, 1,
990 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
991 }
992 static void qs8_gemm_3x16c4__neon_mlal_ld2r(benchmark::State& state, const char* net) {
993 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r, 3, 16, 4, 1,
994 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
995 }
996 static void qs8_gemm_4x16c4__neon_mlal_ld2r(benchmark::State& state, const char* net) {
997 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r, 4, 16, 4, 1,
998 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
999 }
Marat Dukhan89991902021-12-06 00:54:36 -08001000 static void qs8_gemm_1x8c8__neon_mull(benchmark::State& state, const char* net) {
1001 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mull, 1, 8, 8, 1,
1002 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard58990122021-01-29 16:09:56 -08001003 }
Marat Dukhan89991902021-12-06 00:54:36 -08001004 static void qs8_gemm_2x8c8__neon_mull(benchmark::State& state, const char* net) {
1005 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, 2, 8, 8, 1,
1006 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard58990122021-01-29 16:09:56 -08001007 }
Marat Dukhan89991902021-12-06 00:54:36 -08001008 static void qs8_gemm_3x8c8__neon_mull(benchmark::State& state, const char* net) {
1009 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, 3, 8, 8, 1,
1010 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard58990122021-01-29 16:09:56 -08001011 }
Marat Dukhan89991902021-12-06 00:54:36 -08001012 static void qs8_gemm_4x8c8__neon_mull(benchmark::State& state, const char* net) {
1013 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, 4, 8, 8, 1,
1014 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard58990122021-01-29 16:09:56 -08001015 }
Marat Dukhan89991902021-12-06 00:54:36 -08001016 static void qs8_gemm_1x16c8__neon_mull(benchmark::State& state, const char* net) {
1017 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull, 1, 16, 8, 1,
1018 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard58990122021-01-29 16:09:56 -08001019 }
Marat Dukhan89991902021-12-06 00:54:36 -08001020 static void qs8_gemm_2x16c8__neon_mull(benchmark::State& state, const char* net) {
1021 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull, 2, 16, 8, 1,
1022 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard58990122021-01-29 16:09:56 -08001023 }
Marat Dukhan89991902021-12-06 00:54:36 -08001024 static void qs8_gemm_3x16c8__neon_mull(benchmark::State& state, const char* net) {
1025 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull, 3, 16, 8, 1,
1026 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard58990122021-01-29 16:09:56 -08001027 }
Marat Dukhan89991902021-12-06 00:54:36 -08001028 static void qs8_gemm_4x16c8__neon_mull(benchmark::State& state, const char* net) {
1029 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull, 4, 16, 8, 1,
1030 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard58990122021-01-29 16:09:56 -08001031 }
Marat Dukhan89991902021-12-06 00:54:36 -08001032 static void qs8_gemm_1x8c8__neon_mlal(benchmark::State& state, const char* net) {
1033 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, 1, 8, 8, 1,
1034 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardda78da12021-03-02 14:28:00 -08001035 }
Marat Dukhan89991902021-12-06 00:54:36 -08001036 static void qs8_gemm_2x8c8__neon_mlal(benchmark::State& state, const char* net) {
1037 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, 2, 8, 8, 1,
1038 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardda78da12021-03-02 14:28:00 -08001039 }
Marat Dukhan89991902021-12-06 00:54:36 -08001040 static void qs8_gemm_3x8c8__neon_mlal(benchmark::State& state, const char* net) {
1041 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal, 3, 8, 8, 1,
1042 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardda78da12021-03-02 14:28:00 -08001043 }
Marat Dukhan89991902021-12-06 00:54:36 -08001044 static void qs8_gemm_4x8c8__neon_mlal(benchmark::State& state, const char* net) {
1045 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, 4, 8, 8, 1,
1046 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardda78da12021-03-02 14:28:00 -08001047 }
Marat Dukhan89991902021-12-06 00:54:36 -08001048 static void qs8_gemm_1x16c8__neon_mlal(benchmark::State& state, const char* net) {
1049 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal, 1, 16, 8, 1,
1050 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardda78da12021-03-02 14:28:00 -08001051 }
Marat Dukhan89991902021-12-06 00:54:36 -08001052 static void qs8_gemm_2x16c8__neon_mlal(benchmark::State& state, const char* net) {
1053 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal, 2, 16, 8, 1,
1054 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardda78da12021-03-02 14:28:00 -08001055 }
Marat Dukhan89991902021-12-06 00:54:36 -08001056 static void qs8_gemm_3x16c8__neon_mlal(benchmark::State& state, const char* net) {
1057 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal, 3, 16, 8, 1,
1058 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardda78da12021-03-02 14:28:00 -08001059 }
Marat Dukhan89991902021-12-06 00:54:36 -08001060 static void qs8_gemm_4x16c8__neon_mlal(benchmark::State& state, const char* net) {
1061 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, 4, 16, 8, 1,
1062 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchardda78da12021-03-02 14:28:00 -08001063 }
Marat Dukhan89991902021-12-06 00:54:36 -08001064 static void qs8_gemm_1x8c16__neon_mlal(benchmark::State& state, const char* net) {
1065 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal, 1, 8, 16, 1,
1066 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard71c4d1a2021-02-19 10:07:23 -08001067 }
Marat Dukhan89991902021-12-06 00:54:36 -08001068 static void qs8_gemm_2x8c16__neon_mlal(benchmark::State& state, const char* net) {
1069 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal, 2, 8, 16, 1,
1070 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard71c4d1a2021-02-19 10:07:23 -08001071 }
Marat Dukhan89991902021-12-06 00:54:36 -08001072 static void qs8_gemm_3x8c16__neon_mlal(benchmark::State& state, const char* net) {
1073 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal, 3, 8, 16, 1,
1074 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard71c4d1a2021-02-19 10:07:23 -08001075 }
Marat Dukhan89991902021-12-06 00:54:36 -08001076 static void qs8_gemm_4x8c16__neon_mlal(benchmark::State& state, const char* net) {
1077 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal, 4, 8, 16, 1,
1078 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard71c4d1a2021-02-19 10:07:23 -08001079 }
Marat Dukhan89991902021-12-06 00:54:36 -08001080 static void qs8_gemm_1x16c16__neon_mlal(benchmark::State& state, const char* net) {
1081 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, 1, 16, 16, 1,
1082 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard71c4d1a2021-02-19 10:07:23 -08001083 }
Marat Dukhan89991902021-12-06 00:54:36 -08001084 static void qs8_gemm_2x16c16__neon_mlal(benchmark::State& state, const char* net) {
1085 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal, 2, 16, 16, 1,
1086 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard71c4d1a2021-02-19 10:07:23 -08001087 }
Marat Dukhan89991902021-12-06 00:54:36 -08001088 static void qs8_gemm_3x16c16__neon_mlal(benchmark::State& state, const char* net) {
1089 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal, 3, 16, 16, 1,
1090 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard71c4d1a2021-02-19 10:07:23 -08001091 }
Marat Dukhan89991902021-12-06 00:54:36 -08001092 static void qs8_gemm_4x16c16__neon_mlal(benchmark::State& state, const char* net) {
1093 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, 4, 16, 16, 1,
1094 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEON);
Frank Barchard71c4d1a2021-02-19 10:07:23 -08001095 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001096 static void qs8_gemm_1x8c4__neondot(benchmark::State& state, const char* net) {
1097 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, 1, 8, 4, 1,
1098 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barchardf1fd89e2020-10-06 23:27:11 -07001099 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001100 static void qs8_gemm_4x8c4__neondot(benchmark::State& state, const char* net) {
1101 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neondot, 4, 8, 4, 1,
1102 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barcharda48848f2020-09-24 00:59:24 -07001103 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001104 static void qs8_gemm_6x8c4__neondot(benchmark::State& state, const char* net) {
1105 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, 6, 8, 4, 1,
1106 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barcharda48848f2020-09-24 00:59:24 -07001107 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001108 static void qs8_gemm_8x8c4__neondot(benchmark::State& state, const char* net) {
1109 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, 8, 8, 4, 1,
1110 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Benoit Jacoba9644732020-08-13 12:48:55 -07001111 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001112 static void qs8_gemm_1x16c4__neondot(benchmark::State& state, const char* net) {
1113 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neondot, 1, 16, 4, 1,
1114 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barchardf1fd89e2020-10-06 23:27:11 -07001115 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001116 static void qs8_gemm_4x16c4__neondot(benchmark::State& state, const char* net) {
1117 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, 4, 16, 4, 1,
1118 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Benoit Jacoba9644732020-08-13 12:48:55 -07001119 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001120 static void qs8_gemm_6x16c4__neondot(benchmark::State& state, const char* net) {
1121 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, 6, 16, 4, 1,
1122 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barchard2fa17452020-09-22 09:51:46 -07001123 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001124 static void qs8_gemm_8x16c4__neondot(benchmark::State& state, const char* net) {
1125 GEMMBenchmark(state, xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, 8, 16, 4, 1,
1126 xnn_init_qs8_conv_minmax_rndnu_neon_params, benchmark::utils::CheckNEONDOT);
Frank Barcharda48848f2020-09-24 00:59:24 -07001127 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001128 BENCHMARK_GEMM(qs8_gemm_1x8c4__neon_mull_dup)
1129 BENCHMARK_GEMM(qs8_gemm_2x8c4__neon_mull_dup)
1130 BENCHMARK_GEMM(qs8_gemm_3x8c4__neon_mull_dup)
1131 BENCHMARK_GEMM(qs8_gemm_4x8c4__neon_mull_dup)
1132 BENCHMARK_GEMM(qs8_gemm_1x16c4__neon_mull_dup)
1133 BENCHMARK_GEMM(qs8_gemm_2x16c4__neon_mull_dup)
1134 BENCHMARK_GEMM(qs8_gemm_3x16c4__neon_mull_dup)
1135 BENCHMARK_GEMM(qs8_gemm_4x16c4__neon_mull_dup)
1136 BENCHMARK_GEMM(qs8_gemm_1x8c4__neon_mlal_dup)
1137 BENCHMARK_GEMM(qs8_gemm_2x8c4__neon_mlal_dup)
1138 BENCHMARK_GEMM(qs8_gemm_3x8c4__neon_mlal_dup)
1139 BENCHMARK_GEMM(qs8_gemm_4x8c4__neon_mlal_dup)
1140 BENCHMARK_GEMM(qs8_gemm_1x16c4__neon_mlal_dup)
1141 BENCHMARK_GEMM(qs8_gemm_2x16c4__neon_mlal_dup)
1142 BENCHMARK_GEMM(qs8_gemm_3x16c4__neon_mlal_dup)
1143 BENCHMARK_GEMM(qs8_gemm_4x16c4__neon_mlal_dup)
Frank Barchard64ab1b72021-11-22 10:57:40 -08001144 BENCHMARK_GEMM(qs8_gemm_1x8c4__neon_mull_ld1r)
1145 BENCHMARK_GEMM(qs8_gemm_2x8c4__neon_mull_ld1r)
1146 BENCHMARK_GEMM(qs8_gemm_3x8c4__neon_mull_ld1r)
1147 BENCHMARK_GEMM(qs8_gemm_4x8c4__neon_mull_ld1r)
1148 BENCHMARK_GEMM(qs8_gemm_1x16c4__neon_mull_ld1r)
1149 BENCHMARK_GEMM(qs8_gemm_2x16c4__neon_mull_ld1r)
1150 BENCHMARK_GEMM(qs8_gemm_3x16c4__neon_mull_ld1r)
1151 BENCHMARK_GEMM(qs8_gemm_4x16c4__neon_mull_ld1r)
1152 BENCHMARK_GEMM(qs8_gemm_1x8c4__neon_mlal_ld1r)
1153 BENCHMARK_GEMM(qs8_gemm_2x8c4__neon_mlal_ld1r)
1154 BENCHMARK_GEMM(qs8_gemm_3x8c4__neon_mlal_ld1r)
1155 BENCHMARK_GEMM(qs8_gemm_4x8c4__neon_mlal_ld1r)
1156 BENCHMARK_GEMM(qs8_gemm_1x16c4__neon_mlal_ld1r)
1157 BENCHMARK_GEMM(qs8_gemm_2x16c4__neon_mlal_ld1r)
1158 BENCHMARK_GEMM(qs8_gemm_3x16c4__neon_mlal_ld1r)
1159 BENCHMARK_GEMM(qs8_gemm_4x16c4__neon_mlal_ld1r)
1160 BENCHMARK_GEMM(qs8_gemm_1x8c4__neon_mull_ld2r)
1161 BENCHMARK_GEMM(qs8_gemm_2x8c4__neon_mull_ld2r)
1162 BENCHMARK_GEMM(qs8_gemm_3x8c4__neon_mull_ld2r)
1163 BENCHMARK_GEMM(qs8_gemm_4x8c4__neon_mull_ld2r)
1164 BENCHMARK_GEMM(qs8_gemm_1x16c4__neon_mull_ld2r)
1165 BENCHMARK_GEMM(qs8_gemm_2x16c4__neon_mull_ld2r)
1166 BENCHMARK_GEMM(qs8_gemm_3x16c4__neon_mull_ld2r)
1167 BENCHMARK_GEMM(qs8_gemm_4x16c4__neon_mull_ld2r)
1168 BENCHMARK_GEMM(qs8_gemm_1x8c4__neon_mlal_ld2r)
1169 BENCHMARK_GEMM(qs8_gemm_2x8c4__neon_mlal_ld2r)
1170 BENCHMARK_GEMM(qs8_gemm_3x8c4__neon_mlal_ld2r)
1171 BENCHMARK_GEMM(qs8_gemm_4x8c4__neon_mlal_ld2r)
1172 BENCHMARK_GEMM(qs8_gemm_1x16c4__neon_mlal_ld2r)
1173 BENCHMARK_GEMM(qs8_gemm_2x16c4__neon_mlal_ld2r)
1174 BENCHMARK_GEMM(qs8_gemm_3x16c4__neon_mlal_ld2r)
1175 BENCHMARK_GEMM(qs8_gemm_4x16c4__neon_mlal_ld2r)
Frank Barcharde22685a2021-11-12 11:36:58 -08001176 BENCHMARK_GEMM(qs8_gemm_1x8c2__neon_mull_dup)
1177 BENCHMARK_GEMM(qs8_gemm_2x8c2__neon_mull_dup)
1178 BENCHMARK_GEMM(qs8_gemm_3x8c2__neon_mull_dup)
1179 BENCHMARK_GEMM(qs8_gemm_4x8c2__neon_mull_dup)
1180 BENCHMARK_GEMM(qs8_gemm_1x16c2__neon_mull_dup)
1181 BENCHMARK_GEMM(qs8_gemm_2x16c2__neon_mull_dup)
1182 BENCHMARK_GEMM(qs8_gemm_3x16c2__neon_mull_dup)
1183 BENCHMARK_GEMM(qs8_gemm_4x16c2__neon_mull_dup)
1184 BENCHMARK_GEMM(qs8_gemm_1x8c2__neon_mlal_dup)
1185 BENCHMARK_GEMM(qs8_gemm_2x8c2__neon_mlal_dup)
1186 BENCHMARK_GEMM(qs8_gemm_3x8c2__neon_mlal_dup)
1187 BENCHMARK_GEMM(qs8_gemm_4x8c2__neon_mlal_dup)
1188 BENCHMARK_GEMM(qs8_gemm_1x16c2__neon_mlal_dup)
1189 BENCHMARK_GEMM(qs8_gemm_2x16c2__neon_mlal_dup)
1190 BENCHMARK_GEMM(qs8_gemm_3x16c2__neon_mlal_dup)
1191 BENCHMARK_GEMM(qs8_gemm_4x16c2__neon_mlal_dup)
Frank Barchard15eec022021-11-17 13:26:20 -08001192 BENCHMARK_GEMM(qs8_gemm_1x8c2__neon_mull_ld1r)
1193 BENCHMARK_GEMM(qs8_gemm_2x8c2__neon_mull_ld1r)
1194 BENCHMARK_GEMM(qs8_gemm_3x8c2__neon_mull_ld1r)
1195 BENCHMARK_GEMM(qs8_gemm_4x8c2__neon_mull_ld1r)
1196 BENCHMARK_GEMM(qs8_gemm_1x16c2__neon_mull_ld1r)
1197 BENCHMARK_GEMM(qs8_gemm_2x16c2__neon_mull_ld1r)
1198 BENCHMARK_GEMM(qs8_gemm_3x16c2__neon_mull_ld1r)
1199 BENCHMARK_GEMM(qs8_gemm_4x16c2__neon_mull_ld1r)
1200 BENCHMARK_GEMM(qs8_gemm_1x8c2__neon_mlal_ld1r)
1201 BENCHMARK_GEMM(qs8_gemm_2x8c2__neon_mlal_ld1r)
1202 BENCHMARK_GEMM(qs8_gemm_3x8c2__neon_mlal_ld1r)
1203 BENCHMARK_GEMM(qs8_gemm_4x8c2__neon_mlal_ld1r)
1204 BENCHMARK_GEMM(qs8_gemm_1x16c2__neon_mlal_ld1r)
1205 BENCHMARK_GEMM(qs8_gemm_2x16c2__neon_mlal_ld1r)
1206 BENCHMARK_GEMM(qs8_gemm_3x16c2__neon_mlal_ld1r)
1207 BENCHMARK_GEMM(qs8_gemm_4x16c2__neon_mlal_ld1r)
1208 BENCHMARK_GEMM(qs8_gemm_1x8c2__neon_mull_ld2r)
1209 BENCHMARK_GEMM(qs8_gemm_2x8c2__neon_mull_ld2r)
1210 BENCHMARK_GEMM(qs8_gemm_3x8c2__neon_mull_ld2r)
1211 BENCHMARK_GEMM(qs8_gemm_4x8c2__neon_mull_ld2r)
1212 BENCHMARK_GEMM(qs8_gemm_1x16c2__neon_mull_ld2r)
1213 BENCHMARK_GEMM(qs8_gemm_2x16c2__neon_mull_ld2r)
1214 BENCHMARK_GEMM(qs8_gemm_3x16c2__neon_mull_ld2r)
1215 BENCHMARK_GEMM(qs8_gemm_4x16c2__neon_mull_ld2r)
1216 BENCHMARK_GEMM(qs8_gemm_1x8c2__neon_mlal_ld2r)
1217 BENCHMARK_GEMM(qs8_gemm_2x8c2__neon_mlal_ld2r)
1218 BENCHMARK_GEMM(qs8_gemm_3x8c2__neon_mlal_ld2r)
1219 BENCHMARK_GEMM(qs8_gemm_4x8c2__neon_mlal_ld2r)
1220 BENCHMARK_GEMM(qs8_gemm_1x16c2__neon_mlal_ld2r)
1221 BENCHMARK_GEMM(qs8_gemm_2x16c2__neon_mlal_ld2r)
1222 BENCHMARK_GEMM(qs8_gemm_3x16c2__neon_mlal_ld2r)
1223 BENCHMARK_GEMM(qs8_gemm_4x16c2__neon_mlal_ld2r)
Frank Barchard42f5c502021-11-16 10:04:21 -08001224 BENCHMARK_GEMM(qs8_gemm_1x8c2__neon_mull_ld4r)
1225 BENCHMARK_GEMM(qs8_gemm_2x8c2__neon_mull_ld4r)
1226 BENCHMARK_GEMM(qs8_gemm_3x8c2__neon_mull_ld4r)
1227 BENCHMARK_GEMM(qs8_gemm_4x8c2__neon_mull_ld4r)
1228 BENCHMARK_GEMM(qs8_gemm_1x16c2__neon_mull_ld4r)
1229 BENCHMARK_GEMM(qs8_gemm_2x16c2__neon_mull_ld4r)
1230 BENCHMARK_GEMM(qs8_gemm_3x16c2__neon_mull_ld4r)
1231 BENCHMARK_GEMM(qs8_gemm_4x16c2__neon_mull_ld4r)
1232 BENCHMARK_GEMM(qs8_gemm_1x8c2__neon_mlal_ld4r)
1233 BENCHMARK_GEMM(qs8_gemm_2x8c2__neon_mlal_ld4r)
1234 BENCHMARK_GEMM(qs8_gemm_3x8c2__neon_mlal_ld4r)
1235 BENCHMARK_GEMM(qs8_gemm_4x8c2__neon_mlal_ld4r)
1236 BENCHMARK_GEMM(qs8_gemm_1x16c2__neon_mlal_ld4r)
1237 BENCHMARK_GEMM(qs8_gemm_2x16c2__neon_mlal_ld4r)
1238 BENCHMARK_GEMM(qs8_gemm_3x16c2__neon_mlal_ld4r)
1239 BENCHMARK_GEMM(qs8_gemm_4x16c2__neon_mlal_ld4r)
Frank Barcharde22685a2021-11-12 11:36:58 -08001240 BENCHMARK_GEMM(qs8_gemm_1x8c2s4__neon_mull)
1241 BENCHMARK_GEMM(qs8_gemm_2x8c2s4__neon_mull)
1242 BENCHMARK_GEMM(qs8_gemm_3x8c2s4__neon_mull)
1243 BENCHMARK_GEMM(qs8_gemm_4x8c2s4__neon_mull)
1244 BENCHMARK_GEMM(qs8_gemm_1x16c2s4__neon_mull)
1245 BENCHMARK_GEMM(qs8_gemm_2x16c2s4__neon_mull)
1246 BENCHMARK_GEMM(qs8_gemm_3x16c2s4__neon_mull)
1247 BENCHMARK_GEMM(qs8_gemm_4x16c2s4__neon_mull)
1248 BENCHMARK_GEMM(qs8_gemm_1x8c2s4__neon_mlal)
1249 BENCHMARK_GEMM(qs8_gemm_2x8c2s4__neon_mlal)
1250 BENCHMARK_GEMM(qs8_gemm_3x8c2s4__neon_mlal)
1251 BENCHMARK_GEMM(qs8_gemm_4x8c2s4__neon_mlal)
1252 BENCHMARK_GEMM(qs8_gemm_1x16c2s4__neon_mlal)
1253 BENCHMARK_GEMM(qs8_gemm_2x16c2s4__neon_mlal)
1254 BENCHMARK_GEMM(qs8_gemm_3x16c2s4__neon_mlal)
1255 BENCHMARK_GEMM(qs8_gemm_4x16c2s4__neon_mlal)
Frank Barchard27bf92c2021-11-24 15:47:52 -08001256 BENCHMARK_GEMM(qs8_gemm_1x8__neon_mlal_lane)
1257 BENCHMARK_GEMM(qs8_gemm_2x8__neon_mlal_lane)
1258 BENCHMARK_GEMM(qs8_gemm_3x8__neon_mlal_lane)
1259 BENCHMARK_GEMM(qs8_gemm_4x8__neon_mlal_lane)
1260 BENCHMARK_GEMM(qs8_gemm_6x8__neon_mlal_lane)
1261 BENCHMARK_GEMM(qs8_gemm_1x16__neon_mlal_lane)
1262 BENCHMARK_GEMM(qs8_gemm_2x16__neon_mlal_lane)
1263 BENCHMARK_GEMM(qs8_gemm_3x16__neon_mlal_lane)
1264 BENCHMARK_GEMM(qs8_gemm_4x16__neon_mlal_lane)
1265 BENCHMARK_GEMM(qs8_gemm_6x16__neon_mlal_lane)
Frank Barchardf82ea822021-12-01 15:43:37 -08001266 BENCHMARK_GEMM(qs8_gemm_1x8__neon_mlal_lane_prfm)
1267 BENCHMARK_GEMM(qs8_gemm_2x8__neon_mlal_lane_prfm)
1268 BENCHMARK_GEMM(qs8_gemm_3x8__neon_mlal_lane_prfm)
1269 BENCHMARK_GEMM(qs8_gemm_4x8__neon_mlal_lane_prfm)
1270 BENCHMARK_GEMM(qs8_gemm_6x8__neon_mlal_lane_prfm)
1271 BENCHMARK_GEMM(qs8_gemm_1x16__neon_mlal_lane_prfm)
1272 BENCHMARK_GEMM(qs8_gemm_2x16__neon_mlal_lane_prfm)
1273 BENCHMARK_GEMM(qs8_gemm_3x16__neon_mlal_lane_prfm)
1274 BENCHMARK_GEMM(qs8_gemm_4x16__neon_mlal_lane_prfm)
1275 BENCHMARK_GEMM(qs8_gemm_6x16__neon_mlal_lane_prfm)
Marat Dukhan89991902021-12-06 00:54:36 -08001276 BENCHMARK_GEMM(qs8_gemm_1x8c8__neon_mull)
1277 BENCHMARK_GEMM(qs8_gemm_2x8c8__neon_mull)
1278 BENCHMARK_GEMM(qs8_gemm_3x8c8__neon_mull)
1279 BENCHMARK_GEMM(qs8_gemm_4x8c8__neon_mull)
1280 BENCHMARK_GEMM(qs8_gemm_1x16c8__neon_mull)
1281 BENCHMARK_GEMM(qs8_gemm_2x16c8__neon_mull)
1282 BENCHMARK_GEMM(qs8_gemm_3x16c8__neon_mull)
1283 BENCHMARK_GEMM(qs8_gemm_4x16c8__neon_mull)
1284 BENCHMARK_GEMM(qs8_gemm_1x8c8__neon_mlal)
1285 BENCHMARK_GEMM(qs8_gemm_2x8c8__neon_mlal)
1286 BENCHMARK_GEMM(qs8_gemm_3x8c8__neon_mlal)
1287 BENCHMARK_GEMM(qs8_gemm_4x8c8__neon_mlal)
1288 BENCHMARK_GEMM(qs8_gemm_1x16c8__neon_mlal)
1289 BENCHMARK_GEMM(qs8_gemm_2x16c8__neon_mlal)
1290 BENCHMARK_GEMM(qs8_gemm_3x16c8__neon_mlal)
1291 BENCHMARK_GEMM(qs8_gemm_4x16c8__neon_mlal)
1292 BENCHMARK_GEMM(qs8_gemm_1x8c16__neon_mlal)
1293 BENCHMARK_GEMM(qs8_gemm_2x8c16__neon_mlal)
1294 BENCHMARK_GEMM(qs8_gemm_3x8c16__neon_mlal)
1295 BENCHMARK_GEMM(qs8_gemm_4x8c16__neon_mlal)
1296 BENCHMARK_GEMM(qs8_gemm_1x16c16__neon_mlal)
1297 BENCHMARK_GEMM(qs8_gemm_2x16c16__neon_mlal)
1298 BENCHMARK_GEMM(qs8_gemm_3x16c16__neon_mlal)
1299 BENCHMARK_GEMM(qs8_gemm_4x16c16__neon_mlal)
Marat Dukhan4486f872021-08-07 15:22:50 -07001300
1301 BENCHMARK_GEMM(qs8_gemm_1x8c4__neondot)
1302 BENCHMARK_GEMM(qs8_gemm_4x8c4__neondot)
1303 BENCHMARK_GEMM(qs8_gemm_6x8c4__neondot)
1304 BENCHMARK_GEMM(qs8_gemm_8x8c4__neondot)
1305 BENCHMARK_GEMM(qs8_gemm_1x16c4__neondot)
1306 BENCHMARK_GEMM(qs8_gemm_4x16c4__neondot)
1307 BENCHMARK_GEMM(qs8_gemm_6x16c4__neondot)
1308 BENCHMARK_GEMM(qs8_gemm_8x16c4__neondot)
Marat Dukhanab671422020-08-05 16:39:04 -07001309#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1310
Marat Dukhan725f47e2021-05-22 10:06:19 -07001311
Marat Dukhan595e1702020-07-31 10:12:52 -07001312#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan529d2c12021-08-06 15:37:03 -07001313 static void qs8_gemm_2x16c8__avx512skx(benchmark::State& state, const char* net) {
Marat Dukhan81025932021-05-26 09:01:05 -07001314 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx, 2, 16, 8, 1,
1315 xnn_init_qs8_conv_minmax_fp32_avx512_params, benchmark::utils::CheckAVX512SKX);
1316 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001317 static void qs8_gemm_3x16c8__avx512skx(benchmark::State& state, const char* net) {
Marat Dukhan81025932021-05-26 09:01:05 -07001318 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx, 3, 16, 8, 1,
1319 xnn_init_qs8_conv_minmax_fp32_avx512_params, benchmark::utils::CheckAVX512SKX);
1320 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001321 static void qs8_gemm_4x16c8__avx512skx(benchmark::State& state, const char* net) {
Marat Dukhan81025932021-05-26 09:01:05 -07001322 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx, 4, 16, 8, 1,
1323 xnn_init_qs8_conv_minmax_fp32_avx512_params, benchmark::utils::CheckAVX512SKX);
Marat Dukhanbb00b1d2020-08-10 11:37:23 -07001324 }
1325
Marat Dukhan529d2c12021-08-06 15:37:03 -07001326 static void qs8_gemm_2x8c8__avx2(benchmark::State& state, const char* net) {
Marat Dukhan81025932021-05-26 09:01:05 -07001327 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2, 2, 8, 8, 1,
1328 xnn_init_qs8_conv_minmax_fp32_avx2_params, benchmark::utils::CheckAVX2);
1329 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001330 static void qs8_gemm_3x8c8__avx2(benchmark::State& state, const char* net) {
Marat Dukhan81025932021-05-26 09:01:05 -07001331 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2, 3, 8, 8, 1,
1332 xnn_init_qs8_conv_minmax_fp32_avx2_params, benchmark::utils::CheckAVX2);
1333 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001334
Marat Dukhan0ff79892021-08-06 16:05:06 -07001335 static void qs8_gemm_xw_2x8c8__avx2(benchmark::State& state, const char* net) {
1336 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x8c8__avx2, 2, 8, 8, 1,
1337 xnn_init_qs8_conv_minmax_fp32_avx2_params, benchmark::utils::CheckAVX2, true);
Marat Dukhan683fab32020-08-03 19:42:52 -07001338 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001339 static void qs8_gemm_xw_3x8c8__avx2(benchmark::State& state, const char* net) {
1340 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x8c8__avx2, 3, 8, 8, 1,
1341 xnn_init_qs8_conv_minmax_fp32_avx2_params, benchmark::utils::CheckAVX2, true);
Marat Dukhan683fab32020-08-03 19:42:52 -07001342 }
Marat Dukhan12809522020-08-02 22:23:51 -07001343
Marat Dukhan529d2c12021-08-06 15:37:03 -07001344 static void qs8_gemm_2x4c2__xop_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001345 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64, 2, 4, 2, 1,
1346 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP);
1347 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001348 static void qs8_gemm_3x4c2__xop_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001349 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64, 3, 4, 2, 1,
1350 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP);
1351 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001352 static void qs8_gemm_4x4c2__xop_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001353 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64, 4, 4, 2, 1,
1354 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP);
1355 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001356
Marat Dukhan529d2c12021-08-06 15:37:03 -07001357 static void qs8_gemm_2x4c2__xop_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001358 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128, 2, 4, 2, 1,
1359 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP);
1360 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001361 static void qs8_gemm_3x4c2__xop_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001362 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128, 3, 4, 2, 1,
1363 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP);
1364 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001365 static void qs8_gemm_4x4c2__xop_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001366 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128, 4, 4, 2, 1,
1367 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP);
1368 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001369
Marat Dukhan0ff79892021-08-06 16:05:06 -07001370 static void qs8_gemm_xw_2x4c2__xop(benchmark::State& state, const char* net) {
1371 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__xop, 2, 4, 2, 1,
1372 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001373 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001374 static void qs8_gemm_xw_3x4c2__xop(benchmark::State& state, const char* net) {
1375 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__xop, 3, 4, 2, 1,
1376 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001377 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001378 static void qs8_gemm_xw_4x4c2__xop(benchmark::State& state, const char* net) {
1379 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__xop, 4, 4, 2, 1,
1380 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001381 }
1382
Marat Dukhan529d2c12021-08-06 15:37:03 -07001383 static void qs8_gemm_2x4c8__xop_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001384 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64, 2, 4, 8, 1,
1385 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP);
1386 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001387 static void qs8_gemm_3x4c8__xop_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001388 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64, 3, 4, 8, 1,
1389 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP);
1390 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001391
Marat Dukhan529d2c12021-08-06 15:37:03 -07001392 static void qs8_gemm_2x4c8__xop_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001393 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128, 2, 4, 8, 1,
1394 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP);
1395 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001396 static void qs8_gemm_3x4c8__xop_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001397 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128, 3, 4, 8, 1,
1398 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP);
1399 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001400
Marat Dukhan0ff79892021-08-06 16:05:06 -07001401 static void qs8_gemm_xw_2x4c8__xop(benchmark::State& state, const char* net) {
1402 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__xop, 2, 4, 8, 1,
1403 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001404 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001405 static void qs8_gemm_xw_3x4c8__xop(benchmark::State& state, const char* net) {
1406 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__xop, 3, 4, 8, 1,
1407 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckXOP, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001408 }
1409
Marat Dukhan529d2c12021-08-06 15:37:03 -07001410 static void qs8_gemm_2x4c2__avx_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001411 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64, 2, 4, 2, 1,
1412 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX);
1413 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001414 static void qs8_gemm_3x4c2__avx_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001415 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64, 3, 4, 2, 1,
1416 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX);
1417 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001418 static void qs8_gemm_4x4c2__avx_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001419 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64, 4, 4, 2, 1,
1420 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX);
1421 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001422
Marat Dukhan529d2c12021-08-06 15:37:03 -07001423 static void qs8_gemm_2x4c2__avx_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001424 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128, 2, 4, 2, 1,
1425 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX);
1426 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001427 static void qs8_gemm_3x4c2__avx_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001428 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128, 3, 4, 2, 1,
1429 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX);
1430 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001431 static void qs8_gemm_4x4c2__avx_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001432 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128, 4, 4, 2, 1,
1433 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX);
1434 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001435
Marat Dukhan0ff79892021-08-06 16:05:06 -07001436 static void qs8_gemm_xw_2x4c2__avx(benchmark::State& state, const char* net) {
1437 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__avx, 2, 4, 2, 1,
1438 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001439 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001440 static void qs8_gemm_xw_3x4c2__avx(benchmark::State& state, const char* net) {
1441 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__avx, 3, 4, 2, 1,
1442 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001443 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001444 static void qs8_gemm_xw_4x4c2__avx(benchmark::State& state, const char* net) {
1445 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__avx, 4, 4, 2, 1,
1446 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001447 }
1448
Marat Dukhan529d2c12021-08-06 15:37:03 -07001449 static void qs8_gemm_2x4c8__avx_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001450 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64, 2, 4, 8, 1,
1451 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX);
1452 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001453 static void qs8_gemm_3x4c8__avx_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001454 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64, 3, 4, 8, 1,
1455 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX);
1456 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001457
Marat Dukhan529d2c12021-08-06 15:37:03 -07001458 static void qs8_gemm_2x4c8__avx_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001459 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128, 2, 4, 8, 1,
1460 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX);
1461 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001462 static void qs8_gemm_3x4c8__avx_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001463 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128, 3, 4, 8, 1,
1464 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX);
1465 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001466
Marat Dukhan0ff79892021-08-06 16:05:06 -07001467 static void qs8_gemm_xw_2x4c8__avx(benchmark::State& state, const char* net) {
1468 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__avx, 2, 4, 8, 1,
1469 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001470 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001471 static void qs8_gemm_xw_3x4c8__avx(benchmark::State& state, const char* net) {
1472 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__avx, 3, 4, 8, 1,
1473 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckAVX, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001474 }
1475
Marat Dukhan529d2c12021-08-06 15:37:03 -07001476 static void qs8_gemm_2x4c2__sse41_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001477 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64, 2, 4, 2, 1,
1478 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41);
1479 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001480 static void qs8_gemm_3x4c2__sse41_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001481 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64, 3, 4, 2, 1,
1482 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41);
1483 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001484 static void qs8_gemm_4x4c2__sse41_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001485 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64, 4, 4, 2, 1,
1486 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41);
1487 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001488
Marat Dukhan529d2c12021-08-06 15:37:03 -07001489 static void qs8_gemm_2x4c2__sse41_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001490 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128, 2, 4, 2, 1,
1491 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41);
1492 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001493 static void qs8_gemm_3x4c2__sse41_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001494 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128, 3, 4, 2, 1,
1495 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41);
1496 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001497 static void qs8_gemm_4x4c2__sse41_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001498 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128, 4, 4, 2, 1,
1499 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41);
1500 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001501
Marat Dukhan0ff79892021-08-06 16:05:06 -07001502 static void qs8_gemm_xw_2x4c2__sse41(benchmark::State& state, const char* net) {
1503 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse41, 2, 4, 2, 1,
1504 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001505 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001506 static void qs8_gemm_xw_3x4c2__sse41(benchmark::State& state, const char* net) {
1507 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse41, 3, 4, 2, 1,
1508 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001509 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001510 static void qs8_gemm_xw_4x4c2__sse41(benchmark::State& state, const char* net) {
1511 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__sse41, 4, 4, 2, 1,
1512 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001513 }
1514
Marat Dukhan529d2c12021-08-06 15:37:03 -07001515 static void qs8_gemm_2x4c8__sse41_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001516 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64, 2, 4, 8, 1,
1517 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41);
1518 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001519 static void qs8_gemm_3x4c8__sse41_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001520 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64, 3, 4, 8, 1,
1521 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41);
1522 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001523
Marat Dukhan529d2c12021-08-06 15:37:03 -07001524 static void qs8_gemm_2x4c8__sse41_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001525 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128, 2, 4, 8, 1,
1526 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41);
1527 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001528 static void qs8_gemm_3x4c8__sse41_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001529 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128, 3, 4, 8, 1,
1530 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41);
1531 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001532
Marat Dukhan0ff79892021-08-06 16:05:06 -07001533 static void qs8_gemm_xw_2x4c8__sse41(benchmark::State& state, const char* net) {
1534 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse41, 2, 4, 8, 1,
1535 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001536 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001537 static void qs8_gemm_xw_3x4c8__sse41(benchmark::State& state, const char* net) {
1538 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse41, 3, 4, 8, 1,
1539 xnn_init_qs8_conv_minmax_fp32_sse4_params, benchmark::utils::CheckSSE41, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001540 }
1541
Marat Dukhan529d2c12021-08-06 15:37:03 -07001542 static void qs8_gemm_2x4c8__ssse3_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001543 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64, 2, 4, 8, 1,
1544 xnn_init_qs8_conv_minmax_fp32_sse2_params, benchmark::utils::CheckSSSE3);
1545 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001546 static void qs8_gemm_3x4c8__ssse3_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001547 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64, 3, 4, 8, 1,
1548 xnn_init_qs8_conv_minmax_fp32_sse2_params, benchmark::utils::CheckSSSE3);
1549 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001550
Marat Dukhan529d2c12021-08-06 15:37:03 -07001551 static void qs8_gemm_2x4c8__ssse3_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001552 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128, 2, 4, 8, 1,
1553 xnn_init_qs8_conv_minmax_fp32_sse2_params, benchmark::utils::CheckSSSE3);
1554 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001555 static void qs8_gemm_3x4c8__ssse3_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001556 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128, 3, 4, 8, 1,
1557 xnn_init_qs8_conv_minmax_fp32_sse2_params, benchmark::utils::CheckSSSE3);
1558 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001559
Marat Dukhan0ff79892021-08-06 16:05:06 -07001560 static void qs8_gemm_xw_2x4c8__ssse3(benchmark::State& state, const char* net) {
1561 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__ssse3, 2, 4, 8, 1,
1562 xnn_init_qs8_conv_minmax_fp32_sse2_params, benchmark::utils::CheckSSSE3, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001563 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001564 static void qs8_gemm_xw_3x4c8__ssse3(benchmark::State& state, const char* net) {
1565 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__ssse3, 3, 4, 8, 1,
1566 xnn_init_qs8_conv_minmax_fp32_sse2_params, benchmark::utils::CheckSSSE3, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001567 }
1568
Marat Dukhan529d2c12021-08-06 15:37:03 -07001569 static void qs8_gemm_2x4c2__sse2_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001570 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64, 2, 4, 2, 1,
1571 xnn_init_qs8_conv_minmax_fp32_sse2_params);
1572 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001573 static void qs8_gemm_3x4c2__sse2_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001574 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64, 3, 4, 2, 1,
1575 xnn_init_qs8_conv_minmax_fp32_sse2_params);
1576 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001577 static void qs8_gemm_4x4c2__sse2_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001578 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64, 4, 4, 2, 1,
1579 xnn_init_qs8_conv_minmax_fp32_sse2_params);
1580 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001581
Marat Dukhan529d2c12021-08-06 15:37:03 -07001582 static void qs8_gemm_2x4c2__sse2_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001583 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128, 2, 4, 2, 1,
1584 xnn_init_qs8_conv_minmax_fp32_sse2_params);
1585 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001586 static void qs8_gemm_3x4c2__sse2_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001587 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128, 3, 4, 2, 1,
1588 xnn_init_qs8_conv_minmax_fp32_sse2_params);
1589 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001590 static void qs8_gemm_4x4c2__sse2_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001591 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128, 4, 4, 2, 1,
1592 xnn_init_qs8_conv_minmax_fp32_sse2_params);
1593 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001594
Marat Dukhan0ff79892021-08-06 16:05:06 -07001595 static void qs8_gemm_xw_2x4c2__sse2(benchmark::State& state, const char* net) {
1596 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__sse2, 2, 4, 2, 1,
1597 xnn_init_qs8_conv_minmax_fp32_sse2_params, nullptr, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001598 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001599 static void qs8_gemm_xw_3x4c2__sse2(benchmark::State& state, const char* net) {
1600 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__sse2, 3, 4, 2, 1,
1601 xnn_init_qs8_conv_minmax_fp32_sse2_params, nullptr, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001602 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001603 static void qs8_gemm_xw_4x4c2__sse2(benchmark::State& state, const char* net) {
1604 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__sse2, 4, 4, 2, 1,
1605 xnn_init_qs8_conv_minmax_fp32_sse2_params, nullptr, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001606 }
1607
Marat Dukhan529d2c12021-08-06 15:37:03 -07001608 static void qs8_gemm_2x4c8__sse2_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001609 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64, 2, 4, 8, 1,
1610 xnn_init_qs8_conv_minmax_fp32_sse2_params);
1611 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001612 static void qs8_gemm_3x4c8__sse2_ld64(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001613 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64, 3, 4, 8, 1,
1614 xnn_init_qs8_conv_minmax_fp32_sse2_params);
1615 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001616
Marat Dukhan529d2c12021-08-06 15:37:03 -07001617 static void qs8_gemm_2x4c8__sse2_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001618 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128, 2, 4, 8, 1,
1619 xnn_init_qs8_conv_minmax_fp32_sse2_params);
1620 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001621 static void qs8_gemm_3x4c8__sse2_ld128(benchmark::State& state, const char* net) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07001622 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128, 3, 4, 8, 1,
1623 xnn_init_qs8_conv_minmax_fp32_sse2_params);
1624 }
Marat Dukhan2e427872021-04-09 23:40:07 -07001625
Marat Dukhan0ff79892021-08-06 16:05:06 -07001626 static void qs8_gemm_xw_2x4c8__sse2(benchmark::State& state, const char* net) {
1627 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__sse2, 2, 4, 8, 1,
1628 xnn_init_qs8_conv_minmax_fp32_sse2_params, nullptr, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001629 }
Marat Dukhan0ff79892021-08-06 16:05:06 -07001630 static void qs8_gemm_xw_3x4c8__sse2(benchmark::State& state, const char* net) {
1631 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__sse2, 3, 4, 8, 1,
1632 xnn_init_qs8_conv_minmax_fp32_sse2_params, nullptr, true);
Marat Dukhan2e427872021-04-09 23:40:07 -07001633 }
1634
Marat Dukhan529d2c12021-08-06 15:37:03 -07001635 BENCHMARK_GEMM(qs8_gemm_2x16c8__avx512skx)
1636 BENCHMARK_GEMM(qs8_gemm_3x16c8__avx512skx)
1637 BENCHMARK_GEMM(qs8_gemm_4x16c8__avx512skx)
Marat Dukhanbb00b1d2020-08-10 11:37:23 -07001638
Marat Dukhan529d2c12021-08-06 15:37:03 -07001639 BENCHMARK_GEMM(qs8_gemm_2x8c8__avx2)
1640 BENCHMARK_GEMM(qs8_gemm_3x8c8__avx2)
Marat Dukhan0ff79892021-08-06 16:05:06 -07001641 BENCHMARK_GEMM(qs8_gemm_xw_2x8c8__avx2)
1642 BENCHMARK_GEMM(qs8_gemm_xw_3x8c8__avx2)
Marat Dukhan683fab32020-08-03 19:42:52 -07001643
Marat Dukhan529d2c12021-08-06 15:37:03 -07001644 BENCHMARK_GEMM(qs8_gemm_2x4c2__xop_ld64)
1645 BENCHMARK_GEMM(qs8_gemm_3x4c2__xop_ld64)
1646 BENCHMARK_GEMM(qs8_gemm_4x4c2__xop_ld64)
1647 BENCHMARK_GEMM(qs8_gemm_2x4c2__xop_ld128)
1648 BENCHMARK_GEMM(qs8_gemm_3x4c2__xop_ld128)
1649 BENCHMARK_GEMM(qs8_gemm_4x4c2__xop_ld128)
Marat Dukhan0ff79892021-08-06 16:05:06 -07001650 BENCHMARK_GEMM(qs8_gemm_xw_2x4c2__xop)
1651 BENCHMARK_GEMM(qs8_gemm_xw_3x4c2__xop)
1652 BENCHMARK_GEMM(qs8_gemm_xw_4x4c2__xop)
Marat Dukhan529d2c12021-08-06 15:37:03 -07001653 BENCHMARK_GEMM(qs8_gemm_2x4c8__xop_ld64)
1654 BENCHMARK_GEMM(qs8_gemm_3x4c8__xop_ld64)
1655 BENCHMARK_GEMM(qs8_gemm_2x4c8__xop_ld128)
1656 BENCHMARK_GEMM(qs8_gemm_3x4c8__xop_ld128)
Marat Dukhan0ff79892021-08-06 16:05:06 -07001657 BENCHMARK_GEMM(qs8_gemm_xw_2x4c8__xop)
1658 BENCHMARK_GEMM(qs8_gemm_xw_3x4c8__xop)
Marat Dukhan683fab32020-08-03 19:42:52 -07001659
Marat Dukhan529d2c12021-08-06 15:37:03 -07001660 BENCHMARK_GEMM(qs8_gemm_2x4c2__avx_ld64)
1661 BENCHMARK_GEMM(qs8_gemm_3x4c2__avx_ld64)
1662 BENCHMARK_GEMM(qs8_gemm_4x4c2__avx_ld64)
1663 BENCHMARK_GEMM(qs8_gemm_2x4c2__avx_ld128)
1664 BENCHMARK_GEMM(qs8_gemm_3x4c2__avx_ld128)
1665 BENCHMARK_GEMM(qs8_gemm_4x4c2__avx_ld128)
Marat Dukhan0ff79892021-08-06 16:05:06 -07001666 BENCHMARK_GEMM(qs8_gemm_xw_2x4c2__avx)
1667 BENCHMARK_GEMM(qs8_gemm_xw_3x4c2__avx)
1668 BENCHMARK_GEMM(qs8_gemm_xw_4x4c2__avx)
Marat Dukhan529d2c12021-08-06 15:37:03 -07001669 BENCHMARK_GEMM(qs8_gemm_2x4c8__avx_ld64)
1670 BENCHMARK_GEMM(qs8_gemm_3x4c8__avx_ld64)
1671 BENCHMARK_GEMM(qs8_gemm_2x4c8__avx_ld128)
1672 BENCHMARK_GEMM(qs8_gemm_3x4c8__avx_ld128)
Marat Dukhan0ff79892021-08-06 16:05:06 -07001673 BENCHMARK_GEMM(qs8_gemm_xw_2x4c8__avx)
1674 BENCHMARK_GEMM(qs8_gemm_xw_3x4c8__avx)
Marat Dukhana3c16332021-04-02 15:03:27 -07001675
Marat Dukhan529d2c12021-08-06 15:37:03 -07001676 BENCHMARK_GEMM(qs8_gemm_2x4c2__sse41_ld64)
1677 BENCHMARK_GEMM(qs8_gemm_3x4c2__sse41_ld64)
1678 BENCHMARK_GEMM(qs8_gemm_4x4c2__sse41_ld64)
1679 BENCHMARK_GEMM(qs8_gemm_2x4c2__sse41_ld128)
1680 BENCHMARK_GEMM(qs8_gemm_3x4c2__sse41_ld128)
1681 BENCHMARK_GEMM(qs8_gemm_4x4c2__sse41_ld128)
Marat Dukhan0ff79892021-08-06 16:05:06 -07001682 BENCHMARK_GEMM(qs8_gemm_xw_2x4c2__sse41)
1683 BENCHMARK_GEMM(qs8_gemm_xw_3x4c2__sse41)
1684 BENCHMARK_GEMM(qs8_gemm_xw_4x4c2__sse41)
Marat Dukhan529d2c12021-08-06 15:37:03 -07001685 BENCHMARK_GEMM(qs8_gemm_2x4c8__sse41_ld64)
1686 BENCHMARK_GEMM(qs8_gemm_3x4c8__sse41_ld64)
1687 BENCHMARK_GEMM(qs8_gemm_2x4c8__sse41_ld128)
1688 BENCHMARK_GEMM(qs8_gemm_3x4c8__sse41_ld128)
Marat Dukhan0ff79892021-08-06 16:05:06 -07001689 BENCHMARK_GEMM(qs8_gemm_xw_2x4c8__sse41)
1690 BENCHMARK_GEMM(qs8_gemm_xw_3x4c8__sse41)
Marat Dukhan683fab32020-08-03 19:42:52 -07001691
Marat Dukhan529d2c12021-08-06 15:37:03 -07001692 BENCHMARK_GEMM(qs8_gemm_2x4c8__ssse3_ld64)
1693 BENCHMARK_GEMM(qs8_gemm_3x4c8__ssse3_ld64)
1694 BENCHMARK_GEMM(qs8_gemm_2x4c8__ssse3_ld128)
1695 BENCHMARK_GEMM(qs8_gemm_3x4c8__ssse3_ld128)
Marat Dukhan0ff79892021-08-06 16:05:06 -07001696 BENCHMARK_GEMM(qs8_gemm_xw_2x4c8__ssse3)
1697 BENCHMARK_GEMM(qs8_gemm_xw_3x4c8__ssse3)
Marat Dukhan683fab32020-08-03 19:42:52 -07001698
Marat Dukhan529d2c12021-08-06 15:37:03 -07001699 BENCHMARK_GEMM(qs8_gemm_2x4c2__sse2_ld64)
1700 BENCHMARK_GEMM(qs8_gemm_3x4c2__sse2_ld64)
1701 BENCHMARK_GEMM(qs8_gemm_4x4c2__sse2_ld64)
1702 BENCHMARK_GEMM(qs8_gemm_2x4c2__sse2_ld128)
1703 BENCHMARK_GEMM(qs8_gemm_3x4c2__sse2_ld128)
1704 BENCHMARK_GEMM(qs8_gemm_4x4c2__sse2_ld128)
Marat Dukhan0ff79892021-08-06 16:05:06 -07001705 BENCHMARK_GEMM(qs8_gemm_xw_2x4c2__sse2)
1706 BENCHMARK_GEMM(qs8_gemm_xw_3x4c2__sse2)
1707 BENCHMARK_GEMM(qs8_gemm_xw_4x4c2__sse2)
Marat Dukhan529d2c12021-08-06 15:37:03 -07001708 BENCHMARK_GEMM(qs8_gemm_2x4c8__sse2_ld64)
1709 BENCHMARK_GEMM(qs8_gemm_3x4c8__sse2_ld64)
1710 BENCHMARK_GEMM(qs8_gemm_2x4c8__sse2_ld128)
1711 BENCHMARK_GEMM(qs8_gemm_3x4c8__sse2_ld128)
Marat Dukhan0ff79892021-08-06 16:05:06 -07001712 BENCHMARK_GEMM(qs8_gemm_xw_2x4c8__sse2)
1713 BENCHMARK_GEMM(qs8_gemm_xw_3x4c8__sse2)
Marat Dukhan27203da2020-08-05 15:19:03 -07001714#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1715
Marat Dukhan725f47e2021-05-22 10:06:19 -07001716
Marat Dukhan4c617792021-12-21 15:47:58 -08001717#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -07001718 static void qs8_gemm_2x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) {
1719 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64, 2, 4, 2, 1,
1720 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1721 }
1722 static void qs8_gemm_3x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) {
1723 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64, 3, 4, 2, 1,
1724 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1725 }
1726 static void qs8_gemm_4x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) {
1727 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64, 4, 4, 2, 1,
1728 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1729 }
1730
1731 static void qs8_gemm_2x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) {
1732 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128, 2, 4, 2, 1,
1733 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1734 }
1735 static void qs8_gemm_3x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) {
1736 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128, 3, 4, 2, 1,
1737 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1738 }
1739 static void qs8_gemm_4x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) {
1740 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128, 4, 4, 2, 1,
1741 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1742 }
1743
1744 static void qs8_gemm_xw_2x4c2__wasmsimd_dot16x2(benchmark::State& state, const char* net) {
1745 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2, 2, 4, 2, 1,
1746 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, nullptr, true);
1747 }
1748 static void qs8_gemm_xw_3x4c2__wasmsimd_dot16x2(benchmark::State& state, const char* net) {
1749 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2, 3, 4, 2, 1,
1750 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, nullptr, true);
1751 }
1752 static void qs8_gemm_xw_4x4c2__wasmsimd_dot16x2(benchmark::State& state, const char* net) {
1753 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2, 4, 4, 2, 1,
1754 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, nullptr, true);
1755 }
1756
Marat Dukhan348c3772022-02-01 00:36:50 -08001757 static void qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) {
1758 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64, 2, 4, 2, 4,
1759 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1760 }
1761 static void qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) {
1762 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64, 3, 4, 2, 4,
1763 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1764 }
1765 static void qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) {
1766 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64, 4, 4, 2, 4,
1767 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1768 }
1769
1770 static void qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) {
1771 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128, 2, 4, 2, 4,
1772 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1773 }
1774 static void qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) {
1775 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128, 3, 4, 2, 4,
1776 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1777 }
1778 static void qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) {
1779 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128, 4, 4, 2, 4,
1780 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1781 }
1782
Marat Dukhan8dc106e2021-08-31 15:23:02 -07001783 static void qs8_gemm_2x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) {
1784 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64, 2, 4, 8, 1,
1785 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1786 }
1787 static void qs8_gemm_3x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) {
1788 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64, 3, 4, 8, 1,
1789 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1790 }
1791 static void qs8_gemm_4x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, const char* net) {
1792 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64, 4, 4, 8, 1,
1793 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1794 }
1795
1796 static void qs8_gemm_2x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) {
1797 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128, 2, 4, 8, 1,
1798 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1799 }
1800 static void qs8_gemm_3x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) {
1801 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128, 3, 4, 8, 1,
1802 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1803 }
1804 static void qs8_gemm_4x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, const char* net) {
1805 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128, 4, 4, 8, 1,
1806 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
1807 }
1808
1809 static void qs8_gemm_xw_2x4c8__wasmsimd_dot16x2(benchmark::State& state, const char* net) {
1810 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2, 2, 4, 8, 1,
1811 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, nullptr, true);
1812 }
1813 static void qs8_gemm_xw_3x4c8__wasmsimd_dot16x2(benchmark::State& state, const char* net) {
1814 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2, 3, 4, 8, 1,
1815 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, nullptr, true);
1816 }
1817 static void qs8_gemm_xw_4x4c8__wasmsimd_dot16x2(benchmark::State& state, const char* net) {
1818 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2, 4, 4, 8, 1,
1819 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, nullptr, true);
1820 }
1821
Marat Dukhandfc2db02021-08-08 21:19:07 -07001822 static void qs8_gemm_2x4c8__wasmsimd_mul16_ld64(benchmark::State& state, const char* net) {
1823 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64, 2, 4, 8, 1,
Marat Dukhan86746292021-08-06 17:27:18 -07001824 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
Marat Dukhan27203da2020-08-05 15:19:03 -07001825 }
Marat Dukhandfc2db02021-08-08 21:19:07 -07001826 static void qs8_gemm_3x4c8__wasmsimd_mul16_ld64(benchmark::State& state, const char* net) {
1827 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64, 3, 4, 8, 1,
Marat Dukhan86746292021-08-06 17:27:18 -07001828 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
Marat Dukhan27203da2020-08-05 15:19:03 -07001829 }
1830
Marat Dukhandfc2db02021-08-08 21:19:07 -07001831 static void qs8_gemm_2x4c8__wasmsimd_mul16_ld128(benchmark::State& state, const char* net) {
1832 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128, 2, 4, 8, 1,
Marat Dukhan86746292021-08-06 17:27:18 -07001833 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
Marat Dukhan27203da2020-08-05 15:19:03 -07001834 }
Marat Dukhandfc2db02021-08-08 21:19:07 -07001835 static void qs8_gemm_3x4c8__wasmsimd_mul16_ld128(benchmark::State& state, const char* net) {
1836 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128, 3, 4, 8, 1,
Marat Dukhan86746292021-08-06 17:27:18 -07001837 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params);
Marat Dukhan27203da2020-08-05 15:19:03 -07001838 }
1839
Marat Dukhandfc2db02021-08-08 21:19:07 -07001840 static void qs8_gemm_xw_2x4c8__wasmsimd_mul16(benchmark::State& state, const char* net) {
1841 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16, 2, 4, 8, 1,
Marat Dukhan86746292021-08-06 17:27:18 -07001842 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, nullptr, true);
Marat Dukhan27203da2020-08-05 15:19:03 -07001843 }
Marat Dukhandfc2db02021-08-08 21:19:07 -07001844 static void qs8_gemm_xw_3x4c8__wasmsimd_mul16(benchmark::State& state, const char* net) {
1845 GEMMBenchmark(state, xnn_qs8_gemm_xw_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16, 3, 4, 8, 1,
Marat Dukhan86746292021-08-06 17:27:18 -07001846 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params, nullptr, true);
Marat Dukhan27203da2020-08-05 15:19:03 -07001847 }
1848
Marat Dukhan8dc106e2021-08-31 15:23:02 -07001849 BENCHMARK_GEMM(qs8_gemm_2x4c2__wasmsimd_dot16x2_ld64)
1850 BENCHMARK_GEMM(qs8_gemm_3x4c2__wasmsimd_dot16x2_ld64)
1851 BENCHMARK_GEMM(qs8_gemm_4x4c2__wasmsimd_dot16x2_ld64)
1852 BENCHMARK_GEMM(qs8_gemm_2x4c2__wasmsimd_dot16x2_ld128)
1853 BENCHMARK_GEMM(qs8_gemm_3x4c2__wasmsimd_dot16x2_ld128)
1854 BENCHMARK_GEMM(qs8_gemm_4x4c2__wasmsimd_dot16x2_ld128)
1855 BENCHMARK_GEMM(qs8_gemm_xw_2x4c2__wasmsimd_dot16x2)
1856 BENCHMARK_GEMM(qs8_gemm_xw_3x4c2__wasmsimd_dot16x2)
1857 BENCHMARK_GEMM(qs8_gemm_xw_4x4c2__wasmsimd_dot16x2)
1858
Marat Dukhan348c3772022-02-01 00:36:50 -08001859 BENCHMARK_GEMM(qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld64)
1860 BENCHMARK_GEMM(qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld64)
1861 BENCHMARK_GEMM(qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld64)
1862 BENCHMARK_GEMM(qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld128)
1863 BENCHMARK_GEMM(qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld128)
1864 BENCHMARK_GEMM(qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld128)
1865
Marat Dukhan8dc106e2021-08-31 15:23:02 -07001866 BENCHMARK_GEMM(qs8_gemm_2x4c8__wasmsimd_dot16x2_ld64)
1867 BENCHMARK_GEMM(qs8_gemm_3x4c8__wasmsimd_dot16x2_ld64)
1868 BENCHMARK_GEMM(qs8_gemm_4x4c8__wasmsimd_dot16x2_ld64)
1869 BENCHMARK_GEMM(qs8_gemm_2x4c8__wasmsimd_dot16x2_ld128)
1870 BENCHMARK_GEMM(qs8_gemm_3x4c8__wasmsimd_dot16x2_ld128)
1871 BENCHMARK_GEMM(qs8_gemm_4x4c8__wasmsimd_dot16x2_ld128)
1872 BENCHMARK_GEMM(qs8_gemm_xw_2x4c8__wasmsimd_dot16x2)
1873 BENCHMARK_GEMM(qs8_gemm_xw_3x4c8__wasmsimd_dot16x2)
1874 BENCHMARK_GEMM(qs8_gemm_xw_4x4c8__wasmsimd_dot16x2)
1875
Marat Dukhandfc2db02021-08-08 21:19:07 -07001876 BENCHMARK_GEMM(qs8_gemm_2x4c8__wasmsimd_mul16_ld64)
1877 BENCHMARK_GEMM(qs8_gemm_3x4c8__wasmsimd_mul16_ld64)
1878 BENCHMARK_GEMM(qs8_gemm_2x4c8__wasmsimd_mul16_ld128)
1879 BENCHMARK_GEMM(qs8_gemm_3x4c8__wasmsimd_mul16_ld128)
1880 BENCHMARK_GEMM(qs8_gemm_xw_2x4c8__wasmsimd_mul16)
1881 BENCHMARK_GEMM(qs8_gemm_xw_3x4c8__wasmsimd_mul16)
Marat Dukhan4c617792021-12-21 15:47:58 -08001882#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan595e1702020-07-31 10:12:52 -07001883
Marat Dukhan725f47e2021-05-22 10:06:19 -07001884
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001885#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1886 static void qs8_gemm_2x2__wasm_fmagic(benchmark::State& state, const char* net) {
1887 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic, 2, 2, 1, 1,
1888 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
1889 }
1890 static void qs8_gemm_3x2__wasm_fmagic(benchmark::State& state, const char* net) {
1891 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic, 3, 2, 1, 1,
1892 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
1893 }
1894 static void qs8_gemm_4x2__wasm_fmagic(benchmark::State& state, const char* net) {
1895 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic, 4, 2, 1, 1,
1896 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
1897 }
1898 static void qs8_gemm_2x4__wasm_fmagic(benchmark::State& state, const char* net) {
1899 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic, 2, 4, 1, 1,
1900 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
1901 }
1902 static void qs8_gemm_3x4__wasm_fmagic(benchmark::State& state, const char* net) {
1903 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic, 3, 4, 1, 1,
1904 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
1905 }
1906 static void qs8_gemm_4x4__wasm_fmagic(benchmark::State& state, const char* net) {
1907 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic, 4, 4, 1, 1,
1908 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
1909 }
1910
1911 BENCHMARK_GEMM(qs8_gemm_2x2__wasm_fmagic)
1912 BENCHMARK_GEMM(qs8_gemm_3x2__wasm_fmagic)
1913 BENCHMARK_GEMM(qs8_gemm_4x2__wasm_fmagic)
1914 BENCHMARK_GEMM(qs8_gemm_2x4__wasm_fmagic)
1915 BENCHMARK_GEMM(qs8_gemm_3x4__wasm_fmagic)
1916 BENCHMARK_GEMM(qs8_gemm_4x4__wasm_fmagic)
1917#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1918
1919
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001920static void qs8_gemm_2x2__scalar_fmagic(benchmark::State& state, const char* net) {
1921 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic, 2, 2, 1, 1,
1922 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
Marat Dukhana1a4e782021-05-07 17:49:47 -07001923}
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001924static void qs8_gemm_3x2__scalar_fmagic(benchmark::State& state, const char* net) {
1925 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic, 3, 2, 1, 1,
1926 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
Marat Dukhana1a4e782021-05-07 17:49:47 -07001927}
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001928static void qs8_gemm_4x2__scalar_fmagic(benchmark::State& state, const char* net) {
1929 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic, 4, 2, 1, 1,
1930 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
Marat Dukhana1a4e782021-05-07 17:49:47 -07001931}
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001932static void qs8_gemm_2x4__scalar_fmagic(benchmark::State& state, const char* net) {
1933 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic, 2, 4, 1, 1,
1934 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
Marat Dukhana1a4e782021-05-07 17:49:47 -07001935}
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001936static void qs8_gemm_3x4__scalar_fmagic(benchmark::State& state, const char* net) {
1937 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic, 3, 4, 1, 1,
1938 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
Marat Dukhana1a4e782021-05-07 17:49:47 -07001939}
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001940static void qs8_gemm_4x4__scalar_fmagic(benchmark::State& state, const char* net) {
1941 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic, 4, 4, 1, 1,
1942 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params);
Marat Dukhana1a4e782021-05-07 17:49:47 -07001943}
1944
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001945static void qs8_gemm_2x2__scalar_imagic(benchmark::State& state, const char* net) {
1946 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic, 2, 2, 1, 1,
1947 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params);
1948}
1949static void qs8_gemm_3x2__scalar_imagic(benchmark::State& state, const char* net) {
1950 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic, 3, 2, 1, 1,
1951 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params);
1952}
1953static void qs8_gemm_4x2__scalar_imagic(benchmark::State& state, const char* net) {
1954 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic, 4, 2, 1, 1,
1955 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params);
1956}
1957static void qs8_gemm_2x4__scalar_imagic(benchmark::State& state, const char* net) {
1958 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic, 2, 4, 1, 1,
1959 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params);
1960}
1961static void qs8_gemm_3x4__scalar_imagic(benchmark::State& state, const char* net) {
1962 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic, 3, 4, 1, 1,
1963 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params);
1964}
1965static void qs8_gemm_4x4__scalar_imagic(benchmark::State& state, const char* net) {
1966 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic, 4, 4, 1, 1,
1967 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params);
1968}
1969
1970static void qs8_gemm_2x2__scalar_lrintf(benchmark::State& state, const char* net) {
1971 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf, 2, 2, 1, 1,
1972 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params);
1973}
1974static void qs8_gemm_3x2__scalar_lrintf(benchmark::State& state, const char* net) {
1975 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf, 3, 2, 1, 1,
1976 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params);
1977}
1978static void qs8_gemm_4x2__scalar_lrintf(benchmark::State& state, const char* net) {
1979 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf, 4, 2, 1, 1,
1980 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params);
1981}
1982static void qs8_gemm_2x4__scalar_lrintf(benchmark::State& state, const char* net) {
1983 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf, 2, 4, 1, 1,
1984 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params);
1985}
1986static void qs8_gemm_3x4__scalar_lrintf(benchmark::State& state, const char* net) {
1987 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf, 3, 4, 1, 1,
1988 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params);
1989}
1990static void qs8_gemm_4x4__scalar_lrintf(benchmark::State& state, const char* net) {
1991 GEMMBenchmark(state, xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf, 4, 4, 1, 1,
1992 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params);
1993}
1994
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001995BENCHMARK_GEMM(qs8_gemm_2x2__scalar_fmagic)
1996BENCHMARK_GEMM(qs8_gemm_3x2__scalar_fmagic)
1997BENCHMARK_GEMM(qs8_gemm_4x2__scalar_fmagic)
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001998BENCHMARK_GEMM(qs8_gemm_2x4__scalar_fmagic)
1999BENCHMARK_GEMM(qs8_gemm_3x4__scalar_fmagic)
2000BENCHMARK_GEMM(qs8_gemm_4x4__scalar_fmagic)
Marat Dukhana1a4e782021-05-07 17:49:47 -07002001
Marat Dukhan440e8ed2022-01-04 15:30:57 -08002002BENCHMARK_GEMM(qs8_gemm_2x2__scalar_imagic)
2003BENCHMARK_GEMM(qs8_gemm_3x2__scalar_imagic)
2004BENCHMARK_GEMM(qs8_gemm_4x2__scalar_imagic)
2005BENCHMARK_GEMM(qs8_gemm_2x4__scalar_imagic)
2006BENCHMARK_GEMM(qs8_gemm_3x4__scalar_imagic)
2007BENCHMARK_GEMM(qs8_gemm_4x4__scalar_imagic)
2008
2009BENCHMARK_GEMM(qs8_gemm_2x2__scalar_lrintf)
2010BENCHMARK_GEMM(qs8_gemm_3x2__scalar_lrintf)
2011BENCHMARK_GEMM(qs8_gemm_4x2__scalar_lrintf)
2012BENCHMARK_GEMM(qs8_gemm_2x4__scalar_lrintf)
2013BENCHMARK_GEMM(qs8_gemm_3x4__scalar_lrintf)
2014BENCHMARK_GEMM(qs8_gemm_4x4__scalar_lrintf)
2015
Marat Dukhan725f47e2021-05-22 10:06:19 -07002016
Frank Barchard31328cb2020-10-12 11:55:18 -07002017#ifdef BENCHMARK_RUY
2018BENCHMARK_GEMM(ruy_st)
2019#endif // BENCHMARK_RUY
2020
Marat Dukhan595e1702020-07-31 10:12:52 -07002021#ifndef XNNPACK_BENCHMARK_NO_MAIN
2022BENCHMARK_MAIN();
2023#endif