blob: c44a13d4c0ec2f3953ccd83d9e1a06dbe41e15b1 [file] [log] [blame]
Frank Barcharddc909cb2021-02-08 13:59:31 -08001// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
12#include <xnnpack.h>
13
14#include <benchmark/benchmark.h>
15
16#include "bench/end2end.h"
17#include "bench/utils.h"
18#include "models/models.h"
19#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/params.h>
Marat Dukhan81025932021-05-26 09:01:05 -070022#include <xnnpack/params-init.h>
Frank Barcharddc909cb2021-02-08 13:59:31 -080023
24
25static void GEMMEnd2EndBenchmark(
26 benchmark::State& state,
27 models::ExecutionPlanFactory model_factory,
Marat Dukhane3d17bf2021-05-24 22:22:43 -070028 xnn_qs8_gemm_minmax_ukernel_function gemm,
29 xnn_qs8_igemm_minmax_ukernel_function igemm,
30 xnn_qs8_gemm_minmax_ukernel_function gemm1,
31 xnn_qs8_igemm_minmax_ukernel_function igemm1,
Marat Dukhan81025932021-05-26 09:01:05 -070032 xnn_init_qs8_conv_minmax_params_fn init_params,
Frank Barcharddc909cb2021-02-08 13:59:31 -080033 uint8_t mr, uint8_t nr, uint8_t log2_kr = 0, uint8_t log2_sr = 0,
34 benchmark::utils::IsaCheckFunction isa_check = nullptr)
35{
36 if (isa_check && !isa_check(state)) {
37 return;
38 }
39 if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
40 state.SkipWithError("failed to initialize XNNPACK");
41 return;
42 }
43
44 // Override microkernels chosen in xnn_initialize
45 // Note: do not directly assign to xnn_params.qs8.gemm because it breaks older gcc.
46 xnn_params.qs8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function(gemm));
47 xnn_params.qs8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function(igemm));
48 xnn_params.qs8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function(gemm1));
49 xnn_params.qs8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function(igemm1));
Marat Dukhan81025932021-05-26 09:01:05 -070050 xnn_params.qs8.gemm.init.qs8 = init_params;
Frank Barcharddc909cb2021-02-08 13:59:31 -080051 xnn_params.qs8.gemm.mr = mr;
52 xnn_params.qs8.gemm.nr = nr;
53 xnn_params.qs8.gemm.log2_kr = log2_kr;
54 xnn_params.qs8.gemm.log2_sr = log2_sr;
55
56 auto execution_plan = model_factory(nullptr);
57 if (execution_plan.empty()) {
58 state.SkipWithError("failed to create a model");
59 return;
60 }
61
62 for (auto _ : state) {
63 for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
64 xnn_status status = xnn_run_operator(op.get(), nullptr);
65 if (status != xnn_status_success) {
66 state.SkipWithError("failed to run a model");
67 return;
68 }
69 }
70 }
71
72 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
73 if (cpu_frequency != 0) {
74 state.counters["cpufreq"] = cpu_frequency;
75 }
76}
77
Frank Barchardda7b2e22021-12-13 23:50:53 -080078#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Frank Barchard9e4d2aa2022-02-02 00:31:21 -080079 static void qs8_gemm_4x8c4__aarch32_neondot_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
80 GEMMEnd2EndBenchmark(state, model,
81 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55,
82 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55,
83 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
84 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
85 xnn_init_qs8_conv_minmax_rndnu_neon_params,
86 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
87 benchmark::utils::CheckNEONDOT);
88 }
Frank Barchard9f3f4202021-12-16 18:13:51 -080089 static void qs8_gemm_4x8c4__aarch32_neondot_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
90 GEMMEnd2EndBenchmark(state, model,
91 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64,
Frank Barchard48410212021-12-20 17:14:00 -080092 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_ld64,
Frank Barchard9f3f4202021-12-16 18:13:51 -080093 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
94 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
95 xnn_init_qs8_conv_minmax_rndnu_neon_params,
96 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
97 benchmark::utils::CheckNEONDOT);
98 }
Frank Barchard9e4d2aa2022-02-02 00:31:21 -080099 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard0f294ad2022-01-24 10:48:38 -0800100 GEMMEnd2EndBenchmark(state, model,
Frank Barchard9e4d2aa2022-02-02 00:31:21 -0800101 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53,
102 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64,
103 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
104 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
Frank Barchard0f294ad2022-01-24 10:48:38 -0800105 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard9e4d2aa2022-02-02 00:31:21 -0800106 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
107 benchmark::utils::CheckNEON);
108 }
109 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
110 GEMMEnd2EndBenchmark(state, model,
111 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53,
112 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64,
113 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
114 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
115 xnn_init_qs8_conv_minmax_rndnu_neon_params,
116 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
117 benchmark::utils::CheckNEON);
Frank Barchard0f294ad2022-01-24 10:48:38 -0800118 }
Frank Barchard34251d82022-02-02 11:57:11 -0800119 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7(benchmark::State& state, models::ExecutionPlanFactory model) {
120 GEMMEnd2EndBenchmark(state, model,
121 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7,
122 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64,
123 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
124 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
125 xnn_init_qs8_conv_minmax_rndnu_neon_params,
126 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
127 benchmark::utils::CheckNEON);
128 }
129 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7(benchmark::State& state, models::ExecutionPlanFactory model) {
130 GEMMEnd2EndBenchmark(state, model,
131 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7,
132 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64,
133 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
134 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
135 xnn_init_qs8_conv_minmax_rndnu_neon_params,
136 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
137 benchmark::utils::CheckNEON);
138 }
Frank Barchardda7b2e22021-12-13 23:50:53 -0800139 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
140 GEMMEnd2EndBenchmark(state, model,
141 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64,
Frank Barcharde48b5c12021-12-21 07:22:45 -0800142 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64,
Frank Barchardda7b2e22021-12-13 23:50:53 -0800143 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
144 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
145 xnn_init_qs8_conv_minmax_rndnu_neon_params,
146 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
147 benchmark::utils::CheckNEON);
148 }
149 static void qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
150 GEMMEnd2EndBenchmark(state, model,
151 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64,
Frank Barcharde48b5c12021-12-21 07:22:45 -0800152 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64,
Frank Barchardda7b2e22021-12-13 23:50:53 -0800153 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
154 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
155 xnn_init_qs8_conv_minmax_rndnu_neon_params,
156 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
157 benchmark::utils::CheckNEON);
158 }
Frank Barchard0f294ad2022-01-24 10:48:38 -0800159 BENCHMARK_QS8_END2END(qs8_gemm_4x8c4__aarch32_neondot_cortex_a55)
Frank Barchard9e4d2aa2022-02-02 00:31:21 -0800160 BENCHMARK_QS8_END2END(qs8_gemm_4x8c4__aarch32_neondot_ld64)
161 BENCHMARK_QS8_END2END(qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53)
162 BENCHMARK_QS8_END2END(qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53)
Frank Barchard34251d82022-02-02 11:57:11 -0800163 BENCHMARK_QS8_END2END(qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7)
164 BENCHMARK_QS8_END2END(qs8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7)
Frank Barchardda7b2e22021-12-13 23:50:53 -0800165 BENCHMARK_QS8_END2END(qs8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64)
166 BENCHMARK_QS8_END2END(qs8_gemm_4x8__aarch32_neon_mlal_lane_ld64)
167#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
168
Frank Barcharddc909cb2021-02-08 13:59:31 -0800169#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Marat Dukhan4486f872021-08-07 15:22:50 -0700170 static void qs8_gemm_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard4a4be4e2021-03-01 15:00:28 -0800171 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan4486f872021-08-07 15:22:50 -0700172 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55,
173 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55,
Frank Barcharddc020ff2021-08-07 11:09:57 -0700174 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64,
175 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
176 xnn_init_qs8_conv_minmax_rndnu_neon_params,
177 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
178 benchmark::utils::CheckNEONDOT);
179 }
Marat Dukhan4486f872021-08-07 15:22:50 -0700180 static void qs8_gemm_4x16c4__aarch64_neondot_ld32(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard40c0eaa2021-06-29 22:50:47 -0700181 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan4486f872021-08-07 15:22:50 -0700182 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld32,
183 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot,
184 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld32,
185 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
186 xnn_init_qs8_conv_minmax_rndnu_neon_params,
187 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
188 benchmark::utils::CheckNEONDOT);
189 }
190 static void qs8_gemm_4x16c4__aarch64_neondot_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
191 GEMMEnd2EndBenchmark(state, model,
192 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64,
193 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld64,
194 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64,
195 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
196 xnn_init_qs8_conv_minmax_rndnu_neon_params,
197 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
198 benchmark::utils::CheckNEONDOT);
199 }
200 static void qs8_gemm_4x16c4__aarch64_neondot_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
201 GEMMEnd2EndBenchmark(state, model,
202 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128,
203 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128,
204 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__aarch64_neondot_ld64,
205 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
206 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard40c0eaa2021-06-29 22:50:47 -0700207 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
208 benchmark::utils::CheckNEONDOT);
209 }
Frank Barchard914f57b2021-12-13 12:31:42 -0800210 static void qs8_gemm_4x8__aarch64_neon_mlal_lane_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
211 GEMMEnd2EndBenchmark(state, model,
212 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64,
213 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64,
214 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
215 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
216 xnn_init_qs8_conv_minmax_rndnu_neon_params,
217 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
218 benchmark::utils::CheckNEON);
219 }
220 static void qs8_gemm_4x8__aarch64_neon_mlal_lane_prfm_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
221 GEMMEnd2EndBenchmark(state, model,
222 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64,
223 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64,
224 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
225 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
226 xnn_init_qs8_conv_minmax_rndnu_neon_params,
227 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
228 benchmark::utils::CheckNEON);
229 }
Frank Barchard889ed102021-08-20 15:01:29 -0700230 static void qs8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardd4416d62021-05-17 15:51:37 -0700231 GEMMEnd2EndBenchmark(state, model,
Frank Barchard889ed102021-08-20 15:01:29 -0700232 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53,
233 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53,
234 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
235 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
236 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardd4416d62021-05-17 15:51:37 -0700237 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
238 benchmark::utils::CheckNEON);
239 }
Frank Barchard889ed102021-08-20 15:01:29 -0700240 static void qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardd4416d62021-05-17 15:51:37 -0700241 GEMMEnd2EndBenchmark(state, model,
Frank Barchard889ed102021-08-20 15:01:29 -0700242 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53,
243 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53,
244 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
245 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
246 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardd4416d62021-05-17 15:51:37 -0700247 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
248 benchmark::utils::CheckNEON);
249 }
Frank Barchard5cffb642021-11-22 13:59:43 -0800250 static void qs8_gemm_4x16__aarch64_neon_mlal_lane_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
251 GEMMEnd2EndBenchmark(state, model,
252 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64,
253 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64,
254 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
255 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
256 xnn_init_qs8_conv_minmax_rndnu_neon_params,
257 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
258 benchmark::utils::CheckNEON);
259 }
260 static void qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
261 GEMMEnd2EndBenchmark(state, model,
262 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64,
263 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64,
264 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
265 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
266 xnn_init_qs8_conv_minmax_rndnu_neon_params,
267 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
268 benchmark::utils::CheckNEON);
269 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800270 static void qs8_gemm_2x8c8__aarch64_neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard2f061502021-03-11 02:18:15 -0800271 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800272 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal,
273 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal,
274 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal,
275 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal,
Frank Barchard889ed102021-08-20 15:01:29 -0700276 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard2f061502021-03-11 02:18:15 -0800277 2 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
278 benchmark::utils::CheckNEON);
279 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800280 static void qs8_gemm_2x8c8__aarch64_neon_mlal_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard2f061502021-03-11 02:18:15 -0800281 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800282 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm,
283 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm,
284 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm,
285 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm,
Frank Barchard889ed102021-08-20 15:01:29 -0700286 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard2f061502021-03-11 02:18:15 -0800287 2 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
288 benchmark::utils::CheckNEON);
289 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800290 static void qs8_gemm_2x8c8__aarch64_neon_mlal_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard4a352042021-04-13 15:52:08 -0700291 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800292 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_cortex_a53,
293 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal,
294 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal,
295 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal,
Frank Barchard889ed102021-08-20 15:01:29 -0700296 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard4a352042021-04-13 15:52:08 -0700297 2 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
298 benchmark::utils::CheckNEON);
299 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800300 static void qs8_gemm_2x8c8__aarch64_neon_mlal_prfm_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardd68e1142021-04-26 15:15:08 -0700301 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800302 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53,
303 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm_cortex_a53,
304 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53,
305 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_prfm_cortex_a53,
Frank Barchard889ed102021-08-20 15:01:29 -0700306 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardd68e1142021-04-26 15:15:08 -0700307 2 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
308 benchmark::utils::CheckNEON);
309 }
Frank Barchard5655cb72021-03-09 20:57:14 -0800310
Marat Dukhan4486f872021-08-07 15:22:50 -0700311 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4__aarch64_neondot_cortex_a55)
312 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4__aarch64_neondot_ld32)
313 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4__aarch64_neondot_ld64)
314 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4__aarch64_neondot_ld128)
Frank Barchard914f57b2021-12-13 12:31:42 -0800315 BENCHMARK_QS8_END2END(qs8_gemm_4x8__aarch64_neon_mlal_lane_prfm_ld64)
316 BENCHMARK_QS8_END2END(qs8_gemm_4x8__aarch64_neon_mlal_lane_ld64)
Frank Barchard889ed102021-08-20 15:01:29 -0700317 BENCHMARK_QS8_END2END(qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53)
318 BENCHMARK_QS8_END2END(qs8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53)
Frank Barchard5cffb642021-11-22 13:59:43 -0800319 BENCHMARK_QS8_END2END(qs8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64)
320 BENCHMARK_QS8_END2END(qs8_gemm_4x16__aarch64_neon_mlal_lane_ld64)
Frank Barcharde22685a2021-11-12 11:36:58 -0800321 BENCHMARK_QS8_END2END(qs8_gemm_2x8c8__aarch64_neon_mlal_prfm_cortex_a53)
322 BENCHMARK_QS8_END2END(qs8_gemm_2x8c8__aarch64_neon_mlal_cortex_a53)
323 BENCHMARK_QS8_END2END(qs8_gemm_2x8c8__aarch64_neon_mlal_prfm)
324 BENCHMARK_QS8_END2END(qs8_gemm_2x8c8__aarch64_neon_mlal)
Frank Barcharddc909cb2021-02-08 13:59:31 -0800325#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
326
Frank Barcharddc909cb2021-02-08 13:59:31 -0800327#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard27bf92c2021-11-24 15:47:52 -0800328 static void qs8_gemm_2x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard15c00362021-02-08 23:21:43 -0800329 GEMMEnd2EndBenchmark(state, model,
Frank Barchard27bf92c2021-11-24 15:47:52 -0800330 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane,
331 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane,
332 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
333 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
334 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard15c00362021-02-08 23:21:43 -0800335 2 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
336 benchmark::utils::CheckNEON);
337 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800338 static void qs8_gemm_2x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barcharddc909cb2021-02-08 13:59:31 -0800339 GEMMEnd2EndBenchmark(state, model,
Frank Barchard27bf92c2021-11-24 15:47:52 -0800340 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane,
341 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane,
342 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
343 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
344 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barcharddc909cb2021-02-08 13:59:31 -0800345 2 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
346 benchmark::utils::CheckNEON);
347 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800348 static void qs8_gemm_3x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard15c00362021-02-08 23:21:43 -0800349 GEMMEnd2EndBenchmark(state, model,
Frank Barchard27bf92c2021-11-24 15:47:52 -0800350 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane,
351 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane,
352 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
353 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
354 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard15c00362021-02-08 23:21:43 -0800355 3 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
356 benchmark::utils::CheckNEON);
357 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800358 static void qs8_gemm_3x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard15c00362021-02-08 23:21:43 -0800359 GEMMEnd2EndBenchmark(state, model,
Frank Barchard27bf92c2021-11-24 15:47:52 -0800360 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane,
361 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane,
362 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
363 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
364 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard15c00362021-02-08 23:21:43 -0800365 3 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
366 benchmark::utils::CheckNEON);
367 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800368 static void qs8_gemm_4x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard15c00362021-02-08 23:21:43 -0800369 GEMMEnd2EndBenchmark(state, model,
Frank Barchard27bf92c2021-11-24 15:47:52 -0800370 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane,
371 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane,
372 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
373 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
374 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard15c00362021-02-08 23:21:43 -0800375 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
376 benchmark::utils::CheckNEON);
377 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800378 static void qs8_gemm_4x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard15c00362021-02-08 23:21:43 -0800379 GEMMEnd2EndBenchmark(state, model,
Frank Barchard27bf92c2021-11-24 15:47:52 -0800380 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane,
381 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane,
382 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
383 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
384 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard15c00362021-02-08 23:21:43 -0800385 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
386 benchmark::utils::CheckNEON);
387 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800388 static void qs8_gemm_6x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard55497352021-04-30 11:47:56 -0700389 GEMMEnd2EndBenchmark(state, model,
Frank Barchard27bf92c2021-11-24 15:47:52 -0800390 xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane,
391 xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane,
392 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
393 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
394 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard55497352021-04-30 11:47:56 -0700395 6 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
396 benchmark::utils::CheckNEON);
397 }
Frank Barchard27bf92c2021-11-24 15:47:52 -0800398 static void qs8_gemm_6x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard55497352021-04-30 11:47:56 -0700399 GEMMEnd2EndBenchmark(state, model,
Frank Barchard27bf92c2021-11-24 15:47:52 -0800400 xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane,
401 xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane,
402 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
403 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
404 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard55497352021-04-30 11:47:56 -0700405 6 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
406 benchmark::utils::CheckNEON);
407 }
Frank Barchardf82ea822021-12-01 15:43:37 -0800408 static void qs8_gemm_2x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
409 GEMMEnd2EndBenchmark(state, model,
410 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane_prfm,
411 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane_prfm,
412 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm,
413 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm,
414 xnn_init_qs8_conv_minmax_rndnu_neon_params,
415 2 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
416 benchmark::utils::CheckNEON);
417 }
418 static void qs8_gemm_2x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
419 GEMMEnd2EndBenchmark(state, model,
420 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm,
421 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane_prfm,
422 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm,
423 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm,
424 xnn_init_qs8_conv_minmax_rndnu_neon_params,
425 2 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
426 benchmark::utils::CheckNEON);
427 }
428 static void qs8_gemm_3x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
429 GEMMEnd2EndBenchmark(state, model,
430 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm,
431 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane_prfm,
432 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm,
433 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm,
434 xnn_init_qs8_conv_minmax_rndnu_neon_params,
435 3 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
436 benchmark::utils::CheckNEON);
437 }
438 static void qs8_gemm_3x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
439 GEMMEnd2EndBenchmark(state, model,
440 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm,
441 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane_prfm,
442 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm,
443 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm,
444 xnn_init_qs8_conv_minmax_rndnu_neon_params,
445 3 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
446 benchmark::utils::CheckNEON);
447 }
448 static void qs8_gemm_4x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
449 GEMMEnd2EndBenchmark(state, model,
450 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane_prfm,
451 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane_prfm,
452 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm,
453 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm,
454 xnn_init_qs8_conv_minmax_rndnu_neon_params,
455 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
456 benchmark::utils::CheckNEON);
457 }
458 static void qs8_gemm_4x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
459 GEMMEnd2EndBenchmark(state, model,
460 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm,
461 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane_prfm,
462 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm,
463 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm,
464 xnn_init_qs8_conv_minmax_rndnu_neon_params,
465 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
466 benchmark::utils::CheckNEON);
467 }
468 static void qs8_gemm_6x8__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
469 GEMMEnd2EndBenchmark(state, model,
470 xnn_qs8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm,
471 xnn_qs8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane_prfm,
472 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm,
473 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane_prfm,
474 xnn_init_qs8_conv_minmax_rndnu_neon_params,
475 6 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
476 benchmark::utils::CheckNEON);
477 }
478 static void qs8_gemm_6x16__neon_mlal_lane_prfm(benchmark::State& state, models::ExecutionPlanFactory model) {
479 GEMMEnd2EndBenchmark(state, model,
480 xnn_qs8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm,
481 xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm,
482 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm,
483 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm,
484 xnn_init_qs8_conv_minmax_rndnu_neon_params,
485 6 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
486 benchmark::utils::CheckNEON);
487 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800488 static void qs8_gemm_2x8c2__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -0800489 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800490 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup,
491 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup,
492 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup,
493 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup,
Frank Barchard1d412472021-10-25 17:27:21 -0700494 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -0800495 2 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
496 benchmark::utils::CheckNEON);
497 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800498 static void qs8_gemm_2x16c2__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -0800499 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800500 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup,
501 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup,
502 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup,
503 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup,
Frank Barchard1d412472021-10-25 17:27:21 -0700504 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -0800505 2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
506 benchmark::utils::CheckNEON);
507 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800508 static void qs8_gemm_3x8c2__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -0800509 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800510 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup,
511 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup,
512 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup,
513 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup,
Frank Barchard1d412472021-10-25 17:27:21 -0700514 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -0800515 3 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
516 benchmark::utils::CheckNEON);
517 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800518 static void qs8_gemm_3x16c2__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -0800519 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800520 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup,
521 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_dup,
522 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup,
523 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup,
Frank Barchard1d412472021-10-25 17:27:21 -0700524 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -0800525 3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
526 benchmark::utils::CheckNEON);
527 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800528 static void qs8_gemm_4x8c2__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -0800529 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800530 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup,
531 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup,
532 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup,
533 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_dup,
Frank Barchard1d412472021-10-25 17:27:21 -0700534 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -0800535 4 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
536 benchmark::utils::CheckNEON);
537 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800538 static void qs8_gemm_4x16c2__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -0800539 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800540 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup,
541 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_dup,
542 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup,
543 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup,
Frank Barchard1d412472021-10-25 17:27:21 -0700544 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -0800545 4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
546 benchmark::utils::CheckNEON);
547 }
Frank Barchard15eec022021-11-17 13:26:20 -0800548 static void qs8_gemm_2x8c2__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
549 GEMMEnd2EndBenchmark(state, model,
550 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r,
551 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r,
552 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r,
553 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r,
554 xnn_init_qs8_conv_minmax_rndnu_neon_params,
555 2 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
556 benchmark::utils::CheckNEON);
557 }
558 static void qs8_gemm_2x16c2__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
559 GEMMEnd2EndBenchmark(state, model,
560 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r,
561 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld1r,
562 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r,
563 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r,
564 xnn_init_qs8_conv_minmax_rndnu_neon_params,
565 2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
566 benchmark::utils::CheckNEON);
567 }
568 static void qs8_gemm_3x8c2__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
569 GEMMEnd2EndBenchmark(state, model,
570 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r,
571 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld1r,
572 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r,
573 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r,
574 xnn_init_qs8_conv_minmax_rndnu_neon_params,
575 3 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
576 benchmark::utils::CheckNEON);
577 }
578 static void qs8_gemm_3x16c2__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
579 GEMMEnd2EndBenchmark(state, model,
580 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r,
581 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld1r,
582 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r,
583 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r,
584 xnn_init_qs8_conv_minmax_rndnu_neon_params,
585 3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
586 benchmark::utils::CheckNEON);
587 }
588 static void qs8_gemm_4x8c2__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
589 GEMMEnd2EndBenchmark(state, model,
590 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r,
591 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld1r,
592 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r,
593 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld1r,
594 xnn_init_qs8_conv_minmax_rndnu_neon_params,
595 4 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
596 benchmark::utils::CheckNEON);
597 }
598 static void qs8_gemm_4x16c2__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
599 GEMMEnd2EndBenchmark(state, model,
600 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r,
601 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r,
602 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r,
603 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld1r,
604 xnn_init_qs8_conv_minmax_rndnu_neon_params,
605 4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
606 benchmark::utils::CheckNEON);
607 }
608 static void qs8_gemm_2x8c2__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
609 GEMMEnd2EndBenchmark(state, model,
610 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r,
611 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld2r,
612 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r,
613 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r,
614 xnn_init_qs8_conv_minmax_rndnu_neon_params,
615 2 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
616 benchmark::utils::CheckNEON);
617 }
618 static void qs8_gemm_2x16c2__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
619 GEMMEnd2EndBenchmark(state, model,
620 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r,
621 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld2r,
622 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r,
623 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r,
624 xnn_init_qs8_conv_minmax_rndnu_neon_params,
625 2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
626 benchmark::utils::CheckNEON);
627 }
628 static void qs8_gemm_3x8c2__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
629 GEMMEnd2EndBenchmark(state, model,
630 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r,
631 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld2r,
632 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r,
633 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r,
634 xnn_init_qs8_conv_minmax_rndnu_neon_params,
635 3 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
636 benchmark::utils::CheckNEON);
637 }
638 static void qs8_gemm_3x16c2__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
639 GEMMEnd2EndBenchmark(state, model,
640 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r,
641 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld2r,
642 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r,
643 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r,
644 xnn_init_qs8_conv_minmax_rndnu_neon_params,
645 3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
646 benchmark::utils::CheckNEON);
647 }
648 static void qs8_gemm_4x8c2__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
649 GEMMEnd2EndBenchmark(state, model,
650 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r,
651 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld2r,
652 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r,
653 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r,
654 xnn_init_qs8_conv_minmax_rndnu_neon_params,
655 4 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
656 benchmark::utils::CheckNEON);
657 }
658 static void qs8_gemm_4x16c2__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
659 GEMMEnd2EndBenchmark(state, model,
660 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r,
661 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld2r,
662 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r,
663 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld2r,
664 xnn_init_qs8_conv_minmax_rndnu_neon_params,
665 4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
666 benchmark::utils::CheckNEON);
667 }
Frank Barchard42f5c502021-11-16 10:04:21 -0800668 static void qs8_gemm_2x8c2__neon_mlal_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
669 GEMMEnd2EndBenchmark(state, model,
670 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r,
671 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld4r,
672 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r,
673 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r,
674 xnn_init_qs8_conv_minmax_rndnu_neon_params,
675 2 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
676 benchmark::utils::CheckNEON);
677 }
678 static void qs8_gemm_2x16c2__neon_mlal_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
679 GEMMEnd2EndBenchmark(state, model,
680 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r,
681 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r,
682 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r,
683 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r,
684 xnn_init_qs8_conv_minmax_rndnu_neon_params,
685 2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
686 benchmark::utils::CheckNEON);
687 }
688 static void qs8_gemm_3x8c2__neon_mlal_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
689 GEMMEnd2EndBenchmark(state, model,
690 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r,
691 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_ld4r,
692 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r,
693 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r,
694 xnn_init_qs8_conv_minmax_rndnu_neon_params,
695 3 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
696 benchmark::utils::CheckNEON);
697 }
698 static void qs8_gemm_3x16c2__neon_mlal_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
699 GEMMEnd2EndBenchmark(state, model,
700 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r,
701 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r,
702 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r,
703 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r,
704 xnn_init_qs8_conv_minmax_rndnu_neon_params,
705 3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
706 benchmark::utils::CheckNEON);
707 }
708 static void qs8_gemm_4x8c2__neon_mlal_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
709 GEMMEnd2EndBenchmark(state, model,
710 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r,
711 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_ld4r,
712 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r,
713 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld4r,
714 xnn_init_qs8_conv_minmax_rndnu_neon_params,
715 4 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
716 benchmark::utils::CheckNEON);
717 }
718 static void qs8_gemm_4x16c2__neon_mlal_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
719 GEMMEnd2EndBenchmark(state, model,
720 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r,
721 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r,
722 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r,
723 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r,
724 xnn_init_qs8_conv_minmax_rndnu_neon_params,
725 4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
726 benchmark::utils::CheckNEON);
727 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800728 static void qs8_gemm_2x8c2s4__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -0800729 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800730 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal,
731 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mlal,
732 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal,
733 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800734 xnn_init_qs8_conv_minmax_rndnu_neon_params,
735 2 /* mr */, 8 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
736 benchmark::utils::CheckNEON);
737 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800738 static void qs8_gemm_2x16c2s4__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -0800739 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800740 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal,
741 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal,
742 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal,
743 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800744 xnn_init_qs8_conv_minmax_rndnu_neon_params,
745 2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
746 benchmark::utils::CheckNEON);
747 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800748 static void qs8_gemm_3x8c2s4__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -0800749 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800750 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal,
751 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal,
752 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal,
753 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800754 xnn_init_qs8_conv_minmax_rndnu_neon_params,
755 3 /* mr */, 8 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
756 benchmark::utils::CheckNEON);
757 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800758 static void qs8_gemm_3x16c2s4__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -0800759 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800760 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal,
761 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal,
762 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal,
763 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800764 xnn_init_qs8_conv_minmax_rndnu_neon_params,
765 3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
766 benchmark::utils::CheckNEON);
767 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800768 static void qs8_gemm_4x8c2s4__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -0800769 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800770 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal,
771 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal,
772 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal,
773 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mlal,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800774 xnn_init_qs8_conv_minmax_rndnu_neon_params,
775 4 /* mr */, 8 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
776 benchmark::utils::CheckNEON);
777 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800778 static void qs8_gemm_4x16c2s4__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -0800779 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800780 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal,
781 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mlal,
782 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal,
783 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mlal,
Frank Barchardc7a032d2021-11-10 12:37:49 -0800784 xnn_init_qs8_conv_minmax_rndnu_neon_params,
785 4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
786 benchmark::utils::CheckNEON);
787 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800788 static void qs8_gemm_2x8c4__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -0700789 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800790 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup,
791 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_dup,
792 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup,
793 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup,
Frank Barchard51320102021-11-05 16:01:30 -0700794 xnn_init_qs8_conv_minmax_rndnu_neon_params,
795 2 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
796 benchmark::utils::CheckNEON);
797 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800798 static void qs8_gemm_2x16c4__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -0700799 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800800 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup,
801 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup,
802 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup,
803 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup,
Frank Barchard51320102021-11-05 16:01:30 -0700804 xnn_init_qs8_conv_minmax_rndnu_neon_params,
805 2 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
806 benchmark::utils::CheckNEON);
807 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800808 static void qs8_gemm_3x8c4__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -0700809 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800810 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup,
811 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup,
812 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup,
813 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup,
Frank Barchard51320102021-11-05 16:01:30 -0700814 xnn_init_qs8_conv_minmax_rndnu_neon_params,
815 3 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
816 benchmark::utils::CheckNEON);
817 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800818 static void qs8_gemm_3x16c4__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -0700819 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800820 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup,
821 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_dup,
822 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup,
823 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup,
Frank Barchard51320102021-11-05 16:01:30 -0700824 xnn_init_qs8_conv_minmax_rndnu_neon_params,
825 3 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
826 benchmark::utils::CheckNEON);
827 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800828 static void qs8_gemm_4x8c4__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -0700829 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800830 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup,
831 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup,
832 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup,
833 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_dup,
Frank Barchard51320102021-11-05 16:01:30 -0700834 xnn_init_qs8_conv_minmax_rndnu_neon_params,
835 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
836 benchmark::utils::CheckNEON);
837 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800838 static void qs8_gemm_4x16c4__neon_mlal_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -0700839 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800840 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup,
841 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_dup,
842 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup,
843 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_dup,
Frank Barchard51320102021-11-05 16:01:30 -0700844 xnn_init_qs8_conv_minmax_rndnu_neon_params,
845 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
846 benchmark::utils::CheckNEON);
847 }
Frank Barchard64ab1b72021-11-22 10:57:40 -0800848 static void qs8_gemm_2x8c4__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
849 GEMMEnd2EndBenchmark(state, model,
850 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r,
851 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r,
852 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r,
853 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r,
854 xnn_init_qs8_conv_minmax_rndnu_neon_params,
855 2 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
856 benchmark::utils::CheckNEON);
857 }
858 static void qs8_gemm_2x16c4__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
859 GEMMEnd2EndBenchmark(state, model,
860 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r,
861 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld1r,
862 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r,
863 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r,
864 xnn_init_qs8_conv_minmax_rndnu_neon_params,
865 2 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
866 benchmark::utils::CheckNEON);
867 }
868 static void qs8_gemm_3x8c4__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
869 GEMMEnd2EndBenchmark(state, model,
870 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r,
871 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld1r,
872 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r,
873 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r,
874 xnn_init_qs8_conv_minmax_rndnu_neon_params,
875 3 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
876 benchmark::utils::CheckNEON);
877 }
878 static void qs8_gemm_3x16c4__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
879 GEMMEnd2EndBenchmark(state, model,
880 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r,
881 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld1r,
882 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r,
883 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r,
884 xnn_init_qs8_conv_minmax_rndnu_neon_params,
885 3 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
886 benchmark::utils::CheckNEON);
887 }
888 static void qs8_gemm_4x8c4__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
889 GEMMEnd2EndBenchmark(state, model,
890 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r,
891 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld1r,
892 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r,
893 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld1r,
894 xnn_init_qs8_conv_minmax_rndnu_neon_params,
895 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
896 benchmark::utils::CheckNEON);
897 }
898 static void qs8_gemm_4x16c4__neon_mlal_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
899 GEMMEnd2EndBenchmark(state, model,
900 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r,
901 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld1r,
902 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r,
903 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld1r,
904 xnn_init_qs8_conv_minmax_rndnu_neon_params,
905 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
906 benchmark::utils::CheckNEON);
907 }
908 static void qs8_gemm_2x8c4__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
909 GEMMEnd2EndBenchmark(state, model,
910 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r,
911 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld2r,
912 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r,
913 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r,
914 xnn_init_qs8_conv_minmax_rndnu_neon_params,
915 2 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
916 benchmark::utils::CheckNEON);
917 }
918 static void qs8_gemm_2x16c4__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
919 GEMMEnd2EndBenchmark(state, model,
920 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r,
921 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r,
922 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r,
923 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r,
924 xnn_init_qs8_conv_minmax_rndnu_neon_params,
925 2 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
926 benchmark::utils::CheckNEON);
927 }
928 static void qs8_gemm_3x8c4__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
929 GEMMEnd2EndBenchmark(state, model,
930 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r,
931 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_ld2r,
932 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r,
933 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r,
934 xnn_init_qs8_conv_minmax_rndnu_neon_params,
935 3 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
936 benchmark::utils::CheckNEON);
937 }
938 static void qs8_gemm_3x16c4__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
939 GEMMEnd2EndBenchmark(state, model,
940 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r,
941 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mlal_ld2r,
942 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r,
943 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r,
944 xnn_init_qs8_conv_minmax_rndnu_neon_params,
945 3 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
946 benchmark::utils::CheckNEON);
947 }
948 static void qs8_gemm_4x8c4__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
949 GEMMEnd2EndBenchmark(state, model,
950 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r,
951 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_ld2r,
952 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r,
953 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r,
954 xnn_init_qs8_conv_minmax_rndnu_neon_params,
955 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
956 benchmark::utils::CheckNEON);
957 }
958 static void qs8_gemm_4x16c4__neon_mlal_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
959 GEMMEnd2EndBenchmark(state, model,
960 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r,
961 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mlal_ld2r,
962 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r,
963 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mlal_ld2r,
964 xnn_init_qs8_conv_minmax_rndnu_neon_params,
965 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
966 benchmark::utils::CheckNEON);
967 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800968 static void qs8_gemm_2x8c4s2__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -0800969 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800970 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal,
971 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mlal,
972 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal,
973 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal,
Frank Barchardeb704f72021-11-12 01:26:50 -0800974 xnn_init_qs8_conv_minmax_rndnu_neon_params,
975 2 /* mr */, 8 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
976 benchmark::utils::CheckNEON);
977 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800978 static void qs8_gemm_2x16c4s2__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -0800979 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800980 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal,
981 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mlal,
982 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal,
983 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal,
Frank Barchardeb704f72021-11-12 01:26:50 -0800984 xnn_init_qs8_conv_minmax_rndnu_neon_params,
985 2 /* mr */, 16 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
986 benchmark::utils::CheckNEON);
987 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800988 static void qs8_gemm_3x8c4s2__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -0800989 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -0800990 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal,
991 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal,
992 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal,
993 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal,
Frank Barchardeb704f72021-11-12 01:26:50 -0800994 xnn_init_qs8_conv_minmax_rndnu_neon_params,
995 3 /* mr */, 8 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
996 benchmark::utils::CheckNEON);
997 }
Frank Barcharde22685a2021-11-12 11:36:58 -0800998 static void qs8_gemm_3x16c4s2__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -0800999 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001000 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal,
1001 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal,
1002 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal,
1003 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal,
Frank Barchardeb704f72021-11-12 01:26:50 -08001004 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1005 3 /* mr */, 16 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
1006 benchmark::utils::CheckNEON);
1007 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001008 static void qs8_gemm_4x8c4s2__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -08001009 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001010 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal,
1011 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mlal,
1012 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal,
1013 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mlal,
Frank Barchardeb704f72021-11-12 01:26:50 -08001014 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1015 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
1016 benchmark::utils::CheckNEON);
1017 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001018 static void qs8_gemm_4x16c4s2__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -08001019 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001020 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal,
1021 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal,
1022 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal,
1023 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mlal,
Frank Barchardeb704f72021-11-12 01:26:50 -08001024 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1025 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
1026 benchmark::utils::CheckNEON);
1027 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001028 static void qs8_gemm_2x8c2__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -08001029 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001030 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup,
1031 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_dup,
1032 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup,
1033 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup,
Frank Barchard1d412472021-10-25 17:27:21 -07001034 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -08001035 2 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1036 benchmark::utils::CheckNEON);
1037 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001038 static void qs8_gemm_2x16c2__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -08001039 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001040 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup,
1041 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_dup,
1042 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup,
1043 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup,
Frank Barchard1d412472021-10-25 17:27:21 -07001044 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -08001045 2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1046 benchmark::utils::CheckNEON);
1047 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001048 static void qs8_gemm_3x8c2__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -08001049 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001050 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup,
1051 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup,
1052 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup,
1053 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup,
Frank Barchard1d412472021-10-25 17:27:21 -07001054 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -08001055 3 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1056 benchmark::utils::CheckNEON);
1057 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001058 static void qs8_gemm_3x16c2__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -08001059 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001060 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup,
1061 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup,
1062 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup,
1063 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup,
Frank Barchard1d412472021-10-25 17:27:21 -07001064 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -08001065 3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1066 benchmark::utils::CheckNEON);
1067 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001068 static void qs8_gemm_4x8c2__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -08001069 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001070 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup,
1071 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup,
1072 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup,
1073 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup,
Frank Barchard1d412472021-10-25 17:27:21 -07001074 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -08001075 4 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1076 benchmark::utils::CheckNEON);
1077 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001078 static void qs8_gemm_4x16c2__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard32389c62021-02-16 11:04:36 -08001079 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001080 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup,
1081 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup,
1082 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup,
1083 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup,
Frank Barchard1d412472021-10-25 17:27:21 -07001084 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard32389c62021-02-16 11:04:36 -08001085 4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1086 benchmark::utils::CheckNEON);
1087 }
Frank Barchard15eec022021-11-17 13:26:20 -08001088 static void qs8_gemm_2x8c2__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1089 GEMMEnd2EndBenchmark(state, model,
1090 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r,
1091 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r,
1092 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r,
1093 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r,
1094 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1095 2 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1096 benchmark::utils::CheckNEON);
1097 }
1098 static void qs8_gemm_2x16c2__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1099 GEMMEnd2EndBenchmark(state, model,
1100 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r,
1101 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld1r,
1102 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r,
1103 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r,
1104 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1105 2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1106 benchmark::utils::CheckNEON);
1107 }
1108 static void qs8_gemm_3x8c2__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1109 GEMMEnd2EndBenchmark(state, model,
1110 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r,
1111 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld1r,
1112 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r,
1113 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r,
1114 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1115 3 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1116 benchmark::utils::CheckNEON);
1117 }
1118 static void qs8_gemm_3x16c2__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1119 GEMMEnd2EndBenchmark(state, model,
1120 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r,
1121 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld1r,
1122 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r,
1123 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r,
1124 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1125 3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1126 benchmark::utils::CheckNEON);
1127 }
1128 static void qs8_gemm_4x8c2__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1129 GEMMEnd2EndBenchmark(state, model,
1130 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r,
1131 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r,
1132 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r,
1133 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r,
1134 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1135 4 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1136 benchmark::utils::CheckNEON);
1137 }
1138 static void qs8_gemm_4x16c2__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1139 GEMMEnd2EndBenchmark(state, model,
1140 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r,
1141 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld1r,
1142 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r,
1143 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r,
1144 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1145 4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1146 benchmark::utils::CheckNEON);
1147 }
1148 static void qs8_gemm_2x8c2__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1149 GEMMEnd2EndBenchmark(state, model,
1150 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r,
1151 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r,
1152 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r,
1153 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r,
1154 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1155 2 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1156 benchmark::utils::CheckNEON);
1157 }
1158 static void qs8_gemm_2x16c2__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1159 GEMMEnd2EndBenchmark(state, model,
1160 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r,
1161 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld2r,
1162 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r,
1163 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r,
1164 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1165 2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1166 benchmark::utils::CheckNEON);
1167 }
1168 static void qs8_gemm_3x8c2__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1169 GEMMEnd2EndBenchmark(state, model,
1170 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r,
1171 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld2r,
1172 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r,
1173 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r,
1174 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1175 3 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1176 benchmark::utils::CheckNEON);
1177 }
1178 static void qs8_gemm_3x16c2__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1179 GEMMEnd2EndBenchmark(state, model,
1180 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r,
1181 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r,
1182 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r,
1183 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r,
1184 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1185 3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1186 benchmark::utils::CheckNEON);
1187 }
1188 static void qs8_gemm_4x8c2__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1189 GEMMEnd2EndBenchmark(state, model,
1190 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r,
1191 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r,
1192 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r,
1193 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r,
1194 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1195 4 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1196 benchmark::utils::CheckNEON);
1197 }
1198 static void qs8_gemm_4x16c2__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1199 GEMMEnd2EndBenchmark(state, model,
1200 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r,
1201 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld2r,
1202 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r,
1203 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld2r,
1204 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1205 4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1206 benchmark::utils::CheckNEON);
1207 }
Frank Barchard42f5c502021-11-16 10:04:21 -08001208 static void qs8_gemm_2x8c2__neon_mull_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
1209 GEMMEnd2EndBenchmark(state, model,
1210 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r,
1211 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld4r,
1212 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r,
1213 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r,
1214 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1215 2 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1216 benchmark::utils::CheckNEON);
1217 }
1218 static void qs8_gemm_2x16c2__neon_mull_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
1219 GEMMEnd2EndBenchmark(state, model,
1220 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r,
1221 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r,
1222 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r,
1223 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r,
1224 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1225 2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1226 benchmark::utils::CheckNEON);
1227 }
1228 static void qs8_gemm_3x8c2__neon_mull_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
1229 GEMMEnd2EndBenchmark(state, model,
1230 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r,
1231 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_ld4r,
1232 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r,
1233 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r,
1234 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1235 3 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1236 benchmark::utils::CheckNEON);
1237 }
1238 static void qs8_gemm_3x16c2__neon_mull_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
1239 GEMMEnd2EndBenchmark(state, model,
1240 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r,
1241 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r,
1242 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r,
1243 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r,
1244 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1245 3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1246 benchmark::utils::CheckNEON);
1247 }
1248 static void qs8_gemm_4x8c2__neon_mull_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
1249 GEMMEnd2EndBenchmark(state, model,
1250 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r,
1251 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld4r,
1252 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r,
1253 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld4r,
1254 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1255 4 /* mr */, 8 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1256 benchmark::utils::CheckNEON);
1257 }
1258 static void qs8_gemm_4x16c2__neon_mull_ld4r(benchmark::State& state, models::ExecutionPlanFactory model) {
1259 GEMMEnd2EndBenchmark(state, model,
1260 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r,
1261 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r,
1262 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r,
1263 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r,
1264 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1265 4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
1266 benchmark::utils::CheckNEON);
1267 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001268 static void qs8_gemm_2x8c2s4__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08001269 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001270 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull,
1271 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2s4__neon_mull,
1272 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull,
1273 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull,
Frank Barchardc7a032d2021-11-10 12:37:49 -08001274 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1275 2 /* mr */, 8 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
1276 benchmark::utils::CheckNEON);
1277 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001278 static void qs8_gemm_2x16c2s4__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08001279 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001280 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull,
1281 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mull,
1282 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull,
1283 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull,
Frank Barchardc7a032d2021-11-10 12:37:49 -08001284 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1285 2 /* mr */, 16 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
1286 benchmark::utils::CheckNEON);
1287 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001288 static void qs8_gemm_3x8c2s4__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08001289 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001290 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull,
1291 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mull,
1292 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull,
1293 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull,
Frank Barchardc7a032d2021-11-10 12:37:49 -08001294 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1295 3 /* mr */, 8 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
1296 benchmark::utils::CheckNEON);
1297 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001298 static void qs8_gemm_3x16c2s4__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08001299 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001300 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull,
1301 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull,
1302 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull,
1303 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull,
Frank Barchardc7a032d2021-11-10 12:37:49 -08001304 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1305 3 /* mr */, 16 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
1306 benchmark::utils::CheckNEON);
1307 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001308 static void qs8_gemm_4x8c2s4__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08001309 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001310 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull,
1311 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull,
1312 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull,
1313 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull,
Frank Barchardc7a032d2021-11-10 12:37:49 -08001314 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1315 4 /* mr */, 8 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
1316 benchmark::utils::CheckNEON);
1317 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001318 static void qs8_gemm_4x16c2s4__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08001319 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001320 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull,
1321 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2s4__neon_mull,
1322 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull,
1323 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2s4__neon_mull,
Frank Barchardc7a032d2021-11-10 12:37:49 -08001324 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1325 4 /* mr */, 16 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */,
1326 benchmark::utils::CheckNEON);
1327 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001328 static void qs8_gemm_2x8c4__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -07001329 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001330 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup,
1331 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_dup,
1332 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup,
1333 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup,
Frank Barchard51320102021-11-05 16:01:30 -07001334 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1335 2 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1336 benchmark::utils::CheckNEON);
1337 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001338 static void qs8_gemm_2x16c4__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -07001339 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001340 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup,
1341 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_dup,
1342 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup,
1343 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup,
Frank Barchard51320102021-11-05 16:01:30 -07001344 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1345 2 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1346 benchmark::utils::CheckNEON);
1347 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001348 static void qs8_gemm_3x8c4__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -07001349 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001350 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup,
1351 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup,
1352 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup,
1353 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup,
Frank Barchard51320102021-11-05 16:01:30 -07001354 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1355 3 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1356 benchmark::utils::CheckNEON);
1357 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001358 static void qs8_gemm_3x16c4__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -07001359 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001360 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup,
1361 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_dup,
1362 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup,
1363 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup,
Frank Barchard51320102021-11-05 16:01:30 -07001364 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1365 3 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1366 benchmark::utils::CheckNEON);
1367 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001368 static void qs8_gemm_4x8c4__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -07001369 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001370 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup,
1371 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_dup,
1372 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup,
1373 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup,
Frank Barchard51320102021-11-05 16:01:30 -07001374 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1375 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1376 benchmark::utils::CheckNEON);
1377 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001378 static void qs8_gemm_4x16c4__neon_mull_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard51320102021-11-05 16:01:30 -07001379 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001380 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup,
1381 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_dup,
1382 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup,
1383 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup,
Frank Barchard51320102021-11-05 16:01:30 -07001384 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1385 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1386 benchmark::utils::CheckNEON);
1387 }
Frank Barchard64ab1b72021-11-22 10:57:40 -08001388 static void qs8_gemm_2x8c4__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1389 GEMMEnd2EndBenchmark(state, model,
1390 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r,
1391 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r,
1392 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r,
1393 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r,
1394 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1395 2 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1396 benchmark::utils::CheckNEON);
1397 }
1398 static void qs8_gemm_2x16c4__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1399 GEMMEnd2EndBenchmark(state, model,
1400 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r,
1401 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld1r,
1402 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r,
1403 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r,
1404 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1405 2 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1406 benchmark::utils::CheckNEON);
1407 }
1408 static void qs8_gemm_3x8c4__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1409 GEMMEnd2EndBenchmark(state, model,
1410 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r,
1411 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld1r,
1412 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r,
1413 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r,
1414 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1415 3 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1416 benchmark::utils::CheckNEON);
1417 }
1418 static void qs8_gemm_3x16c4__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1419 GEMMEnd2EndBenchmark(state, model,
1420 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r,
1421 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r,
1422 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r,
1423 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r,
1424 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1425 3 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1426 benchmark::utils::CheckNEON);
1427 }
1428 static void qs8_gemm_4x8c4__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1429 GEMMEnd2EndBenchmark(state, model,
1430 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r,
1431 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r,
1432 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r,
1433 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld1r,
1434 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1435 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1436 benchmark::utils::CheckNEON);
1437 }
1438 static void qs8_gemm_4x16c4__neon_mull_ld1r(benchmark::State& state, models::ExecutionPlanFactory model) {
1439 GEMMEnd2EndBenchmark(state, model,
1440 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r,
1441 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld1r,
1442 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r,
1443 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld1r,
1444 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1445 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1446 benchmark::utils::CheckNEON);
1447 }
1448 static void qs8_gemm_2x8c4__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1449 GEMMEnd2EndBenchmark(state, model,
1450 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r,
1451 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld2r,
1452 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r,
1453 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r,
1454 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1455 2 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1456 benchmark::utils::CheckNEON);
1457 }
1458 static void qs8_gemm_2x16c4__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1459 GEMMEnd2EndBenchmark(state, model,
1460 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r,
1461 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mull_ld2r,
1462 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r,
1463 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r,
1464 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1465 2 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1466 benchmark::utils::CheckNEON);
1467 }
1468 static void qs8_gemm_3x8c4__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1469 GEMMEnd2EndBenchmark(state, model,
1470 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r,
1471 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r,
1472 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r,
1473 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r,
1474 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1475 3 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1476 benchmark::utils::CheckNEON);
1477 }
1478 static void qs8_gemm_3x16c4__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1479 GEMMEnd2EndBenchmark(state, model,
1480 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r,
1481 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld2r,
1482 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r,
1483 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r,
1484 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1485 3 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1486 benchmark::utils::CheckNEON);
1487 }
1488 static void qs8_gemm_4x8c4__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1489 GEMMEnd2EndBenchmark(state, model,
1490 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r,
1491 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld2r,
1492 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r,
1493 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r,
1494 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1495 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1496 benchmark::utils::CheckNEON);
1497 }
1498 static void qs8_gemm_4x16c4__neon_mull_ld2r(benchmark::State& state, models::ExecutionPlanFactory model) {
1499 GEMMEnd2EndBenchmark(state, model,
1500 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r,
1501 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neon_mull_ld2r,
1502 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r,
1503 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_ld2r,
1504 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1505 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1506 benchmark::utils::CheckNEON);
1507 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001508 static void qs8_gemm_2x8c4s2__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -08001509 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001510 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull,
1511 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4s2__neon_mull,
1512 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull,
1513 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull,
Frank Barchardeb704f72021-11-12 01:26:50 -08001514 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1515 2 /* mr */, 8 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
1516 benchmark::utils::CheckNEON);
1517 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001518 static void qs8_gemm_2x16c4s2__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -08001519 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001520 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull,
1521 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull,
1522 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull,
1523 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull,
Frank Barchardeb704f72021-11-12 01:26:50 -08001524 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1525 2 /* mr */, 16 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
1526 benchmark::utils::CheckNEON);
1527 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001528 static void qs8_gemm_3x8c4s2__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -08001529 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001530 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull,
1531 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull,
1532 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull,
1533 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull,
Frank Barchardeb704f72021-11-12 01:26:50 -08001534 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1535 3 /* mr */, 8 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
1536 benchmark::utils::CheckNEON);
1537 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001538 static void qs8_gemm_3x16c4s2__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -08001539 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001540 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull,
1541 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull,
1542 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull,
1543 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull,
Frank Barchardeb704f72021-11-12 01:26:50 -08001544 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1545 3 /* mr */, 16 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
1546 benchmark::utils::CheckNEON);
1547 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001548 static void qs8_gemm_4x8c4s2__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -08001549 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001550 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull,
1551 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull,
1552 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull,
1553 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull,
Frank Barchardeb704f72021-11-12 01:26:50 -08001554 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1555 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
1556 benchmark::utils::CheckNEON);
1557 }
Frank Barcharde22685a2021-11-12 11:36:58 -08001558 static void qs8_gemm_4x16c4s2__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardeb704f72021-11-12 01:26:50 -08001559 GEMMEnd2EndBenchmark(state, model,
Frank Barcharde22685a2021-11-12 11:36:58 -08001560 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull,
1561 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4s2__neon_mull,
1562 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull,
1563 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4s2__neon_mull,
Frank Barchardeb704f72021-11-12 01:26:50 -08001564 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1565 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 1 /* log2_sr */,
1566 benchmark::utils::CheckNEON);
1567 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001568 static void qs8_gemm_4x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard15c00362021-02-08 23:21:43 -08001569 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan4486f872021-08-07 15:22:50 -07001570 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neondot,
1571 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neondot,
1572 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
1573 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
1574 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard15c00362021-02-08 23:21:43 -08001575 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1576 benchmark::utils::CheckNEONDOT);
1577 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001578 static void qs8_gemm_6x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard15c00362021-02-08 23:21:43 -08001579 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan4486f872021-08-07 15:22:50 -07001580 xnn_qs8_gemm_minmax_rndnu_ukernel_6x8c4__neondot,
1581 xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot,
1582 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
1583 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
1584 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard15c00362021-02-08 23:21:43 -08001585 6 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1586 benchmark::utils::CheckNEONDOT);
1587 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001588 static void qs8_gemm_8x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard15c00362021-02-08 23:21:43 -08001589 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan4486f872021-08-07 15:22:50 -07001590 xnn_qs8_gemm_minmax_rndnu_ukernel_8x8c4__neondot,
1591 xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot,
1592 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
1593 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
1594 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard15c00362021-02-08 23:21:43 -08001595 8 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1596 benchmark::utils::CheckNEONDOT);
1597 }
Marat Dukhan4486f872021-08-07 15:22:50 -07001598 static void qs8_gemm_4x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard15c00362021-02-08 23:21:43 -08001599 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan4486f872021-08-07 15:22:50 -07001600 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot,
1601 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__neondot,
1602 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
1603 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
1604 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1605 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1606 benchmark::utils::CheckNEONDOT);
1607 }
1608 static void qs8_gemm_6x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
1609 GEMMEnd2EndBenchmark(state, model,
1610 xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot,
1611 xnn_qs8_igemm_minmax_rndnu_ukernel_6x16c4__neondot,
1612 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
1613 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
1614 xnn_init_qs8_conv_minmax_rndnu_neon_params,
1615 6 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1616 benchmark::utils::CheckNEONDOT);
1617 }
1618 static void qs8_gemm_8x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
1619 GEMMEnd2EndBenchmark(state, model,
1620 xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot,
1621 xnn_qs8_igemm_minmax_rndnu_ukernel_8x16c4__neondot,
1622 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
1623 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
1624 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard15c00362021-02-08 23:21:43 -08001625 8 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
1626 benchmark::utils::CheckNEONDOT);
1627 }
Marat Dukhan89991902021-12-06 00:54:36 -08001628 static void qs8_gemm_2x8c8__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardd79391d2021-02-15 14:22:32 -08001629 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001630 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull,
1631 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mull,
1632 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mull,
1633 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull,
1634 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardd79391d2021-02-15 14:22:32 -08001635 2 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1636 benchmark::utils::CheckNEON);
1637 }
Marat Dukhan89991902021-12-06 00:54:36 -08001638 static void qs8_gemm_2x16c8__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardd79391d2021-02-15 14:22:32 -08001639 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001640 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mull,
1641 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mull,
1642 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull,
1643 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull,
1644 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardd79391d2021-02-15 14:22:32 -08001645 2 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1646 benchmark::utils::CheckNEON);
1647 }
Marat Dukhan89991902021-12-06 00:54:36 -08001648 static void qs8_gemm_3x8c8__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardd79391d2021-02-15 14:22:32 -08001649 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001650 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull,
1651 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mull,
1652 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mull,
1653 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull,
1654 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardd79391d2021-02-15 14:22:32 -08001655 3 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1656 benchmark::utils::CheckNEON);
1657 }
Marat Dukhan89991902021-12-06 00:54:36 -08001658 static void qs8_gemm_3x16c8__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardd79391d2021-02-15 14:22:32 -08001659 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001660 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mull,
1661 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull,
1662 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull,
1663 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull,
1664 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardd79391d2021-02-15 14:22:32 -08001665 3 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1666 benchmark::utils::CheckNEON);
1667 }
Marat Dukhan89991902021-12-06 00:54:36 -08001668 static void qs8_gemm_4x8c8__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardd79391d2021-02-15 14:22:32 -08001669 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001670 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull,
1671 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mull,
1672 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mull,
1673 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull,
1674 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardd79391d2021-02-15 14:22:32 -08001675 4 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1676 benchmark::utils::CheckNEON);
1677 }
Marat Dukhan89991902021-12-06 00:54:36 -08001678 static void qs8_gemm_4x16c8__neon_mull(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardd79391d2021-02-15 14:22:32 -08001679 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001680 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mull,
1681 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mull,
1682 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mull,
1683 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mull,
1684 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardd79391d2021-02-15 14:22:32 -08001685 4 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1686 benchmark::utils::CheckNEON);
1687 }
Marat Dukhan89991902021-12-06 00:54:36 -08001688 static void qs8_gemm_2x8c16__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard36f95cf2021-02-23 03:12:52 -08001689 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001690 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c16__neon_mlal,
1691 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c16__neon_mlal,
1692 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal,
1693 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal,
1694 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard36f95cf2021-02-23 03:12:52 -08001695 2 /* mr */, 8 /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
1696 benchmark::utils::CheckNEON);
1697 }
Marat Dukhan89991902021-12-06 00:54:36 -08001698 static void qs8_gemm_2x16c16__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard36f95cf2021-02-23 03:12:52 -08001699 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001700 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c16__neon_mlal,
1701 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c16__neon_mlal,
1702 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal,
1703 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal,
1704 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard36f95cf2021-02-23 03:12:52 -08001705 2 /* mr */, 16 /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
1706 benchmark::utils::CheckNEON);
1707 }
Marat Dukhan89991902021-12-06 00:54:36 -08001708 static void qs8_gemm_3x8c16__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard36f95cf2021-02-23 03:12:52 -08001709 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001710 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c16__neon_mlal,
1711 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c16__neon_mlal,
1712 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal,
1713 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal,
1714 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard36f95cf2021-02-23 03:12:52 -08001715 4 /* mr */, 8 /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
1716 benchmark::utils::CheckNEON);
1717 }
Marat Dukhan89991902021-12-06 00:54:36 -08001718 static void qs8_gemm_3x16c16__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard36f95cf2021-02-23 03:12:52 -08001719 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001720 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c16__neon_mlal,
1721 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c16__neon_mlal,
1722 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal,
1723 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal,
1724 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard36f95cf2021-02-23 03:12:52 -08001725 4 /* mr */, 16 /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
1726 benchmark::utils::CheckNEON);
1727 }
Marat Dukhan89991902021-12-06 00:54:36 -08001728 static void qs8_gemm_4x8c16__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard36f95cf2021-02-23 03:12:52 -08001729 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001730 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c16__neon_mlal,
1731 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c16__neon_mlal,
1732 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c16__neon_mlal,
1733 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c16__neon_mlal,
1734 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard36f95cf2021-02-23 03:12:52 -08001735 4 /* mr */, 8 /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
1736 benchmark::utils::CheckNEON);
1737 }
Marat Dukhan89991902021-12-06 00:54:36 -08001738 static void qs8_gemm_4x16c16__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard36f95cf2021-02-23 03:12:52 -08001739 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001740 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal,
1741 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c16__neon_mlal,
1742 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c16__neon_mlal,
1743 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal,
1744 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchard36f95cf2021-02-23 03:12:52 -08001745 4 /* mr */, 16 /* nr */, 4 /* log2_kr */, 0 /* log2_sr */,
1746 benchmark::utils::CheckNEON);
1747 }
Marat Dukhan89991902021-12-06 00:54:36 -08001748 static void qs8_gemm_2x8c8__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardda78da12021-03-02 14:28:00 -08001749 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001750 xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mlal,
1751 xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal,
1752 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal,
1753 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal,
1754 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardda78da12021-03-02 14:28:00 -08001755 2 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1756 benchmark::utils::CheckNEON);
1757 }
Marat Dukhan89991902021-12-06 00:54:36 -08001758 static void qs8_gemm_2x16c8__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardda78da12021-03-02 14:28:00 -08001759 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001760 xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c8__neon_mlal,
1761 xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c8__neon_mlal,
1762 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal,
1763 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal,
1764 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardda78da12021-03-02 14:28:00 -08001765 2 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1766 benchmark::utils::CheckNEON);
1767 }
Marat Dukhan89991902021-12-06 00:54:36 -08001768 static void qs8_gemm_3x8c8__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardda78da12021-03-02 14:28:00 -08001769 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001770 xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mlal,
1771 xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c8__neon_mlal,
1772 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal,
1773 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal,
1774 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardda78da12021-03-02 14:28:00 -08001775 3 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1776 benchmark::utils::CheckNEON);
1777 }
Marat Dukhan89991902021-12-06 00:54:36 -08001778 static void qs8_gemm_3x16c8__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardda78da12021-03-02 14:28:00 -08001779 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001780 xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c8__neon_mlal,
1781 xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mlal,
1782 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal,
1783 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal,
1784 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardda78da12021-03-02 14:28:00 -08001785 3 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1786 benchmark::utils::CheckNEON);
1787 }
Marat Dukhan89991902021-12-06 00:54:36 -08001788 static void qs8_gemm_4x8c8__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardda78da12021-03-02 14:28:00 -08001789 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001790 xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mlal,
1791 xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal,
1792 xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal,
1793 xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mlal,
1794 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardda78da12021-03-02 14:28:00 -08001795 4 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1796 benchmark::utils::CheckNEON);
1797 }
Marat Dukhan89991902021-12-06 00:54:36 -08001798 static void qs8_gemm_4x16c8__neon_mlal(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchardda78da12021-03-02 14:28:00 -08001799 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan89991902021-12-06 00:54:36 -08001800 xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal,
1801 xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c8__neon_mlal,
1802 xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c8__neon_mlal,
1803 xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c8__neon_mlal,
1804 xnn_init_qs8_conv_minmax_rndnu_neon_params,
Frank Barchardda78da12021-03-02 14:28:00 -08001805 4 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1806 benchmark::utils::CheckNEON);
1807 }
1808
Marat Dukhan4486f872021-08-07 15:22:50 -07001809 BENCHMARK_QS8_END2END(qs8_gemm_4x8c4__neondot);
1810 BENCHMARK_QS8_END2END(qs8_gemm_6x8c4__neondot);
1811 BENCHMARK_QS8_END2END(qs8_gemm_8x8c4__neondot);
1812 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4__neondot);
1813 BENCHMARK_QS8_END2END(qs8_gemm_6x16c4__neondot);
1814 BENCHMARK_QS8_END2END(qs8_gemm_8x16c4__neondot);
Frank Barchard4c032f32021-02-10 15:18:49 -08001815
Marat Dukhan89991902021-12-06 00:54:36 -08001816 BENCHMARK_QS8_END2END(qs8_gemm_2x8c8__neon_mlal);
1817 BENCHMARK_QS8_END2END(qs8_gemm_2x16c8__neon_mlal);
1818 BENCHMARK_QS8_END2END(qs8_gemm_3x8c8__neon_mlal);
1819 BENCHMARK_QS8_END2END(qs8_gemm_3x16c8__neon_mlal);
1820 BENCHMARK_QS8_END2END(qs8_gemm_4x8c8__neon_mlal);
1821 BENCHMARK_QS8_END2END(qs8_gemm_4x16c8__neon_mlal);
Frank Barchardda78da12021-03-02 14:28:00 -08001822
Marat Dukhan89991902021-12-06 00:54:36 -08001823 BENCHMARK_QS8_END2END(qs8_gemm_2x8c8__neon_mull);
1824 BENCHMARK_QS8_END2END(qs8_gemm_2x16c8__neon_mull);
1825 BENCHMARK_QS8_END2END(qs8_gemm_3x8c8__neon_mull);
1826 BENCHMARK_QS8_END2END(qs8_gemm_3x16c8__neon_mull);
1827 BENCHMARK_QS8_END2END(qs8_gemm_4x8c8__neon_mull);
1828 BENCHMARK_QS8_END2END(qs8_gemm_4x16c8__neon_mull);
Frank Barchardd79391d2021-02-15 14:22:32 -08001829
Marat Dukhan89991902021-12-06 00:54:36 -08001830 BENCHMARK_QS8_END2END(qs8_gemm_2x8c16__neon_mlal);
1831 BENCHMARK_QS8_END2END(qs8_gemm_2x16c16__neon_mlal);
1832 BENCHMARK_QS8_END2END(qs8_gemm_3x8c16__neon_mlal);
1833 BENCHMARK_QS8_END2END(qs8_gemm_3x16c16__neon_mlal);
1834 BENCHMARK_QS8_END2END(qs8_gemm_4x8c16__neon_mlal);
1835 BENCHMARK_QS8_END2END(qs8_gemm_4x16c16__neon_mlal);
Frank Barchard36f95cf2021-02-23 03:12:52 -08001836
Frank Barcharde22685a2021-11-12 11:36:58 -08001837 BENCHMARK_QS8_END2END(qs8_gemm_2x8c4__neon_mlal_dup);
1838 BENCHMARK_QS8_END2END(qs8_gemm_2x16c4__neon_mlal_dup);
1839 BENCHMARK_QS8_END2END(qs8_gemm_3x8c4__neon_mlal_dup);
1840 BENCHMARK_QS8_END2END(qs8_gemm_3x16c4__neon_mlal_dup);
1841 BENCHMARK_QS8_END2END(qs8_gemm_4x8c4__neon_mlal_dup);
1842 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4__neon_mlal_dup);
Frank Barchard51320102021-11-05 16:01:30 -07001843
Frank Barcharde22685a2021-11-12 11:36:58 -08001844 BENCHMARK_QS8_END2END(qs8_gemm_2x8c4__neon_mull_dup);
1845 BENCHMARK_QS8_END2END(qs8_gemm_2x16c4__neon_mull_dup);
1846 BENCHMARK_QS8_END2END(qs8_gemm_3x8c4__neon_mull_dup);
1847 BENCHMARK_QS8_END2END(qs8_gemm_3x16c4__neon_mull_dup);
1848 BENCHMARK_QS8_END2END(qs8_gemm_4x8c4__neon_mull_dup);
1849 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4__neon_mull_dup);
Frank Barchard51320102021-11-05 16:01:30 -07001850
Frank Barchard64ab1b72021-11-22 10:57:40 -08001851 BENCHMARK_QS8_END2END(qs8_gemm_2x8c4__neon_mlal_ld1r);
1852 BENCHMARK_QS8_END2END(qs8_gemm_2x16c4__neon_mlal_ld1r);
1853 BENCHMARK_QS8_END2END(qs8_gemm_3x8c4__neon_mlal_ld1r);
1854 BENCHMARK_QS8_END2END(qs8_gemm_3x16c4__neon_mlal_ld1r);
1855 BENCHMARK_QS8_END2END(qs8_gemm_4x8c4__neon_mlal_ld1r);
1856 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4__neon_mlal_ld1r);
1857
1858 BENCHMARK_QS8_END2END(qs8_gemm_2x8c4__neon_mull_ld1r);
1859 BENCHMARK_QS8_END2END(qs8_gemm_2x16c4__neon_mull_ld1r);
1860 BENCHMARK_QS8_END2END(qs8_gemm_3x8c4__neon_mull_ld1r);
1861 BENCHMARK_QS8_END2END(qs8_gemm_3x16c4__neon_mull_ld1r);
1862 BENCHMARK_QS8_END2END(qs8_gemm_4x8c4__neon_mull_ld1r);
1863 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4__neon_mull_ld1r);
1864
1865 BENCHMARK_QS8_END2END(qs8_gemm_2x8c4__neon_mlal_ld2r);
1866 BENCHMARK_QS8_END2END(qs8_gemm_2x16c4__neon_mlal_ld2r);
1867 BENCHMARK_QS8_END2END(qs8_gemm_3x8c4__neon_mlal_ld2r);
1868 BENCHMARK_QS8_END2END(qs8_gemm_3x16c4__neon_mlal_ld2r);
1869 BENCHMARK_QS8_END2END(qs8_gemm_4x8c4__neon_mlal_ld2r);
1870 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4__neon_mlal_ld2r);
1871
1872 BENCHMARK_QS8_END2END(qs8_gemm_2x8c4__neon_mull_ld2r);
1873 BENCHMARK_QS8_END2END(qs8_gemm_2x16c4__neon_mull_ld2r);
1874 BENCHMARK_QS8_END2END(qs8_gemm_3x8c4__neon_mull_ld2r);
1875 BENCHMARK_QS8_END2END(qs8_gemm_3x16c4__neon_mull_ld2r);
1876 BENCHMARK_QS8_END2END(qs8_gemm_4x8c4__neon_mull_ld2r);
1877 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4__neon_mull_ld2r);
1878
Frank Barcharde22685a2021-11-12 11:36:58 -08001879 BENCHMARK_QS8_END2END(qs8_gemm_2x8c4s2__neon_mlal);
1880 BENCHMARK_QS8_END2END(qs8_gemm_2x16c4s2__neon_mlal);
1881 BENCHMARK_QS8_END2END(qs8_gemm_3x8c4s2__neon_mlal);
1882 BENCHMARK_QS8_END2END(qs8_gemm_3x16c4s2__neon_mlal);
1883 BENCHMARK_QS8_END2END(qs8_gemm_4x8c4s2__neon_mlal);
1884 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4s2__neon_mlal);
Frank Barchardeb704f72021-11-12 01:26:50 -08001885
Frank Barcharde22685a2021-11-12 11:36:58 -08001886 BENCHMARK_QS8_END2END(qs8_gemm_2x8c4s2__neon_mull);
1887 BENCHMARK_QS8_END2END(qs8_gemm_2x16c4s2__neon_mull);
1888 BENCHMARK_QS8_END2END(qs8_gemm_3x8c4s2__neon_mull);
1889 BENCHMARK_QS8_END2END(qs8_gemm_3x16c4s2__neon_mull);
1890 BENCHMARK_QS8_END2END(qs8_gemm_4x8c4s2__neon_mull);
1891 BENCHMARK_QS8_END2END(qs8_gemm_4x16c4s2__neon_mull);
Frank Barchardeb704f72021-11-12 01:26:50 -08001892
Frank Barcharde22685a2021-11-12 11:36:58 -08001893 BENCHMARK_QS8_END2END(qs8_gemm_2x8c2__neon_mlal_dup);
1894 BENCHMARK_QS8_END2END(qs8_gemm_2x16c2__neon_mlal_dup);
1895 BENCHMARK_QS8_END2END(qs8_gemm_3x8c2__neon_mlal_dup);
1896 BENCHMARK_QS8_END2END(qs8_gemm_3x16c2__neon_mlal_dup);
1897 BENCHMARK_QS8_END2END(qs8_gemm_4x8c2__neon_mlal_dup);
1898 BENCHMARK_QS8_END2END(qs8_gemm_4x16c2__neon_mlal_dup);
Frank Barchard32389c62021-02-16 11:04:36 -08001899
Frank Barcharde22685a2021-11-12 11:36:58 -08001900 BENCHMARK_QS8_END2END(qs8_gemm_2x8c2__neon_mull_dup);
1901 BENCHMARK_QS8_END2END(qs8_gemm_2x16c2__neon_mull_dup);
1902 BENCHMARK_QS8_END2END(qs8_gemm_3x8c2__neon_mull_dup);
1903 BENCHMARK_QS8_END2END(qs8_gemm_3x16c2__neon_mull_dup);
1904 BENCHMARK_QS8_END2END(qs8_gemm_4x8c2__neon_mull_dup);
1905 BENCHMARK_QS8_END2END(qs8_gemm_4x16c2__neon_mull_dup);
Frank Barchard32389c62021-02-16 11:04:36 -08001906
Frank Barchard15eec022021-11-17 13:26:20 -08001907 BENCHMARK_QS8_END2END(qs8_gemm_2x8c2__neon_mlal_ld1r);
1908 BENCHMARK_QS8_END2END(qs8_gemm_2x16c2__neon_mlal_ld1r);
1909 BENCHMARK_QS8_END2END(qs8_gemm_3x8c2__neon_mlal_ld1r);
1910 BENCHMARK_QS8_END2END(qs8_gemm_3x16c2__neon_mlal_ld1r);
1911 BENCHMARK_QS8_END2END(qs8_gemm_4x8c2__neon_mlal_ld1r);
1912 BENCHMARK_QS8_END2END(qs8_gemm_4x16c2__neon_mlal_ld1r);
1913
1914 BENCHMARK_QS8_END2END(qs8_gemm_2x8c2__neon_mull_ld1r);
1915 BENCHMARK_QS8_END2END(qs8_gemm_2x16c2__neon_mull_ld1r);
1916 BENCHMARK_QS8_END2END(qs8_gemm_3x8c2__neon_mull_ld1r);
1917 BENCHMARK_QS8_END2END(qs8_gemm_3x16c2__neon_mull_ld1r);
1918 BENCHMARK_QS8_END2END(qs8_gemm_4x8c2__neon_mull_ld1r);
1919 BENCHMARK_QS8_END2END(qs8_gemm_4x16c2__neon_mull_ld1r);
1920
1921 BENCHMARK_QS8_END2END(qs8_gemm_2x8c2__neon_mlal_ld2r);
1922 BENCHMARK_QS8_END2END(qs8_gemm_2x16c2__neon_mlal_ld2r);
1923 BENCHMARK_QS8_END2END(qs8_gemm_3x8c2__neon_mlal_ld2r);
1924 BENCHMARK_QS8_END2END(qs8_gemm_3x16c2__neon_mlal_ld2r);
1925 BENCHMARK_QS8_END2END(qs8_gemm_4x8c2__neon_mlal_ld2r);
1926 BENCHMARK_QS8_END2END(qs8_gemm_4x16c2__neon_mlal_ld2r);
1927
1928 BENCHMARK_QS8_END2END(qs8_gemm_2x8c2__neon_mull_ld2r);
1929 BENCHMARK_QS8_END2END(qs8_gemm_2x16c2__neon_mull_ld2r);
1930 BENCHMARK_QS8_END2END(qs8_gemm_3x8c2__neon_mull_ld2r);
1931 BENCHMARK_QS8_END2END(qs8_gemm_3x16c2__neon_mull_ld2r);
1932 BENCHMARK_QS8_END2END(qs8_gemm_4x8c2__neon_mull_ld2r);
1933 BENCHMARK_QS8_END2END(qs8_gemm_4x16c2__neon_mull_ld2r);
1934
Frank Barchard42f5c502021-11-16 10:04:21 -08001935 BENCHMARK_QS8_END2END(qs8_gemm_2x8c2__neon_mlal_ld4r);
1936 BENCHMARK_QS8_END2END(qs8_gemm_2x16c2__neon_mlal_ld4r);
1937 BENCHMARK_QS8_END2END(qs8_gemm_3x8c2__neon_mlal_ld4r);
1938 BENCHMARK_QS8_END2END(qs8_gemm_3x16c2__neon_mlal_ld4r);
1939 BENCHMARK_QS8_END2END(qs8_gemm_4x8c2__neon_mlal_ld4r);
1940 BENCHMARK_QS8_END2END(qs8_gemm_4x16c2__neon_mlal_ld4r);
1941
1942 BENCHMARK_QS8_END2END(qs8_gemm_2x8c2__neon_mull_ld4r);
1943 BENCHMARK_QS8_END2END(qs8_gemm_2x16c2__neon_mull_ld4r);
1944 BENCHMARK_QS8_END2END(qs8_gemm_3x8c2__neon_mull_ld4r);
1945 BENCHMARK_QS8_END2END(qs8_gemm_3x16c2__neon_mull_ld4r);
1946 BENCHMARK_QS8_END2END(qs8_gemm_4x8c2__neon_mull_ld4r);
1947 BENCHMARK_QS8_END2END(qs8_gemm_4x16c2__neon_mull_ld4r);
1948
Frank Barcharde22685a2021-11-12 11:36:58 -08001949 BENCHMARK_QS8_END2END(qs8_gemm_2x8c2s4__neon_mlal);
1950 BENCHMARK_QS8_END2END(qs8_gemm_2x16c2s4__neon_mlal);
1951 BENCHMARK_QS8_END2END(qs8_gemm_3x8c2s4__neon_mlal);
1952 BENCHMARK_QS8_END2END(qs8_gemm_3x16c2s4__neon_mlal);
1953 BENCHMARK_QS8_END2END(qs8_gemm_4x8c2s4__neon_mlal);
1954 BENCHMARK_QS8_END2END(qs8_gemm_4x16c2s4__neon_mlal);
Frank Barchardc7a032d2021-11-10 12:37:49 -08001955
Frank Barcharde22685a2021-11-12 11:36:58 -08001956 BENCHMARK_QS8_END2END(qs8_gemm_2x8c2s4__neon_mull);
1957 BENCHMARK_QS8_END2END(qs8_gemm_2x16c2s4__neon_mull);
1958 BENCHMARK_QS8_END2END(qs8_gemm_3x8c2s4__neon_mull);
1959 BENCHMARK_QS8_END2END(qs8_gemm_3x16c2s4__neon_mull);
1960 BENCHMARK_QS8_END2END(qs8_gemm_4x8c2s4__neon_mull);
1961 BENCHMARK_QS8_END2END(qs8_gemm_4x16c2s4__neon_mull);
Frank Barchardc7a032d2021-11-10 12:37:49 -08001962
Frank Barchard27bf92c2021-11-24 15:47:52 -08001963 BENCHMARK_QS8_END2END(qs8_gemm_2x8__neon_mlal_lane);
1964 BENCHMARK_QS8_END2END(qs8_gemm_2x16__neon_mlal_lane);
1965 BENCHMARK_QS8_END2END(qs8_gemm_3x8__neon_mlal_lane);
1966 BENCHMARK_QS8_END2END(qs8_gemm_3x16__neon_mlal_lane);
1967 BENCHMARK_QS8_END2END(qs8_gemm_4x8__neon_mlal_lane);
1968 BENCHMARK_QS8_END2END(qs8_gemm_4x16__neon_mlal_lane);
1969 BENCHMARK_QS8_END2END(qs8_gemm_6x8__neon_mlal_lane);
1970 BENCHMARK_QS8_END2END(qs8_gemm_6x16__neon_mlal_lane);
Frank Barchardf82ea822021-12-01 15:43:37 -08001971
1972 BENCHMARK_QS8_END2END(qs8_gemm_2x8__neon_mlal_lane_prfm);
1973 BENCHMARK_QS8_END2END(qs8_gemm_2x16__neon_mlal_lane_prfm);
1974 BENCHMARK_QS8_END2END(qs8_gemm_3x8__neon_mlal_lane_prfm);
1975 BENCHMARK_QS8_END2END(qs8_gemm_3x16__neon_mlal_lane_prfm);
1976 BENCHMARK_QS8_END2END(qs8_gemm_4x8__neon_mlal_lane_prfm);
1977 BENCHMARK_QS8_END2END(qs8_gemm_4x16__neon_mlal_lane_prfm);
1978 BENCHMARK_QS8_END2END(qs8_gemm_6x8__neon_mlal_lane_prfm);
1979 BENCHMARK_QS8_END2END(qs8_gemm_6x16__neon_mlal_lane_prfm);
Frank Barcharddc909cb2021-02-08 13:59:31 -08001980#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1981
Frank Barchardb8c9fa92021-02-10 11:30:51 -08001982#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan529d2c12021-08-06 15:37:03 -07001983 static void qs8_gemm_2x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan3c5892e2021-05-26 10:56:04 -07001984 GEMMEnd2EndBenchmark(state, model,
1985 xnn_qs8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx,
1986 xnn_qs8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx,
1987 xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx,
1988 xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx,
1989 xnn_init_qs8_conv_minmax_fp32_avx512_params,
1990 2 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
1991 benchmark::utils::CheckAVX512F);
1992 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07001993 static void qs8_gemm_3x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan3c5892e2021-05-26 10:56:04 -07001994 GEMMEnd2EndBenchmark(state, model,
1995 xnn_qs8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx,
1996 xnn_qs8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx,
1997 xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx,
1998 xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx,
1999 xnn_init_qs8_conv_minmax_fp32_avx512_params,
2000 3 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2001 benchmark::utils::CheckAVX512F);
2002 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002003 static void qs8_gemm_4x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan3c5892e2021-05-26 10:56:04 -07002004 GEMMEnd2EndBenchmark(state, model,
2005 xnn_qs8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx,
2006 xnn_qs8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx,
2007 xnn_qs8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx,
2008 xnn_qs8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx,
2009 xnn_init_qs8_conv_minmax_fp32_avx512_params,
2010 4 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2011 benchmark::utils::CheckAVX512F);
2012 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002013 static void qs8_gemm_2x8c8__avx2(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan3c5892e2021-05-26 10:56:04 -07002014 GEMMEnd2EndBenchmark(state, model,
2015 xnn_qs8_gemm_minmax_fp32_ukernel_2x8c8__avx2,
2016 xnn_qs8_igemm_minmax_fp32_ukernel_2x8c8__avx2,
2017 xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2,
2018 xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2,
2019 xnn_init_qs8_conv_minmax_fp32_avx2_params,
2020 2 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2021 benchmark::utils::CheckAVX2);
2022 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002023 static void qs8_gemm_3x8c8__avx2(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan3c5892e2021-05-26 10:56:04 -07002024 GEMMEnd2EndBenchmark(state, model,
2025 xnn_qs8_gemm_minmax_fp32_ukernel_3x8c8__avx2,
2026 xnn_qs8_igemm_minmax_fp32_ukernel_3x8c8__avx2,
2027 xnn_qs8_gemm_minmax_fp32_ukernel_1x8c8__avx2,
2028 xnn_qs8_igemm_minmax_fp32_ukernel_1x8c8__avx2,
2029 xnn_init_qs8_conv_minmax_fp32_avx2_params,
2030 3 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2031 benchmark::utils::CheckAVX2);
2032 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002033 static void qs8_gemm_2x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002034 GEMMEnd2EndBenchmark(state, model,
2035 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64,
2036 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64,
2037 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
2038 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
2039 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2040 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2041 benchmark::utils::CheckXOP);
2042 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002043 static void qs8_gemm_2x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002044 GEMMEnd2EndBenchmark(state, model,
2045 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128,
2046 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128,
2047 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
2048 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
2049 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2050 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2051 benchmark::utils::CheckXOP);
2052 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002053 static void qs8_gemm_3x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002054 GEMMEnd2EndBenchmark(state, model,
2055 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64,
2056 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64,
2057 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
2058 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
2059 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2060 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2061 benchmark::utils::CheckXOP);
2062 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002063 static void qs8_gemm_3x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002064 GEMMEnd2EndBenchmark(state, model,
2065 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128,
2066 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128,
2067 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
2068 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
2069 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2070 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2071 benchmark::utils::CheckXOP);
2072 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002073 static void qs8_gemm_4x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002074 GEMMEnd2EndBenchmark(state, model,
2075 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64,
2076 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64,
2077 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
2078 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
2079 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2080 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2081 benchmark::utils::CheckXOP);
2082 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002083 static void qs8_gemm_4x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002084 GEMMEnd2EndBenchmark(state, model,
2085 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128,
2086 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128,
2087 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
2088 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
2089 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2090 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2091 benchmark::utils::CheckXOP);
2092 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002093 static void qs8_gemm_2x4c8__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002094 GEMMEnd2EndBenchmark(state, model,
2095 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64,
2096 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64,
2097 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64,
2098 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64,
2099 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2100 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2101 benchmark::utils::CheckXOP);
2102 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002103 static void qs8_gemm_3x4c8__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002104 GEMMEnd2EndBenchmark(state, model,
2105 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64,
2106 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64,
2107 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64,
2108 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64,
2109 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2110 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2111 benchmark::utils::CheckXOP);
2112 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002113 static void qs8_gemm_2x4c8__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
2114 GEMMEnd2EndBenchmark(state, model,
2115 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128,
2116 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128,
2117 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128,
2118 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128,
2119 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2120 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2121 benchmark::utils::CheckXOP);
2122 }
2123 static void qs8_gemm_3x4c8__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002124 GEMMEnd2EndBenchmark(state, model,
2125 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128,
2126 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128,
2127 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128,
2128 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128,
2129 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2130 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2131 benchmark::utils::CheckXOP);
2132 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002133 static void qs8_gemm_2x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002134 GEMMEnd2EndBenchmark(state, model,
2135 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64,
2136 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64,
2137 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
2138 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
2139 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2140 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2141 benchmark::utils::CheckAVX);
2142 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002143 static void qs8_gemm_2x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002144 GEMMEnd2EndBenchmark(state, model,
2145 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128,
2146 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128,
2147 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
2148 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
2149 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2150 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2151 benchmark::utils::CheckAVX);
2152 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002153 static void qs8_gemm_3x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002154 GEMMEnd2EndBenchmark(state, model,
2155 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64,
2156 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64,
2157 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
2158 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
2159 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2160 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2161 benchmark::utils::CheckAVX);
2162 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002163 static void qs8_gemm_3x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002164 GEMMEnd2EndBenchmark(state, model,
2165 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128,
2166 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128,
2167 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
2168 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
2169 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2170 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2171 benchmark::utils::CheckAVX);
2172 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002173 static void qs8_gemm_4x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002174 GEMMEnd2EndBenchmark(state, model,
2175 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64,
2176 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64,
2177 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
2178 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
2179 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2180 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2181 benchmark::utils::CheckAVX);
2182 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002183 static void qs8_gemm_4x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002184 GEMMEnd2EndBenchmark(state, model,
2185 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128,
2186 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128,
2187 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
2188 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
2189 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2190 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2191 benchmark::utils::CheckAVX);
2192 }
Marat Dukhanbe3d8fd2021-04-08 10:07:13 -07002193
2194
Marat Dukhan529d2c12021-08-06 15:37:03 -07002195 static void qs8_gemm_2x4c8__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002196 GEMMEnd2EndBenchmark(state, model,
2197 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64,
2198 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64,
2199 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64,
2200 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64,
2201 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2202 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2203 benchmark::utils::CheckAVX);
2204 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002205 static void qs8_gemm_2x4c8__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002206 GEMMEnd2EndBenchmark(state, model,
2207 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128,
2208 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128,
2209 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128,
2210 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128,
2211 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2212 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2213 benchmark::utils::CheckAVX);
2214 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002215 static void qs8_gemm_3x4c8__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002216 GEMMEnd2EndBenchmark(state, model,
2217 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64,
2218 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64,
2219 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64,
2220 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64,
2221 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2222 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2223 benchmark::utils::CheckAVX);
2224 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002225 static void qs8_gemm_3x4c8__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002226 GEMMEnd2EndBenchmark(state, model,
2227 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128,
2228 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128,
2229 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128,
2230 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128,
2231 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2232 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2233 benchmark::utils::CheckAVX);
2234 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002235 static void qs8_gemm_2x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002236 GEMMEnd2EndBenchmark(state, model,
2237 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64,
2238 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64,
2239 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
2240 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
2241 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2242 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2243 benchmark::utils::CheckSSE41);
2244 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002245 static void qs8_gemm_2x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002246 GEMMEnd2EndBenchmark(state, model,
2247 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128,
2248 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128,
2249 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
2250 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
2251 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2252 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2253 benchmark::utils::CheckSSE41);
2254 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002255 static void qs8_gemm_3x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002256 GEMMEnd2EndBenchmark(state, model,
2257 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64,
2258 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64,
2259 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
2260 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
2261 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2262 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2263 benchmark::utils::CheckSSE41);
2264 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002265 static void qs8_gemm_3x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002266 GEMMEnd2EndBenchmark(state, model,
2267 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128,
2268 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128,
2269 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
2270 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
2271 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2272 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2273 benchmark::utils::CheckSSE41);
2274 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002275 static void qs8_gemm_4x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002276 GEMMEnd2EndBenchmark(state, model,
2277 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64,
2278 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64,
2279 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
2280 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
2281 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2282 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2283 benchmark::utils::CheckSSE41);
2284 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002285 static void qs8_gemm_4x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002286 GEMMEnd2EndBenchmark(state, model,
2287 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128,
2288 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128,
2289 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
2290 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
2291 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2292 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
2293 benchmark::utils::CheckSSE41);
2294 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002295 static void qs8_gemm_2x4c8__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002296 GEMMEnd2EndBenchmark(state, model,
2297 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64,
2298 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64,
2299 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64,
2300 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64,
2301 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2302 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2303 benchmark::utils::CheckSSE41);
2304 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002305 static void qs8_gemm_2x4c8__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002306 GEMMEnd2EndBenchmark(state, model,
2307 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128,
2308 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128,
2309 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128,
2310 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128,
2311 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2312 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2313 benchmark::utils::CheckSSE41);
2314 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002315 static void qs8_gemm_3x4c8__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002316 GEMMEnd2EndBenchmark(state, model,
2317 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64,
2318 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64,
2319 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64,
2320 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64,
2321 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2322 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2323 benchmark::utils::CheckSSE41);
2324 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002325 static void qs8_gemm_3x4c8__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002326 GEMMEnd2EndBenchmark(state, model,
2327 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128,
2328 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128,
2329 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128,
2330 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128,
2331 xnn_init_qs8_conv_minmax_fp32_sse4_params,
2332 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2333 benchmark::utils::CheckSSE41);
2334 }
Marat Dukhand5cc5082021-04-08 01:23:33 -07002335
2336
Marat Dukhan529d2c12021-08-06 15:37:03 -07002337 static void qs8_gemm_2x4c8__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002338 GEMMEnd2EndBenchmark(state, model,
2339 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64,
2340 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld64,
2341 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64,
2342 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64,
2343 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2344 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2345 benchmark::utils::CheckSSSE3);
2346 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002347 static void qs8_gemm_2x4c8__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002348 GEMMEnd2EndBenchmark(state, model,
2349 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128,
2350 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__ssse3_ld128,
2351 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128,
2352 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128,
2353 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2354 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2355 benchmark::utils::CheckSSSE3);
2356 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002357 static void qs8_gemm_3x4c8__ssse3_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002358 GEMMEnd2EndBenchmark(state, model,
2359 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64,
2360 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld64,
2361 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64,
2362 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld64,
2363 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2364 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2365 benchmark::utils::CheckSSSE3);
2366 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002367 static void qs8_gemm_3x4c8__ssse3_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002368 GEMMEnd2EndBenchmark(state, model,
2369 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128,
2370 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__ssse3_ld128,
2371 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128,
2372 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__ssse3_ld128,
2373 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2374 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
2375 benchmark::utils::CheckSSSE3);
2376 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002377 static void qs8_gemm_2x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002378 GEMMEnd2EndBenchmark(state, model,
2379 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64,
2380 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64,
2381 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
2382 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
2383 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2384 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
2385 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002386 static void qs8_gemm_2x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002387 GEMMEnd2EndBenchmark(state, model,
2388 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128,
2389 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128,
2390 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
2391 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
2392 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2393 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
2394 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002395 static void qs8_gemm_3x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002396 GEMMEnd2EndBenchmark(state, model,
2397 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64,
2398 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64,
2399 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
2400 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
2401 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2402 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
2403 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002404 static void qs8_gemm_3x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002405 GEMMEnd2EndBenchmark(state, model,
2406 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128,
2407 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128,
2408 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
2409 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
2410 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2411 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
2412 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002413 static void qs8_gemm_4x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002414 GEMMEnd2EndBenchmark(state, model,
2415 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64,
2416 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64,
2417 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
2418 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
2419 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2420 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
2421 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002422 static void qs8_gemm_4x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002423 GEMMEnd2EndBenchmark(state, model,
2424 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128,
2425 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128,
2426 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
2427 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
2428 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2429 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
2430 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002431 static void qs8_gemm_2x4c8__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002432 GEMMEnd2EndBenchmark(state, model,
2433 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64,
2434 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64,
2435 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64,
2436 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64,
2437 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2438 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
2439 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002440 static void qs8_gemm_2x4c8__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002441 GEMMEnd2EndBenchmark(state, model,
2442 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128,
2443 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128,
2444 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128,
2445 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128,
2446 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2447 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
2448 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002449 static void qs8_gemm_3x4c8__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002450 GEMMEnd2EndBenchmark(state, model,
2451 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64,
2452 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64,
2453 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64,
2454 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64,
2455 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2456 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
2457 }
Marat Dukhan529d2c12021-08-06 15:37:03 -07002458 static void qs8_gemm_3x4c8__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanc46e6712021-06-01 19:00:16 -07002459 GEMMEnd2EndBenchmark(state, model,
2460 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128,
2461 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128,
2462 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128,
2463 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128,
2464 xnn_init_qs8_conv_minmax_fp32_sse2_params,
2465 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
2466 }
Marat Dukhand5cc5082021-04-08 01:23:33 -07002467
2468
Marat Dukhan529d2c12021-08-06 15:37:03 -07002469 BENCHMARK_QS8_END2END(qs8_gemm_2x16c8__avx512skx);
Marat Dukhan529d2c12021-08-06 15:37:03 -07002470 BENCHMARK_QS8_END2END(qs8_gemm_3x16c8__avx512skx);
2471 BENCHMARK_QS8_END2END(qs8_gemm_4x16c8__avx512skx);
Frank Barchardb8c9fa92021-02-10 11:30:51 -08002472
Marat Dukhan529d2c12021-08-06 15:37:03 -07002473 BENCHMARK_QS8_END2END(qs8_gemm_2x8c8__avx2);
2474 BENCHMARK_QS8_END2END(qs8_gemm_3x8c8__avx2);
Frank Barchardb8c9fa92021-02-10 11:30:51 -08002475
Marat Dukhan529d2c12021-08-06 15:37:03 -07002476 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2__xop_ld64);
2477 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2__xop_ld128);
2478 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2__xop_ld64);
2479 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2__xop_ld128);
2480 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2__xop_ld64);
2481 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2__xop_ld128);
Marat Dukhand5cc5082021-04-08 01:23:33 -07002482
Marat Dukhan529d2c12021-08-06 15:37:03 -07002483 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__xop_ld64);
2484 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__xop_ld128);
2485 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__xop_ld64);
2486 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__xop_ld128);
Marat Dukhand5cc5082021-04-08 01:23:33 -07002487
Marat Dukhan529d2c12021-08-06 15:37:03 -07002488 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2__avx_ld64);
2489 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2__avx_ld128);
2490 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2__avx_ld64);
2491 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2__avx_ld128);
2492 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2__avx_ld64);
2493 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2__avx_ld128);
Marat Dukhanbe3d8fd2021-04-08 10:07:13 -07002494
Marat Dukhan529d2c12021-08-06 15:37:03 -07002495 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__avx_ld64);
2496 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__avx_ld128);
2497 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__avx_ld64);
2498 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__avx_ld128);
Frank Barchardb8c9fa92021-02-10 11:30:51 -08002499
Marat Dukhan529d2c12021-08-06 15:37:03 -07002500 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2__sse41_ld64);
2501 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2__sse41_ld128);
2502 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2__sse41_ld64);
2503 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2__sse41_ld128);
2504 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2__sse41_ld64);
2505 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2__sse41_ld128);
Marat Dukhand5cc5082021-04-08 01:23:33 -07002506
Marat Dukhan529d2c12021-08-06 15:37:03 -07002507 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__sse41_ld64);
2508 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__sse41_ld128);
2509 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__sse41_ld64);
2510 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__sse41_ld128);
Frank Barchardb8c9fa92021-02-10 11:30:51 -08002511
Marat Dukhan529d2c12021-08-06 15:37:03 -07002512 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__ssse3_ld64);
2513 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__ssse3_ld128);
2514 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__ssse3_ld64);
2515 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__ssse3_ld128);
Frank Barchardb8c9fa92021-02-10 11:30:51 -08002516
Marat Dukhan529d2c12021-08-06 15:37:03 -07002517 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2__sse2_ld64);
2518 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2__sse2_ld128);
2519 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2__sse2_ld64);
2520 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2__sse2_ld128);
2521 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2__sse2_ld64);
2522 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2__sse2_ld128);
Marat Dukhand5cc5082021-04-08 01:23:33 -07002523
Marat Dukhan529d2c12021-08-06 15:37:03 -07002524 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__sse2_ld64);
2525 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__sse2_ld128);
2526 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__sse2_ld64);
2527 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__sse2_ld128);
Frank Barchardb8c9fa92021-02-10 11:30:51 -08002528#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2529
Marat Dukhan4c617792021-12-21 15:47:58 -08002530#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan8dc106e2021-08-31 15:23:02 -07002531 static void qs8_gemm_2x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
2532 GEMMEnd2EndBenchmark(state, model,
2533 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64,
2534 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64,
2535 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
2536 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
2537 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2538 2 /* mr */, 4 /* nr */, 1 /* log2_kr */);
2539 }
2540 static void qs8_gemm_2x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
2541 GEMMEnd2EndBenchmark(state, model,
2542 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128,
2543 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128,
2544 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
2545 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
2546 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2547 2 /* mr */, 4 /* nr */, 1 /* log2_kr */);
2548 }
2549 static void qs8_gemm_3x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
2550 GEMMEnd2EndBenchmark(state, model,
2551 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64,
2552 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64,
2553 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
2554 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
2555 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2556 3 /* mr */, 4 /* nr */, 1 /* log2_kr */);
2557 }
2558 static void qs8_gemm_3x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
2559 GEMMEnd2EndBenchmark(state, model,
2560 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128,
2561 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128,
2562 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
2563 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
2564 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2565 3 /* mr */, 4 /* nr */, 1 /* log2_kr */);
2566 }
2567 static void qs8_gemm_4x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
2568 GEMMEnd2EndBenchmark(state, model,
2569 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64,
2570 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64,
2571 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
2572 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
2573 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2574 4 /* mr */, 4 /* nr */, 1 /* log2_kr */);
2575 }
2576 static void qs8_gemm_4x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
2577 GEMMEnd2EndBenchmark(state, model,
2578 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128,
2579 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128,
2580 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
2581 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
2582 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2583 4 /* mr */, 4 /* nr */, 1 /* log2_kr */);
2584 }
Marat Dukhan0f1ed942021-12-08 23:25:50 -08002585 static void qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
2586 GEMMEnd2EndBenchmark(state, model,
2587 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64,
2588 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64,
2589 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
2590 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
2591 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2592 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
2593 }
2594 static void qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
2595 GEMMEnd2EndBenchmark(state, model,
2596 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128,
2597 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128,
2598 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
2599 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
2600 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2601 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
2602 }
2603 static void qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
2604 GEMMEnd2EndBenchmark(state, model,
2605 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64,
2606 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64,
2607 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
2608 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
2609 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2610 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
2611 }
2612 static void qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
2613 GEMMEnd2EndBenchmark(state, model,
2614 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128,
2615 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128,
2616 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
2617 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
2618 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2619 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
2620 }
2621 static void qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
2622 GEMMEnd2EndBenchmark(state, model,
2623 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64,
2624 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64,
2625 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
2626 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
2627 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2628 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
2629 }
2630 static void qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
2631 GEMMEnd2EndBenchmark(state, model,
2632 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128,
2633 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128,
2634 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
2635 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
2636 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2637 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
2638 }
Marat Dukhan8dc106e2021-08-31 15:23:02 -07002639 static void qs8_gemm_2x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
2640 GEMMEnd2EndBenchmark(state, model,
2641 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64,
2642 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64,
2643 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
2644 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
2645 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2646 2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
2647 }
2648 static void qs8_gemm_2x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
2649 GEMMEnd2EndBenchmark(state, model,
2650 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128,
2651 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128,
2652 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
2653 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
2654 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2655 2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
2656 }
2657 static void qs8_gemm_3x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
2658 GEMMEnd2EndBenchmark(state, model,
2659 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64,
2660 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64,
2661 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
2662 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
2663 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2664 3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
2665 }
2666 static void qs8_gemm_3x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
2667 GEMMEnd2EndBenchmark(state, model,
2668 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128,
2669 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128,
2670 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
2671 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
2672 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2673 3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
2674 }
2675 static void qs8_gemm_4x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
2676 GEMMEnd2EndBenchmark(state, model,
2677 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64,
2678 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64,
2679 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
2680 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
2681 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2682 4 /* mr */, 4 /* nr */, 3 /* log2_kr */);
2683 }
2684 static void qs8_gemm_4x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
2685 GEMMEnd2EndBenchmark(state, model,
2686 xnn_qs8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128,
2687 xnn_qs8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128,
2688 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
2689 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
2690 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
2691 4 /* mr */, 4 /* nr */, 3 /* log2_kr */);
2692 }
Marat Dukhandfc2db02021-08-08 21:19:07 -07002693 static void qs8_gemm_2x4c8__wasmsimd_mul16_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard19f47482021-02-09 23:26:33 -08002694 GEMMEnd2EndBenchmark(state, model,
Marat Dukhandfc2db02021-08-08 21:19:07 -07002695 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64,
2696 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld64,
2697 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64,
2698 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64,
Marat Dukhan86746292021-08-06 17:27:18 -07002699 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
Frank Barchard19f47482021-02-09 23:26:33 -08002700 2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
2701 }
Marat Dukhandfc2db02021-08-08 21:19:07 -07002702 static void qs8_gemm_2x4c8__wasmsimd_mul16_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard19f47482021-02-09 23:26:33 -08002703 GEMMEnd2EndBenchmark(state, model,
Marat Dukhandfc2db02021-08-08 21:19:07 -07002704 xnn_qs8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128,
2705 xnn_qs8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul16_ld128,
2706 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128,
2707 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128,
Marat Dukhan86746292021-08-06 17:27:18 -07002708 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
Frank Barchard19f47482021-02-09 23:26:33 -08002709 2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
2710 }
Marat Dukhandfc2db02021-08-08 21:19:07 -07002711 static void qs8_gemm_3x4c8__wasmsimd_mul16_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard19f47482021-02-09 23:26:33 -08002712 GEMMEnd2EndBenchmark(state, model,
Marat Dukhandfc2db02021-08-08 21:19:07 -07002713 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64,
2714 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld64,
2715 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64,
2716 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld64,
Marat Dukhan86746292021-08-06 17:27:18 -07002717 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
Frank Barchard19f47482021-02-09 23:26:33 -08002718 3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
2719 }
Marat Dukhandfc2db02021-08-08 21:19:07 -07002720 static void qs8_gemm_3x4c8__wasmsimd_mul16_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard19f47482021-02-09 23:26:33 -08002721 GEMMEnd2EndBenchmark(state, model,
Marat Dukhandfc2db02021-08-08 21:19:07 -07002722 xnn_qs8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128,
2723 xnn_qs8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul16_ld128,
2724 xnn_qs8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128,
2725 xnn_qs8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul16_ld128,
Marat Dukhan86746292021-08-06 17:27:18 -07002726 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
Frank Barchard19f47482021-02-09 23:26:33 -08002727 3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
2728 }
2729
Marat Dukhan8dc106e2021-08-31 15:23:02 -07002730 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2__wasmsimd_dot16x2_ld64)
2731 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2__wasmsimd_dot16x2_ld128)
2732 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2__wasmsimd_dot16x2_ld64)
2733 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2__wasmsimd_dot16x2_ld128)
2734 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2__wasmsimd_dot16x2_ld64)
2735 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2__wasmsimd_dot16x2_ld128)
2736
Marat Dukhan0f1ed942021-12-08 23:25:50 -08002737 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld64)
2738 BENCHMARK_QS8_END2END(qs8_gemm_2x4c2s4__wasmsimd_dot16x2_ld128)
2739 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld64)
2740 BENCHMARK_QS8_END2END(qs8_gemm_3x4c2s4__wasmsimd_dot16x2_ld128)
2741 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld64)
2742 BENCHMARK_QS8_END2END(qs8_gemm_4x4c2s4__wasmsimd_dot16x2_ld128)
2743
Marat Dukhan8dc106e2021-08-31 15:23:02 -07002744 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__wasmsimd_dot16x2_ld64)
2745 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__wasmsimd_dot16x2_ld128)
2746 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__wasmsimd_dot16x2_ld64)
2747 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__wasmsimd_dot16x2_ld128)
2748 BENCHMARK_QS8_END2END(qs8_gemm_4x4c8__wasmsimd_dot16x2_ld64)
2749 BENCHMARK_QS8_END2END(qs8_gemm_4x4c8__wasmsimd_dot16x2_ld128)
2750
Marat Dukhandfc2db02021-08-08 21:19:07 -07002751 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__wasmsimd_mul16_ld64)
2752 BENCHMARK_QS8_END2END(qs8_gemm_2x4c8__wasmsimd_mul16_ld128)
2753 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__wasmsimd_mul16_ld64)
2754 BENCHMARK_QS8_END2END(qs8_gemm_3x4c8__wasmsimd_mul16_ld128)
Marat Dukhan4c617792021-12-21 15:47:58 -08002755#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard19f47482021-02-09 23:26:33 -08002756
Marat Dukhan7c1115f2022-01-04 17:18:41 -08002757
2758#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2759 static void qs8_gemm_2x2__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2760 GEMMEnd2EndBenchmark(state, model,
2761 xnn_qs8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic,
2762 xnn_qs8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic,
2763 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
2764 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
2765 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
2766 2 /* mr */, 2 /* nr */);
2767 }
2768 static void qs8_gemm_3x2__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2769 GEMMEnd2EndBenchmark(state, model,
2770 xnn_qs8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic,
2771 xnn_qs8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic,
2772 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
2773 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
2774 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
2775 3 /* mr */, 2 /* nr */);
2776 }
2777 static void qs8_gemm_4x2__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2778 GEMMEnd2EndBenchmark(state, model,
2779 xnn_qs8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic,
2780 xnn_qs8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic,
2781 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
2782 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
2783 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
2784 4 /* mr */, 2 /* nr */);
2785 }
2786 static void qs8_gemm_2x4__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2787 GEMMEnd2EndBenchmark(state, model,
2788 xnn_qs8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic,
2789 xnn_qs8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic,
2790 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
2791 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
2792 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
2793 2 /* mr */, 4 /* nr */);
2794 }
2795 static void qs8_gemm_3x4__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2796 GEMMEnd2EndBenchmark(state, model,
2797 xnn_qs8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic,
2798 xnn_qs8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic,
2799 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
2800 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
2801 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
2802 3 /* mr */, 4 /* nr */);
2803 }
2804 static void qs8_gemm_4x4__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2805 GEMMEnd2EndBenchmark(state, model,
2806 xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic,
2807 xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic,
2808 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
2809 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
2810 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
2811 4 /* mr */, 4 /* nr */);
2812 }
2813
2814 BENCHMARK_QS8_END2END(qs8_gemm_2x2__wasm_fmagic)
2815 BENCHMARK_QS8_END2END(qs8_gemm_3x2__wasm_fmagic)
2816 BENCHMARK_QS8_END2END(qs8_gemm_4x2__wasm_fmagic)
2817 BENCHMARK_QS8_END2END(qs8_gemm_2x4__wasm_fmagic)
2818 BENCHMARK_QS8_END2END(qs8_gemm_3x4__wasm_fmagic)
2819 BENCHMARK_QS8_END2END(qs8_gemm_4x4__wasm_fmagic)
2820#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
2821
2822
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002823static void qs8_gemm_2x2__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhana1a4e782021-05-07 17:49:47 -07002824 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002825 xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic,
2826 xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic,
2827 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
2828 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
2829 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
Marat Dukhana1a4e782021-05-07 17:49:47 -07002830 2 /* mr */, 2 /* nr */);
2831}
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002832static void qs8_gemm_3x2__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhana1a4e782021-05-07 17:49:47 -07002833 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002834 xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic,
2835 xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic,
2836 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
2837 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
2838 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
Marat Dukhana1a4e782021-05-07 17:49:47 -07002839 3 /* mr */, 2 /* nr */);
2840}
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002841static void qs8_gemm_4x2__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhana1a4e782021-05-07 17:49:47 -07002842 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002843 xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic,
2844 xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic,
2845 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
2846 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
2847 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
Marat Dukhana1a4e782021-05-07 17:49:47 -07002848 4 /* mr */, 2 /* nr */);
2849}
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002850static void qs8_gemm_2x4__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhana1a4e782021-05-07 17:49:47 -07002851 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002852 xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic,
2853 xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic,
2854 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
2855 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
2856 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
Marat Dukhana1a4e782021-05-07 17:49:47 -07002857 2 /* mr */, 4 /* nr */);
2858}
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002859static void qs8_gemm_3x4__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhana1a4e782021-05-07 17:49:47 -07002860 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002861 xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic,
2862 xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic,
2863 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
2864 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
2865 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
Marat Dukhana1a4e782021-05-07 17:49:47 -07002866 3 /* mr */, 4 /* nr */);
2867}
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002868static void qs8_gemm_4x4__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhana1a4e782021-05-07 17:49:47 -07002869 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002870 xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic,
2871 xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic,
2872 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
2873 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
2874 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
Marat Dukhana1a4e782021-05-07 17:49:47 -07002875 4 /* mr */, 4 /* nr */);
2876}
2877
Marat Dukhan440e8ed2022-01-04 15:30:57 -08002878static void qs8_gemm_2x2__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2879 GEMMEnd2EndBenchmark(state, model,
2880 xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic,
2881 xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic,
2882 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic,
2883 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic,
2884 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
2885 2 /* mr */, 2 /* nr */);
2886}
2887static void qs8_gemm_3x2__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2888 GEMMEnd2EndBenchmark(state, model,
2889 xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic,
2890 xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic,
2891 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic,
2892 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic,
2893 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
2894 3 /* mr */, 2 /* nr */);
2895}
2896static void qs8_gemm_4x2__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2897 GEMMEnd2EndBenchmark(state, model,
2898 xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic,
2899 xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic,
2900 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic,
2901 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic,
2902 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
2903 4 /* mr */, 2 /* nr */);
2904}
2905static void qs8_gemm_2x4__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2906 GEMMEnd2EndBenchmark(state, model,
2907 xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic,
2908 xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic,
2909 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic,
2910 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic,
2911 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
2912 2 /* mr */, 4 /* nr */);
2913}
2914static void qs8_gemm_3x4__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2915 GEMMEnd2EndBenchmark(state, model,
2916 xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic,
2917 xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic,
2918 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic,
2919 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic,
2920 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
2921 3 /* mr */, 4 /* nr */);
2922}
2923static void qs8_gemm_4x4__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
2924 GEMMEnd2EndBenchmark(state, model,
2925 xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic,
2926 xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic,
2927 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic,
2928 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic,
2929 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
2930 4 /* mr */, 4 /* nr */);
2931}
2932
2933static void qs8_gemm_2x2__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
2934 GEMMEnd2EndBenchmark(state, model,
2935 xnn_qs8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf,
2936 xnn_qs8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf,
2937 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
2938 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
2939 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
2940 2 /* mr */, 2 /* nr */);
2941}
2942static void qs8_gemm_3x2__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
2943 GEMMEnd2EndBenchmark(state, model,
2944 xnn_qs8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf,
2945 xnn_qs8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf,
2946 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
2947 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
2948 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
2949 3 /* mr */, 2 /* nr */);
2950}
2951static void qs8_gemm_4x2__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
2952 GEMMEnd2EndBenchmark(state, model,
2953 xnn_qs8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf,
2954 xnn_qs8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf,
2955 xnn_qs8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
2956 xnn_qs8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
2957 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
2958 4 /* mr */, 2 /* nr */);
2959}
2960static void qs8_gemm_2x4__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
2961 GEMMEnd2EndBenchmark(state, model,
2962 xnn_qs8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf,
2963 xnn_qs8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf,
2964 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
2965 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
2966 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
2967 2 /* mr */, 4 /* nr */);
2968}
2969static void qs8_gemm_3x4__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
2970 GEMMEnd2EndBenchmark(state, model,
2971 xnn_qs8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf,
2972 xnn_qs8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf,
2973 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
2974 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
2975 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
2976 3 /* mr */, 4 /* nr */);
2977}
2978static void qs8_gemm_4x4__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
2979 GEMMEnd2EndBenchmark(state, model,
2980 xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf,
2981 xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf,
2982 xnn_qs8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
2983 xnn_qs8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
2984 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
2985 4 /* mr */, 4 /* nr */);
2986}
2987
Marat Dukhan2ac722e2022-01-04 01:54:20 -08002988BENCHMARK_QS8_END2END(qs8_gemm_2x2__scalar_fmagic)
2989BENCHMARK_QS8_END2END(qs8_gemm_3x2__scalar_fmagic)
2990BENCHMARK_QS8_END2END(qs8_gemm_4x2__scalar_fmagic)
2991BENCHMARK_QS8_END2END(qs8_gemm_2x4__scalar_fmagic)
2992BENCHMARK_QS8_END2END(qs8_gemm_3x4__scalar_fmagic)
2993BENCHMARK_QS8_END2END(qs8_gemm_4x4__scalar_fmagic)
Marat Dukhana1a4e782021-05-07 17:49:47 -07002994
Marat Dukhan440e8ed2022-01-04 15:30:57 -08002995BENCHMARK_QS8_END2END(qs8_gemm_2x2__scalar_imagic)
2996BENCHMARK_QS8_END2END(qs8_gemm_3x2__scalar_imagic)
2997BENCHMARK_QS8_END2END(qs8_gemm_4x2__scalar_imagic)
2998BENCHMARK_QS8_END2END(qs8_gemm_2x4__scalar_imagic)
2999BENCHMARK_QS8_END2END(qs8_gemm_3x4__scalar_imagic)
3000BENCHMARK_QS8_END2END(qs8_gemm_4x4__scalar_imagic)
3001
3002BENCHMARK_QS8_END2END(qs8_gemm_2x2__scalar_lrintf)
3003BENCHMARK_QS8_END2END(qs8_gemm_3x2__scalar_lrintf)
3004BENCHMARK_QS8_END2END(qs8_gemm_4x2__scalar_lrintf)
3005BENCHMARK_QS8_END2END(qs8_gemm_2x4__scalar_lrintf)
3006BENCHMARK_QS8_END2END(qs8_gemm_3x4__scalar_lrintf)
3007BENCHMARK_QS8_END2END(qs8_gemm_4x4__scalar_lrintf)
3008
Frank Barcharddc909cb2021-02-08 13:59:31 -08003009#ifndef XNNPACK_BENCHMARK_NO_MAIN
3010BENCHMARK_MAIN();
3011#endif