blob: f2c1d8e88273c952b4facd14db17503c21c3e865 [file] [log] [blame]
Frank Barchard9098aba2021-08-12 12:20:03 -07001// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
12#include <xnnpack.h>
13
14#include <benchmark/benchmark.h>
15
16#include "bench/end2end.h"
17#include "bench/utils.h"
18#include "models/models.h"
19#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/params.h>
22#include <xnnpack/params-init.h>
23
24
Frank Barchard9098aba2021-08-12 12:20:03 -070025static void GEMMEnd2EndBenchmark(
26 benchmark::State& state,
27 models::ExecutionPlanFactory model_factory,
28 xnn_qu8_gemm_minmax_ukernel_function gemm,
29 xnn_qu8_igemm_minmax_ukernel_function igemm,
30 xnn_qu8_gemm_minmax_ukernel_function gemm1,
31 xnn_qu8_igemm_minmax_ukernel_function igemm1,
32 xnn_init_qu8_conv_minmax_params_fn init_params,
33 uint8_t mr, uint8_t nr, uint8_t log2_kr = 0, uint8_t log2_sr = 0,
34 benchmark::utils::IsaCheckFunction isa_check = nullptr)
35{
36 if (isa_check && !isa_check(state)) {
37 return;
38 }
39 if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
40 state.SkipWithError("failed to initialize XNNPACK");
41 return;
42 }
43
44 // Override microkernels chosen in xnn_initialize
45 // Note: do not directly assign to xnn_params.qu8.gemm because it breaks older gcc.
46 xnn_params.qu8.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function(gemm));
47 xnn_params.qu8.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function(igemm));
48 xnn_params.qu8.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function(gemm1));
49 xnn_params.qu8.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function(igemm1));
50 xnn_params.qu8.gemm.init.qu8 = init_params;
51 xnn_params.qu8.gemm.mr = mr;
52 xnn_params.qu8.gemm.nr = nr;
53 xnn_params.qu8.gemm.log2_kr = log2_kr;
54 xnn_params.qu8.gemm.log2_sr = log2_sr;
55
56 auto execution_plan = model_factory(nullptr);
57 if (execution_plan.empty()) {
58 state.SkipWithError("failed to create a model");
59 return;
60 }
61
62 for (auto _ : state) {
63 for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
64 xnn_status status = xnn_run_operator(op.get(), nullptr);
65 if (status != xnn_status_success) {
66 state.SkipWithError("failed to run a model");
67 return;
68 }
69 }
70 }
71
72 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
73 if (cpu_frequency != 0) {
74 state.counters["cpufreq"] = cpu_frequency;
75 }
76}
77
Frank Barchard901845c2022-01-19 01:45:22 -080078#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
Frank Barchardf82410d2022-02-02 01:01:38 -080079 static void qu8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
80 GEMMEnd2EndBenchmark(state, model,
81 xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a53,
82 xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64,
83 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
84 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
85 xnn_init_qu8_conv_minmax_rndnu_neon_params,
86 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
87 benchmark::utils::CheckNEON);
88 }
89 static void qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
90 GEMMEnd2EndBenchmark(state, model,
91 xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53,
92 xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64,
93 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
94 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
95 xnn_init_qu8_conv_minmax_rndnu_neon_params,
96 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
97 benchmark::utils::CheckNEON);
98 }
Frank Barchard34251d82022-02-02 11:57:11 -080099 static void qu8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7(benchmark::State& state, models::ExecutionPlanFactory model) {
100 GEMMEnd2EndBenchmark(state, model,
101 xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_cortex_a7,
102 xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64,
103 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
104 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
105 xnn_init_qu8_conv_minmax_rndnu_neon_params,
106 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
107 benchmark::utils::CheckNEON);
108 }
109 static void qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7(benchmark::State& state, models::ExecutionPlanFactory model) {
110 GEMMEnd2EndBenchmark(state, model,
111 xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7,
112 xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64,
113 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
114 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
115 xnn_init_qu8_conv_minmax_rndnu_neon_params,
116 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
117 benchmark::utils::CheckNEON);
118 }
Frank Barchard901845c2022-01-19 01:45:22 -0800119 static void qu8_gemm_4x8__aarch32_neon_mlal_lane_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
120 GEMMEnd2EndBenchmark(state, model,
121 xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64,
122 xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64,
123 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
124 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
125 xnn_init_qu8_conv_minmax_rndnu_neon_params,
126 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
127 benchmark::utils::CheckNEON);
128 }
129 static void qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
130 GEMMEnd2EndBenchmark(state, model,
131 xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64,
132 xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64,
133 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
134 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
135 xnn_init_qu8_conv_minmax_rndnu_neon_params,
136 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
137 benchmark::utils::CheckNEON);
138 }
Frank Barchardf82410d2022-02-02 01:01:38 -0800139 BENCHMARK_QU8_END2END(qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53)
140 BENCHMARK_QU8_END2END(qu8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a53)
Frank Barchard34251d82022-02-02 11:57:11 -0800141 BENCHMARK_QU8_END2END(qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_cortex_a7)
142 BENCHMARK_QU8_END2END(qu8_gemm_4x8__aarch32_neon_mlal_lane_cortex_a7)
Frank Barchard901845c2022-01-19 01:45:22 -0800143 BENCHMARK_QU8_END2END(qu8_gemm_4x8__aarch32_neon_mlal_lane_prfm_ld64)
144 BENCHMARK_QU8_END2END(qu8_gemm_4x8__aarch32_neon_mlal_lane_ld64)
145#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
146
Frank Barchard0049e892021-08-22 09:37:21 -0700147#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchardefc3ccf2021-08-31 23:20:00 -0700148 static void qu8_gemm_4x16c4__aarch64_neondot_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
149 GEMMEnd2EndBenchmark(state, model,
150 xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55,
151 xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55,
152 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
153 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
154 xnn_init_qu8_conv_minmax_rndnu_neon_params,
155 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
156 benchmark::utils::CheckNEONDOT);
157 }
Frank Barchard40668982021-08-24 11:12:04 -0700158 static void qu8_gemm_4x16c4__aarch64_neondot_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
159 GEMMEnd2EndBenchmark(state, model,
160 xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128,
161 xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_ld128,
162 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
163 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
164 xnn_init_qu8_conv_minmax_rndnu_neon_params,
165 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
166 benchmark::utils::CheckNEONDOT);
167 }
Frank Barcharddf8e6042021-09-03 13:56:29 -0700168 static void qu8_gemm_4x8c4__aarch64_neondot_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard0049e892021-08-22 09:37:21 -0700169 GEMMEnd2EndBenchmark(state, model,
Frank Barcharddf8e6042021-09-03 13:56:29 -0700170 xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128,
171 xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128,
Frank Barchard0049e892021-08-22 09:37:21 -0700172 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
173 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
174 xnn_init_qu8_conv_minmax_rndnu_neon_params,
175 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
176 benchmark::utils::CheckNEONDOT);
177 }
Frank Barchardca4c68e2021-08-25 19:06:40 -0700178 static void qu8_gemm_4x8c4__aarch64_neondot_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
179 GEMMEnd2EndBenchmark(state, model,
180 xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55,
181 xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55,
182 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
183 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
184 xnn_init_qu8_conv_minmax_rndnu_neon_params,
185 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
186 benchmark::utils::CheckNEONDOT);
187 }
Frank Barchard23662902021-09-13 15:07:13 -0700188
189 static void qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a75(benchmark::State& state, models::ExecutionPlanFactory model) {
190 GEMMEnd2EndBenchmark(state, model,
191 xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75,
192 xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75,
193 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
194 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
195 xnn_init_qu8_conv_minmax_rndnu_neon_params,
196 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
197 benchmark::utils::CheckNEON);
198 }
199
200 static void qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75(benchmark::State& state, models::ExecutionPlanFactory model) {
201 GEMMEnd2EndBenchmark(state, model,
202 xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75,
203 xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75,
204 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
205 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
206 xnn_init_qu8_conv_minmax_rndnu_neon_params,
207 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
208 benchmark::utils::CheckNEON);
209 }
210
211 static void qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
212 GEMMEnd2EndBenchmark(state, model,
213 xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53,
214 xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53,
215 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
216 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
217 xnn_init_qu8_conv_minmax_rndnu_neon_params,
218 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
219 benchmark::utils::CheckNEON);
220 }
221
222 static void qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
223 GEMMEnd2EndBenchmark(state, model,
224 xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53,
225 xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53,
226 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
227 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
228 xnn_init_qu8_conv_minmax_rndnu_neon_params,
229 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
230 benchmark::utils::CheckNEON);
231 }
Frank Barchard9cdc10d2021-11-22 19:03:54 -0800232 static void qu8_gemm_4x16__aarch64_neon_mlal_lane_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
233 GEMMEnd2EndBenchmark(state, model,
234 xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64,
235 xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64,
236 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
237 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
238 xnn_init_qu8_conv_minmax_rndnu_neon_params,
239 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
240 benchmark::utils::CheckNEON);
241 }
Frank Barchard23662902021-09-13 15:07:13 -0700242
Frank Barchard9cdc10d2021-11-22 19:03:54 -0800243 static void qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
244 GEMMEnd2EndBenchmark(state, model,
245 xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64,
246 xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64,
247 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
248 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
249 xnn_init_qu8_conv_minmax_rndnu_neon_params,
250 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
251 benchmark::utils::CheckNEON);
252 }
Frank Barchardca4c68e2021-08-25 19:06:40 -0700253 BENCHMARK_QU8_END2END(qu8_gemm_4x8c4__aarch64_neondot_cortex_a55);
Frank Barcharddf8e6042021-09-03 13:56:29 -0700254 BENCHMARK_QU8_END2END(qu8_gemm_4x16c4__aarch64_neondot_cortex_a55);
255 BENCHMARK_QU8_END2END(qu8_gemm_4x8c4__aarch64_neondot_ld128);
256 BENCHMARK_QU8_END2END(qu8_gemm_4x16c4__aarch64_neondot_ld128);
Frank Barchard23662902021-09-13 15:07:13 -0700257 BENCHMARK_QU8_END2END(qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a75);
258 BENCHMARK_QU8_END2END(qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75);
259 BENCHMARK_QU8_END2END(qu8_gemm_4x16__aarch64_neon_mlal_lane_cortex_a53);
260 BENCHMARK_QU8_END2END(qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53);
Frank Barchard9cdc10d2021-11-22 19:03:54 -0800261 BENCHMARK_QU8_END2END(qu8_gemm_4x16__aarch64_neon_mlal_lane_ld64);
262 BENCHMARK_QU8_END2END(qu8_gemm_4x16__aarch64_neon_mlal_lane_prfm_ld64);
Frank Barchard0049e892021-08-22 09:37:21 -0700263#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
264
Frank Barchard9098aba2021-08-12 12:20:03 -0700265#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchardd5a53332022-01-10 03:44:40 -0800266 static void qu8_gemm_2x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
267 GEMMEnd2EndBenchmark(state, model,
268 xnn_qu8_gemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane,
269 xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane,
270 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
271 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
272 xnn_init_qu8_conv_minmax_rndnu_neon_params,
Frank Barcharda30e2df2022-01-10 13:44:16 -0800273 2 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
Frank Barchardd5a53332022-01-10 03:44:40 -0800274 benchmark::utils::CheckNEON);
275 }
276
277 static void qu8_gemm_3x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
278 GEMMEnd2EndBenchmark(state, model,
279 xnn_qu8_gemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane,
280 xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane,
281 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
282 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
283 xnn_init_qu8_conv_minmax_rndnu_neon_params,
Frank Barcharda30e2df2022-01-10 13:44:16 -0800284 3 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
Frank Barchardd5a53332022-01-10 03:44:40 -0800285 benchmark::utils::CheckNEON);
286 }
287
Frank Barchard2df75422021-09-08 19:02:40 -0700288 static void qu8_gemm_4x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
289 GEMMEnd2EndBenchmark(state, model,
290 xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane,
291 xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane,
292 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
293 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
294 xnn_init_qu8_conv_minmax_rndnu_neon_params,
295 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
296 benchmark::utils::CheckNEON);
297 }
298
Frank Barchardd5a53332022-01-10 03:44:40 -0800299 static void qu8_gemm_6x8__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
300 GEMMEnd2EndBenchmark(state, model,
301 xnn_qu8_gemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane,
302 xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane,
303 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
304 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane,
305 xnn_init_qu8_conv_minmax_rndnu_neon_params,
Frank Barcharda30e2df2022-01-10 13:44:16 -0800306 6 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
Frank Barchardd5a53332022-01-10 03:44:40 -0800307 benchmark::utils::CheckNEON);
308 }
309
310 static void qu8_gemm_2x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
311 GEMMEnd2EndBenchmark(state, model,
312 xnn_qu8_gemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane,
313 xnn_qu8_igemm_minmax_rndnu_ukernel_2x16__neon_mlal_lane,
314 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
315 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
316 xnn_init_qu8_conv_minmax_rndnu_neon_params,
Frank Barcharda30e2df2022-01-10 13:44:16 -0800317 2 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
Frank Barchardd5a53332022-01-10 03:44:40 -0800318 benchmark::utils::CheckNEON);
319 }
320
321 static void qu8_gemm_3x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
322 GEMMEnd2EndBenchmark(state, model,
323 xnn_qu8_gemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane,
324 xnn_qu8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane,
325 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
326 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
327 xnn_init_qu8_conv_minmax_rndnu_neon_params,
Frank Barcharda30e2df2022-01-10 13:44:16 -0800328 3 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
Frank Barchardd5a53332022-01-10 03:44:40 -0800329 benchmark::utils::CheckNEON);
330 }
331
Frank Barcharda29b57e2021-08-20 19:06:28 -0700332 static void qu8_gemm_4x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard9098aba2021-08-12 12:20:03 -0700333 GEMMEnd2EndBenchmark(state, model,
334 xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane,
335 xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane,
336 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
337 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
338 xnn_init_qu8_conv_minmax_rndnu_neon_params,
339 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
340 benchmark::utils::CheckNEON);
341 }
342
Frank Barchardd5a53332022-01-10 03:44:40 -0800343 static void qu8_gemm_6x16__neon_mlal_lane(benchmark::State& state, models::ExecutionPlanFactory model) {
344 GEMMEnd2EndBenchmark(state, model,
345 xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane,
346 xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane,
347 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
348 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane,
349 xnn_init_qu8_conv_minmax_rndnu_neon_params,
Frank Barcharda30e2df2022-01-10 13:44:16 -0800350 6 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
Frank Barchardd5a53332022-01-10 03:44:40 -0800351 benchmark::utils::CheckNEON);
352 }
353
Frank Barchard9098aba2021-08-12 12:20:03 -0700354 static void qu8_gemm_1x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
355 GEMMEnd2EndBenchmark(state, model,
356 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
357 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
358 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
359 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
360 xnn_init_qu8_conv_minmax_rndnu_neon_params,
361 1 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
362 benchmark::utils::CheckNEONDOT);
363 }
364 static void qu8_gemm_2x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
365 GEMMEnd2EndBenchmark(state, model,
366 xnn_qu8_gemm_minmax_rndnu_ukernel_2x8c4__neondot,
367 xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot,
368 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
369 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
370 xnn_init_qu8_conv_minmax_rndnu_neon_params,
371 2 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
372 benchmark::utils::CheckNEONDOT);
373 }
374 static void qu8_gemm_3x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
375 GEMMEnd2EndBenchmark(state, model,
376 xnn_qu8_gemm_minmax_rndnu_ukernel_3x8c4__neondot,
377 xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot,
378 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
379 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
380 xnn_init_qu8_conv_minmax_rndnu_neon_params,
381 3 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
382 benchmark::utils::CheckNEONDOT);
383 }
384 static void qu8_gemm_4x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
385 GEMMEnd2EndBenchmark(state, model,
386 xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__neondot,
387 xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot,
388 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
389 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
390 xnn_init_qu8_conv_minmax_rndnu_neon_params,
391 4 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
392 benchmark::utils::CheckNEONDOT);
393 }
394 static void qu8_gemm_5x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
395 GEMMEnd2EndBenchmark(state, model,
396 xnn_qu8_gemm_minmax_rndnu_ukernel_5x8c4__neondot,
397 xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot,
398 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
399 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
400 xnn_init_qu8_conv_minmax_rndnu_neon_params,
401 5 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
402 benchmark::utils::CheckNEONDOT);
403 }
404 static void qu8_gemm_6x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
405 GEMMEnd2EndBenchmark(state, model,
406 xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot,
407 xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot,
408 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
409 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
410 xnn_init_qu8_conv_minmax_rndnu_neon_params,
411 6 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
412 benchmark::utils::CheckNEONDOT);
413 }
414 static void qu8_gemm_8x8c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
415 GEMMEnd2EndBenchmark(state, model,
416 xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot,
417 xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot,
418 xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot,
419 xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot,
420 xnn_init_qu8_conv_minmax_rndnu_neon_params,
421 8 /* mr */, 8 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
422 benchmark::utils::CheckNEONDOT);
423 }
424 static void qu8_gemm_1x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
425 GEMMEnd2EndBenchmark(state, model,
426 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
427 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
428 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
429 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
430 xnn_init_qu8_conv_minmax_rndnu_neon_params,
431 1 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
432 benchmark::utils::CheckNEONDOT);
433 }
434 static void qu8_gemm_2x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
435 GEMMEnd2EndBenchmark(state, model,
436 xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot,
437 xnn_qu8_igemm_minmax_rndnu_ukernel_2x16c4__neondot,
438 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
439 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
440 xnn_init_qu8_conv_minmax_rndnu_neon_params,
441 2 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
442 benchmark::utils::CheckNEONDOT);
443 }
444 static void qu8_gemm_3x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
445 GEMMEnd2EndBenchmark(state, model,
446 xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot,
447 xnn_qu8_igemm_minmax_rndnu_ukernel_3x16c4__neondot,
448 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
449 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
450 xnn_init_qu8_conv_minmax_rndnu_neon_params,
451 3 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
452 benchmark::utils::CheckNEONDOT);
453 }
454 static void qu8_gemm_4x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
455 GEMMEnd2EndBenchmark(state, model,
456 xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot,
457 xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot,
458 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
459 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
460 xnn_init_qu8_conv_minmax_rndnu_neon_params,
461 4 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
462 benchmark::utils::CheckNEONDOT);
463 }
464 static void qu8_gemm_5x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
465 GEMMEnd2EndBenchmark(state, model,
466 xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot,
467 xnn_qu8_igemm_minmax_rndnu_ukernel_5x16c4__neondot,
468 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
469 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
470 xnn_init_qu8_conv_minmax_rndnu_neon_params,
471 5 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
472 benchmark::utils::CheckNEONDOT);
473 }
474 static void qu8_gemm_6x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
475 GEMMEnd2EndBenchmark(state, model,
476 xnn_qu8_gemm_minmax_rndnu_ukernel_6x16c4__neondot,
477 xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot,
478 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
479 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
480 xnn_init_qu8_conv_minmax_rndnu_neon_params,
481 6 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
482 benchmark::utils::CheckNEONDOT);
483 }
484 static void qu8_gemm_8x16c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
485 GEMMEnd2EndBenchmark(state, model,
486 xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot,
487 xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot,
488 xnn_qu8_gemm_minmax_rndnu_ukernel_1x16c4__neondot,
489 xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot,
490 xnn_init_qu8_conv_minmax_rndnu_neon_params,
491 8 /* mr */, 16 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
492 benchmark::utils::CheckNEONDOT);
493 }
Frank Barchardcdf59a52021-09-08 13:55:24 -0700494 static void qu8_gemm_2x32c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
495 GEMMEnd2EndBenchmark(state, model,
496 xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot,
497 xnn_qu8_igemm_minmax_rndnu_ukernel_2x32c4__neondot,
498 xnn_qu8_gemm_minmax_rndnu_ukernel_1x32c4__neondot,
499 xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot,
500 xnn_init_qu8_conv_minmax_rndnu_neon_params,
501 2 /* mr */, 32 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
502 benchmark::utils::CheckNEONDOT);
503 }
504 static void qu8_gemm_3x32c4__neondot(benchmark::State& state, models::ExecutionPlanFactory model) {
505 GEMMEnd2EndBenchmark(state, model,
506 xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot,
507 xnn_qu8_igemm_minmax_rndnu_ukernel_3x32c4__neondot,
508 xnn_qu8_gemm_minmax_rndnu_ukernel_1x32c4__neondot,
509 xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot,
510 xnn_init_qu8_conv_minmax_rndnu_neon_params,
511 3 /* mr */, 32 /* nr */, 2 /* log2_kr */, 0 /* log2_sr */,
512 benchmark::utils::CheckNEONDOT);
513 }
Frank Barchard9098aba2021-08-12 12:20:03 -0700514 BENCHMARK_QU8_END2END(qu8_gemm_1x8c4__neondot);
515 BENCHMARK_QU8_END2END(qu8_gemm_2x8c4__neondot);
516 BENCHMARK_QU8_END2END(qu8_gemm_3x8c4__neondot);
517 BENCHMARK_QU8_END2END(qu8_gemm_4x8c4__neondot);
518 BENCHMARK_QU8_END2END(qu8_gemm_5x8c4__neondot);
519 BENCHMARK_QU8_END2END(qu8_gemm_6x8c4__neondot);
520 BENCHMARK_QU8_END2END(qu8_gemm_8x8c4__neondot);
521 BENCHMARK_QU8_END2END(qu8_gemm_1x16c4__neondot);
522 BENCHMARK_QU8_END2END(qu8_gemm_2x16c4__neondot);
523 BENCHMARK_QU8_END2END(qu8_gemm_3x16c4__neondot);
524 BENCHMARK_QU8_END2END(qu8_gemm_4x16c4__neondot);
525 BENCHMARK_QU8_END2END(qu8_gemm_5x16c4__neondot);
526 BENCHMARK_QU8_END2END(qu8_gemm_6x16c4__neondot);
527 BENCHMARK_QU8_END2END(qu8_gemm_8x16c4__neondot);
Frank Barchardcdf59a52021-09-08 13:55:24 -0700528 BENCHMARK_QU8_END2END(qu8_gemm_2x32c4__neondot);
529 BENCHMARK_QU8_END2END(qu8_gemm_3x32c4__neondot);
Frank Barchard9098aba2021-08-12 12:20:03 -0700530
Frank Barchardd5a53332022-01-10 03:44:40 -0800531 BENCHMARK_QU8_END2END(qu8_gemm_2x8__neon_mlal_lane);
532 BENCHMARK_QU8_END2END(qu8_gemm_3x8__neon_mlal_lane);
Frank Barchard2df75422021-09-08 19:02:40 -0700533 BENCHMARK_QU8_END2END(qu8_gemm_4x8__neon_mlal_lane);
Frank Barchardd5a53332022-01-10 03:44:40 -0800534 BENCHMARK_QU8_END2END(qu8_gemm_6x8__neon_mlal_lane);
535 BENCHMARK_QU8_END2END(qu8_gemm_2x16__neon_mlal_lane);
536 BENCHMARK_QU8_END2END(qu8_gemm_3x16__neon_mlal_lane);
Frank Barcharda29b57e2021-08-20 19:06:28 -0700537 BENCHMARK_QU8_END2END(qu8_gemm_4x16__neon_mlal_lane);
Frank Barchardd5a53332022-01-10 03:44:40 -0800538 BENCHMARK_QU8_END2END(qu8_gemm_6x16__neon_mlal_lane);
Frank Barchard9098aba2021-08-12 12:20:03 -0700539#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
540
541#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Frank Barchard9098aba2021-08-12 12:20:03 -0700542 static void qu8_gemm_2x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
543 GEMMEnd2EndBenchmark(state, model,
544 xnn_qu8_gemm_minmax_fp32_ukernel_2x16c8__avx512skx,
545 xnn_qu8_igemm_minmax_fp32_ukernel_2x16c8__avx512skx,
546 xnn_qu8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx,
547 xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx,
548 xnn_init_qu8_conv_minmax_fp32_avx512_params,
549 2 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
550 benchmark::utils::CheckAVX512F);
551 }
Frank Barchard9098aba2021-08-12 12:20:03 -0700552
553 static void qu8_gemm_3x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
554 GEMMEnd2EndBenchmark(state, model,
555 xnn_qu8_gemm_minmax_fp32_ukernel_3x16c8__avx512skx,
556 xnn_qu8_igemm_minmax_fp32_ukernel_3x16c8__avx512skx,
557 xnn_qu8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx,
558 xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx,
559 xnn_init_qu8_conv_minmax_fp32_avx512_params,
560 3 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
561 benchmark::utils::CheckAVX512F);
562 }
Frank Barchard07228a32021-11-10 14:23:59 -0800563
Frank Barchard9098aba2021-08-12 12:20:03 -0700564 static void qu8_gemm_4x16c8__avx512skx(benchmark::State& state, models::ExecutionPlanFactory model) {
565 GEMMEnd2EndBenchmark(state, model,
566 xnn_qu8_gemm_minmax_fp32_ukernel_4x16c8__avx512skx,
567 xnn_qu8_igemm_minmax_fp32_ukernel_4x16c8__avx512skx,
568 xnn_qu8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx,
569 xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx,
570 xnn_init_qu8_conv_minmax_fp32_avx512_params,
571 4 /* mr */, 16 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
572 benchmark::utils::CheckAVX512F);
573 }
574
Frank Barchard9098aba2021-08-12 12:20:03 -0700575 static void qu8_gemm_2x8c8__avx2(benchmark::State& state, models::ExecutionPlanFactory model) {
576 GEMMEnd2EndBenchmark(state, model,
577 xnn_qu8_gemm_minmax_fp32_ukernel_2x8c8__avx2,
578 xnn_qu8_igemm_minmax_fp32_ukernel_2x8c8__avx2,
579 xnn_qu8_gemm_minmax_fp32_ukernel_1x8c8__avx2,
580 xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2,
581 xnn_init_qu8_conv_minmax_fp32_avx2_params,
582 2 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
583 benchmark::utils::CheckAVX2);
584 }
585 static void qu8_gemm_3x8c8__avx2(benchmark::State& state, models::ExecutionPlanFactory model) {
586 GEMMEnd2EndBenchmark(state, model,
587 xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2,
588 xnn_qu8_igemm_minmax_fp32_ukernel_3x8c8__avx2,
589 xnn_qu8_gemm_minmax_fp32_ukernel_1x8c8__avx2,
590 xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2,
591 xnn_init_qu8_conv_minmax_fp32_avx2_params,
592 3 /* mr */, 8 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
593 benchmark::utils::CheckAVX2);
594 }
595
596 static void qu8_gemm_2x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
597 GEMMEnd2EndBenchmark(state, model,
598 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld64,
599 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld64,
600 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
601 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
602 xnn_init_qu8_conv_minmax_fp32_sse2_params,
603 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
604 benchmark::utils::CheckXOP);
605 }
606 static void qu8_gemm_2x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
607 GEMMEnd2EndBenchmark(state, model,
608 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__xop_ld128,
609 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__xop_ld128,
610 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
611 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
612 xnn_init_qu8_conv_minmax_fp32_sse2_params,
613 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
614 benchmark::utils::CheckXOP);
615 }
616 static void qu8_gemm_3x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
617 GEMMEnd2EndBenchmark(state, model,
618 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld64,
619 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld64,
620 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
621 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
622 xnn_init_qu8_conv_minmax_fp32_sse2_params,
623 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
624 benchmark::utils::CheckXOP);
625 }
626 static void qu8_gemm_3x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
627 GEMMEnd2EndBenchmark(state, model,
628 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__xop_ld128,
629 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__xop_ld128,
630 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
631 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
632 xnn_init_qu8_conv_minmax_fp32_sse2_params,
633 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
634 benchmark::utils::CheckXOP);
635 }
636 static void qu8_gemm_4x4c2__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
637 GEMMEnd2EndBenchmark(state, model,
638 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld64,
639 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld64,
640 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
641 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld64,
642 xnn_init_qu8_conv_minmax_fp32_sse2_params,
643 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
644 benchmark::utils::CheckXOP);
645 }
646 static void qu8_gemm_4x4c2__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
647 GEMMEnd2EndBenchmark(state, model,
648 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__xop_ld128,
649 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__xop_ld128,
650 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
651 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__xop_ld128,
652 xnn_init_qu8_conv_minmax_fp32_sse2_params,
653 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
654 benchmark::utils::CheckXOP);
655 }
656
657 static void qu8_gemm_2x4c8__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
658 GEMMEnd2EndBenchmark(state, model,
659 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld64,
660 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld64,
661 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64,
662 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64,
663 xnn_init_qu8_conv_minmax_fp32_sse2_params,
664 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
665 benchmark::utils::CheckXOP);
666 }
667 static void qu8_gemm_3x4c8__xop_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
668 GEMMEnd2EndBenchmark(state, model,
669 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld64,
670 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld64,
671 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld64,
672 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld64,
673 xnn_init_qu8_conv_minmax_fp32_sse2_params,
674 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
675 benchmark::utils::CheckXOP);
676 }
677
678 static void qu8_gemm_2x4c8__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
679 GEMMEnd2EndBenchmark(state, model,
680 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__xop_ld128,
681 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__xop_ld128,
682 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128,
683 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128,
684 xnn_init_qu8_conv_minmax_fp32_sse2_params,
685 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
686 benchmark::utils::CheckXOP);
687 }
688 static void qu8_gemm_3x4c8__xop_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
689 GEMMEnd2EndBenchmark(state, model,
690 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__xop_ld128,
691 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__xop_ld128,
692 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__xop_ld128,
693 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__xop_ld128,
694 xnn_init_qu8_conv_minmax_fp32_sse2_params,
695 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
696 benchmark::utils::CheckXOP);
697 }
698
699 static void qu8_gemm_2x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
700 GEMMEnd2EndBenchmark(state, model,
701 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld64,
702 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld64,
703 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
704 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
705 xnn_init_qu8_conv_minmax_fp32_sse2_params,
706 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
707 benchmark::utils::CheckAVX);
708 }
709 static void qu8_gemm_2x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
710 GEMMEnd2EndBenchmark(state, model,
711 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__avx_ld128,
712 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__avx_ld128,
713 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
714 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
715 xnn_init_qu8_conv_minmax_fp32_sse2_params,
716 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
717 benchmark::utils::CheckAVX);
718 }
719 static void qu8_gemm_3x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
720 GEMMEnd2EndBenchmark(state, model,
721 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld64,
722 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld64,
723 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
724 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
725 xnn_init_qu8_conv_minmax_fp32_sse2_params,
726 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
727 benchmark::utils::CheckAVX);
728 }
729 static void qu8_gemm_3x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
730 GEMMEnd2EndBenchmark(state, model,
731 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__avx_ld128,
732 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__avx_ld128,
733 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
734 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
735 xnn_init_qu8_conv_minmax_fp32_sse2_params,
736 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
737 benchmark::utils::CheckAVX);
738 }
739 static void qu8_gemm_4x4c2__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
740 GEMMEnd2EndBenchmark(state, model,
741 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld64,
742 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld64,
743 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
744 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld64,
745 xnn_init_qu8_conv_minmax_fp32_sse2_params,
746 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
747 benchmark::utils::CheckAVX);
748 }
749 static void qu8_gemm_4x4c2__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
750 GEMMEnd2EndBenchmark(state, model,
751 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__avx_ld128,
752 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__avx_ld128,
753 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
754 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__avx_ld128,
755 xnn_init_qu8_conv_minmax_fp32_sse2_params,
756 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
757 benchmark::utils::CheckAVX);
758 }
759
760
761 static void qu8_gemm_2x4c8__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
762 GEMMEnd2EndBenchmark(state, model,
763 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld64,
764 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld64,
765 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64,
766 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64,
767 xnn_init_qu8_conv_minmax_fp32_sse2_params,
768 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
769 benchmark::utils::CheckAVX);
770 }
771 static void qu8_gemm_2x4c8__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
772 GEMMEnd2EndBenchmark(state, model,
773 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__avx_ld128,
774 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__avx_ld128,
775 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128,
776 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128,
777 xnn_init_qu8_conv_minmax_fp32_sse2_params,
778 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
779 benchmark::utils::CheckAVX);
780 }
781 static void qu8_gemm_3x4c8__avx_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
782 GEMMEnd2EndBenchmark(state, model,
783 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld64,
784 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld64,
785 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld64,
786 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld64,
787 xnn_init_qu8_conv_minmax_fp32_sse2_params,
788 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
789 benchmark::utils::CheckAVX);
790 }
791 static void qu8_gemm_3x4c8__avx_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
792 GEMMEnd2EndBenchmark(state, model,
793 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__avx_ld128,
794 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__avx_ld128,
795 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__avx_ld128,
796 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__avx_ld128,
797 xnn_init_qu8_conv_minmax_fp32_sse2_params,
798 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
799 benchmark::utils::CheckAVX);
800 }
801
802 static void qu8_gemm_2x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
803 GEMMEnd2EndBenchmark(state, model,
804 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld64,
805 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld64,
806 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
807 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
808 xnn_init_qu8_conv_minmax_fp32_sse2_params,
809 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
810 benchmark::utils::CheckSSE41);
811 }
812 static void qu8_gemm_2x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
813 GEMMEnd2EndBenchmark(state, model,
814 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse41_ld128,
815 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse41_ld128,
816 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
817 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
818 xnn_init_qu8_conv_minmax_fp32_sse2_params,
819 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
820 benchmark::utils::CheckSSE41);
821 }
822 static void qu8_gemm_3x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
823 GEMMEnd2EndBenchmark(state, model,
824 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld64,
825 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld64,
826 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
827 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
828 xnn_init_qu8_conv_minmax_fp32_sse2_params,
829 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
830 benchmark::utils::CheckSSE41);
831 }
832 static void qu8_gemm_3x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
833 GEMMEnd2EndBenchmark(state, model,
834 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse41_ld128,
835 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse41_ld128,
836 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
837 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
838 xnn_init_qu8_conv_minmax_fp32_sse2_params,
839 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
840 benchmark::utils::CheckSSE41);
841 }
842 static void qu8_gemm_4x4c2__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
843 GEMMEnd2EndBenchmark(state, model,
844 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld64,
845 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld64,
846 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
847 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld64,
848 xnn_init_qu8_conv_minmax_fp32_sse2_params,
849 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
850 benchmark::utils::CheckSSE41);
851 }
852 static void qu8_gemm_4x4c2__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
853 GEMMEnd2EndBenchmark(state, model,
854 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse41_ld128,
855 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse41_ld128,
856 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
857 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse41_ld128,
858 xnn_init_qu8_conv_minmax_fp32_sse2_params,
859 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */,
860 benchmark::utils::CheckSSE41);
861 }
862
863 static void qu8_gemm_2x4c8__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
864 GEMMEnd2EndBenchmark(state, model,
865 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld64,
866 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld64,
867 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64,
868 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64,
869 xnn_init_qu8_conv_minmax_fp32_sse2_params,
870 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
871 benchmark::utils::CheckSSE41);
872 }
873 static void qu8_gemm_2x4c8__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
874 GEMMEnd2EndBenchmark(state, model,
875 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse41_ld128,
876 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse41_ld128,
877 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128,
878 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128,
879 xnn_init_qu8_conv_minmax_fp32_sse2_params,
880 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
881 benchmark::utils::CheckSSE41);
882 }
883 static void qu8_gemm_3x4c8__sse41_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
884 GEMMEnd2EndBenchmark(state, model,
885 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld64,
886 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld64,
887 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld64,
888 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld64,
889 xnn_init_qu8_conv_minmax_fp32_sse2_params,
890 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
891 benchmark::utils::CheckSSE41);
892 }
893 static void qu8_gemm_3x4c8__sse41_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
894 GEMMEnd2EndBenchmark(state, model,
895 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse41_ld128,
896 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse41_ld128,
897 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse41_ld128,
898 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse41_ld128,
899 xnn_init_qu8_conv_minmax_fp32_sse2_params,
900 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */,
901 benchmark::utils::CheckSSE41);
902 }
903
904 static void qu8_gemm_2x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
905 GEMMEnd2EndBenchmark(state, model,
906 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld64,
907 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld64,
908 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
909 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
910 xnn_init_qu8_conv_minmax_fp32_sse2_params,
911 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
912 }
913 static void qu8_gemm_2x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
914 GEMMEnd2EndBenchmark(state, model,
915 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__sse2_ld128,
916 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__sse2_ld128,
917 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
918 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
919 xnn_init_qu8_conv_minmax_fp32_sse2_params,
920 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
921 }
922 static void qu8_gemm_3x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
923 GEMMEnd2EndBenchmark(state, model,
924 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld64,
925 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld64,
926 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
927 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
928 xnn_init_qu8_conv_minmax_fp32_sse2_params,
929 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
930 }
931 static void qu8_gemm_3x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
932 GEMMEnd2EndBenchmark(state, model,
933 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__sse2_ld128,
934 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__sse2_ld128,
935 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
936 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
937 xnn_init_qu8_conv_minmax_fp32_sse2_params,
938 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
939 }
940 static void qu8_gemm_4x4c2__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
941 GEMMEnd2EndBenchmark(state, model,
942 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld64,
943 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld64,
944 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
945 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld64,
946 xnn_init_qu8_conv_minmax_fp32_sse2_params,
947 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
948 }
949 static void qu8_gemm_4x4c2__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
950 GEMMEnd2EndBenchmark(state, model,
951 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__sse2_ld128,
952 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__sse2_ld128,
953 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
954 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__sse2_ld128,
955 xnn_init_qu8_conv_minmax_fp32_sse2_params,
956 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 0 /* log2_sr */);
957 }
958
959 static void qu8_gemm_2x4c8__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
960 GEMMEnd2EndBenchmark(state, model,
961 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld64,
962 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld64,
963 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64,
964 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64,
965 xnn_init_qu8_conv_minmax_fp32_sse2_params,
966 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
967 }
968 static void qu8_gemm_2x4c8__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
969 GEMMEnd2EndBenchmark(state, model,
970 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__sse2_ld128,
971 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__sse2_ld128,
972 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128,
973 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128,
974 xnn_init_qu8_conv_minmax_fp32_sse2_params,
975 2 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
976 }
977 static void qu8_gemm_3x4c8__sse2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
978 GEMMEnd2EndBenchmark(state, model,
979 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld64,
980 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld64,
981 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld64,
982 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld64,
983 xnn_init_qu8_conv_minmax_fp32_sse2_params,
984 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
985 }
986 static void qu8_gemm_3x4c8__sse2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
987 GEMMEnd2EndBenchmark(state, model,
988 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__sse2_ld128,
989 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__sse2_ld128,
990 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__sse2_ld128,
991 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__sse2_ld128,
992 xnn_init_qu8_conv_minmax_fp32_sse2_params,
993 3 /* mr */, 4 /* nr */, 3 /* log2_kr */, 0 /* log2_sr */);
994 }
995
996
Frank Barchard9098aba2021-08-12 12:20:03 -0700997 BENCHMARK_QU8_END2END(qu8_gemm_2x16c8__avx512skx);
Frank Barchard9098aba2021-08-12 12:20:03 -0700998 BENCHMARK_QU8_END2END(qu8_gemm_3x16c8__avx512skx);
999 BENCHMARK_QU8_END2END(qu8_gemm_4x16c8__avx512skx);
1000
Frank Barchard9098aba2021-08-12 12:20:03 -07001001 BENCHMARK_QU8_END2END(qu8_gemm_2x8c8__avx2);
1002 BENCHMARK_QU8_END2END(qu8_gemm_3x8c8__avx2);
1003
1004 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2__xop_ld64);
1005 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2__xop_ld128);
1006 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2__xop_ld64);
1007 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2__xop_ld128);
1008 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2__xop_ld64);
1009 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2__xop_ld128);
1010
1011 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__xop_ld64);
1012 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__xop_ld128);
1013 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__xop_ld64);
1014 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__xop_ld128);
1015
1016 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2__avx_ld64);
1017 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2__avx_ld128);
1018 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2__avx_ld64);
1019 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2__avx_ld128);
1020 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2__avx_ld64);
1021 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2__avx_ld128);
1022
1023 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__avx_ld64);
1024 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__avx_ld128);
1025 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__avx_ld64);
1026 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__avx_ld128);
1027
1028 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2__sse41_ld64);
1029 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2__sse41_ld128);
1030 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2__sse41_ld64);
1031 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2__sse41_ld128);
1032 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2__sse41_ld64);
1033 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2__sse41_ld128);
1034
1035 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__sse41_ld64);
1036 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__sse41_ld128);
1037 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__sse41_ld64);
1038 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__sse41_ld128);
1039
1040 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2__sse2_ld64);
1041 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2__sse2_ld128);
1042 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2__sse2_ld64);
1043 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2__sse2_ld128);
1044 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2__sse2_ld64);
1045 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2__sse2_ld128);
1046
1047 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__sse2_ld64);
1048 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__sse2_ld128);
1049 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__sse2_ld64);
1050 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__sse2_ld128);
1051#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
1052
Marat Dukhan4c617792021-12-21 15:47:58 -08001053#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan58cdcf22022-02-01 02:05:00 -08001054 static void qu8_gemm_2x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
1055 GEMMEnd2EndBenchmark(state, model,
1056 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64,
1057 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld64,
1058 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
1059 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
1060 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1061 2 /* mr */, 4 /* nr */, 1 /* log2_kr */);
1062 }
1063 static void qu8_gemm_2x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
1064 GEMMEnd2EndBenchmark(state, model,
1065 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128,
1066 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2__wasmsimd_dot16x2_ld128,
1067 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
1068 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
1069 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1070 2 /* mr */, 4 /* nr */, 1 /* log2_kr */);
1071 }
1072 static void qu8_gemm_3x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
1073 GEMMEnd2EndBenchmark(state, model,
1074 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64,
1075 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld64,
1076 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
1077 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
1078 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1079 3 /* mr */, 4 /* nr */, 1 /* log2_kr */);
1080 }
1081 static void qu8_gemm_3x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
1082 GEMMEnd2EndBenchmark(state, model,
1083 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128,
1084 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2__wasmsimd_dot16x2_ld128,
1085 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
1086 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
1087 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1088 3 /* mr */, 4 /* nr */, 1 /* log2_kr */);
1089 }
1090 static void qu8_gemm_4x4c2__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
1091 GEMMEnd2EndBenchmark(state, model,
1092 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64,
1093 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld64,
1094 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
1095 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld64,
1096 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1097 4 /* mr */, 4 /* nr */, 1 /* log2_kr */);
1098 }
1099 static void qu8_gemm_4x4c2__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
1100 GEMMEnd2EndBenchmark(state, model,
1101 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128,
1102 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2__wasmsimd_dot16x2_ld128,
1103 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
1104 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2__wasmsimd_dot16x2_ld128,
1105 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1106 4 /* mr */, 4 /* nr */, 1 /* log2_kr */);
1107 }
1108
1109 static void qu8_gemm_2x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
1110 GEMMEnd2EndBenchmark(state, model,
1111 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64,
1112 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld64,
1113 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
1114 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
1115 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1116 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
1117 }
1118 static void qu8_gemm_2x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
1119 GEMMEnd2EndBenchmark(state, model,
1120 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128,
1121 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c2s4__wasmsimd_dot16x2_ld128,
1122 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
1123 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
1124 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1125 2 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
1126 }
1127 static void qu8_gemm_3x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
1128 GEMMEnd2EndBenchmark(state, model,
1129 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64,
1130 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld64,
1131 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
1132 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
1133 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1134 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
1135 }
1136 static void qu8_gemm_3x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
1137 GEMMEnd2EndBenchmark(state, model,
1138 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128,
1139 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c2s4__wasmsimd_dot16x2_ld128,
1140 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
1141 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
1142 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1143 3 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
1144 }
1145 static void qu8_gemm_4x4c2s4__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
1146 GEMMEnd2EndBenchmark(state, model,
1147 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64,
1148 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld64,
1149 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
1150 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld64,
1151 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1152 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
1153 }
1154 static void qu8_gemm_4x4c2s4__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
1155 GEMMEnd2EndBenchmark(state, model,
1156 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128,
1157 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c2s4__wasmsimd_dot16x2_ld128,
1158 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
1159 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c2s4__wasmsimd_dot16x2_ld128,
1160 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1161 4 /* mr */, 4 /* nr */, 1 /* log2_kr */, 2 /* log2_sr */);
1162 }
1163
1164 static void qu8_gemm_2x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
1165 GEMMEnd2EndBenchmark(state, model,
1166 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64,
1167 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld64,
1168 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
1169 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
1170 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1171 2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
1172 }
1173 static void qu8_gemm_2x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
1174 GEMMEnd2EndBenchmark(state, model,
1175 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128,
1176 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_dot16x2_ld128,
1177 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
1178 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
1179 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1180 2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
1181 }
1182 static void qu8_gemm_3x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
1183 GEMMEnd2EndBenchmark(state, model,
1184 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64,
1185 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld64,
1186 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
1187 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
1188 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1189 3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
1190 }
1191 static void qu8_gemm_3x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
1192 GEMMEnd2EndBenchmark(state, model,
1193 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128,
1194 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_dot16x2_ld128,
1195 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
1196 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
1197 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1198 3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
1199 }
1200 static void qu8_gemm_4x4c8__wasmsimd_dot16x2_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
1201 GEMMEnd2EndBenchmark(state, model,
1202 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64,
1203 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld64,
1204 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
1205 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld64,
1206 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1207 4 /* mr */, 4 /* nr */, 3 /* log2_kr */);
1208 }
1209 static void qu8_gemm_4x4c8__wasmsimd_dot16x2_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
1210 GEMMEnd2EndBenchmark(state, model,
1211 xnn_qu8_gemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128,
1212 xnn_qu8_igemm_minmax_fp32_ukernel_4x4c8__wasmsimd_dot16x2_ld128,
1213 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
1214 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_dot16x2_ld128,
1215 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1216 4 /* mr */, 4 /* nr */, 3 /* log2_kr */);
1217 }
1218
Frank Barchard9098aba2021-08-12 12:20:03 -07001219 static void qu8_gemm_2x4c8__wasmsimd_mul32_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
1220 GEMMEnd2EndBenchmark(state, model,
1221 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul32_ld64,
1222 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul32_ld64,
1223 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld64,
1224 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld64,
1225 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1226 2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
1227 }
Frank Barchard9098aba2021-08-12 12:20:03 -07001228 static void qu8_gemm_2x4c8__wasmsimd_mul32_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
1229 GEMMEnd2EndBenchmark(state, model,
1230 xnn_qu8_gemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul32_ld128,
1231 xnn_qu8_igemm_minmax_fp32_ukernel_2x4c8__wasmsimd_mul32_ld128,
1232 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128,
1233 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128,
1234 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1235 2 /* mr */, 4 /* nr */, 3 /* log2_kr */);
1236 }
Frank Barchard9098aba2021-08-12 12:20:03 -07001237 static void qu8_gemm_3x4c8__wasmsimd_mul32_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
1238 GEMMEnd2EndBenchmark(state, model,
1239 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64,
1240 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld64,
1241 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld64,
1242 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld64,
1243 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1244 3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
1245 }
Frank Barchard9098aba2021-08-12 12:20:03 -07001246 static void qu8_gemm_3x4c8__wasmsimd_mul32_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
1247 GEMMEnd2EndBenchmark(state, model,
1248 xnn_qu8_gemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128,
1249 xnn_qu8_igemm_minmax_fp32_ukernel_3x4c8__wasmsimd_mul32_ld128,
1250 xnn_qu8_gemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128,
1251 xnn_qu8_igemm_minmax_fp32_ukernel_1x4c8__wasmsimd_mul32_ld128,
1252 xnn_init_qu8_conv_minmax_fp32_wasmsimd_params,
1253 3 /* mr */, 4 /* nr */, 3 /* log2_kr */);
1254 }
1255
Marat Dukhan58cdcf22022-02-01 02:05:00 -08001256 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2__wasmsimd_dot16x2_ld64)
1257 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2__wasmsimd_dot16x2_ld128)
1258 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2__wasmsimd_dot16x2_ld64)
1259 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2__wasmsimd_dot16x2_ld128)
1260 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2__wasmsimd_dot16x2_ld64)
1261 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2__wasmsimd_dot16x2_ld128)
1262
1263 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2s4__wasmsimd_dot16x2_ld64)
1264 BENCHMARK_QU8_END2END(qu8_gemm_2x4c2s4__wasmsimd_dot16x2_ld128)
1265 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2s4__wasmsimd_dot16x2_ld64)
1266 BENCHMARK_QU8_END2END(qu8_gemm_3x4c2s4__wasmsimd_dot16x2_ld128)
1267 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2s4__wasmsimd_dot16x2_ld64)
1268 BENCHMARK_QU8_END2END(qu8_gemm_4x4c2s4__wasmsimd_dot16x2_ld128)
1269
1270 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__wasmsimd_dot16x2_ld64)
1271 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__wasmsimd_dot16x2_ld128)
1272 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__wasmsimd_dot16x2_ld64)
1273 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__wasmsimd_dot16x2_ld128)
1274 BENCHMARK_QU8_END2END(qu8_gemm_4x4c8__wasmsimd_dot16x2_ld64)
1275 BENCHMARK_QU8_END2END(qu8_gemm_4x4c8__wasmsimd_dot16x2_ld128)
1276
Frank Barchard9098aba2021-08-12 12:20:03 -07001277 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__wasmsimd_mul32_ld64)
1278 BENCHMARK_QU8_END2END(qu8_gemm_2x4c8__wasmsimd_mul32_ld128)
1279 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__wasmsimd_mul32_ld64)
1280 BENCHMARK_QU8_END2END(qu8_gemm_3x4c8__wasmsimd_mul32_ld128)
Marat Dukhan4c617792021-12-21 15:47:58 -08001281#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Frank Barchard9098aba2021-08-12 12:20:03 -07001282
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001283
1284#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1285 static void qu8_gemm_2x2__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
1286 GEMMEnd2EndBenchmark(state, model,
1287 xnn_qu8_gemm_minmax_fp32_ukernel_2x2__wasm_fmagic,
1288 xnn_qu8_igemm_minmax_fp32_ukernel_2x2__wasm_fmagic,
1289 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
1290 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
1291 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
1292 2 /* mr */, 2 /* nr */);
1293 }
1294 static void qu8_gemm_3x2__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
1295 GEMMEnd2EndBenchmark(state, model,
1296 xnn_qu8_gemm_minmax_fp32_ukernel_3x2__wasm_fmagic,
1297 xnn_qu8_igemm_minmax_fp32_ukernel_3x2__wasm_fmagic,
1298 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
1299 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
1300 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
1301 3 /* mr */, 2 /* nr */);
1302 }
1303 static void qu8_gemm_4x2__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
1304 GEMMEnd2EndBenchmark(state, model,
1305 xnn_qu8_gemm_minmax_fp32_ukernel_4x2__wasm_fmagic,
1306 xnn_qu8_igemm_minmax_fp32_ukernel_4x2__wasm_fmagic,
1307 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
1308 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__wasm_fmagic,
1309 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
1310 4 /* mr */, 2 /* nr */);
1311 }
1312 static void qu8_gemm_2x4__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
1313 GEMMEnd2EndBenchmark(state, model,
1314 xnn_qu8_gemm_minmax_fp32_ukernel_2x4__wasm_fmagic,
1315 xnn_qu8_igemm_minmax_fp32_ukernel_2x4__wasm_fmagic,
1316 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
1317 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
1318 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
1319 2 /* mr */, 4 /* nr */);
1320 }
1321 static void qu8_gemm_3x4__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
1322 GEMMEnd2EndBenchmark(state, model,
1323 xnn_qu8_gemm_minmax_fp32_ukernel_3x4__wasm_fmagic,
1324 xnn_qu8_igemm_minmax_fp32_ukernel_3x4__wasm_fmagic,
1325 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
1326 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
1327 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
1328 3 /* mr */, 4 /* nr */);
1329 }
1330 static void qu8_gemm_4x4__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
1331 GEMMEnd2EndBenchmark(state, model,
1332 xnn_qu8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic,
1333 xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic,
1334 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
1335 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__wasm_fmagic,
1336 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
1337 4 /* mr */, 4 /* nr */);
1338 }
1339
1340 BENCHMARK_QU8_END2END(qu8_gemm_2x2__wasm_fmagic)
1341 BENCHMARK_QU8_END2END(qu8_gemm_3x2__wasm_fmagic)
1342 BENCHMARK_QU8_END2END(qu8_gemm_4x2__wasm_fmagic)
1343 BENCHMARK_QU8_END2END(qu8_gemm_2x4__wasm_fmagic)
1344 BENCHMARK_QU8_END2END(qu8_gemm_3x4__wasm_fmagic)
1345 BENCHMARK_QU8_END2END(qu8_gemm_4x4__wasm_fmagic)
1346#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
1347
1348
1349static void qu8_gemm_2x2__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard9098aba2021-08-12 12:20:03 -07001350 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001351 xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_fmagic,
1352 xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_fmagic,
1353 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
1354 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
1355 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
Frank Barchard9098aba2021-08-12 12:20:03 -07001356 2 /* mr */, 2 /* nr */);
1357}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001358static void qu8_gemm_3x2__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard9098aba2021-08-12 12:20:03 -07001359 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001360 xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_fmagic,
1361 xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_fmagic,
1362 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
1363 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
1364 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
Frank Barchard9098aba2021-08-12 12:20:03 -07001365 3 /* mr */, 2 /* nr */);
1366}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001367static void qu8_gemm_4x2__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard9098aba2021-08-12 12:20:03 -07001368 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001369 xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_fmagic,
1370 xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_fmagic,
1371 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
1372 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_fmagic,
1373 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
Frank Barchard9098aba2021-08-12 12:20:03 -07001374 4 /* mr */, 2 /* nr */);
1375}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001376static void qu8_gemm_2x4__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard9098aba2021-08-12 12:20:03 -07001377 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001378 xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_fmagic,
1379 xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_fmagic,
1380 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
1381 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
1382 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
Frank Barchard9098aba2021-08-12 12:20:03 -07001383 2 /* mr */, 4 /* nr */);
1384}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001385static void qu8_gemm_3x4__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard9098aba2021-08-12 12:20:03 -07001386 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001387 xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_fmagic,
1388 xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_fmagic,
1389 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
1390 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
1391 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
Frank Barchard9098aba2021-08-12 12:20:03 -07001392 3 /* mr */, 4 /* nr */);
1393}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001394static void qu8_gemm_4x4__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard9098aba2021-08-12 12:20:03 -07001395 GEMMEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -08001396 xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic,
1397 xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic,
1398 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
1399 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_fmagic,
1400 xnn_init_qu8_conv_minmax_fp32_scalar_fmagic_params,
Frank Barchard9098aba2021-08-12 12:20:03 -07001401 4 /* mr */, 4 /* nr */);
1402}
1403
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001404static void qu8_gemm_2x2__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001405 GEMMEnd2EndBenchmark(state, model,
1406 xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_imagic,
1407 xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_imagic,
1408 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic,
1409 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic,
1410 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
1411 2 /* mr */, 2 /* nr */);
1412}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001413static void qu8_gemm_3x2__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001414 GEMMEnd2EndBenchmark(state, model,
1415 xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_imagic,
1416 xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_imagic,
1417 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic,
1418 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic,
1419 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
1420 3 /* mr */, 2 /* nr */);
1421}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001422static void qu8_gemm_4x2__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001423 GEMMEnd2EndBenchmark(state, model,
1424 xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_imagic,
1425 xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_imagic,
1426 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_imagic,
1427 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_imagic,
1428 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
1429 4 /* mr */, 2 /* nr */);
1430}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001431static void qu8_gemm_2x4__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001432 GEMMEnd2EndBenchmark(state, model,
1433 xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_imagic,
1434 xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_imagic,
1435 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic,
1436 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic,
1437 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
1438 2 /* mr */, 4 /* nr */);
1439}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001440static void qu8_gemm_3x4__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001441 GEMMEnd2EndBenchmark(state, model,
1442 xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_imagic,
1443 xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_imagic,
1444 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic,
1445 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic,
1446 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
1447 3 /* mr */, 4 /* nr */);
1448}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001449static void qu8_gemm_4x4__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001450 GEMMEnd2EndBenchmark(state, model,
1451 xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_imagic,
1452 xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_imagic,
1453 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_imagic,
1454 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_imagic,
1455 xnn_init_qu8_conv_minmax_fp32_scalar_imagic_params,
1456 4 /* mr */, 4 /* nr */);
1457}
1458
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001459static void qu8_gemm_2x2__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001460 GEMMEnd2EndBenchmark(state, model,
1461 xnn_qu8_gemm_minmax_fp32_ukernel_2x2__scalar_lrintf,
1462 xnn_qu8_igemm_minmax_fp32_ukernel_2x2__scalar_lrintf,
1463 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
1464 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
1465 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
1466 2 /* mr */, 2 /* nr */);
1467}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001468static void qu8_gemm_3x2__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001469 GEMMEnd2EndBenchmark(state, model,
1470 xnn_qu8_gemm_minmax_fp32_ukernel_3x2__scalar_lrintf,
1471 xnn_qu8_igemm_minmax_fp32_ukernel_3x2__scalar_lrintf,
1472 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
1473 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
1474 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
1475 3 /* mr */, 2 /* nr */);
1476}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001477static void qu8_gemm_4x2__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001478 GEMMEnd2EndBenchmark(state, model,
1479 xnn_qu8_gemm_minmax_fp32_ukernel_4x2__scalar_lrintf,
1480 xnn_qu8_igemm_minmax_fp32_ukernel_4x2__scalar_lrintf,
1481 xnn_qu8_gemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
1482 xnn_qu8_igemm_minmax_fp32_ukernel_1x2__scalar_lrintf,
1483 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
1484 4 /* mr */, 2 /* nr */);
1485}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001486static void qu8_gemm_2x4__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001487 GEMMEnd2EndBenchmark(state, model,
1488 xnn_qu8_gemm_minmax_fp32_ukernel_2x4__scalar_lrintf,
1489 xnn_qu8_igemm_minmax_fp32_ukernel_2x4__scalar_lrintf,
1490 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
1491 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
1492 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
1493 2 /* mr */, 4 /* nr */);
1494}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001495static void qu8_gemm_3x4__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001496 GEMMEnd2EndBenchmark(state, model,
1497 xnn_qu8_gemm_minmax_fp32_ukernel_3x4__scalar_lrintf,
1498 xnn_qu8_igemm_minmax_fp32_ukernel_3x4__scalar_lrintf,
1499 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
1500 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
1501 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
1502 3 /* mr */, 4 /* nr */);
1503}
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001504static void qu8_gemm_4x4__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001505 GEMMEnd2EndBenchmark(state, model,
1506 xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf,
1507 xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf,
1508 xnn_qu8_gemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
1509 xnn_qu8_igemm_minmax_fp32_ukernel_1x4__scalar_lrintf,
1510 xnn_init_qu8_conv_minmax_fp32_scalar_lrintf_params,
1511 4 /* mr */, 4 /* nr */);
1512}
1513
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001514BENCHMARK_QU8_END2END(qu8_gemm_2x2__scalar_fmagic)
1515BENCHMARK_QU8_END2END(qu8_gemm_3x2__scalar_fmagic)
1516BENCHMARK_QU8_END2END(qu8_gemm_4x2__scalar_fmagic)
1517BENCHMARK_QU8_END2END(qu8_gemm_2x4__scalar_fmagic)
1518BENCHMARK_QU8_END2END(qu8_gemm_3x4__scalar_fmagic)
1519BENCHMARK_QU8_END2END(qu8_gemm_4x4__scalar_fmagic)
Frank Barchard9098aba2021-08-12 12:20:03 -07001520
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001521BENCHMARK_QU8_END2END(qu8_gemm_2x2__scalar_imagic)
1522BENCHMARK_QU8_END2END(qu8_gemm_3x2__scalar_imagic)
1523BENCHMARK_QU8_END2END(qu8_gemm_4x2__scalar_imagic)
1524BENCHMARK_QU8_END2END(qu8_gemm_2x4__scalar_imagic)
1525BENCHMARK_QU8_END2END(qu8_gemm_3x4__scalar_imagic)
1526BENCHMARK_QU8_END2END(qu8_gemm_4x4__scalar_imagic)
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001527
Marat Dukhan7c1115f2022-01-04 17:18:41 -08001528BENCHMARK_QU8_END2END(qu8_gemm_2x2__scalar_lrintf)
1529BENCHMARK_QU8_END2END(qu8_gemm_3x2__scalar_lrintf)
1530BENCHMARK_QU8_END2END(qu8_gemm_4x2__scalar_lrintf)
1531BENCHMARK_QU8_END2END(qu8_gemm_2x4__scalar_lrintf)
1532BENCHMARK_QU8_END2END(qu8_gemm_3x4__scalar_lrintf)
1533BENCHMARK_QU8_END2END(qu8_gemm_4x4__scalar_lrintf)
Marat Dukhan440e8ed2022-01-04 15:30:57 -08001534
Frank Barchard9098aba2021-08-12 12:20:03 -07001535#ifndef XNNPACK_BENCHMARK_NO_MAIN
1536BENCHMARK_MAIN();
1537#endif