blob: 8843f4e7c23ff84ae115828060f6b20cddd99a28 [file] [log] [blame]
Marat Dukhanbbfc6d32021-07-26 18:31:02 -07001// Copyright 2021 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
12#include <xnnpack.h>
13
14#include <benchmark/benchmark.h>
15
16#include "bench/end2end.h"
17#include "bench/utils.h"
18#include "models/models.h"
19#include <xnnpack/dwconv.h>
20#include <xnnpack/params.h>
21#include <xnnpack/params-init.h>
22
23
24static void DWConvEnd2EndBenchmark(
25 benchmark::State& state,
26 models::ExecutionPlanFactory model_factory,
27 xnn_qs8_dwconv_minmax_unipass_ukernel_function dwconv,
28 xnn_init_qs8_conv_minmax_params_fn init_params,
29 uint8_t channel_tile, uint8_t primary_tile,
30 benchmark::utils::IsaCheckFunction isa_check = nullptr)
31{
32 if (isa_check && !isa_check(state)) {
33 return;
34 }
35 if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
36 state.SkipWithError("failed to initialize XNNPACK");
37 return;
38 }
39
40 // Override microkernels chosen in xnn_initialize
41 for (size_t i = 0; i < XNN_MAX_QS8_DWCONV_UKERNELS; i++) {
42 // Replace only the microkernel the matching kernel size.
43 if (xnn_params.qs8.dwconv[i].primary_tile == primary_tile) {
44 // Note: do not directly assign to xnn_params.qs8.dwconv[i] because it breaks older gcc.
45 xnn_params.qs8.dwconv[i].minmax.unipass = xnn_dwconv_unipass_ukernel_function(dwconv);
46 xnn_params.qs8.dwconv[i].channel_tile = channel_tile;
47 xnn_params.qs8.dwconv[i].primary_tile = primary_tile;
48 xnn_params.qs8.dwconv[i].incremental_tile = 0;
49 xnn_params.qs8.dwconv[i].init.qs8 = init_params;
50 break;
51 }
52 }
53
54 auto execution_plan = model_factory(nullptr);
55 if (execution_plan.empty()) {
56 state.SkipWithError("failed to create a model");
57 return;
58 }
59
60 for (auto _ : state) {
61 for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
62 xnn_status status = xnn_run_operator(op.get(), nullptr);
63 if (status != xnn_status_success) {
64 state.SkipWithError("failed to run a model");
65 return;
66 }
67 }
68 }
69
70 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
71 if (cpu_frequency != 0) {
72 state.counters["cpufreq"] = cpu_frequency;
73 }
74}
75
Marat Dukhanbbfc6d32021-07-26 18:31:02 -070076
77#if XNN_ARCH_ARM || XNN_ARCH_ARM64
78 static void qs8_dwconv_up8x9__neon_mul8_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
79 DWConvEnd2EndBenchmark(state, model,
80 xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64,
81 xnn_init_qs8_conv_minmax_rndnu_neon_params,
82 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
83 }
84 static void qs8_dwconv_up16x9__neon_mul8_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
85 DWConvEnd2EndBenchmark(state, model,
86 xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64,
87 xnn_init_qs8_conv_minmax_rndnu_neon_params,
88 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
89 }
90 static void qs8_dwconv_up16x9__neon_mul8_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
91 DWConvEnd2EndBenchmark(state, model,
92 xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128,
93 xnn_init_qs8_conv_minmax_rndnu_neon_params,
94 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
95 }
96 static void qs8_dwconv_up8x9__neon_mla8_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
97 DWConvEnd2EndBenchmark(state, model,
98 xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64,
99 xnn_init_qs8_conv_minmax_rndnu_neon_params,
100 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
101 }
102 static void qs8_dwconv_up16x9__neon_mla8_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
103 DWConvEnd2EndBenchmark(state, model,
104 xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64,
105 xnn_init_qs8_conv_minmax_rndnu_neon_params,
106 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
107 }
108 static void qs8_dwconv_up16x9__neon_mla8_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
109 DWConvEnd2EndBenchmark(state, model,
110 xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128,
111 xnn_init_qs8_conv_minmax_rndnu_neon_params,
112 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
113 }
114 static void qs8_dwconv_up8x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
115 DWConvEnd2EndBenchmark(state, model,
116 xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16,
117 xnn_init_qs8_conv_minmax_rndnu_neon_params,
118 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
119 }
120 static void qs8_dwconv_up16x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
121 DWConvEnd2EndBenchmark(state, model,
122 xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16,
123 xnn_init_qs8_conv_minmax_rndnu_neon_params,
124 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
125 }
Frank Barchard2aa2e2a2021-09-16 14:59:13 -0700126 static void qs8_dwconv_up24x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
127 DWConvEnd2EndBenchmark(state, model,
128 xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16,
129 xnn_init_qs8_conv_minmax_rndnu_neon_params,
130 24 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
131 }
132 static void qs8_dwconv_up32x9__neon_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
133 DWConvEnd2EndBenchmark(state, model,
134 xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16,
135 xnn_init_qs8_conv_minmax_rndnu_neon_params,
136 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckNEON);
137 }
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700138
139 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__neon_mul8_ld64);
140 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__neon_mul8_ld64);
141 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__neon_mul8_ld128);
142 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__neon_mla8_ld64);
143 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__neon_mla8_ld64);
144 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__neon_mla8_ld128);
145 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__neon_mul16);
146 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__neon_mul16);
Frank Barchard2aa2e2a2021-09-16 14:59:13 -0700147 BENCHMARK_QS8_END2END(qs8_dwconv_up24x9__neon_mul16);
148 BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__neon_mul16);
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700149#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
150
151
152#if XNN_ARCH_X86 || XNN_ARCH_X86_64
153 static void qs8_dwconv_up16x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
154 DWConvEnd2EndBenchmark(state, model,
155 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx512skx_mul32,
156 xnn_init_qs8_conv_minmax_fp32_avx512_params,
157 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
158 }
159 static void qs8_dwconv_up32x9__avx512skx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
160 DWConvEnd2EndBenchmark(state, model,
161 xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx512skx_mul32,
162 xnn_init_qs8_conv_minmax_fp32_avx512_params,
163 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX512SKX);
164 }
Marat Dukhan881ab022021-07-28 13:49:26 -0700165 static void qs8_dwconv_up16x9__avx2_mul16_vpmovsx(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700166 DWConvEnd2EndBenchmark(state, model,
Marat Dukhan881ab022021-07-28 13:49:26 -0700167 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpmovsx,
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700168 xnn_init_qs8_conv_minmax_fp32_avx2_params,
169 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
170 }
Marat Dukhan881ab022021-07-28 13:49:26 -0700171 static void qs8_dwconv_up32x9__avx2_mul16_vpmovsx(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700172 DWConvEnd2EndBenchmark(state, model,
Marat Dukhan881ab022021-07-28 13:49:26 -0700173 xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx,
174 xnn_init_qs8_conv_minmax_fp32_avx2_params,
175 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
176 }
177 static void qs8_dwconv_up16x9__avx2_mul16_vpunpck(benchmark::State& state, models::ExecutionPlanFactory model) {
178 DWConvEnd2EndBenchmark(state, model,
179 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_vpunpck,
180 xnn_init_qs8_conv_minmax_fp32_avx2_params,
181 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
182 }
183 static void qs8_dwconv_up32x9__avx2_mul16_vpunpck(benchmark::State& state, models::ExecutionPlanFactory model) {
184 DWConvEnd2EndBenchmark(state, model,
185 xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpunpck,
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700186 xnn_init_qs8_conv_minmax_fp32_avx2_params,
187 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
188 }
Marat Dukhan60bb7ec2021-07-28 18:51:28 -0700189 static void qs8_dwconv_up16x9__avx2_mul16_add16_vpunpck(benchmark::State& state, models::ExecutionPlanFactory model) {
190 DWConvEnd2EndBenchmark(state, model,
191 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul16_add16_vpunpck,
192 xnn_init_qs8_conv_minmax_fp32_avx2_params,
193 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
194 }
195 static void qs8_dwconv_up32x9__avx2_mul16_add16_vpunpck(benchmark::State& state, models::ExecutionPlanFactory model) {
196 DWConvEnd2EndBenchmark(state, model,
197 xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_add16_vpunpck,
198 xnn_init_qs8_conv_minmax_fp32_avx2_params,
199 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
200 }
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700201 static void qs8_dwconv_up8x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
202 DWConvEnd2EndBenchmark(state, model,
203 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx2_mul32,
204 xnn_init_qs8_conv_minmax_fp32_avx2_params,
205 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
206 }
207 static void qs8_dwconv_up16x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
208 DWConvEnd2EndBenchmark(state, model,
209 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx2_mul32,
210 xnn_init_qs8_conv_minmax_fp32_avx2_params,
211 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
212 }
213 static void qs8_dwconv_up32x9__avx2_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
214 DWConvEnd2EndBenchmark(state, model,
215 xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32,
216 xnn_init_qs8_conv_minmax_fp32_avx2_params,
217 32 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX2);
218 }
219 static void qs8_dwconv_up8x9__xop_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
220 DWConvEnd2EndBenchmark(state, model,
221 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul16_add16,
222 xnn_init_qs8_conv_minmax_fp32_sse4_params,
223 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckXOP);
224 }
225 static void qs8_dwconv_up16x9__xop_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
226 DWConvEnd2EndBenchmark(state, model,
227 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul16_add16,
228 xnn_init_qs8_conv_minmax_fp32_sse4_params,
229 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckXOP);
230 }
Marat Dukhancc967702021-07-27 22:38:59 -0700231 static void qs8_dwconv_up8x9__xop_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
232 DWConvEnd2EndBenchmark(state, model,
233 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__xop_mul32,
234 xnn_init_qs8_conv_minmax_fp32_sse4_params,
235 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckXOP);
236 }
237 static void qs8_dwconv_up16x9__xop_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
238 DWConvEnd2EndBenchmark(state, model,
239 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__xop_mul32,
240 xnn_init_qs8_conv_minmax_fp32_sse4_params,
241 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckXOP);
242 }
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700243 static void qs8_dwconv_up8x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
244 DWConvEnd2EndBenchmark(state, model,
245 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16,
246 xnn_init_qs8_conv_minmax_fp32_sse4_params,
247 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
248 }
249 static void qs8_dwconv_up16x9__avx_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
250 DWConvEnd2EndBenchmark(state, model,
251 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16,
252 xnn_init_qs8_conv_minmax_fp32_sse4_params,
253 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
254 }
255 static void qs8_dwconv_up8x9__avx_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
256 DWConvEnd2EndBenchmark(state, model,
257 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul16_add16,
258 xnn_init_qs8_conv_minmax_fp32_sse4_params,
259 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
260 }
261 static void qs8_dwconv_up16x9__avx_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
262 DWConvEnd2EndBenchmark(state, model,
263 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul16_add16,
264 xnn_init_qs8_conv_minmax_fp32_sse4_params,
265 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
266 }
267 static void qs8_dwconv_up8x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
268 DWConvEnd2EndBenchmark(state, model,
269 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__avx_mul32,
270 xnn_init_qs8_conv_minmax_fp32_sse4_params,
271 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
272 }
273 static void qs8_dwconv_up16x9__avx_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
274 DWConvEnd2EndBenchmark(state, model,
275 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__avx_mul32,
276 xnn_init_qs8_conv_minmax_fp32_sse4_params,
277 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckAVX);
278 }
279 static void qs8_dwconv_up8x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
280 DWConvEnd2EndBenchmark(state, model,
281 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16,
282 xnn_init_qs8_conv_minmax_fp32_sse4_params,
283 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
284 }
285 static void qs8_dwconv_up16x9__sse41_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
286 DWConvEnd2EndBenchmark(state, model,
287 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16,
288 xnn_init_qs8_conv_minmax_fp32_sse4_params,
289 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
290 }
291 static void qs8_dwconv_up8x9__sse41_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
292 DWConvEnd2EndBenchmark(state, model,
293 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul16_add16,
294 xnn_init_qs8_conv_minmax_fp32_sse4_params,
295 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
296 }
297 static void qs8_dwconv_up16x9__sse41_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
298 DWConvEnd2EndBenchmark(state, model,
299 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul16_add16,
300 xnn_init_qs8_conv_minmax_fp32_sse4_params,
301 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
302 }
303 static void qs8_dwconv_up8x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
304 DWConvEnd2EndBenchmark(state, model,
305 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse41_mul32,
306 xnn_init_qs8_conv_minmax_fp32_sse4_params,
307 8 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
308 }
309 static void qs8_dwconv_up16x9__sse41_mul32(benchmark::State& state, models::ExecutionPlanFactory model) {
310 DWConvEnd2EndBenchmark(state, model,
311 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse41_mul32,
312 xnn_init_qs8_conv_minmax_fp32_sse4_params,
313 16 /* channel tile */, 9 /* primary tile */, benchmark::utils::CheckSSE41);
314 }
315 static void qs8_dwconv_up8x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
316 DWConvEnd2EndBenchmark(state, model,
317 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16,
318 xnn_init_qs8_conv_minmax_fp32_sse2_params,
319 8 /* channel tile */, 9 /* primary tile */);
320 }
321 static void qs8_dwconv_up16x9__sse2_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
322 DWConvEnd2EndBenchmark(state, model,
323 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16,
324 xnn_init_qs8_conv_minmax_fp32_sse2_params,
325 16 /* channel tile */, 9 /* primary tile */);
326 }
327 static void qs8_dwconv_up8x9__sse2_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
328 DWConvEnd2EndBenchmark(state, model,
329 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__sse2_mul16_add16,
330 xnn_init_qs8_conv_minmax_fp32_sse2_params,
331 8 /* channel tile */, 9 /* primary tile */);
332 }
333 static void qs8_dwconv_up16x9__sse2_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
334 DWConvEnd2EndBenchmark(state, model,
335 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__sse2_mul16_add16,
336 xnn_init_qs8_conv_minmax_fp32_sse2_params,
337 16 /* channel tile */, 9 /* primary tile */);
338 }
339
340 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx512skx_mul32);
341 BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__avx512skx_mul32);
342
Marat Dukhan881ab022021-07-28 13:49:26 -0700343 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx2_mul16_vpmovsx);
344 BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__avx2_mul16_vpmovsx);
345 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx2_mul16_vpunpck);
346 BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__avx2_mul16_vpunpck);
Marat Dukhanbbe88242021-07-28 19:17:31 -0700347 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx2_mul16_add16_vpunpck);
348 BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__avx2_mul16_add16_vpunpck);
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700349 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__avx2_mul32);
350 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx2_mul32);
351 BENCHMARK_QS8_END2END(qs8_dwconv_up32x9__avx2_mul32);
352
353 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__xop_mul16_add16);
354 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__xop_mul16_add16);
Marat Dukhancc967702021-07-27 22:38:59 -0700355 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__xop_mul32);
356 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__xop_mul32);
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700357
358 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__avx_mul16);
359 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx_mul16);
360 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__avx_mul16_add16);
361 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx_mul16_add16);
362 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__avx_mul32);
363 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__avx_mul32);
364
365 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__sse41_mul16);
366 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__sse41_mul16);
367 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__sse41_mul16_add16);
368 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__sse41_mul16_add16);
369 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__sse41_mul32);
370 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__sse41_mul32);
371
372 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__sse2_mul16);
373 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__sse2_mul16);
374 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__sse2_mul16_add16);
375 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__sse2_mul16_add16);
376#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
377
378
Marat Dukhan4c617792021-12-21 15:47:58 -0800379#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700380 static void qs8_dwconv_up8x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
381 DWConvEnd2EndBenchmark(state, model,
382 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16,
383 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
384 8 /* channel tile */, 9 /* primary tile */);
385 }
386 static void qs8_dwconv_up16x9__wasmsimd_mul16(benchmark::State& state, models::ExecutionPlanFactory model) {
387 DWConvEnd2EndBenchmark(state, model,
388 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16,
389 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
390 16 /* channel tile */, 9 /* primary tile */);
391 }
392
Marat Dukhan9cedb592021-08-17 17:25:24 -0700393 static void qs8_dwconv_up8x9__wasmsimd_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
394 DWConvEnd2EndBenchmark(state, model,
395 xnn_qs8_dwconv_minmax_fp32_ukernel_up8x9__wasmsimd_mul16_add16,
396 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
397 8 /* channel tile */, 9 /* primary tile */);
398 }
399 static void qs8_dwconv_up16x9__wasmsimd_mul16_add16(benchmark::State& state, models::ExecutionPlanFactory model) {
400 DWConvEnd2EndBenchmark(state, model,
401 xnn_qs8_dwconv_minmax_fp32_ukernel_up16x9__wasmsimd_mul16_add16,
402 xnn_init_qs8_conv_minmax_fp32_wasmsimd_params,
403 16 /* channel tile */, 9 /* primary tile */);
404 }
405
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700406 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__wasmsimd_mul16);
407 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__wasmsimd_mul16);
Marat Dukhan9cedb592021-08-17 17:25:24 -0700408
409 BENCHMARK_QS8_END2END(qs8_dwconv_up8x9__wasmsimd_mul16_add16);
410 BENCHMARK_QS8_END2END(qs8_dwconv_up16x9__wasmsimd_mul16_add16);
Marat Dukhan4c617792021-12-21 15:47:58 -0800411#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700412
Marat Dukhan7c1115f2022-01-04 17:18:41 -0800413
414#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
415 static void qs8_dwconv_up1x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
416 DWConvEnd2EndBenchmark(state, model,
417 xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__wasm_fmagic,
418 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
419 1 /* channel tile */, 9 /* primary tile */);
420 }
421 static void qs8_dwconv_up2x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
422 DWConvEnd2EndBenchmark(state, model,
423 xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__wasm_fmagic,
424 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
425 2 /* channel tile */, 9 /* primary tile */);
426 }
427 static void qs8_dwconv_up4x9__wasm_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
428 DWConvEnd2EndBenchmark(state, model,
429 xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__wasm_fmagic,
430 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
431 4 /* channel tile */, 9 /* primary tile */);
432 }
433
434 BENCHMARK_QS8_END2END(qs8_dwconv_up1x9__wasm_fmagic);
435 BENCHMARK_QS8_END2END(qs8_dwconv_up2x9__wasm_fmagic);
436 BENCHMARK_QS8_END2END(qs8_dwconv_up4x9__wasm_fmagic);
437#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
438
439
Marat Dukhan2ac722e2022-01-04 01:54:20 -0800440static void qs8_dwconv_up1x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700441 DWConvEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -0800442 xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_fmagic,
443 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700444 1 /* channel tile */, 9 /* primary tile */);
445}
Marat Dukhan2ac722e2022-01-04 01:54:20 -0800446static void qs8_dwconv_up2x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700447 DWConvEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -0800448 xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_fmagic,
449 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700450 2 /* channel tile */, 9 /* primary tile */);
451}
Marat Dukhan2ac722e2022-01-04 01:54:20 -0800452static void qs8_dwconv_up4x9__scalar_fmagic(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700453 DWConvEnd2EndBenchmark(state, model,
Marat Dukhan2ac722e2022-01-04 01:54:20 -0800454 xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_fmagic,
455 xnn_init_qs8_conv_minmax_fp32_scalar_fmagic_params,
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700456 4 /* channel tile */, 9 /* primary tile */);
457}
458
Marat Dukhan440e8ed2022-01-04 15:30:57 -0800459static void qs8_dwconv_up1x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
460 DWConvEnd2EndBenchmark(state, model,
461 xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_imagic,
462 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
463 1 /* channel tile */, 9 /* primary tile */);
464}
465static void qs8_dwconv_up2x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
466 DWConvEnd2EndBenchmark(state, model,
467 xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_imagic,
468 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
469 2 /* channel tile */, 9 /* primary tile */);
470}
471static void qs8_dwconv_up4x9__scalar_imagic(benchmark::State& state, models::ExecutionPlanFactory model) {
472 DWConvEnd2EndBenchmark(state, model,
473 xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_imagic,
474 xnn_init_qs8_conv_minmax_fp32_scalar_imagic_params,
475 4 /* channel tile */, 9 /* primary tile */);
476}
477
478static void qs8_dwconv_up1x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
479 DWConvEnd2EndBenchmark(state, model,
480 xnn_qs8_dwconv_minmax_fp32_ukernel_up1x9__scalar_lrintf,
481 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
482 1 /* channel tile */, 9 /* primary tile */);
483}
484static void qs8_dwconv_up2x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
485 DWConvEnd2EndBenchmark(state, model,
486 xnn_qs8_dwconv_minmax_fp32_ukernel_up2x9__scalar_lrintf,
487 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
488 2 /* channel tile */, 9 /* primary tile */);
489}
490static void qs8_dwconv_up4x9__scalar_lrintf(benchmark::State& state, models::ExecutionPlanFactory model) {
491 DWConvEnd2EndBenchmark(state, model,
492 xnn_qs8_dwconv_minmax_fp32_ukernel_up4x9__scalar_lrintf,
493 xnn_init_qs8_conv_minmax_fp32_scalar_lrintf_params,
494 4 /* channel tile */, 9 /* primary tile */);
495}
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700496
Marat Dukhan2ac722e2022-01-04 01:54:20 -0800497BENCHMARK_QS8_END2END(qs8_dwconv_up1x9__scalar_fmagic);
498BENCHMARK_QS8_END2END(qs8_dwconv_up2x9__scalar_fmagic);
499BENCHMARK_QS8_END2END(qs8_dwconv_up4x9__scalar_fmagic);
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700500
Marat Dukhan440e8ed2022-01-04 15:30:57 -0800501BENCHMARK_QS8_END2END(qs8_dwconv_up1x9__scalar_imagic);
502BENCHMARK_QS8_END2END(qs8_dwconv_up2x9__scalar_imagic);
503BENCHMARK_QS8_END2END(qs8_dwconv_up4x9__scalar_imagic);
504
505BENCHMARK_QS8_END2END(qs8_dwconv_up1x9__scalar_lrintf);
506BENCHMARK_QS8_END2END(qs8_dwconv_up2x9__scalar_lrintf);
507BENCHMARK_QS8_END2END(qs8_dwconv_up4x9__scalar_lrintf);
508
Marat Dukhanbbfc6d32021-07-26 18:31:02 -0700509
510#ifndef XNNPACK_BENCHMARK_NO_MAIN
511BENCHMARK_MAIN();
512#endif