blob: cb1e58e5ce6667b64563d1886e9d391368f2379b [file] [log] [blame]
Marat Dukhane903dff2021-07-16 19:43:41 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qs8-igemm-minmax-rndnu.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
Frank Barchard447aa7b2021-12-28 14:11:40 -080016#include <xnnpack/allocator.h>
Marat Dukhane903dff2021-07-16 19:43:41 -070017#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
Frank Barchard870108c2022-01-26 11:21:46 -080026#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
27 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8) {
28 TEST_REQUIRES_ARM_NEON_DOT;
29 GemmMicrokernelTester()
30 .mr(4)
31 .nr(8)
32 .kr(4)
33 .sr(1)
34 .m(4)
35 .n(8)
36 .k(8)
37 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
38 }
39
40 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cn) {
41 TEST_REQUIRES_ARM_NEON_DOT;
42 GemmMicrokernelTester()
43 .mr(4)
44 .nr(8)
45 .kr(4)
46 .sr(1)
47 .m(4)
48 .n(8)
49 .k(8)
50 .cn_stride(11)
51 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
52 }
53
54 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile) {
55 TEST_REQUIRES_ARM_NEON_DOT;
56 for (uint32_t n = 1; n <= 8; n++) {
57 for (uint32_t m = 1; m <= 4; m++) {
58 GemmMicrokernelTester()
59 .mr(4)
60 .nr(8)
61 .kr(4)
62 .sr(1)
63 .m(m)
64 .n(n)
65 .k(8)
66 .iterations(1)
67 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
68 }
69 }
70 }
71
72 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile_m) {
73 TEST_REQUIRES_ARM_NEON_DOT;
74 for (uint32_t m = 1; m <= 4; m++) {
75 GemmMicrokernelTester()
76 .mr(4)
77 .nr(8)
78 .kr(4)
79 .sr(1)
80 .m(m)
81 .n(8)
82 .k(8)
83 .iterations(1)
84 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
85 }
86 }
87
88 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_eq_8_subtile_n) {
89 TEST_REQUIRES_ARM_NEON_DOT;
90 for (uint32_t n = 1; n <= 8; n++) {
91 GemmMicrokernelTester()
92 .mr(4)
93 .nr(8)
94 .kr(4)
95 .sr(1)
96 .m(4)
97 .n(n)
98 .k(8)
99 .iterations(1)
100 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
101 }
102 }
103
104 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_lt_8) {
105 TEST_REQUIRES_ARM_NEON_DOT;
106 for (size_t k = 1; k < 8; k++) {
107 GemmMicrokernelTester()
108 .mr(4)
109 .nr(8)
110 .kr(4)
111 .sr(1)
112 .m(4)
113 .n(8)
114 .k(k)
115 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
116 }
117 }
118
119 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_lt_8_subtile) {
120 TEST_REQUIRES_ARM_NEON_DOT;
121 for (size_t k = 1; k < 8; k++) {
122 for (uint32_t n = 1; n <= 8; n++) {
123 for (uint32_t m = 1; m <= 4; m++) {
124 GemmMicrokernelTester()
125 .mr(4)
126 .nr(8)
127 .kr(4)
128 .sr(1)
129 .m(m)
130 .n(n)
131 .k(k)
132 .iterations(1)
133 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
134 }
135 }
136 }
137 }
138
139 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_gt_8) {
140 TEST_REQUIRES_ARM_NEON_DOT;
141 for (size_t k = 9; k < 16; k++) {
142 GemmMicrokernelTester()
143 .mr(4)
144 .nr(8)
145 .kr(4)
146 .sr(1)
147 .m(4)
148 .n(8)
149 .k(k)
150 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
151 }
152 }
153
154 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_gt_8_subtile) {
155 TEST_REQUIRES_ARM_NEON_DOT;
156 for (size_t k = 9; k < 16; k++) {
157 for (uint32_t n = 1; n <= 8; n++) {
158 for (uint32_t m = 1; m <= 4; m++) {
159 GemmMicrokernelTester()
160 .mr(4)
161 .nr(8)
162 .kr(4)
163 .sr(1)
164 .m(m)
165 .n(n)
166 .k(k)
167 .iterations(1)
168 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
169 }
170 }
171 }
172 }
173
174 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_div_8) {
175 TEST_REQUIRES_ARM_NEON_DOT;
176 for (size_t k = 16; k <= 80; k += 8) {
177 GemmMicrokernelTester()
178 .mr(4)
179 .nr(8)
180 .kr(4)
181 .sr(1)
182 .m(4)
183 .n(8)
184 .k(k)
185 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
186 }
187 }
188
189 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, k_div_8_subtile) {
190 TEST_REQUIRES_ARM_NEON_DOT;
191 for (size_t k = 16; k <= 80; k += 8) {
192 for (uint32_t n = 1; n <= 8; n++) {
193 for (uint32_t m = 1; m <= 4; m++) {
194 GemmMicrokernelTester()
195 .mr(4)
196 .nr(8)
197 .kr(4)
198 .sr(1)
199 .m(m)
200 .n(n)
201 .k(k)
202 .iterations(1)
203 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
204 }
205 }
206 }
207 }
208
209 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8) {
210 TEST_REQUIRES_ARM_NEON_DOT;
211 for (uint32_t n = 9; n < 16; n++) {
212 for (size_t k = 1; k <= 40; k += 9) {
213 GemmMicrokernelTester()
214 .mr(4)
215 .nr(8)
216 .kr(4)
217 .sr(1)
218 .m(4)
219 .n(n)
220 .k(k)
221 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
222 }
223 }
224 }
225
226 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_strided_cn) {
227 TEST_REQUIRES_ARM_NEON_DOT;
228 for (uint32_t n = 9; n < 16; n++) {
229 for (size_t k = 1; k <= 40; k += 9) {
230 GemmMicrokernelTester()
231 .mr(4)
232 .nr(8)
233 .kr(4)
234 .sr(1)
235 .m(4)
236 .n(n)
237 .k(k)
238 .cn_stride(11)
239 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
240 }
241 }
242 }
243
244 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_subtile) {
245 TEST_REQUIRES_ARM_NEON_DOT;
246 for (uint32_t n = 9; n < 16; n++) {
247 for (size_t k = 1; k <= 40; k += 9) {
248 for (uint32_t m = 1; m <= 4; m++) {
249 GemmMicrokernelTester()
250 .mr(4)
251 .nr(8)
252 .kr(4)
253 .sr(1)
254 .m(m)
255 .n(n)
256 .k(k)
257 .iterations(1)
258 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
259 }
260 }
261 }
262 }
263
264 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8) {
265 TEST_REQUIRES_ARM_NEON_DOT;
266 for (uint32_t n = 16; n <= 24; n += 8) {
267 for (size_t k = 1; k <= 40; k += 9) {
268 GemmMicrokernelTester()
269 .mr(4)
270 .nr(8)
271 .kr(4)
272 .sr(1)
273 .m(4)
274 .n(n)
275 .k(k)
276 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
277 }
278 }
279 }
280
281 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_strided_cn) {
282 TEST_REQUIRES_ARM_NEON_DOT;
283 for (uint32_t n = 16; n <= 24; n += 8) {
284 for (size_t k = 1; k <= 40; k += 9) {
285 GemmMicrokernelTester()
286 .mr(4)
287 .nr(8)
288 .kr(4)
289 .sr(1)
290 .m(4)
291 .n(n)
292 .k(k)
293 .cn_stride(11)
294 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
295 }
296 }
297 }
298
299 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_subtile) {
300 TEST_REQUIRES_ARM_NEON_DOT;
301 for (uint32_t n = 16; n <= 24; n += 8) {
302 for (size_t k = 1; k <= 40; k += 9) {
303 for (uint32_t m = 1; m <= 4; m++) {
304 GemmMicrokernelTester()
305 .mr(4)
306 .nr(8)
307 .kr(4)
308 .sr(1)
309 .m(m)
310 .n(n)
311 .k(k)
312 .iterations(1)
313 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
314 }
315 }
316 }
317 }
318
319 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, small_kernel) {
320 TEST_REQUIRES_ARM_NEON_DOT;
321 for (size_t k = 1; k <= 40; k += 9) {
322 GemmMicrokernelTester()
323 .mr(4)
324 .nr(8)
325 .kr(4)
326 .sr(1)
327 .m(4)
328 .n(8)
329 .k(k)
330 .ks(3)
331 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
332 }
333 }
334
335 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, small_kernel_subtile) {
336 TEST_REQUIRES_ARM_NEON_DOT;
337 for (size_t k = 1; k <= 40; k += 9) {
338 for (uint32_t n = 1; n <= 8; n++) {
339 for (uint32_t m = 1; m <= 4; m++) {
340 GemmMicrokernelTester()
341 .mr(4)
342 .nr(8)
343 .kr(4)
344 .sr(1)
345 .m(m)
346 .n(n)
347 .k(k)
348 .ks(3)
349 .iterations(1)
350 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
351 }
352 }
353 }
354 }
355
356 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_gt_8_small_kernel) {
357 TEST_REQUIRES_ARM_NEON_DOT;
358 for (uint32_t n = 9; n < 16; n++) {
359 for (size_t k = 1; k <= 40; k += 9) {
360 GemmMicrokernelTester()
361 .mr(4)
362 .nr(8)
363 .kr(4)
364 .sr(1)
365 .m(4)
366 .n(n)
367 .k(k)
368 .ks(3)
369 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
370 }
371 }
372 }
373
374 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, n_div_8_small_kernel) {
375 TEST_REQUIRES_ARM_NEON_DOT;
376 for (uint32_t n = 16; n <= 24; n += 8) {
377 for (size_t k = 1; k <= 40; k += 9) {
378 GemmMicrokernelTester()
379 .mr(4)
380 .nr(8)
381 .kr(4)
382 .sr(1)
383 .m(4)
384 .n(n)
385 .k(k)
386 .ks(3)
387 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
388 }
389 }
390 }
391
392 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cm_subtile) {
393 TEST_REQUIRES_ARM_NEON_DOT;
394 for (size_t k = 1; k <= 40; k += 9) {
395 for (uint32_t n = 1; n <= 8; n++) {
396 for (uint32_t m = 1; m <= 4; m++) {
397 GemmMicrokernelTester()
398 .mr(4)
399 .nr(8)
400 .kr(4)
401 .sr(1)
402 .m(m)
403 .n(n)
404 .k(k)
405 .cm_stride(11)
406 .iterations(1)
407 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
408 }
409 }
410 }
411 }
412
413 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, a_offset) {
414 TEST_REQUIRES_ARM_NEON_DOT;
415 for (size_t k = 1; k <= 40; k += 9) {
416 GemmMicrokernelTester()
417 .mr(4)
418 .nr(8)
419 .kr(4)
420 .sr(1)
421 .m(4)
422 .n(8)
423 .k(k)
424 .ks(3)
425 .a_offset(163)
426 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
427 }
428 }
429
430 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, zero) {
431 TEST_REQUIRES_ARM_NEON_DOT;
432 for (size_t k = 1; k <= 40; k += 9) {
433 for (uint32_t mz = 0; mz < 4; mz++) {
434 GemmMicrokernelTester()
435 .mr(4)
436 .nr(8)
437 .kr(4)
438 .sr(1)
439 .m(4)
440 .n(8)
441 .k(k)
442 .ks(3)
443 .a_offset(163)
444 .zero_index(mz)
445 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
446 }
447 }
448 }
449
450 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, qmin) {
451 TEST_REQUIRES_ARM_NEON_DOT;
452 GemmMicrokernelTester()
453 .mr(4)
454 .nr(8)
455 .kr(4)
456 .sr(1)
457 .m(4)
458 .n(8)
459 .k(8)
460 .qmin(128)
461 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
462 }
463
464 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, qmax) {
465 TEST_REQUIRES_ARM_NEON_DOT;
466 GemmMicrokernelTester()
467 .mr(4)
468 .nr(8)
469 .kr(4)
470 .sr(1)
471 .m(4)
472 .n(8)
473 .k(8)
474 .qmax(128)
475 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
476 }
477
478 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH32_NEONDOT_CORTEX_A55, strided_cm) {
479 TEST_REQUIRES_ARM_NEON_DOT;
480 GemmMicrokernelTester()
481 .mr(4)
482 .nr(8)
483 .kr(4)
484 .sr(1)
485 .m(4)
486 .n(8)
487 .k(8)
488 .cm_stride(11)
489 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__aarch32_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
490 }
491#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS && XNN_ENABLE_ASSEMBLY
492
493
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800494#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
495 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800496 TEST_REQUIRES_ARM_NEON;
497 GemmMicrokernelTester()
498 .mr(4)
499 .nr(8)
500 .kr(1)
501 .sr(1)
502 .m(4)
503 .n(8)
504 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800505 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800506 }
507
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800508 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800509 TEST_REQUIRES_ARM_NEON;
510 GemmMicrokernelTester()
511 .mr(4)
512 .nr(8)
513 .kr(1)
514 .sr(1)
515 .m(4)
516 .n(8)
517 .k(8)
518 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800519 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800520 }
521
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800522 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800523 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800524 for (uint32_t n = 1; n <= 8; n++) {
525 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800526 GemmMicrokernelTester()
527 .mr(4)
528 .nr(8)
529 .kr(1)
530 .sr(1)
531 .m(m)
532 .n(n)
533 .k(8)
534 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800535 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800536 }
537 }
538 }
539
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800540 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800541 TEST_REQUIRES_ARM_NEON;
542 for (uint32_t m = 1; m <= 4; m++) {
543 GemmMicrokernelTester()
544 .mr(4)
545 .nr(8)
546 .kr(1)
547 .sr(1)
548 .m(m)
549 .n(8)
550 .k(8)
551 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800552 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800553 }
554 }
555
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800556 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800557 TEST_REQUIRES_ARM_NEON;
558 for (uint32_t n = 1; n <= 8; n++) {
559 GemmMicrokernelTester()
560 .mr(4)
561 .nr(8)
562 .kr(1)
563 .sr(1)
564 .m(4)
565 .n(n)
566 .k(8)
567 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800568 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800569 }
570 }
571
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800572 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800573 TEST_REQUIRES_ARM_NEON;
574 for (size_t k = 1; k < 8; k++) {
575 GemmMicrokernelTester()
576 .mr(4)
577 .nr(8)
578 .kr(1)
579 .sr(1)
580 .m(4)
581 .n(8)
582 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800583 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800584 }
585 }
586
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800587 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800588 TEST_REQUIRES_ARM_NEON;
589 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800590 for (uint32_t n = 1; n <= 8; n++) {
591 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800592 GemmMicrokernelTester()
593 .mr(4)
594 .nr(8)
595 .kr(1)
596 .sr(1)
597 .m(m)
598 .n(n)
599 .k(k)
600 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800601 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800602 }
603 }
604 }
605 }
606
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800607 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800608 TEST_REQUIRES_ARM_NEON;
609 for (size_t k = 9; k < 16; k++) {
610 GemmMicrokernelTester()
611 .mr(4)
612 .nr(8)
613 .kr(1)
614 .sr(1)
615 .m(4)
616 .n(8)
617 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800618 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800619 }
620 }
621
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800622 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800623 TEST_REQUIRES_ARM_NEON;
624 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800625 for (uint32_t n = 1; n <= 8; n++) {
626 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800627 GemmMicrokernelTester()
628 .mr(4)
629 .nr(8)
630 .kr(1)
631 .sr(1)
632 .m(m)
633 .n(n)
634 .k(k)
635 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800636 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800637 }
638 }
639 }
640 }
641
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800642 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800643 TEST_REQUIRES_ARM_NEON;
644 for (size_t k = 16; k <= 80; k += 8) {
645 GemmMicrokernelTester()
646 .mr(4)
647 .nr(8)
648 .kr(1)
649 .sr(1)
650 .m(4)
651 .n(8)
652 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800653 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800654 }
655 }
656
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800657 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800658 TEST_REQUIRES_ARM_NEON;
659 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800660 for (uint32_t n = 1; n <= 8; n++) {
661 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800662 GemmMicrokernelTester()
663 .mr(4)
664 .nr(8)
665 .kr(1)
666 .sr(1)
667 .m(m)
668 .n(n)
669 .k(k)
670 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800671 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800672 }
673 }
674 }
675 }
676
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800677 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800678 TEST_REQUIRES_ARM_NEON;
679 for (uint32_t n = 9; n < 16; n++) {
680 for (size_t k = 1; k <= 40; k += 9) {
681 GemmMicrokernelTester()
682 .mr(4)
683 .nr(8)
684 .kr(1)
685 .sr(1)
686 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800687 .n(n)
Frank Barchard914f57b2021-12-13 12:31:42 -0800688 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800689 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800690 }
691 }
692 }
693
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800694 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800695 TEST_REQUIRES_ARM_NEON;
696 for (uint32_t n = 9; n < 16; n++) {
697 for (size_t k = 1; k <= 40; k += 9) {
698 GemmMicrokernelTester()
699 .mr(4)
700 .nr(8)
701 .kr(1)
702 .sr(1)
703 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800704 .n(n)
Frank Barchard914f57b2021-12-13 12:31:42 -0800705 .k(k)
706 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800707 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800708 }
709 }
710 }
711
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800712 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800713 TEST_REQUIRES_ARM_NEON;
714 for (uint32_t n = 9; n < 16; n++) {
715 for (size_t k = 1; k <= 40; k += 9) {
716 for (uint32_t m = 1; m <= 4; m++) {
717 GemmMicrokernelTester()
718 .mr(4)
719 .nr(8)
720 .kr(1)
721 .sr(1)
722 .m(m)
723 .n(n)
724 .k(k)
725 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800726 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800727 }
728 }
729 }
730 }
731
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800732 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800733 TEST_REQUIRES_ARM_NEON;
734 for (uint32_t n = 16; n <= 24; n += 8) {
735 for (size_t k = 1; k <= 40; k += 9) {
736 GemmMicrokernelTester()
737 .mr(4)
738 .nr(8)
739 .kr(1)
740 .sr(1)
741 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800742 .n(n)
Frank Barchard914f57b2021-12-13 12:31:42 -0800743 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800744 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800745 }
746 }
747 }
748
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800749 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800750 TEST_REQUIRES_ARM_NEON;
751 for (uint32_t n = 16; n <= 24; n += 8) {
752 for (size_t k = 1; k <= 40; k += 9) {
753 GemmMicrokernelTester()
754 .mr(4)
755 .nr(8)
756 .kr(1)
757 .sr(1)
758 .m(4)
759 .n(n)
760 .k(k)
761 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800762 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800763 }
764 }
765 }
766
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800767 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800768 TEST_REQUIRES_ARM_NEON;
769 for (uint32_t n = 16; n <= 24; n += 8) {
770 for (size_t k = 1; k <= 40; k += 9) {
771 for (uint32_t m = 1; m <= 4; m++) {
772 GemmMicrokernelTester()
773 .mr(4)
774 .nr(8)
775 .kr(1)
776 .sr(1)
777 .m(m)
778 .n(n)
779 .k(k)
780 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800781 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800782 }
783 }
784 }
785 }
786
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800787 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800788 TEST_REQUIRES_ARM_NEON;
789 for (size_t k = 1; k <= 40; k += 9) {
790 GemmMicrokernelTester()
791 .mr(4)
792 .nr(8)
793 .kr(1)
794 .sr(1)
795 .m(4)
796 .n(8)
797 .k(k)
798 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800799 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800800 }
801 }
802
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800803 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800804 TEST_REQUIRES_ARM_NEON;
805 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800806 for (uint32_t n = 1; n <= 8; n++) {
807 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800808 GemmMicrokernelTester()
809 .mr(4)
810 .nr(8)
811 .kr(1)
812 .sr(1)
813 .m(m)
814 .n(n)
815 .k(k)
816 .ks(3)
817 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800818 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800819 }
820 }
821 }
822 }
823
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800824 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800825 TEST_REQUIRES_ARM_NEON;
826 for (uint32_t n = 9; n < 16; n++) {
827 for (size_t k = 1; k <= 40; k += 9) {
828 GemmMicrokernelTester()
829 .mr(4)
830 .nr(8)
831 .kr(1)
832 .sr(1)
833 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800834 .n(n)
Frank Barchard914f57b2021-12-13 12:31:42 -0800835 .k(k)
836 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800837 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800838 }
839 }
840 }
841
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800842 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800843 TEST_REQUIRES_ARM_NEON;
844 for (uint32_t n = 16; n <= 24; n += 8) {
845 for (size_t k = 1; k <= 40; k += 9) {
846 GemmMicrokernelTester()
847 .mr(4)
848 .nr(8)
849 .kr(1)
850 .sr(1)
851 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800852 .n(n)
Frank Barchard914f57b2021-12-13 12:31:42 -0800853 .k(k)
854 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800855 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800856 }
857 }
858 }
859
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800860 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800861 TEST_REQUIRES_ARM_NEON;
862 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800863 for (uint32_t n = 1; n <= 8; n++) {
864 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800865 GemmMicrokernelTester()
866 .mr(4)
867 .nr(8)
868 .kr(1)
869 .sr(1)
870 .m(m)
871 .n(n)
872 .k(k)
873 .cm_stride(11)
874 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800875 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800876 }
877 }
878 }
879 }
880
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800881 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800882 TEST_REQUIRES_ARM_NEON;
883 for (size_t k = 1; k <= 40; k += 9) {
884 GemmMicrokernelTester()
885 .mr(4)
886 .nr(8)
887 .kr(1)
888 .sr(1)
889 .m(4)
890 .n(8)
891 .k(k)
892 .ks(3)
893 .a_offset(163)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800894 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800895 }
896 }
897
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800898 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, zero) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800899 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800900 for (size_t k = 1; k <= 40; k += 9) {
901 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800902 GemmMicrokernelTester()
903 .mr(4)
904 .nr(8)
905 .kr(1)
906 .sr(1)
907 .m(4)
908 .n(8)
909 .k(k)
910 .ks(3)
911 .a_offset(163)
912 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800913 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800914 }
915 }
916 }
917
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800918 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800919 TEST_REQUIRES_ARM_NEON;
920 GemmMicrokernelTester()
921 .mr(4)
922 .nr(8)
923 .kr(1)
924 .sr(1)
925 .m(4)
926 .n(8)
927 .k(8)
928 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800929 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800930 }
931
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800932 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800933 TEST_REQUIRES_ARM_NEON;
934 GemmMicrokernelTester()
935 .mr(4)
936 .nr(8)
937 .kr(1)
938 .sr(1)
939 .m(4)
940 .n(8)
941 .k(8)
942 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800943 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800944 }
945
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800946 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
Frank Barchard914f57b2021-12-13 12:31:42 -0800947 TEST_REQUIRES_ARM_NEON;
948 GemmMicrokernelTester()
949 .mr(4)
950 .nr(8)
951 .kr(1)
952 .sr(1)
953 .m(4)
954 .n(8)
955 .k(8)
956 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800957 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard914f57b2021-12-13 12:31:42 -0800958 }
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800959#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Zhi An Ngc27f04b2022-01-11 09:34:07 -0800960
961
962#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800963 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -0800964 TEST_REQUIRES_ARM_NEON;
965 GemmMicrokernelTester()
966 .mr(2)
967 .nr(8)
968 .kr(2)
969 .sr(1)
970 .m(2)
971 .n(8)
972 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800973 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -0800974 }
975
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800976 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cn) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -0800977 TEST_REQUIRES_ARM_NEON;
978 GemmMicrokernelTester()
979 .mr(2)
980 .nr(8)
981 .kr(2)
982 .sr(1)
983 .m(2)
984 .n(8)
985 .k(8)
986 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800987 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -0800988 }
989
Zhi An Nge96b6bc2022-02-03 10:49:46 -0800990 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -0800991 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800992 for (uint32_t n = 1; n <= 8; n++) {
993 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -0800994 GemmMicrokernelTester()
995 .mr(2)
996 .nr(8)
997 .kr(2)
998 .sr(1)
999 .m(m)
1000 .n(n)
1001 .k(8)
1002 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001003 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001004 }
1005 }
1006 }
1007
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001008 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001009 TEST_REQUIRES_ARM_NEON;
1010 for (uint32_t m = 1; m <= 2; m++) {
1011 GemmMicrokernelTester()
1012 .mr(2)
1013 .nr(8)
1014 .kr(2)
1015 .sr(1)
1016 .m(m)
1017 .n(8)
1018 .k(8)
1019 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001020 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001021 }
1022 }
1023
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001024 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001025 TEST_REQUIRES_ARM_NEON;
1026 for (uint32_t n = 1; n <= 8; n++) {
1027 GemmMicrokernelTester()
1028 .mr(2)
1029 .nr(8)
1030 .kr(2)
1031 .sr(1)
1032 .m(2)
1033 .n(n)
1034 .k(8)
1035 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001036 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001037 }
1038 }
1039
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001040 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001041 TEST_REQUIRES_ARM_NEON;
1042 for (size_t k = 1; k < 8; k++) {
1043 GemmMicrokernelTester()
1044 .mr(2)
1045 .nr(8)
1046 .kr(2)
1047 .sr(1)
1048 .m(2)
1049 .n(8)
1050 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001051 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001052 }
1053 }
1054
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001055 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_lt_8_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001056 TEST_REQUIRES_ARM_NEON;
1057 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001058 for (uint32_t n = 1; n <= 8; n++) {
1059 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001060 GemmMicrokernelTester()
1061 .mr(2)
1062 .nr(8)
1063 .kr(2)
1064 .sr(1)
1065 .m(m)
1066 .n(n)
1067 .k(k)
1068 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001069 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001070 }
1071 }
1072 }
1073 }
1074
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001075 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001076 TEST_REQUIRES_ARM_NEON;
1077 for (size_t k = 9; k < 16; k++) {
1078 GemmMicrokernelTester()
1079 .mr(2)
1080 .nr(8)
1081 .kr(2)
1082 .sr(1)
1083 .m(2)
1084 .n(8)
1085 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001086 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001087 }
1088 }
1089
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001090 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_gt_8_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001091 TEST_REQUIRES_ARM_NEON;
1092 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001093 for (uint32_t n = 1; n <= 8; n++) {
1094 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001095 GemmMicrokernelTester()
1096 .mr(2)
1097 .nr(8)
1098 .kr(2)
1099 .sr(1)
1100 .m(m)
1101 .n(n)
1102 .k(k)
1103 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001104 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001105 }
1106 }
1107 }
1108 }
1109
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001110 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001111 TEST_REQUIRES_ARM_NEON;
1112 for (size_t k = 16; k <= 80; k += 8) {
1113 GemmMicrokernelTester()
1114 .mr(2)
1115 .nr(8)
1116 .kr(2)
1117 .sr(1)
1118 .m(2)
1119 .n(8)
1120 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001121 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001122 }
1123 }
1124
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001125 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, k_div_8_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001126 TEST_REQUIRES_ARM_NEON;
1127 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001128 for (uint32_t n = 1; n <= 8; n++) {
1129 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001130 GemmMicrokernelTester()
1131 .mr(2)
1132 .nr(8)
1133 .kr(2)
1134 .sr(1)
1135 .m(m)
1136 .n(n)
1137 .k(k)
1138 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001139 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001140 }
1141 }
1142 }
1143 }
1144
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001145 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001146 TEST_REQUIRES_ARM_NEON;
1147 for (uint32_t n = 9; n < 16; n++) {
1148 for (size_t k = 1; k <= 40; k += 9) {
1149 GemmMicrokernelTester()
1150 .mr(2)
1151 .nr(8)
1152 .kr(2)
1153 .sr(1)
1154 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001155 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001156 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001157 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001158 }
1159 }
1160 }
1161
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001162 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001163 TEST_REQUIRES_ARM_NEON;
1164 for (uint32_t n = 9; n < 16; n++) {
1165 for (size_t k = 1; k <= 40; k += 9) {
1166 GemmMicrokernelTester()
1167 .mr(2)
1168 .nr(8)
1169 .kr(2)
1170 .sr(1)
1171 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001172 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001173 .k(k)
1174 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001175 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001176 }
1177 }
1178 }
1179
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001180 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001181 TEST_REQUIRES_ARM_NEON;
1182 for (uint32_t n = 9; n < 16; n++) {
1183 for (size_t k = 1; k <= 40; k += 9) {
1184 for (uint32_t m = 1; m <= 2; m++) {
1185 GemmMicrokernelTester()
1186 .mr(2)
1187 .nr(8)
1188 .kr(2)
1189 .sr(1)
1190 .m(m)
1191 .n(n)
1192 .k(k)
1193 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001194 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001195 }
1196 }
1197 }
1198 }
1199
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001200 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001201 TEST_REQUIRES_ARM_NEON;
1202 for (uint32_t n = 16; n <= 24; n += 8) {
1203 for (size_t k = 1; k <= 40; k += 9) {
1204 GemmMicrokernelTester()
1205 .mr(2)
1206 .nr(8)
1207 .kr(2)
1208 .sr(1)
1209 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001210 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001211 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001212 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001213 }
1214 }
1215 }
1216
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001217 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001218 TEST_REQUIRES_ARM_NEON;
1219 for (uint32_t n = 16; n <= 24; n += 8) {
1220 for (size_t k = 1; k <= 40; k += 9) {
1221 GemmMicrokernelTester()
1222 .mr(2)
1223 .nr(8)
1224 .kr(2)
1225 .sr(1)
1226 .m(2)
1227 .n(n)
1228 .k(k)
1229 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001230 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001231 }
1232 }
1233 }
1234
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001235 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001236 TEST_REQUIRES_ARM_NEON;
1237 for (uint32_t n = 16; n <= 24; n += 8) {
1238 for (size_t k = 1; k <= 40; k += 9) {
1239 for (uint32_t m = 1; m <= 2; m++) {
1240 GemmMicrokernelTester()
1241 .mr(2)
1242 .nr(8)
1243 .kr(2)
1244 .sr(1)
1245 .m(m)
1246 .n(n)
1247 .k(k)
1248 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001249 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001250 }
1251 }
1252 }
1253 }
1254
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001255 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, small_kernel) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001256 TEST_REQUIRES_ARM_NEON;
1257 for (size_t k = 1; k <= 40; k += 9) {
1258 GemmMicrokernelTester()
1259 .mr(2)
1260 .nr(8)
1261 .kr(2)
1262 .sr(1)
1263 .m(2)
1264 .n(8)
1265 .k(k)
1266 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001267 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001268 }
1269 }
1270
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001271 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, small_kernel_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001272 TEST_REQUIRES_ARM_NEON;
1273 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001274 for (uint32_t n = 1; n <= 8; n++) {
1275 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001276 GemmMicrokernelTester()
1277 .mr(2)
1278 .nr(8)
1279 .kr(2)
1280 .sr(1)
1281 .m(m)
1282 .n(n)
1283 .k(k)
1284 .ks(3)
1285 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001286 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001287 }
1288 }
1289 }
1290 }
1291
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001292 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_gt_8_small_kernel) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001293 TEST_REQUIRES_ARM_NEON;
1294 for (uint32_t n = 9; n < 16; n++) {
1295 for (size_t k = 1; k <= 40; k += 9) {
1296 GemmMicrokernelTester()
1297 .mr(2)
1298 .nr(8)
1299 .kr(2)
1300 .sr(1)
1301 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001302 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001303 .k(k)
1304 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001305 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001306 }
1307 }
1308 }
1309
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001310 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, n_div_8_small_kernel) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001311 TEST_REQUIRES_ARM_NEON;
1312 for (uint32_t n = 16; n <= 24; n += 8) {
1313 for (size_t k = 1; k <= 40; k += 9) {
1314 GemmMicrokernelTester()
1315 .mr(2)
1316 .nr(8)
1317 .kr(2)
1318 .sr(1)
1319 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001320 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001321 .k(k)
1322 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001323 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001324 }
1325 }
1326 }
1327
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001328 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001329 TEST_REQUIRES_ARM_NEON;
1330 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001331 for (uint32_t n = 1; n <= 8; n++) {
1332 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001333 GemmMicrokernelTester()
1334 .mr(2)
1335 .nr(8)
1336 .kr(2)
1337 .sr(1)
1338 .m(m)
1339 .n(n)
1340 .k(k)
1341 .cm_stride(11)
1342 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001343 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001344 }
1345 }
1346 }
1347 }
1348
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001349 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, a_offset) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001350 TEST_REQUIRES_ARM_NEON;
1351 for (size_t k = 1; k <= 40; k += 9) {
1352 GemmMicrokernelTester()
1353 .mr(2)
1354 .nr(8)
1355 .kr(2)
1356 .sr(1)
1357 .m(2)
1358 .n(8)
1359 .k(k)
1360 .ks(3)
1361 .a_offset(83)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001362 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001363 }
1364 }
1365
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001366 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, zero) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001367 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001368 for (size_t k = 1; k <= 40; k += 9) {
1369 for (uint32_t mz = 0; mz < 2; mz++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001370 GemmMicrokernelTester()
1371 .mr(2)
1372 .nr(8)
1373 .kr(2)
1374 .sr(1)
1375 .m(2)
1376 .n(8)
1377 .k(k)
1378 .ks(3)
1379 .a_offset(83)
1380 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001381 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001382 }
1383 }
1384 }
1385
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001386 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmin) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001387 TEST_REQUIRES_ARM_NEON;
1388 GemmMicrokernelTester()
1389 .mr(2)
1390 .nr(8)
1391 .kr(2)
1392 .sr(1)
1393 .m(2)
1394 .n(8)
1395 .k(8)
1396 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001397 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001398 }
1399
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001400 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, qmax) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001401 TEST_REQUIRES_ARM_NEON;
1402 GemmMicrokernelTester()
1403 .mr(2)
1404 .nr(8)
1405 .kr(2)
1406 .sr(1)
1407 .m(2)
1408 .n(8)
1409 .k(8)
1410 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001411 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001412 }
1413
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001414 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD1R, strided_cm) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -08001415 TEST_REQUIRES_ARM_NEON;
1416 GemmMicrokernelTester()
1417 .mr(2)
1418 .nr(8)
1419 .kr(2)
1420 .sr(1)
1421 .m(2)
1422 .n(8)
1423 .k(8)
1424 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08001425 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1426 }
1427#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1428
1429
1430#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1431 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16) {
1432 TEST_REQUIRES_ARM_NEON;
1433 GemmMicrokernelTester()
1434 .mr(2)
1435 .nr(8)
1436 .kr(2)
1437 .sr(1)
1438 .m(2)
1439 .n(8)
1440 .k(16)
1441 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1442 }
1443
1444 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cn) {
1445 TEST_REQUIRES_ARM_NEON;
1446 GemmMicrokernelTester()
1447 .mr(2)
1448 .nr(8)
1449 .kr(2)
1450 .sr(1)
1451 .m(2)
1452 .n(8)
1453 .k(16)
1454 .cn_stride(11)
1455 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1456 }
1457
1458 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
1459 TEST_REQUIRES_ARM_NEON;
1460 for (uint32_t n = 1; n <= 8; n++) {
1461 for (uint32_t m = 1; m <= 2; m++) {
1462 GemmMicrokernelTester()
1463 .mr(2)
1464 .nr(8)
1465 .kr(2)
1466 .sr(1)
1467 .m(m)
1468 .n(n)
1469 .k(16)
1470 .iterations(1)
1471 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1472 }
1473 }
1474 }
1475
1476 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
1477 TEST_REQUIRES_ARM_NEON;
1478 for (uint32_t m = 1; m <= 2; m++) {
1479 GemmMicrokernelTester()
1480 .mr(2)
1481 .nr(8)
1482 .kr(2)
1483 .sr(1)
1484 .m(m)
1485 .n(8)
1486 .k(16)
1487 .iterations(1)
1488 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1489 }
1490 }
1491
1492 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
1493 TEST_REQUIRES_ARM_NEON;
1494 for (uint32_t n = 1; n <= 8; n++) {
1495 GemmMicrokernelTester()
1496 .mr(2)
1497 .nr(8)
1498 .kr(2)
1499 .sr(1)
1500 .m(2)
1501 .n(n)
1502 .k(16)
1503 .iterations(1)
1504 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1505 }
1506 }
1507
1508 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_lt_16) {
1509 TEST_REQUIRES_ARM_NEON;
1510 for (size_t k = 1; k < 16; k++) {
1511 GemmMicrokernelTester()
1512 .mr(2)
1513 .nr(8)
1514 .kr(2)
1515 .sr(1)
1516 .m(2)
1517 .n(8)
1518 .k(k)
1519 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1520 }
1521 }
1522
1523 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
1524 TEST_REQUIRES_ARM_NEON;
1525 for (size_t k = 1; k < 16; k++) {
1526 for (uint32_t n = 1; n <= 8; n++) {
1527 for (uint32_t m = 1; m <= 2; m++) {
1528 GemmMicrokernelTester()
1529 .mr(2)
1530 .nr(8)
1531 .kr(2)
1532 .sr(1)
1533 .m(m)
1534 .n(n)
1535 .k(k)
1536 .iterations(1)
1537 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1538 }
1539 }
1540 }
1541 }
1542
1543 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_gt_16) {
1544 TEST_REQUIRES_ARM_NEON;
1545 for (size_t k = 17; k < 32; k++) {
1546 GemmMicrokernelTester()
1547 .mr(2)
1548 .nr(8)
1549 .kr(2)
1550 .sr(1)
1551 .m(2)
1552 .n(8)
1553 .k(k)
1554 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1555 }
1556 }
1557
1558 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
1559 TEST_REQUIRES_ARM_NEON;
1560 for (size_t k = 17; k < 32; k++) {
1561 for (uint32_t n = 1; n <= 8; n++) {
1562 for (uint32_t m = 1; m <= 2; m++) {
1563 GemmMicrokernelTester()
1564 .mr(2)
1565 .nr(8)
1566 .kr(2)
1567 .sr(1)
1568 .m(m)
1569 .n(n)
1570 .k(k)
1571 .iterations(1)
1572 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1573 }
1574 }
1575 }
1576 }
1577
1578 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_div_16) {
1579 TEST_REQUIRES_ARM_NEON;
1580 for (size_t k = 32; k <= 160; k += 16) {
1581 GemmMicrokernelTester()
1582 .mr(2)
1583 .nr(8)
1584 .kr(2)
1585 .sr(1)
1586 .m(2)
1587 .n(8)
1588 .k(k)
1589 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1590 }
1591 }
1592
1593 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, k_div_16_subtile) {
1594 TEST_REQUIRES_ARM_NEON;
1595 for (size_t k = 32; k <= 160; k += 16) {
1596 for (uint32_t n = 1; n <= 8; n++) {
1597 for (uint32_t m = 1; m <= 2; m++) {
1598 GemmMicrokernelTester()
1599 .mr(2)
1600 .nr(8)
1601 .kr(2)
1602 .sr(1)
1603 .m(m)
1604 .n(n)
1605 .k(k)
1606 .iterations(1)
1607 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1608 }
1609 }
1610 }
1611 }
1612
1613 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8) {
1614 TEST_REQUIRES_ARM_NEON;
1615 for (uint32_t n = 9; n < 16; n++) {
1616 for (size_t k = 1; k <= 80; k += 17) {
1617 GemmMicrokernelTester()
1618 .mr(2)
1619 .nr(8)
1620 .kr(2)
1621 .sr(1)
1622 .m(2)
1623 .n(n)
1624 .k(k)
1625 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1626 }
1627 }
1628 }
1629
1630 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
1631 TEST_REQUIRES_ARM_NEON;
1632 for (uint32_t n = 9; n < 16; n++) {
1633 for (size_t k = 1; k <= 80; k += 17) {
1634 GemmMicrokernelTester()
1635 .mr(2)
1636 .nr(8)
1637 .kr(2)
1638 .sr(1)
1639 .m(2)
1640 .n(n)
1641 .k(k)
1642 .cn_stride(11)
1643 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1644 }
1645 }
1646 }
1647
1648 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_subtile) {
1649 TEST_REQUIRES_ARM_NEON;
1650 for (uint32_t n = 9; n < 16; n++) {
1651 for (size_t k = 1; k <= 80; k += 17) {
1652 for (uint32_t m = 1; m <= 2; m++) {
1653 GemmMicrokernelTester()
1654 .mr(2)
1655 .nr(8)
1656 .kr(2)
1657 .sr(1)
1658 .m(m)
1659 .n(n)
1660 .k(k)
1661 .iterations(1)
1662 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1663 }
1664 }
1665 }
1666 }
1667
1668 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8) {
1669 TEST_REQUIRES_ARM_NEON;
1670 for (uint32_t n = 16; n <= 24; n += 8) {
1671 for (size_t k = 1; k <= 80; k += 17) {
1672 GemmMicrokernelTester()
1673 .mr(2)
1674 .nr(8)
1675 .kr(2)
1676 .sr(1)
1677 .m(2)
1678 .n(n)
1679 .k(k)
1680 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1681 }
1682 }
1683 }
1684
1685 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_strided_cn) {
1686 TEST_REQUIRES_ARM_NEON;
1687 for (uint32_t n = 16; n <= 24; n += 8) {
1688 for (size_t k = 1; k <= 80; k += 17) {
1689 GemmMicrokernelTester()
1690 .mr(2)
1691 .nr(8)
1692 .kr(2)
1693 .sr(1)
1694 .m(2)
1695 .n(n)
1696 .k(k)
1697 .cn_stride(11)
1698 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1699 }
1700 }
1701 }
1702
1703 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_subtile) {
1704 TEST_REQUIRES_ARM_NEON;
1705 for (uint32_t n = 16; n <= 24; n += 8) {
1706 for (size_t k = 1; k <= 80; k += 17) {
1707 for (uint32_t m = 1; m <= 2; m++) {
1708 GemmMicrokernelTester()
1709 .mr(2)
1710 .nr(8)
1711 .kr(2)
1712 .sr(1)
1713 .m(m)
1714 .n(n)
1715 .k(k)
1716 .iterations(1)
1717 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1718 }
1719 }
1720 }
1721 }
1722
1723 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, small_kernel) {
1724 TEST_REQUIRES_ARM_NEON;
1725 for (size_t k = 1; k <= 80; k += 17) {
1726 GemmMicrokernelTester()
1727 .mr(2)
1728 .nr(8)
1729 .kr(2)
1730 .sr(1)
1731 .m(2)
1732 .n(8)
1733 .k(k)
1734 .ks(3)
1735 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1736 }
1737 }
1738
1739 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, small_kernel_subtile) {
1740 TEST_REQUIRES_ARM_NEON;
1741 for (size_t k = 1; k <= 80; k += 17) {
1742 for (uint32_t n = 1; n <= 8; n++) {
1743 for (uint32_t m = 1; m <= 2; m++) {
1744 GemmMicrokernelTester()
1745 .mr(2)
1746 .nr(8)
1747 .kr(2)
1748 .sr(1)
1749 .m(m)
1750 .n(n)
1751 .k(k)
1752 .ks(3)
1753 .iterations(1)
1754 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1755 }
1756 }
1757 }
1758 }
1759
1760 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
1761 TEST_REQUIRES_ARM_NEON;
1762 for (uint32_t n = 9; n < 16; n++) {
1763 for (size_t k = 1; k <= 80; k += 17) {
1764 GemmMicrokernelTester()
1765 .mr(2)
1766 .nr(8)
1767 .kr(2)
1768 .sr(1)
1769 .m(2)
1770 .n(n)
1771 .k(k)
1772 .ks(3)
1773 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1774 }
1775 }
1776 }
1777
1778 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, n_div_8_small_kernel) {
1779 TEST_REQUIRES_ARM_NEON;
1780 for (uint32_t n = 16; n <= 24; n += 8) {
1781 for (size_t k = 1; k <= 80; k += 17) {
1782 GemmMicrokernelTester()
1783 .mr(2)
1784 .nr(8)
1785 .kr(2)
1786 .sr(1)
1787 .m(2)
1788 .n(n)
1789 .k(k)
1790 .ks(3)
1791 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1792 }
1793 }
1794 }
1795
1796 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cm_subtile) {
1797 TEST_REQUIRES_ARM_NEON;
1798 for (size_t k = 1; k <= 80; k += 17) {
1799 for (uint32_t n = 1; n <= 8; n++) {
1800 for (uint32_t m = 1; m <= 2; m++) {
1801 GemmMicrokernelTester()
1802 .mr(2)
1803 .nr(8)
1804 .kr(2)
1805 .sr(1)
1806 .m(m)
1807 .n(n)
1808 .k(k)
1809 .cm_stride(11)
1810 .iterations(1)
1811 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1812 }
1813 }
1814 }
1815 }
1816
1817 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, a_offset) {
1818 TEST_REQUIRES_ARM_NEON;
1819 for (size_t k = 1; k <= 80; k += 17) {
1820 GemmMicrokernelTester()
1821 .mr(2)
1822 .nr(8)
1823 .kr(2)
1824 .sr(1)
1825 .m(2)
1826 .n(8)
1827 .k(k)
1828 .ks(3)
1829 .a_offset(163)
1830 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1831 }
1832 }
1833
1834 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, zero) {
1835 TEST_REQUIRES_ARM_NEON;
1836 for (size_t k = 1; k <= 80; k += 17) {
1837 for (uint32_t mz = 0; mz < 2; mz++) {
1838 GemmMicrokernelTester()
1839 .mr(2)
1840 .nr(8)
1841 .kr(2)
1842 .sr(1)
1843 .m(2)
1844 .n(8)
1845 .k(k)
1846 .ks(3)
1847 .a_offset(163)
1848 .zero_index(mz)
1849 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1850 }
1851 }
1852 }
1853
1854 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, qmin) {
1855 TEST_REQUIRES_ARM_NEON;
1856 GemmMicrokernelTester()
1857 .mr(2)
1858 .nr(8)
1859 .kr(2)
1860 .sr(1)
1861 .m(2)
1862 .n(8)
1863 .k(16)
1864 .qmin(128)
1865 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1866 }
1867
1868 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, qmax) {
1869 TEST_REQUIRES_ARM_NEON;
1870 GemmMicrokernelTester()
1871 .mr(2)
1872 .nr(8)
1873 .kr(2)
1874 .sr(1)
1875 .m(2)
1876 .n(8)
1877 .k(16)
1878 .qmax(128)
1879 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1880 }
1881
1882 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_LD1R, strided_cm) {
1883 TEST_REQUIRES_ARM_NEON;
1884 GemmMicrokernelTester()
1885 .mr(2)
1886 .nr(8)
1887 .kr(2)
1888 .sr(1)
1889 .m(2)
1890 .n(8)
1891 .k(16)
1892 .cm_stride(11)
1893 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1894 }
1895#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1896
1897
1898#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1899 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8) {
1900 TEST_REQUIRES_ARM_NEON;
1901 GemmMicrokernelTester()
1902 .mr(1)
1903 .nr(8)
1904 .kr(2)
1905 .sr(1)
1906 .m(1)
1907 .n(8)
1908 .k(8)
1909 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1910 }
1911
1912 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cn) {
1913 TEST_REQUIRES_ARM_NEON;
1914 GemmMicrokernelTester()
1915 .mr(1)
1916 .nr(8)
1917 .kr(2)
1918 .sr(1)
1919 .m(1)
1920 .n(8)
1921 .k(8)
1922 .cn_stride(11)
1923 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1924 }
1925
1926 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile) {
1927 TEST_REQUIRES_ARM_NEON;
1928 for (uint32_t n = 1; n <= 8; n++) {
1929 for (uint32_t m = 1; m <= 1; m++) {
1930 GemmMicrokernelTester()
1931 .mr(1)
1932 .nr(8)
1933 .kr(2)
1934 .sr(1)
1935 .m(m)
1936 .n(n)
1937 .k(8)
1938 .iterations(1)
1939 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1940 }
1941 }
1942 }
1943
1944 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
1945 TEST_REQUIRES_ARM_NEON;
1946 for (uint32_t m = 1; m <= 1; m++) {
1947 GemmMicrokernelTester()
1948 .mr(1)
1949 .nr(8)
1950 .kr(2)
1951 .sr(1)
1952 .m(m)
1953 .n(8)
1954 .k(8)
1955 .iterations(1)
1956 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1957 }
1958 }
1959
1960 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
1961 TEST_REQUIRES_ARM_NEON;
1962 for (uint32_t n = 1; n <= 8; n++) {
1963 GemmMicrokernelTester()
1964 .mr(1)
1965 .nr(8)
1966 .kr(2)
1967 .sr(1)
1968 .m(1)
1969 .n(n)
1970 .k(8)
1971 .iterations(1)
1972 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1973 }
1974 }
1975
1976 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_lt_8) {
1977 TEST_REQUIRES_ARM_NEON;
1978 for (size_t k = 1; k < 8; k++) {
1979 GemmMicrokernelTester()
1980 .mr(1)
1981 .nr(8)
1982 .kr(2)
1983 .sr(1)
1984 .m(1)
1985 .n(8)
1986 .k(k)
1987 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
1988 }
1989 }
1990
1991 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_lt_8_subtile) {
1992 TEST_REQUIRES_ARM_NEON;
1993 for (size_t k = 1; k < 8; k++) {
1994 for (uint32_t n = 1; n <= 8; n++) {
1995 for (uint32_t m = 1; m <= 1; m++) {
1996 GemmMicrokernelTester()
1997 .mr(1)
1998 .nr(8)
1999 .kr(2)
2000 .sr(1)
2001 .m(m)
2002 .n(n)
2003 .k(k)
2004 .iterations(1)
2005 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2006 }
2007 }
2008 }
2009 }
2010
2011 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_gt_8) {
2012 TEST_REQUIRES_ARM_NEON;
2013 for (size_t k = 9; k < 16; k++) {
2014 GemmMicrokernelTester()
2015 .mr(1)
2016 .nr(8)
2017 .kr(2)
2018 .sr(1)
2019 .m(1)
2020 .n(8)
2021 .k(k)
2022 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2023 }
2024 }
2025
2026 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_gt_8_subtile) {
2027 TEST_REQUIRES_ARM_NEON;
2028 for (size_t k = 9; k < 16; k++) {
2029 for (uint32_t n = 1; n <= 8; n++) {
2030 for (uint32_t m = 1; m <= 1; m++) {
2031 GemmMicrokernelTester()
2032 .mr(1)
2033 .nr(8)
2034 .kr(2)
2035 .sr(1)
2036 .m(m)
2037 .n(n)
2038 .k(k)
2039 .iterations(1)
2040 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2041 }
2042 }
2043 }
2044 }
2045
2046 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_div_8) {
2047 TEST_REQUIRES_ARM_NEON;
2048 for (size_t k = 16; k <= 80; k += 8) {
2049 GemmMicrokernelTester()
2050 .mr(1)
2051 .nr(8)
2052 .kr(2)
2053 .sr(1)
2054 .m(1)
2055 .n(8)
2056 .k(k)
2057 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2058 }
2059 }
2060
2061 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, k_div_8_subtile) {
2062 TEST_REQUIRES_ARM_NEON;
2063 for (size_t k = 16; k <= 80; k += 8) {
2064 for (uint32_t n = 1; n <= 8; n++) {
2065 for (uint32_t m = 1; m <= 1; m++) {
2066 GemmMicrokernelTester()
2067 .mr(1)
2068 .nr(8)
2069 .kr(2)
2070 .sr(1)
2071 .m(m)
2072 .n(n)
2073 .k(k)
2074 .iterations(1)
2075 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2076 }
2077 }
2078 }
2079 }
2080
2081 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8) {
2082 TEST_REQUIRES_ARM_NEON;
2083 for (uint32_t n = 9; n < 16; n++) {
2084 for (size_t k = 1; k <= 40; k += 9) {
2085 GemmMicrokernelTester()
2086 .mr(1)
2087 .nr(8)
2088 .kr(2)
2089 .sr(1)
2090 .m(1)
2091 .n(n)
2092 .k(k)
2093 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2094 }
2095 }
2096 }
2097
2098 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_strided_cn) {
2099 TEST_REQUIRES_ARM_NEON;
2100 for (uint32_t n = 9; n < 16; n++) {
2101 for (size_t k = 1; k <= 40; k += 9) {
2102 GemmMicrokernelTester()
2103 .mr(1)
2104 .nr(8)
2105 .kr(2)
2106 .sr(1)
2107 .m(1)
2108 .n(n)
2109 .k(k)
2110 .cn_stride(11)
2111 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2112 }
2113 }
2114 }
2115
2116 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_subtile) {
2117 TEST_REQUIRES_ARM_NEON;
2118 for (uint32_t n = 9; n < 16; n++) {
2119 for (size_t k = 1; k <= 40; k += 9) {
2120 for (uint32_t m = 1; m <= 1; m++) {
2121 GemmMicrokernelTester()
2122 .mr(1)
2123 .nr(8)
2124 .kr(2)
2125 .sr(1)
2126 .m(m)
2127 .n(n)
2128 .k(k)
2129 .iterations(1)
2130 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2131 }
2132 }
2133 }
2134 }
2135
2136 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8) {
2137 TEST_REQUIRES_ARM_NEON;
2138 for (uint32_t n = 16; n <= 24; n += 8) {
2139 for (size_t k = 1; k <= 40; k += 9) {
2140 GemmMicrokernelTester()
2141 .mr(1)
2142 .nr(8)
2143 .kr(2)
2144 .sr(1)
2145 .m(1)
2146 .n(n)
2147 .k(k)
2148 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2149 }
2150 }
2151 }
2152
2153 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_strided_cn) {
2154 TEST_REQUIRES_ARM_NEON;
2155 for (uint32_t n = 16; n <= 24; n += 8) {
2156 for (size_t k = 1; k <= 40; k += 9) {
2157 GemmMicrokernelTester()
2158 .mr(1)
2159 .nr(8)
2160 .kr(2)
2161 .sr(1)
2162 .m(1)
2163 .n(n)
2164 .k(k)
2165 .cn_stride(11)
2166 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2167 }
2168 }
2169 }
2170
2171 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_subtile) {
2172 TEST_REQUIRES_ARM_NEON;
2173 for (uint32_t n = 16; n <= 24; n += 8) {
2174 for (size_t k = 1; k <= 40; k += 9) {
2175 for (uint32_t m = 1; m <= 1; m++) {
2176 GemmMicrokernelTester()
2177 .mr(1)
2178 .nr(8)
2179 .kr(2)
2180 .sr(1)
2181 .m(m)
2182 .n(n)
2183 .k(k)
2184 .iterations(1)
2185 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2186 }
2187 }
2188 }
2189 }
2190
2191 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, small_kernel) {
2192 TEST_REQUIRES_ARM_NEON;
2193 for (size_t k = 1; k <= 40; k += 9) {
2194 GemmMicrokernelTester()
2195 .mr(1)
2196 .nr(8)
2197 .kr(2)
2198 .sr(1)
2199 .m(1)
2200 .n(8)
2201 .k(k)
2202 .ks(3)
2203 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2204 }
2205 }
2206
2207 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, small_kernel_subtile) {
2208 TEST_REQUIRES_ARM_NEON;
2209 for (size_t k = 1; k <= 40; k += 9) {
2210 for (uint32_t n = 1; n <= 8; n++) {
2211 for (uint32_t m = 1; m <= 1; m++) {
2212 GemmMicrokernelTester()
2213 .mr(1)
2214 .nr(8)
2215 .kr(2)
2216 .sr(1)
2217 .m(m)
2218 .n(n)
2219 .k(k)
2220 .ks(3)
2221 .iterations(1)
2222 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2223 }
2224 }
2225 }
2226 }
2227
2228 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_gt_8_small_kernel) {
2229 TEST_REQUIRES_ARM_NEON;
2230 for (uint32_t n = 9; n < 16; n++) {
2231 for (size_t k = 1; k <= 40; k += 9) {
2232 GemmMicrokernelTester()
2233 .mr(1)
2234 .nr(8)
2235 .kr(2)
2236 .sr(1)
2237 .m(1)
2238 .n(n)
2239 .k(k)
2240 .ks(3)
2241 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2242 }
2243 }
2244 }
2245
2246 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, n_div_8_small_kernel) {
2247 TEST_REQUIRES_ARM_NEON;
2248 for (uint32_t n = 16; n <= 24; n += 8) {
2249 for (size_t k = 1; k <= 40; k += 9) {
2250 GemmMicrokernelTester()
2251 .mr(1)
2252 .nr(8)
2253 .kr(2)
2254 .sr(1)
2255 .m(1)
2256 .n(n)
2257 .k(k)
2258 .ks(3)
2259 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2260 }
2261 }
2262 }
2263
2264 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cm_subtile) {
2265 TEST_REQUIRES_ARM_NEON;
2266 for (size_t k = 1; k <= 40; k += 9) {
2267 for (uint32_t n = 1; n <= 8; n++) {
2268 for (uint32_t m = 1; m <= 1; m++) {
2269 GemmMicrokernelTester()
2270 .mr(1)
2271 .nr(8)
2272 .kr(2)
2273 .sr(1)
2274 .m(m)
2275 .n(n)
2276 .k(k)
2277 .cm_stride(11)
2278 .iterations(1)
2279 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2280 }
2281 }
2282 }
2283 }
2284
2285 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, a_offset) {
2286 TEST_REQUIRES_ARM_NEON;
2287 for (size_t k = 1; k <= 40; k += 9) {
2288 GemmMicrokernelTester()
2289 .mr(1)
2290 .nr(8)
2291 .kr(2)
2292 .sr(1)
2293 .m(1)
2294 .n(8)
2295 .k(k)
2296 .ks(3)
2297 .a_offset(43)
2298 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2299 }
2300 }
2301
2302 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, zero) {
2303 TEST_REQUIRES_ARM_NEON;
2304 for (size_t k = 1; k <= 40; k += 9) {
2305 for (uint32_t mz = 0; mz < 1; mz++) {
2306 GemmMicrokernelTester()
2307 .mr(1)
2308 .nr(8)
2309 .kr(2)
2310 .sr(1)
2311 .m(1)
2312 .n(8)
2313 .k(k)
2314 .ks(3)
2315 .a_offset(43)
2316 .zero_index(mz)
2317 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2318 }
2319 }
2320 }
2321
2322 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, qmin) {
2323 TEST_REQUIRES_ARM_NEON;
2324 GemmMicrokernelTester()
2325 .mr(1)
2326 .nr(8)
2327 .kr(2)
2328 .sr(1)
2329 .m(1)
2330 .n(8)
2331 .k(8)
2332 .qmin(128)
2333 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2334 }
2335
2336 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, qmax) {
2337 TEST_REQUIRES_ARM_NEON;
2338 GemmMicrokernelTester()
2339 .mr(1)
2340 .nr(8)
2341 .kr(2)
2342 .sr(1)
2343 .m(1)
2344 .n(8)
2345 .k(8)
2346 .qmax(128)
2347 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2348 }
2349
2350 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD2R, strided_cm) {
2351 TEST_REQUIRES_ARM_NEON;
2352 GemmMicrokernelTester()
2353 .mr(1)
2354 .nr(8)
2355 .kr(2)
2356 .sr(1)
2357 .m(1)
2358 .n(8)
2359 .k(8)
2360 .cm_stride(11)
2361 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2362 }
2363#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2364
2365
2366#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2367 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
2368 TEST_REQUIRES_ARM_NEON;
2369 GemmMicrokernelTester()
2370 .mr(1)
2371 .nr(8)
2372 .kr(2)
2373 .sr(1)
2374 .m(1)
2375 .n(8)
2376 .k(16)
2377 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2378 }
2379
2380 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cn) {
2381 TEST_REQUIRES_ARM_NEON;
2382 GemmMicrokernelTester()
2383 .mr(1)
2384 .nr(8)
2385 .kr(2)
2386 .sr(1)
2387 .m(1)
2388 .n(8)
2389 .k(16)
2390 .cn_stride(11)
2391 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2392 }
2393
2394 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
2395 TEST_REQUIRES_ARM_NEON;
2396 for (uint32_t n = 1; n <= 8; n++) {
2397 for (uint32_t m = 1; m <= 1; m++) {
2398 GemmMicrokernelTester()
2399 .mr(1)
2400 .nr(8)
2401 .kr(2)
2402 .sr(1)
2403 .m(m)
2404 .n(n)
2405 .k(16)
2406 .iterations(1)
2407 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2408 }
2409 }
2410 }
2411
2412 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
2413 TEST_REQUIRES_ARM_NEON;
2414 for (uint32_t m = 1; m <= 1; m++) {
2415 GemmMicrokernelTester()
2416 .mr(1)
2417 .nr(8)
2418 .kr(2)
2419 .sr(1)
2420 .m(m)
2421 .n(8)
2422 .k(16)
2423 .iterations(1)
2424 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2425 }
2426 }
2427
2428 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
2429 TEST_REQUIRES_ARM_NEON;
2430 for (uint32_t n = 1; n <= 8; n++) {
2431 GemmMicrokernelTester()
2432 .mr(1)
2433 .nr(8)
2434 .kr(2)
2435 .sr(1)
2436 .m(1)
2437 .n(n)
2438 .k(16)
2439 .iterations(1)
2440 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2441 }
2442 }
2443
2444 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
2445 TEST_REQUIRES_ARM_NEON;
2446 for (size_t k = 1; k < 16; k++) {
2447 GemmMicrokernelTester()
2448 .mr(1)
2449 .nr(8)
2450 .kr(2)
2451 .sr(1)
2452 .m(1)
2453 .n(8)
2454 .k(k)
2455 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2456 }
2457 }
2458
2459 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
2460 TEST_REQUIRES_ARM_NEON;
2461 for (size_t k = 1; k < 16; k++) {
2462 for (uint32_t n = 1; n <= 8; n++) {
2463 for (uint32_t m = 1; m <= 1; m++) {
2464 GemmMicrokernelTester()
2465 .mr(1)
2466 .nr(8)
2467 .kr(2)
2468 .sr(1)
2469 .m(m)
2470 .n(n)
2471 .k(k)
2472 .iterations(1)
2473 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2474 }
2475 }
2476 }
2477 }
2478
2479 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
2480 TEST_REQUIRES_ARM_NEON;
2481 for (size_t k = 17; k < 32; k++) {
2482 GemmMicrokernelTester()
2483 .mr(1)
2484 .nr(8)
2485 .kr(2)
2486 .sr(1)
2487 .m(1)
2488 .n(8)
2489 .k(k)
2490 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2491 }
2492 }
2493
2494 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
2495 TEST_REQUIRES_ARM_NEON;
2496 for (size_t k = 17; k < 32; k++) {
2497 for (uint32_t n = 1; n <= 8; n++) {
2498 for (uint32_t m = 1; m <= 1; m++) {
2499 GemmMicrokernelTester()
2500 .mr(1)
2501 .nr(8)
2502 .kr(2)
2503 .sr(1)
2504 .m(m)
2505 .n(n)
2506 .k(k)
2507 .iterations(1)
2508 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2509 }
2510 }
2511 }
2512 }
2513
2514 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_div_16) {
2515 TEST_REQUIRES_ARM_NEON;
2516 for (size_t k = 32; k <= 160; k += 16) {
2517 GemmMicrokernelTester()
2518 .mr(1)
2519 .nr(8)
2520 .kr(2)
2521 .sr(1)
2522 .m(1)
2523 .n(8)
2524 .k(k)
2525 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2526 }
2527 }
2528
2529 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
2530 TEST_REQUIRES_ARM_NEON;
2531 for (size_t k = 32; k <= 160; k += 16) {
2532 for (uint32_t n = 1; n <= 8; n++) {
2533 for (uint32_t m = 1; m <= 1; m++) {
2534 GemmMicrokernelTester()
2535 .mr(1)
2536 .nr(8)
2537 .kr(2)
2538 .sr(1)
2539 .m(m)
2540 .n(n)
2541 .k(k)
2542 .iterations(1)
2543 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2544 }
2545 }
2546 }
2547 }
2548
2549 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
2550 TEST_REQUIRES_ARM_NEON;
2551 for (uint32_t n = 9; n < 16; n++) {
2552 for (size_t k = 1; k <= 80; k += 17) {
2553 GemmMicrokernelTester()
2554 .mr(1)
2555 .nr(8)
2556 .kr(2)
2557 .sr(1)
2558 .m(1)
2559 .n(n)
2560 .k(k)
2561 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2562 }
2563 }
2564 }
2565
2566 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
2567 TEST_REQUIRES_ARM_NEON;
2568 for (uint32_t n = 9; n < 16; n++) {
2569 for (size_t k = 1; k <= 80; k += 17) {
2570 GemmMicrokernelTester()
2571 .mr(1)
2572 .nr(8)
2573 .kr(2)
2574 .sr(1)
2575 .m(1)
2576 .n(n)
2577 .k(k)
2578 .cn_stride(11)
2579 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2580 }
2581 }
2582 }
2583
2584 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
2585 TEST_REQUIRES_ARM_NEON;
2586 for (uint32_t n = 9; n < 16; n++) {
2587 for (size_t k = 1; k <= 80; k += 17) {
2588 for (uint32_t m = 1; m <= 1; m++) {
2589 GemmMicrokernelTester()
2590 .mr(1)
2591 .nr(8)
2592 .kr(2)
2593 .sr(1)
2594 .m(m)
2595 .n(n)
2596 .k(k)
2597 .iterations(1)
2598 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2599 }
2600 }
2601 }
2602 }
2603
2604 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8) {
2605 TEST_REQUIRES_ARM_NEON;
2606 for (uint32_t n = 16; n <= 24; n += 8) {
2607 for (size_t k = 1; k <= 80; k += 17) {
2608 GemmMicrokernelTester()
2609 .mr(1)
2610 .nr(8)
2611 .kr(2)
2612 .sr(1)
2613 .m(1)
2614 .n(n)
2615 .k(k)
2616 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2617 }
2618 }
2619 }
2620
2621 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
2622 TEST_REQUIRES_ARM_NEON;
2623 for (uint32_t n = 16; n <= 24; n += 8) {
2624 for (size_t k = 1; k <= 80; k += 17) {
2625 GemmMicrokernelTester()
2626 .mr(1)
2627 .nr(8)
2628 .kr(2)
2629 .sr(1)
2630 .m(1)
2631 .n(n)
2632 .k(k)
2633 .cn_stride(11)
2634 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2635 }
2636 }
2637 }
2638
2639 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
2640 TEST_REQUIRES_ARM_NEON;
2641 for (uint32_t n = 16; n <= 24; n += 8) {
2642 for (size_t k = 1; k <= 80; k += 17) {
2643 for (uint32_t m = 1; m <= 1; m++) {
2644 GemmMicrokernelTester()
2645 .mr(1)
2646 .nr(8)
2647 .kr(2)
2648 .sr(1)
2649 .m(m)
2650 .n(n)
2651 .k(k)
2652 .iterations(1)
2653 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2654 }
2655 }
2656 }
2657 }
2658
2659 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, small_kernel) {
2660 TEST_REQUIRES_ARM_NEON;
2661 for (size_t k = 1; k <= 80; k += 17) {
2662 GemmMicrokernelTester()
2663 .mr(1)
2664 .nr(8)
2665 .kr(2)
2666 .sr(1)
2667 .m(1)
2668 .n(8)
2669 .k(k)
2670 .ks(3)
2671 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2672 }
2673 }
2674
2675 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, small_kernel_subtile) {
2676 TEST_REQUIRES_ARM_NEON;
2677 for (size_t k = 1; k <= 80; k += 17) {
2678 for (uint32_t n = 1; n <= 8; n++) {
2679 for (uint32_t m = 1; m <= 1; m++) {
2680 GemmMicrokernelTester()
2681 .mr(1)
2682 .nr(8)
2683 .kr(2)
2684 .sr(1)
2685 .m(m)
2686 .n(n)
2687 .k(k)
2688 .ks(3)
2689 .iterations(1)
2690 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2691 }
2692 }
2693 }
2694 }
2695
2696 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
2697 TEST_REQUIRES_ARM_NEON;
2698 for (uint32_t n = 9; n < 16; n++) {
2699 for (size_t k = 1; k <= 80; k += 17) {
2700 GemmMicrokernelTester()
2701 .mr(1)
2702 .nr(8)
2703 .kr(2)
2704 .sr(1)
2705 .m(1)
2706 .n(n)
2707 .k(k)
2708 .ks(3)
2709 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2710 }
2711 }
2712 }
2713
2714 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_small_kernel) {
2715 TEST_REQUIRES_ARM_NEON;
2716 for (uint32_t n = 16; n <= 24; n += 8) {
2717 for (size_t k = 1; k <= 80; k += 17) {
2718 GemmMicrokernelTester()
2719 .mr(1)
2720 .nr(8)
2721 .kr(2)
2722 .sr(1)
2723 .m(1)
2724 .n(n)
2725 .k(k)
2726 .ks(3)
2727 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2728 }
2729 }
2730 }
2731
2732 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
2733 TEST_REQUIRES_ARM_NEON;
2734 for (size_t k = 1; k <= 80; k += 17) {
2735 for (uint32_t n = 1; n <= 8; n++) {
2736 for (uint32_t m = 1; m <= 1; m++) {
2737 GemmMicrokernelTester()
2738 .mr(1)
2739 .nr(8)
2740 .kr(2)
2741 .sr(1)
2742 .m(m)
2743 .n(n)
2744 .k(k)
2745 .cm_stride(11)
2746 .iterations(1)
2747 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2748 }
2749 }
2750 }
2751 }
2752
2753 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, a_offset) {
2754 TEST_REQUIRES_ARM_NEON;
2755 for (size_t k = 1; k <= 80; k += 17) {
2756 GemmMicrokernelTester()
2757 .mr(1)
2758 .nr(8)
2759 .kr(2)
2760 .sr(1)
2761 .m(1)
2762 .n(8)
2763 .k(k)
2764 .ks(3)
2765 .a_offset(83)
2766 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2767 }
2768 }
2769
2770 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, zero) {
2771 TEST_REQUIRES_ARM_NEON;
2772 for (size_t k = 1; k <= 80; k += 17) {
2773 for (uint32_t mz = 0; mz < 1; mz++) {
2774 GemmMicrokernelTester()
2775 .mr(1)
2776 .nr(8)
2777 .kr(2)
2778 .sr(1)
2779 .m(1)
2780 .n(8)
2781 .k(k)
2782 .ks(3)
2783 .a_offset(83)
2784 .zero_index(mz)
2785 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2786 }
2787 }
2788 }
2789
2790 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, qmin) {
2791 TEST_REQUIRES_ARM_NEON;
2792 GemmMicrokernelTester()
2793 .mr(1)
2794 .nr(8)
2795 .kr(2)
2796 .sr(1)
2797 .m(1)
2798 .n(8)
2799 .k(16)
2800 .qmin(128)
2801 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2802 }
2803
2804 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, qmax) {
2805 TEST_REQUIRES_ARM_NEON;
2806 GemmMicrokernelTester()
2807 .mr(1)
2808 .nr(8)
2809 .kr(2)
2810 .sr(1)
2811 .m(1)
2812 .n(8)
2813 .k(16)
2814 .qmax(128)
2815 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
2816 }
2817
2818 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cm) {
2819 TEST_REQUIRES_ARM_NEON;
2820 GemmMicrokernelTester()
2821 .mr(1)
2822 .nr(8)
2823 .kr(2)
2824 .sr(1)
2825 .m(1)
2826 .n(8)
2827 .k(16)
2828 .cm_stride(11)
2829 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -08002830 }
2831#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2832
2833
2834#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard42f5c502021-11-16 10:04:21 -08002835 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_eq_8) {
2836 TEST_REQUIRES_ARM_NEON;
2837 GemmMicrokernelTester()
2838 .mr(1)
2839 .nr(16)
2840 .kr(2)
2841 .sr(1)
2842 .m(1)
2843 .n(16)
2844 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002845 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08002846 }
2847
2848 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, strided_cn) {
2849 TEST_REQUIRES_ARM_NEON;
2850 GemmMicrokernelTester()
2851 .mr(1)
2852 .nr(16)
2853 .kr(2)
2854 .sr(1)
2855 .m(1)
2856 .n(16)
2857 .k(8)
2858 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002859 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08002860 }
2861
2862 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_eq_8_subtile) {
2863 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002864 for (uint32_t n = 1; n <= 16; n++) {
2865 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002866 GemmMicrokernelTester()
2867 .mr(1)
2868 .nr(16)
2869 .kr(2)
2870 .sr(1)
2871 .m(m)
2872 .n(n)
2873 .k(8)
2874 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002875 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08002876 }
2877 }
2878 }
2879
2880 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
2881 TEST_REQUIRES_ARM_NEON;
2882 for (uint32_t m = 1; m <= 1; m++) {
2883 GemmMicrokernelTester()
2884 .mr(1)
2885 .nr(16)
2886 .kr(2)
2887 .sr(1)
2888 .m(m)
2889 .n(16)
2890 .k(8)
2891 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002892 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08002893 }
2894 }
2895
2896 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
2897 TEST_REQUIRES_ARM_NEON;
2898 for (uint32_t n = 1; n <= 16; n++) {
2899 GemmMicrokernelTester()
2900 .mr(1)
2901 .nr(16)
2902 .kr(2)
2903 .sr(1)
2904 .m(1)
2905 .n(n)
2906 .k(8)
2907 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002908 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08002909 }
2910 }
2911
2912 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_lt_8) {
2913 TEST_REQUIRES_ARM_NEON;
2914 for (size_t k = 1; k < 8; k++) {
2915 GemmMicrokernelTester()
2916 .mr(1)
2917 .nr(16)
2918 .kr(2)
2919 .sr(1)
2920 .m(1)
2921 .n(16)
2922 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002923 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08002924 }
2925 }
2926
2927 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_lt_8_subtile) {
2928 TEST_REQUIRES_ARM_NEON;
2929 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002930 for (uint32_t n = 1; n <= 16; n++) {
2931 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002932 GemmMicrokernelTester()
2933 .mr(1)
2934 .nr(16)
2935 .kr(2)
2936 .sr(1)
2937 .m(m)
2938 .n(n)
2939 .k(k)
2940 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002941 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08002942 }
2943 }
2944 }
2945 }
2946
2947 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_gt_8) {
2948 TEST_REQUIRES_ARM_NEON;
2949 for (size_t k = 9; k < 16; k++) {
2950 GemmMicrokernelTester()
2951 .mr(1)
2952 .nr(16)
2953 .kr(2)
2954 .sr(1)
2955 .m(1)
2956 .n(16)
2957 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002958 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08002959 }
2960 }
2961
2962 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_gt_8_subtile) {
2963 TEST_REQUIRES_ARM_NEON;
2964 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002965 for (uint32_t n = 1; n <= 16; n++) {
2966 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08002967 GemmMicrokernelTester()
2968 .mr(1)
2969 .nr(16)
2970 .kr(2)
2971 .sr(1)
2972 .m(m)
2973 .n(n)
2974 .k(k)
2975 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002976 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08002977 }
2978 }
2979 }
2980 }
2981
2982 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_div_8) {
2983 TEST_REQUIRES_ARM_NEON;
2984 for (size_t k = 16; k <= 80; k += 8) {
2985 GemmMicrokernelTester()
2986 .mr(1)
2987 .nr(16)
2988 .kr(2)
2989 .sr(1)
2990 .m(1)
2991 .n(16)
2992 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002993 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08002994 }
2995 }
2996
2997 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, k_div_8_subtile) {
2998 TEST_REQUIRES_ARM_NEON;
2999 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003000 for (uint32_t n = 1; n <= 16; n++) {
3001 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003002 GemmMicrokernelTester()
3003 .mr(1)
3004 .nr(16)
3005 .kr(2)
3006 .sr(1)
3007 .m(m)
3008 .n(n)
3009 .k(k)
3010 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003011 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003012 }
3013 }
3014 }
3015 }
3016
3017 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_gt_16) {
3018 TEST_REQUIRES_ARM_NEON;
3019 for (uint32_t n = 17; n < 32; n++) {
3020 for (size_t k = 1; k <= 40; k += 9) {
3021 GemmMicrokernelTester()
3022 .mr(1)
3023 .nr(16)
3024 .kr(2)
3025 .sr(1)
3026 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003027 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003028 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003029 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003030 }
3031 }
3032 }
3033
3034 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_gt_16_strided_cn) {
3035 TEST_REQUIRES_ARM_NEON;
3036 for (uint32_t n = 17; n < 32; n++) {
3037 for (size_t k = 1; k <= 40; k += 9) {
3038 GemmMicrokernelTester()
3039 .mr(1)
3040 .nr(16)
3041 .kr(2)
3042 .sr(1)
3043 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003044 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003045 .k(k)
3046 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003047 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003048 }
3049 }
3050 }
3051
3052 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_gt_16_subtile) {
3053 TEST_REQUIRES_ARM_NEON;
3054 for (uint32_t n = 17; n < 32; n++) {
3055 for (size_t k = 1; k <= 40; k += 9) {
3056 for (uint32_t m = 1; m <= 1; m++) {
3057 GemmMicrokernelTester()
3058 .mr(1)
3059 .nr(16)
3060 .kr(2)
3061 .sr(1)
3062 .m(m)
3063 .n(n)
3064 .k(k)
3065 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003066 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003067 }
3068 }
3069 }
3070 }
3071
3072 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_div_16) {
3073 TEST_REQUIRES_ARM_NEON;
3074 for (uint32_t n = 32; n <= 48; n += 16) {
3075 for (size_t k = 1; k <= 40; k += 9) {
3076 GemmMicrokernelTester()
3077 .mr(1)
3078 .nr(16)
3079 .kr(2)
3080 .sr(1)
3081 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003082 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003083 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003084 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003085 }
3086 }
3087 }
3088
3089 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_div_16_strided_cn) {
3090 TEST_REQUIRES_ARM_NEON;
3091 for (uint32_t n = 32; n <= 48; n += 16) {
3092 for (size_t k = 1; k <= 40; k += 9) {
3093 GemmMicrokernelTester()
3094 .mr(1)
3095 .nr(16)
3096 .kr(2)
3097 .sr(1)
3098 .m(1)
3099 .n(n)
3100 .k(k)
3101 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003102 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003103 }
3104 }
3105 }
3106
3107 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_div_16_subtile) {
3108 TEST_REQUIRES_ARM_NEON;
3109 for (uint32_t n = 32; n <= 48; n += 16) {
3110 for (size_t k = 1; k <= 40; k += 9) {
3111 for (uint32_t m = 1; m <= 1; m++) {
3112 GemmMicrokernelTester()
3113 .mr(1)
3114 .nr(16)
3115 .kr(2)
3116 .sr(1)
3117 .m(m)
3118 .n(n)
3119 .k(k)
3120 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003121 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003122 }
3123 }
3124 }
3125 }
3126
3127 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, small_kernel) {
3128 TEST_REQUIRES_ARM_NEON;
3129 for (size_t k = 1; k <= 40; k += 9) {
3130 GemmMicrokernelTester()
3131 .mr(1)
3132 .nr(16)
3133 .kr(2)
3134 .sr(1)
3135 .m(1)
3136 .n(16)
3137 .k(k)
3138 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003139 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003140 }
3141 }
3142
3143 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, small_kernel_subtile) {
3144 TEST_REQUIRES_ARM_NEON;
3145 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003146 for (uint32_t n = 1; n <= 16; n++) {
3147 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003148 GemmMicrokernelTester()
3149 .mr(1)
3150 .nr(16)
3151 .kr(2)
3152 .sr(1)
3153 .m(m)
3154 .n(n)
3155 .k(k)
3156 .ks(3)
3157 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003158 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003159 }
3160 }
3161 }
3162 }
3163
3164 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_gt_16_small_kernel) {
3165 TEST_REQUIRES_ARM_NEON;
3166 for (uint32_t n = 17; n < 32; n++) {
3167 for (size_t k = 1; k <= 40; k += 9) {
3168 GemmMicrokernelTester()
3169 .mr(1)
3170 .nr(16)
3171 .kr(2)
3172 .sr(1)
3173 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003174 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003175 .k(k)
3176 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003177 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003178 }
3179 }
3180 }
3181
3182 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, n_div_16_small_kernel) {
3183 TEST_REQUIRES_ARM_NEON;
3184 for (uint32_t n = 32; n <= 48; n += 16) {
3185 for (size_t k = 1; k <= 40; k += 9) {
3186 GemmMicrokernelTester()
3187 .mr(1)
3188 .nr(16)
3189 .kr(2)
3190 .sr(1)
3191 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003192 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003193 .k(k)
3194 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003195 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003196 }
3197 }
3198 }
3199
3200 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, strided_cm_subtile) {
3201 TEST_REQUIRES_ARM_NEON;
3202 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003203 for (uint32_t n = 1; n <= 16; n++) {
3204 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003205 GemmMicrokernelTester()
3206 .mr(1)
3207 .nr(16)
3208 .kr(2)
3209 .sr(1)
3210 .m(m)
3211 .n(n)
3212 .k(k)
3213 .cm_stride(19)
3214 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003215 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003216 }
3217 }
3218 }
3219 }
3220
3221 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, a_offset) {
3222 TEST_REQUIRES_ARM_NEON;
3223 for (size_t k = 1; k <= 40; k += 9) {
3224 GemmMicrokernelTester()
3225 .mr(1)
3226 .nr(16)
3227 .kr(2)
3228 .sr(1)
3229 .m(1)
3230 .n(16)
3231 .k(k)
3232 .ks(3)
3233 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003234 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003235 }
3236 }
3237
3238 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, zero) {
3239 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003240 for (size_t k = 1; k <= 40; k += 9) {
3241 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003242 GemmMicrokernelTester()
3243 .mr(1)
3244 .nr(16)
3245 .kr(2)
3246 .sr(1)
3247 .m(1)
3248 .n(16)
3249 .k(k)
3250 .ks(3)
3251 .a_offset(43)
3252 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003253 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003254 }
3255 }
3256 }
3257
3258 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, qmin) {
3259 TEST_REQUIRES_ARM_NEON;
3260 GemmMicrokernelTester()
3261 .mr(1)
3262 .nr(16)
3263 .kr(2)
3264 .sr(1)
3265 .m(1)
3266 .n(16)
3267 .k(8)
3268 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003269 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003270 }
3271
3272 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, qmax) {
3273 TEST_REQUIRES_ARM_NEON;
3274 GemmMicrokernelTester()
3275 .mr(1)
3276 .nr(16)
3277 .kr(2)
3278 .sr(1)
3279 .m(1)
3280 .n(16)
3281 .k(8)
3282 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003283 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003284 }
3285
3286 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD4R, strided_cm) {
3287 TEST_REQUIRES_ARM_NEON;
3288 GemmMicrokernelTester()
3289 .mr(1)
3290 .nr(16)
3291 .kr(2)
3292 .sr(1)
3293 .m(1)
3294 .n(16)
3295 .k(8)
3296 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003297 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003298 }
3299#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3300
3301
3302#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3303 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_eq_8) {
3304 TEST_REQUIRES_ARM_NEON;
3305 GemmMicrokernelTester()
3306 .mr(2)
3307 .nr(16)
3308 .kr(2)
3309 .sr(1)
3310 .m(2)
3311 .n(16)
3312 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003313 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003314 }
3315
3316 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, strided_cn) {
3317 TEST_REQUIRES_ARM_NEON;
3318 GemmMicrokernelTester()
3319 .mr(2)
3320 .nr(16)
3321 .kr(2)
3322 .sr(1)
3323 .m(2)
3324 .n(16)
3325 .k(8)
3326 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003327 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003328 }
3329
3330 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_eq_8_subtile) {
3331 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003332 for (uint32_t n = 1; n <= 16; n++) {
3333 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003334 GemmMicrokernelTester()
3335 .mr(2)
3336 .nr(16)
3337 .kr(2)
3338 .sr(1)
3339 .m(m)
3340 .n(n)
3341 .k(8)
3342 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003343 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003344 }
3345 }
3346 }
3347
3348 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
3349 TEST_REQUIRES_ARM_NEON;
3350 for (uint32_t m = 1; m <= 2; m++) {
3351 GemmMicrokernelTester()
3352 .mr(2)
3353 .nr(16)
3354 .kr(2)
3355 .sr(1)
3356 .m(m)
3357 .n(16)
3358 .k(8)
3359 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003360 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003361 }
3362 }
3363
3364 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
3365 TEST_REQUIRES_ARM_NEON;
3366 for (uint32_t n = 1; n <= 16; n++) {
3367 GemmMicrokernelTester()
3368 .mr(2)
3369 .nr(16)
3370 .kr(2)
3371 .sr(1)
3372 .m(2)
3373 .n(n)
3374 .k(8)
3375 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003376 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003377 }
3378 }
3379
3380 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_lt_8) {
3381 TEST_REQUIRES_ARM_NEON;
3382 for (size_t k = 1; k < 8; k++) {
3383 GemmMicrokernelTester()
3384 .mr(2)
3385 .nr(16)
3386 .kr(2)
3387 .sr(1)
3388 .m(2)
3389 .n(16)
3390 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003391 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003392 }
3393 }
3394
3395 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_lt_8_subtile) {
3396 TEST_REQUIRES_ARM_NEON;
3397 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003398 for (uint32_t n = 1; n <= 16; n++) {
3399 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003400 GemmMicrokernelTester()
3401 .mr(2)
3402 .nr(16)
3403 .kr(2)
3404 .sr(1)
3405 .m(m)
3406 .n(n)
3407 .k(k)
3408 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003409 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003410 }
3411 }
3412 }
3413 }
3414
3415 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_gt_8) {
3416 TEST_REQUIRES_ARM_NEON;
3417 for (size_t k = 9; k < 16; k++) {
3418 GemmMicrokernelTester()
3419 .mr(2)
3420 .nr(16)
3421 .kr(2)
3422 .sr(1)
3423 .m(2)
3424 .n(16)
3425 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003426 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003427 }
3428 }
3429
3430 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_gt_8_subtile) {
3431 TEST_REQUIRES_ARM_NEON;
3432 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003433 for (uint32_t n = 1; n <= 16; n++) {
3434 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003435 GemmMicrokernelTester()
3436 .mr(2)
3437 .nr(16)
3438 .kr(2)
3439 .sr(1)
3440 .m(m)
3441 .n(n)
3442 .k(k)
3443 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003444 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003445 }
3446 }
3447 }
3448 }
3449
3450 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_div_8) {
3451 TEST_REQUIRES_ARM_NEON;
3452 for (size_t k = 16; k <= 80; k += 8) {
3453 GemmMicrokernelTester()
3454 .mr(2)
3455 .nr(16)
3456 .kr(2)
3457 .sr(1)
3458 .m(2)
3459 .n(16)
3460 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003461 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003462 }
3463 }
3464
3465 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, k_div_8_subtile) {
3466 TEST_REQUIRES_ARM_NEON;
3467 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003468 for (uint32_t n = 1; n <= 16; n++) {
3469 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003470 GemmMicrokernelTester()
3471 .mr(2)
3472 .nr(16)
3473 .kr(2)
3474 .sr(1)
3475 .m(m)
3476 .n(n)
3477 .k(k)
3478 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003479 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003480 }
3481 }
3482 }
3483 }
3484
3485 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_gt_16) {
3486 TEST_REQUIRES_ARM_NEON;
3487 for (uint32_t n = 17; n < 32; n++) {
3488 for (size_t k = 1; k <= 40; k += 9) {
3489 GemmMicrokernelTester()
3490 .mr(2)
3491 .nr(16)
3492 .kr(2)
3493 .sr(1)
3494 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003495 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003496 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003497 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003498 }
3499 }
3500 }
3501
3502 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_gt_16_strided_cn) {
3503 TEST_REQUIRES_ARM_NEON;
3504 for (uint32_t n = 17; n < 32; n++) {
3505 for (size_t k = 1; k <= 40; k += 9) {
3506 GemmMicrokernelTester()
3507 .mr(2)
3508 .nr(16)
3509 .kr(2)
3510 .sr(1)
3511 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003512 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003513 .k(k)
3514 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003515 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003516 }
3517 }
3518 }
3519
3520 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_gt_16_subtile) {
3521 TEST_REQUIRES_ARM_NEON;
3522 for (uint32_t n = 17; n < 32; n++) {
3523 for (size_t k = 1; k <= 40; k += 9) {
3524 for (uint32_t m = 1; m <= 2; m++) {
3525 GemmMicrokernelTester()
3526 .mr(2)
3527 .nr(16)
3528 .kr(2)
3529 .sr(1)
3530 .m(m)
3531 .n(n)
3532 .k(k)
3533 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003534 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003535 }
3536 }
3537 }
3538 }
3539
3540 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_div_16) {
3541 TEST_REQUIRES_ARM_NEON;
3542 for (uint32_t n = 32; n <= 48; n += 16) {
3543 for (size_t k = 1; k <= 40; k += 9) {
3544 GemmMicrokernelTester()
3545 .mr(2)
3546 .nr(16)
3547 .kr(2)
3548 .sr(1)
3549 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003550 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003551 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003552 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003553 }
3554 }
3555 }
3556
3557 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_div_16_strided_cn) {
3558 TEST_REQUIRES_ARM_NEON;
3559 for (uint32_t n = 32; n <= 48; n += 16) {
3560 for (size_t k = 1; k <= 40; k += 9) {
3561 GemmMicrokernelTester()
3562 .mr(2)
3563 .nr(16)
3564 .kr(2)
3565 .sr(1)
3566 .m(2)
3567 .n(n)
3568 .k(k)
3569 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003570 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003571 }
3572 }
3573 }
3574
3575 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_div_16_subtile) {
3576 TEST_REQUIRES_ARM_NEON;
3577 for (uint32_t n = 32; n <= 48; n += 16) {
3578 for (size_t k = 1; k <= 40; k += 9) {
3579 for (uint32_t m = 1; m <= 2; m++) {
3580 GemmMicrokernelTester()
3581 .mr(2)
3582 .nr(16)
3583 .kr(2)
3584 .sr(1)
3585 .m(m)
3586 .n(n)
3587 .k(k)
3588 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003589 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003590 }
3591 }
3592 }
3593 }
3594
3595 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, small_kernel) {
3596 TEST_REQUIRES_ARM_NEON;
3597 for (size_t k = 1; k <= 40; k += 9) {
3598 GemmMicrokernelTester()
3599 .mr(2)
3600 .nr(16)
3601 .kr(2)
3602 .sr(1)
3603 .m(2)
3604 .n(16)
3605 .k(k)
3606 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003607 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003608 }
3609 }
3610
3611 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, small_kernel_subtile) {
3612 TEST_REQUIRES_ARM_NEON;
3613 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003614 for (uint32_t n = 1; n <= 16; n++) {
3615 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003616 GemmMicrokernelTester()
3617 .mr(2)
3618 .nr(16)
3619 .kr(2)
3620 .sr(1)
3621 .m(m)
3622 .n(n)
3623 .k(k)
3624 .ks(3)
3625 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003626 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003627 }
3628 }
3629 }
3630 }
3631
3632 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_gt_16_small_kernel) {
3633 TEST_REQUIRES_ARM_NEON;
3634 for (uint32_t n = 17; n < 32; n++) {
3635 for (size_t k = 1; k <= 40; k += 9) {
3636 GemmMicrokernelTester()
3637 .mr(2)
3638 .nr(16)
3639 .kr(2)
3640 .sr(1)
3641 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003642 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003643 .k(k)
3644 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003645 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003646 }
3647 }
3648 }
3649
3650 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, n_div_16_small_kernel) {
3651 TEST_REQUIRES_ARM_NEON;
3652 for (uint32_t n = 32; n <= 48; n += 16) {
3653 for (size_t k = 1; k <= 40; k += 9) {
3654 GemmMicrokernelTester()
3655 .mr(2)
3656 .nr(16)
3657 .kr(2)
3658 .sr(1)
3659 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003660 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003661 .k(k)
3662 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003663 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003664 }
3665 }
3666 }
3667
3668 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, strided_cm_subtile) {
3669 TEST_REQUIRES_ARM_NEON;
3670 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003671 for (uint32_t n = 1; n <= 16; n++) {
3672 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003673 GemmMicrokernelTester()
3674 .mr(2)
3675 .nr(16)
3676 .kr(2)
3677 .sr(1)
3678 .m(m)
3679 .n(n)
3680 .k(k)
3681 .cm_stride(19)
3682 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003683 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003684 }
3685 }
3686 }
3687 }
3688
3689 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, a_offset) {
3690 TEST_REQUIRES_ARM_NEON;
3691 for (size_t k = 1; k <= 40; k += 9) {
3692 GemmMicrokernelTester()
3693 .mr(2)
3694 .nr(16)
3695 .kr(2)
3696 .sr(1)
3697 .m(2)
3698 .n(16)
3699 .k(k)
3700 .ks(3)
3701 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003702 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003703 }
3704 }
3705
3706 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, zero) {
3707 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003708 for (size_t k = 1; k <= 40; k += 9) {
3709 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003710 GemmMicrokernelTester()
3711 .mr(2)
3712 .nr(16)
3713 .kr(2)
3714 .sr(1)
3715 .m(2)
3716 .n(16)
3717 .k(k)
3718 .ks(3)
3719 .a_offset(83)
3720 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003721 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003722 }
3723 }
3724 }
3725
3726 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, qmin) {
3727 TEST_REQUIRES_ARM_NEON;
3728 GemmMicrokernelTester()
3729 .mr(2)
3730 .nr(16)
3731 .kr(2)
3732 .sr(1)
3733 .m(2)
3734 .n(16)
3735 .k(8)
3736 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003737 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003738 }
3739
3740 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, qmax) {
3741 TEST_REQUIRES_ARM_NEON;
3742 GemmMicrokernelTester()
3743 .mr(2)
3744 .nr(16)
3745 .kr(2)
3746 .sr(1)
3747 .m(2)
3748 .n(16)
3749 .k(8)
3750 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003751 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003752 }
3753
3754 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MULL_LD4R, strided_cm) {
3755 TEST_REQUIRES_ARM_NEON;
3756 GemmMicrokernelTester()
3757 .mr(2)
3758 .nr(16)
3759 .kr(2)
3760 .sr(1)
3761 .m(2)
3762 .n(16)
3763 .k(8)
3764 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003765 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003766 }
3767#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3768
3769
3770#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3771 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8) {
3772 TEST_REQUIRES_ARM_NEON;
3773 GemmMicrokernelTester()
3774 .mr(3)
3775 .nr(16)
3776 .kr(2)
3777 .sr(1)
3778 .m(3)
3779 .n(16)
3780 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003781 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003782 }
3783
3784 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, strided_cn) {
3785 TEST_REQUIRES_ARM_NEON;
3786 GemmMicrokernelTester()
3787 .mr(3)
3788 .nr(16)
3789 .kr(2)
3790 .sr(1)
3791 .m(3)
3792 .n(16)
3793 .k(8)
3794 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003795 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003796 }
3797
3798 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8_subtile) {
3799 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003800 for (uint32_t n = 1; n <= 16; n++) {
3801 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003802 GemmMicrokernelTester()
3803 .mr(3)
3804 .nr(16)
3805 .kr(2)
3806 .sr(1)
3807 .m(m)
3808 .n(n)
3809 .k(8)
3810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003811 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003812 }
3813 }
3814 }
3815
3816 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
3817 TEST_REQUIRES_ARM_NEON;
3818 for (uint32_t m = 1; m <= 3; m++) {
3819 GemmMicrokernelTester()
3820 .mr(3)
3821 .nr(16)
3822 .kr(2)
3823 .sr(1)
3824 .m(m)
3825 .n(16)
3826 .k(8)
3827 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003828 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003829 }
3830 }
3831
3832 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
3833 TEST_REQUIRES_ARM_NEON;
3834 for (uint32_t n = 1; n <= 16; n++) {
3835 GemmMicrokernelTester()
3836 .mr(3)
3837 .nr(16)
3838 .kr(2)
3839 .sr(1)
3840 .m(3)
3841 .n(n)
3842 .k(8)
3843 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003844 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003845 }
3846 }
3847
3848 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_lt_8) {
3849 TEST_REQUIRES_ARM_NEON;
3850 for (size_t k = 1; k < 8; k++) {
3851 GemmMicrokernelTester()
3852 .mr(3)
3853 .nr(16)
3854 .kr(2)
3855 .sr(1)
3856 .m(3)
3857 .n(16)
3858 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003859 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003860 }
3861 }
3862
3863 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_lt_8_subtile) {
3864 TEST_REQUIRES_ARM_NEON;
3865 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003866 for (uint32_t n = 1; n <= 16; n++) {
3867 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003868 GemmMicrokernelTester()
3869 .mr(3)
3870 .nr(16)
3871 .kr(2)
3872 .sr(1)
3873 .m(m)
3874 .n(n)
3875 .k(k)
3876 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003877 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003878 }
3879 }
3880 }
3881 }
3882
3883 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_gt_8) {
3884 TEST_REQUIRES_ARM_NEON;
3885 for (size_t k = 9; k < 16; k++) {
3886 GemmMicrokernelTester()
3887 .mr(3)
3888 .nr(16)
3889 .kr(2)
3890 .sr(1)
3891 .m(3)
3892 .n(16)
3893 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003894 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003895 }
3896 }
3897
3898 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_gt_8_subtile) {
3899 TEST_REQUIRES_ARM_NEON;
3900 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003901 for (uint32_t n = 1; n <= 16; n++) {
3902 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003903 GemmMicrokernelTester()
3904 .mr(3)
3905 .nr(16)
3906 .kr(2)
3907 .sr(1)
3908 .m(m)
3909 .n(n)
3910 .k(k)
3911 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003912 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003913 }
3914 }
3915 }
3916 }
3917
3918 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_div_8) {
3919 TEST_REQUIRES_ARM_NEON;
3920 for (size_t k = 16; k <= 80; k += 8) {
3921 GemmMicrokernelTester()
3922 .mr(3)
3923 .nr(16)
3924 .kr(2)
3925 .sr(1)
3926 .m(3)
3927 .n(16)
3928 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003929 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003930 }
3931 }
3932
3933 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, k_div_8_subtile) {
3934 TEST_REQUIRES_ARM_NEON;
3935 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003936 for (uint32_t n = 1; n <= 16; n++) {
3937 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08003938 GemmMicrokernelTester()
3939 .mr(3)
3940 .nr(16)
3941 .kr(2)
3942 .sr(1)
3943 .m(m)
3944 .n(n)
3945 .k(k)
3946 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003947 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003948 }
3949 }
3950 }
3951 }
3952
3953 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16) {
3954 TEST_REQUIRES_ARM_NEON;
3955 for (uint32_t n = 17; n < 32; n++) {
3956 for (size_t k = 1; k <= 40; k += 9) {
3957 GemmMicrokernelTester()
3958 .mr(3)
3959 .nr(16)
3960 .kr(2)
3961 .sr(1)
3962 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003963 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003964 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003965 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003966 }
3967 }
3968 }
3969
3970 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16_strided_cn) {
3971 TEST_REQUIRES_ARM_NEON;
3972 for (uint32_t n = 17; n < 32; n++) {
3973 for (size_t k = 1; k <= 40; k += 9) {
3974 GemmMicrokernelTester()
3975 .mr(3)
3976 .nr(16)
3977 .kr(2)
3978 .sr(1)
3979 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003980 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08003981 .k(k)
3982 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003983 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08003984 }
3985 }
3986 }
3987
3988 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16_subtile) {
3989 TEST_REQUIRES_ARM_NEON;
3990 for (uint32_t n = 17; n < 32; n++) {
3991 for (size_t k = 1; k <= 40; k += 9) {
3992 for (uint32_t m = 1; m <= 3; m++) {
3993 GemmMicrokernelTester()
3994 .mr(3)
3995 .nr(16)
3996 .kr(2)
3997 .sr(1)
3998 .m(m)
3999 .n(n)
4000 .k(k)
4001 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004002 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004003 }
4004 }
4005 }
4006 }
4007
4008 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16) {
4009 TEST_REQUIRES_ARM_NEON;
4010 for (uint32_t n = 32; n <= 48; n += 16) {
4011 for (size_t k = 1; k <= 40; k += 9) {
4012 GemmMicrokernelTester()
4013 .mr(3)
4014 .nr(16)
4015 .kr(2)
4016 .sr(1)
4017 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004018 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08004019 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004020 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004021 }
4022 }
4023 }
4024
4025 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16_strided_cn) {
4026 TEST_REQUIRES_ARM_NEON;
4027 for (uint32_t n = 32; n <= 48; n += 16) {
4028 for (size_t k = 1; k <= 40; k += 9) {
4029 GemmMicrokernelTester()
4030 .mr(3)
4031 .nr(16)
4032 .kr(2)
4033 .sr(1)
4034 .m(3)
4035 .n(n)
4036 .k(k)
4037 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004038 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004039 }
4040 }
4041 }
4042
4043 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16_subtile) {
4044 TEST_REQUIRES_ARM_NEON;
4045 for (uint32_t n = 32; n <= 48; n += 16) {
4046 for (size_t k = 1; k <= 40; k += 9) {
4047 for (uint32_t m = 1; m <= 3; m++) {
4048 GemmMicrokernelTester()
4049 .mr(3)
4050 .nr(16)
4051 .kr(2)
4052 .sr(1)
4053 .m(m)
4054 .n(n)
4055 .k(k)
4056 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004057 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004058 }
4059 }
4060 }
4061 }
4062
4063 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, small_kernel) {
4064 TEST_REQUIRES_ARM_NEON;
4065 for (size_t k = 1; k <= 40; k += 9) {
4066 GemmMicrokernelTester()
4067 .mr(3)
4068 .nr(16)
4069 .kr(2)
4070 .sr(1)
4071 .m(3)
4072 .n(16)
4073 .k(k)
4074 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004075 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004076 }
4077 }
4078
4079 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, small_kernel_subtile) {
4080 TEST_REQUIRES_ARM_NEON;
4081 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004082 for (uint32_t n = 1; n <= 16; n++) {
4083 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08004084 GemmMicrokernelTester()
4085 .mr(3)
4086 .nr(16)
4087 .kr(2)
4088 .sr(1)
4089 .m(m)
4090 .n(n)
4091 .k(k)
4092 .ks(3)
4093 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004094 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004095 }
4096 }
4097 }
4098 }
4099
4100 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_gt_16_small_kernel) {
4101 TEST_REQUIRES_ARM_NEON;
4102 for (uint32_t n = 17; n < 32; n++) {
4103 for (size_t k = 1; k <= 40; k += 9) {
4104 GemmMicrokernelTester()
4105 .mr(3)
4106 .nr(16)
4107 .kr(2)
4108 .sr(1)
4109 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004110 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08004111 .k(k)
4112 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004113 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004114 }
4115 }
4116 }
4117
4118 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, n_div_16_small_kernel) {
4119 TEST_REQUIRES_ARM_NEON;
4120 for (uint32_t n = 32; n <= 48; n += 16) {
4121 for (size_t k = 1; k <= 40; k += 9) {
4122 GemmMicrokernelTester()
4123 .mr(3)
4124 .nr(16)
4125 .kr(2)
4126 .sr(1)
4127 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004128 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08004129 .k(k)
4130 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004131 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004132 }
4133 }
4134 }
4135
4136 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, strided_cm_subtile) {
4137 TEST_REQUIRES_ARM_NEON;
4138 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004139 for (uint32_t n = 1; n <= 16; n++) {
4140 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08004141 GemmMicrokernelTester()
4142 .mr(3)
4143 .nr(16)
4144 .kr(2)
4145 .sr(1)
4146 .m(m)
4147 .n(n)
4148 .k(k)
4149 .cm_stride(19)
4150 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004151 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004152 }
4153 }
4154 }
4155 }
4156
4157 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, a_offset) {
4158 TEST_REQUIRES_ARM_NEON;
4159 for (size_t k = 1; k <= 40; k += 9) {
4160 GemmMicrokernelTester()
4161 .mr(3)
4162 .nr(16)
4163 .kr(2)
4164 .sr(1)
4165 .m(3)
4166 .n(16)
4167 .k(k)
4168 .ks(3)
4169 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08004170 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004171 }
4172 }
4173
4174 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, zero) {
4175 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004176 for (size_t k = 1; k <= 40; k += 9) {
4177 for (uint32_t mz = 0; mz < 3; mz++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08004178 GemmMicrokernelTester()
4179 .mr(3)
4180 .nr(16)
4181 .kr(2)
4182 .sr(1)
4183 .m(3)
4184 .n(16)
4185 .k(k)
4186 .ks(3)
4187 .a_offset(127)
4188 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004189 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004190 }
4191 }
4192 }
4193
4194 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, qmin) {
4195 TEST_REQUIRES_ARM_NEON;
4196 GemmMicrokernelTester()
4197 .mr(3)
4198 .nr(16)
4199 .kr(2)
4200 .sr(1)
4201 .m(3)
4202 .n(16)
4203 .k(8)
4204 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004205 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004206 }
4207
4208 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, qmax) {
4209 TEST_REQUIRES_ARM_NEON;
4210 GemmMicrokernelTester()
4211 .mr(3)
4212 .nr(16)
4213 .kr(2)
4214 .sr(1)
4215 .m(3)
4216 .n(16)
4217 .k(8)
4218 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004219 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004220 }
4221
4222 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD4R, strided_cm) {
4223 TEST_REQUIRES_ARM_NEON;
4224 GemmMicrokernelTester()
4225 .mr(3)
4226 .nr(16)
4227 .kr(2)
4228 .sr(1)
4229 .m(3)
4230 .n(16)
4231 .k(8)
4232 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004233 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004234 }
4235#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4236
4237
4238#if XNN_ARCH_ARM || XNN_ARCH_ARM64
4239 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_eq_8) {
4240 TEST_REQUIRES_ARM_NEON;
4241 GemmMicrokernelTester()
4242 .mr(4)
4243 .nr(16)
4244 .kr(2)
4245 .sr(1)
4246 .m(4)
4247 .n(16)
4248 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004249 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004250 }
4251
4252 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, strided_cn) {
4253 TEST_REQUIRES_ARM_NEON;
4254 GemmMicrokernelTester()
4255 .mr(4)
4256 .nr(16)
4257 .kr(2)
4258 .sr(1)
4259 .m(4)
4260 .n(16)
4261 .k(8)
4262 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004263 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004264 }
4265
4266 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_eq_8_subtile) {
4267 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004268 for (uint32_t n = 1; n <= 16; n++) {
4269 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08004270 GemmMicrokernelTester()
4271 .mr(4)
4272 .nr(16)
4273 .kr(2)
4274 .sr(1)
4275 .m(m)
4276 .n(n)
4277 .k(8)
4278 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004279 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004280 }
4281 }
4282 }
4283
4284 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_eq_8_subtile_m) {
4285 TEST_REQUIRES_ARM_NEON;
4286 for (uint32_t m = 1; m <= 4; m++) {
4287 GemmMicrokernelTester()
4288 .mr(4)
4289 .nr(16)
4290 .kr(2)
4291 .sr(1)
4292 .m(m)
4293 .n(16)
4294 .k(8)
4295 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004296 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004297 }
4298 }
4299
4300 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_eq_8_subtile_n) {
4301 TEST_REQUIRES_ARM_NEON;
4302 for (uint32_t n = 1; n <= 16; n++) {
4303 GemmMicrokernelTester()
4304 .mr(4)
4305 .nr(16)
4306 .kr(2)
4307 .sr(1)
4308 .m(4)
4309 .n(n)
4310 .k(8)
4311 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004312 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004313 }
4314 }
4315
4316 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_lt_8) {
4317 TEST_REQUIRES_ARM_NEON;
4318 for (size_t k = 1; k < 8; k++) {
4319 GemmMicrokernelTester()
4320 .mr(4)
4321 .nr(16)
4322 .kr(2)
4323 .sr(1)
4324 .m(4)
4325 .n(16)
4326 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004327 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004328 }
4329 }
4330
4331 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_lt_8_subtile) {
4332 TEST_REQUIRES_ARM_NEON;
4333 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004334 for (uint32_t n = 1; n <= 16; n++) {
4335 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08004336 GemmMicrokernelTester()
4337 .mr(4)
4338 .nr(16)
4339 .kr(2)
4340 .sr(1)
4341 .m(m)
4342 .n(n)
4343 .k(k)
4344 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004345 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004346 }
4347 }
4348 }
4349 }
4350
4351 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_gt_8) {
4352 TEST_REQUIRES_ARM_NEON;
4353 for (size_t k = 9; k < 16; k++) {
4354 GemmMicrokernelTester()
4355 .mr(4)
4356 .nr(16)
4357 .kr(2)
4358 .sr(1)
4359 .m(4)
4360 .n(16)
4361 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004362 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004363 }
4364 }
4365
4366 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_gt_8_subtile) {
4367 TEST_REQUIRES_ARM_NEON;
4368 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004369 for (uint32_t n = 1; n <= 16; n++) {
4370 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08004371 GemmMicrokernelTester()
4372 .mr(4)
4373 .nr(16)
4374 .kr(2)
4375 .sr(1)
4376 .m(m)
4377 .n(n)
4378 .k(k)
4379 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004380 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004381 }
4382 }
4383 }
4384 }
4385
4386 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_div_8) {
4387 TEST_REQUIRES_ARM_NEON;
4388 for (size_t k = 16; k <= 80; k += 8) {
4389 GemmMicrokernelTester()
4390 .mr(4)
4391 .nr(16)
4392 .kr(2)
4393 .sr(1)
4394 .m(4)
4395 .n(16)
4396 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004397 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004398 }
4399 }
4400
4401 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, k_div_8_subtile) {
4402 TEST_REQUIRES_ARM_NEON;
4403 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004404 for (uint32_t n = 1; n <= 16; n++) {
4405 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08004406 GemmMicrokernelTester()
4407 .mr(4)
4408 .nr(16)
4409 .kr(2)
4410 .sr(1)
4411 .m(m)
4412 .n(n)
4413 .k(k)
4414 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004415 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004416 }
4417 }
4418 }
4419 }
4420
4421 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_gt_16) {
4422 TEST_REQUIRES_ARM_NEON;
4423 for (uint32_t n = 17; n < 32; n++) {
4424 for (size_t k = 1; k <= 40; k += 9) {
4425 GemmMicrokernelTester()
4426 .mr(4)
4427 .nr(16)
4428 .kr(2)
4429 .sr(1)
4430 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004431 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08004432 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004433 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004434 }
4435 }
4436 }
4437
4438 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_gt_16_strided_cn) {
4439 TEST_REQUIRES_ARM_NEON;
4440 for (uint32_t n = 17; n < 32; n++) {
4441 for (size_t k = 1; k <= 40; k += 9) {
4442 GemmMicrokernelTester()
4443 .mr(4)
4444 .nr(16)
4445 .kr(2)
4446 .sr(1)
4447 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004448 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08004449 .k(k)
4450 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004451 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004452 }
4453 }
4454 }
4455
4456 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_gt_16_subtile) {
4457 TEST_REQUIRES_ARM_NEON;
4458 for (uint32_t n = 17; n < 32; n++) {
4459 for (size_t k = 1; k <= 40; k += 9) {
4460 for (uint32_t m = 1; m <= 4; m++) {
4461 GemmMicrokernelTester()
4462 .mr(4)
4463 .nr(16)
4464 .kr(2)
4465 .sr(1)
4466 .m(m)
4467 .n(n)
4468 .k(k)
4469 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004470 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004471 }
4472 }
4473 }
4474 }
4475
4476 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_div_16) {
4477 TEST_REQUIRES_ARM_NEON;
4478 for (uint32_t n = 32; n <= 48; n += 16) {
4479 for (size_t k = 1; k <= 40; k += 9) {
4480 GemmMicrokernelTester()
4481 .mr(4)
4482 .nr(16)
4483 .kr(2)
4484 .sr(1)
4485 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004486 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08004487 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004488 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004489 }
4490 }
4491 }
4492
4493 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_div_16_strided_cn) {
4494 TEST_REQUIRES_ARM_NEON;
4495 for (uint32_t n = 32; n <= 48; n += 16) {
4496 for (size_t k = 1; k <= 40; k += 9) {
4497 GemmMicrokernelTester()
4498 .mr(4)
4499 .nr(16)
4500 .kr(2)
4501 .sr(1)
4502 .m(4)
4503 .n(n)
4504 .k(k)
4505 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004506 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004507 }
4508 }
4509 }
4510
4511 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_div_16_subtile) {
4512 TEST_REQUIRES_ARM_NEON;
4513 for (uint32_t n = 32; n <= 48; n += 16) {
4514 for (size_t k = 1; k <= 40; k += 9) {
4515 for (uint32_t m = 1; m <= 4; m++) {
4516 GemmMicrokernelTester()
4517 .mr(4)
4518 .nr(16)
4519 .kr(2)
4520 .sr(1)
4521 .m(m)
4522 .n(n)
4523 .k(k)
4524 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004525 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004526 }
4527 }
4528 }
4529 }
4530
4531 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, small_kernel) {
4532 TEST_REQUIRES_ARM_NEON;
4533 for (size_t k = 1; k <= 40; k += 9) {
4534 GemmMicrokernelTester()
4535 .mr(4)
4536 .nr(16)
4537 .kr(2)
4538 .sr(1)
4539 .m(4)
4540 .n(16)
4541 .k(k)
4542 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004543 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004544 }
4545 }
4546
4547 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, small_kernel_subtile) {
4548 TEST_REQUIRES_ARM_NEON;
4549 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004550 for (uint32_t n = 1; n <= 16; n++) {
4551 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08004552 GemmMicrokernelTester()
4553 .mr(4)
4554 .nr(16)
4555 .kr(2)
4556 .sr(1)
4557 .m(m)
4558 .n(n)
4559 .k(k)
4560 .ks(3)
4561 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004562 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004563 }
4564 }
4565 }
4566 }
4567
4568 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_gt_16_small_kernel) {
4569 TEST_REQUIRES_ARM_NEON;
4570 for (uint32_t n = 17; n < 32; n++) {
4571 for (size_t k = 1; k <= 40; k += 9) {
4572 GemmMicrokernelTester()
4573 .mr(4)
4574 .nr(16)
4575 .kr(2)
4576 .sr(1)
4577 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004578 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08004579 .k(k)
4580 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004581 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004582 }
4583 }
4584 }
4585
4586 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, n_div_16_small_kernel) {
4587 TEST_REQUIRES_ARM_NEON;
4588 for (uint32_t n = 32; n <= 48; n += 16) {
4589 for (size_t k = 1; k <= 40; k += 9) {
4590 GemmMicrokernelTester()
4591 .mr(4)
4592 .nr(16)
4593 .kr(2)
4594 .sr(1)
4595 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004596 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08004597 .k(k)
4598 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004599 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004600 }
4601 }
4602 }
4603
4604 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, strided_cm_subtile) {
4605 TEST_REQUIRES_ARM_NEON;
4606 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004607 for (uint32_t n = 1; n <= 16; n++) {
4608 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08004609 GemmMicrokernelTester()
4610 .mr(4)
4611 .nr(16)
4612 .kr(2)
4613 .sr(1)
4614 .m(m)
4615 .n(n)
4616 .k(k)
4617 .cm_stride(19)
4618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004619 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004620 }
4621 }
4622 }
4623 }
4624
4625 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, a_offset) {
4626 TEST_REQUIRES_ARM_NEON;
4627 for (size_t k = 1; k <= 40; k += 9) {
4628 GemmMicrokernelTester()
4629 .mr(4)
4630 .nr(16)
4631 .kr(2)
4632 .sr(1)
4633 .m(4)
4634 .n(16)
4635 .k(k)
4636 .ks(3)
4637 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08004638 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004639 }
4640 }
4641
4642 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, zero) {
4643 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004644 for (size_t k = 1; k <= 40; k += 9) {
4645 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08004646 GemmMicrokernelTester()
4647 .mr(4)
4648 .nr(16)
4649 .kr(2)
4650 .sr(1)
4651 .m(4)
4652 .n(16)
4653 .k(k)
4654 .ks(3)
4655 .a_offset(163)
4656 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004657 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004658 }
4659 }
4660 }
4661
4662 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, qmin) {
4663 TEST_REQUIRES_ARM_NEON;
4664 GemmMicrokernelTester()
4665 .mr(4)
4666 .nr(16)
4667 .kr(2)
4668 .sr(1)
4669 .m(4)
4670 .n(16)
4671 .k(8)
4672 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004673 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004674 }
4675
4676 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, qmax) {
4677 TEST_REQUIRES_ARM_NEON;
4678 GemmMicrokernelTester()
4679 .mr(4)
4680 .nr(16)
4681 .kr(2)
4682 .sr(1)
4683 .m(4)
4684 .n(16)
4685 .k(8)
4686 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004687 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004688 }
4689
4690 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_LD4R, strided_cm) {
4691 TEST_REQUIRES_ARM_NEON;
4692 GemmMicrokernelTester()
4693 .mr(4)
4694 .nr(16)
4695 .kr(2)
4696 .sr(1)
4697 .m(4)
4698 .n(16)
4699 .k(8)
4700 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004701 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mull_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08004702 }
4703#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4704
4705
4706#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -08004707 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16) {
4708 TEST_REQUIRES_ARM_NEON;
4709 GemmMicrokernelTester()
4710 .mr(1)
4711 .nr(16)
4712 .kr(2)
4713 .sr(1)
4714 .m(1)
4715 .n(16)
4716 .k(16)
4717 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4718 }
4719
4720 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cn) {
4721 TEST_REQUIRES_ARM_NEON;
4722 GemmMicrokernelTester()
4723 .mr(1)
4724 .nr(16)
4725 .kr(2)
4726 .sr(1)
4727 .m(1)
4728 .n(16)
4729 .k(16)
4730 .cn_stride(19)
4731 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4732 }
4733
4734 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
4735 TEST_REQUIRES_ARM_NEON;
4736 for (uint32_t n = 1; n <= 16; n++) {
4737 for (uint32_t m = 1; m <= 1; m++) {
4738 GemmMicrokernelTester()
4739 .mr(1)
4740 .nr(16)
4741 .kr(2)
4742 .sr(1)
4743 .m(m)
4744 .n(n)
4745 .k(16)
4746 .iterations(1)
4747 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4748 }
4749 }
4750 }
4751
4752 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
4753 TEST_REQUIRES_ARM_NEON;
4754 for (uint32_t m = 1; m <= 1; m++) {
4755 GemmMicrokernelTester()
4756 .mr(1)
4757 .nr(16)
4758 .kr(2)
4759 .sr(1)
4760 .m(m)
4761 .n(16)
4762 .k(16)
4763 .iterations(1)
4764 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4765 }
4766 }
4767
4768 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
4769 TEST_REQUIRES_ARM_NEON;
4770 for (uint32_t n = 1; n <= 16; n++) {
4771 GemmMicrokernelTester()
4772 .mr(1)
4773 .nr(16)
4774 .kr(2)
4775 .sr(1)
4776 .m(1)
4777 .n(n)
4778 .k(16)
4779 .iterations(1)
4780 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4781 }
4782 }
4783
4784 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16) {
4785 TEST_REQUIRES_ARM_NEON;
4786 for (size_t k = 1; k < 16; k++) {
4787 GemmMicrokernelTester()
4788 .mr(1)
4789 .nr(16)
4790 .kr(2)
4791 .sr(1)
4792 .m(1)
4793 .n(16)
4794 .k(k)
4795 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4796 }
4797 }
4798
4799 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
4800 TEST_REQUIRES_ARM_NEON;
4801 for (size_t k = 1; k < 16; k++) {
4802 for (uint32_t n = 1; n <= 16; n++) {
4803 for (uint32_t m = 1; m <= 1; m++) {
4804 GemmMicrokernelTester()
4805 .mr(1)
4806 .nr(16)
4807 .kr(2)
4808 .sr(1)
4809 .m(m)
4810 .n(n)
4811 .k(k)
4812 .iterations(1)
4813 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4814 }
4815 }
4816 }
4817 }
4818
4819 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16) {
4820 TEST_REQUIRES_ARM_NEON;
4821 for (size_t k = 17; k < 32; k++) {
4822 GemmMicrokernelTester()
4823 .mr(1)
4824 .nr(16)
4825 .kr(2)
4826 .sr(1)
4827 .m(1)
4828 .n(16)
4829 .k(k)
4830 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4831 }
4832 }
4833
4834 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
4835 TEST_REQUIRES_ARM_NEON;
4836 for (size_t k = 17; k < 32; k++) {
4837 for (uint32_t n = 1; n <= 16; n++) {
4838 for (uint32_t m = 1; m <= 1; m++) {
4839 GemmMicrokernelTester()
4840 .mr(1)
4841 .nr(16)
4842 .kr(2)
4843 .sr(1)
4844 .m(m)
4845 .n(n)
4846 .k(k)
4847 .iterations(1)
4848 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4849 }
4850 }
4851 }
4852 }
4853
4854 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16) {
4855 TEST_REQUIRES_ARM_NEON;
4856 for (size_t k = 32; k <= 160; k += 16) {
4857 GemmMicrokernelTester()
4858 .mr(1)
4859 .nr(16)
4860 .kr(2)
4861 .sr(1)
4862 .m(1)
4863 .n(16)
4864 .k(k)
4865 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4866 }
4867 }
4868
4869 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
4870 TEST_REQUIRES_ARM_NEON;
4871 for (size_t k = 32; k <= 160; k += 16) {
4872 for (uint32_t n = 1; n <= 16; n++) {
4873 for (uint32_t m = 1; m <= 1; m++) {
4874 GemmMicrokernelTester()
4875 .mr(1)
4876 .nr(16)
4877 .kr(2)
4878 .sr(1)
4879 .m(m)
4880 .n(n)
4881 .k(k)
4882 .iterations(1)
4883 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4884 }
4885 }
4886 }
4887 }
4888
4889 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16) {
4890 TEST_REQUIRES_ARM_NEON;
4891 for (uint32_t n = 17; n < 32; n++) {
4892 for (size_t k = 1; k <= 80; k += 17) {
4893 GemmMicrokernelTester()
4894 .mr(1)
4895 .nr(16)
4896 .kr(2)
4897 .sr(1)
4898 .m(1)
4899 .n(n)
4900 .k(k)
4901 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4902 }
4903 }
4904 }
4905
4906 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
4907 TEST_REQUIRES_ARM_NEON;
4908 for (uint32_t n = 17; n < 32; n++) {
4909 for (size_t k = 1; k <= 80; k += 17) {
4910 GemmMicrokernelTester()
4911 .mr(1)
4912 .nr(16)
4913 .kr(2)
4914 .sr(1)
4915 .m(1)
4916 .n(n)
4917 .k(k)
4918 .cn_stride(19)
4919 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4920 }
4921 }
4922 }
4923
4924 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
4925 TEST_REQUIRES_ARM_NEON;
4926 for (uint32_t n = 17; n < 32; n++) {
4927 for (size_t k = 1; k <= 80; k += 17) {
4928 for (uint32_t m = 1; m <= 1; m++) {
4929 GemmMicrokernelTester()
4930 .mr(1)
4931 .nr(16)
4932 .kr(2)
4933 .sr(1)
4934 .m(m)
4935 .n(n)
4936 .k(k)
4937 .iterations(1)
4938 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4939 }
4940 }
4941 }
4942 }
4943
4944 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16) {
4945 TEST_REQUIRES_ARM_NEON;
4946 for (uint32_t n = 32; n <= 48; n += 16) {
4947 for (size_t k = 1; k <= 80; k += 17) {
4948 GemmMicrokernelTester()
4949 .mr(1)
4950 .nr(16)
4951 .kr(2)
4952 .sr(1)
4953 .m(1)
4954 .n(n)
4955 .k(k)
4956 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4957 }
4958 }
4959 }
4960
4961 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
4962 TEST_REQUIRES_ARM_NEON;
4963 for (uint32_t n = 32; n <= 48; n += 16) {
4964 for (size_t k = 1; k <= 80; k += 17) {
4965 GemmMicrokernelTester()
4966 .mr(1)
4967 .nr(16)
4968 .kr(2)
4969 .sr(1)
4970 .m(1)
4971 .n(n)
4972 .k(k)
4973 .cn_stride(19)
4974 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4975 }
4976 }
4977 }
4978
4979 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
4980 TEST_REQUIRES_ARM_NEON;
4981 for (uint32_t n = 32; n <= 48; n += 16) {
4982 for (size_t k = 1; k <= 80; k += 17) {
4983 for (uint32_t m = 1; m <= 1; m++) {
4984 GemmMicrokernelTester()
4985 .mr(1)
4986 .nr(16)
4987 .kr(2)
4988 .sr(1)
4989 .m(m)
4990 .n(n)
4991 .k(k)
4992 .iterations(1)
4993 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
4994 }
4995 }
4996 }
4997 }
4998
4999 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, small_kernel) {
5000 TEST_REQUIRES_ARM_NEON;
5001 for (size_t k = 1; k <= 80; k += 17) {
5002 GemmMicrokernelTester()
5003 .mr(1)
5004 .nr(16)
5005 .kr(2)
5006 .sr(1)
5007 .m(1)
5008 .n(16)
5009 .k(k)
5010 .ks(3)
5011 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5012 }
5013 }
5014
5015 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
5016 TEST_REQUIRES_ARM_NEON;
5017 for (size_t k = 1; k <= 80; k += 17) {
5018 for (uint32_t n = 1; n <= 16; n++) {
5019 for (uint32_t m = 1; m <= 1; m++) {
5020 GemmMicrokernelTester()
5021 .mr(1)
5022 .nr(16)
5023 .kr(2)
5024 .sr(1)
5025 .m(m)
5026 .n(n)
5027 .k(k)
5028 .ks(3)
5029 .iterations(1)
5030 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5031 }
5032 }
5033 }
5034 }
5035
5036 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
5037 TEST_REQUIRES_ARM_NEON;
5038 for (uint32_t n = 17; n < 32; n++) {
5039 for (size_t k = 1; k <= 80; k += 17) {
5040 GemmMicrokernelTester()
5041 .mr(1)
5042 .nr(16)
5043 .kr(2)
5044 .sr(1)
5045 .m(1)
5046 .n(n)
5047 .k(k)
5048 .ks(3)
5049 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5050 }
5051 }
5052 }
5053
5054 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
5055 TEST_REQUIRES_ARM_NEON;
5056 for (uint32_t n = 32; n <= 48; n += 16) {
5057 for (size_t k = 1; k <= 80; k += 17) {
5058 GemmMicrokernelTester()
5059 .mr(1)
5060 .nr(16)
5061 .kr(2)
5062 .sr(1)
5063 .m(1)
5064 .n(n)
5065 .k(k)
5066 .ks(3)
5067 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5068 }
5069 }
5070 }
5071
5072 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
5073 TEST_REQUIRES_ARM_NEON;
5074 for (size_t k = 1; k <= 80; k += 17) {
5075 for (uint32_t n = 1; n <= 16; n++) {
5076 for (uint32_t m = 1; m <= 1; m++) {
5077 GemmMicrokernelTester()
5078 .mr(1)
5079 .nr(16)
5080 .kr(2)
5081 .sr(1)
5082 .m(m)
5083 .n(n)
5084 .k(k)
5085 .cm_stride(19)
5086 .iterations(1)
5087 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5088 }
5089 }
5090 }
5091 }
5092
5093 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, a_offset) {
5094 TEST_REQUIRES_ARM_NEON;
5095 for (size_t k = 1; k <= 80; k += 17) {
5096 GemmMicrokernelTester()
5097 .mr(1)
5098 .nr(16)
5099 .kr(2)
5100 .sr(1)
5101 .m(1)
5102 .n(16)
5103 .k(k)
5104 .ks(3)
5105 .a_offset(83)
5106 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5107 }
5108 }
5109
5110 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, zero) {
5111 TEST_REQUIRES_ARM_NEON;
5112 for (size_t k = 1; k <= 80; k += 17) {
5113 for (uint32_t mz = 0; mz < 1; mz++) {
5114 GemmMicrokernelTester()
5115 .mr(1)
5116 .nr(16)
5117 .kr(2)
5118 .sr(1)
5119 .m(1)
5120 .n(16)
5121 .k(k)
5122 .ks(3)
5123 .a_offset(83)
5124 .zero_index(mz)
5125 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5126 }
5127 }
5128 }
5129
5130 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmin) {
5131 TEST_REQUIRES_ARM_NEON;
5132 GemmMicrokernelTester()
5133 .mr(1)
5134 .nr(16)
5135 .kr(2)
5136 .sr(1)
5137 .m(1)
5138 .n(16)
5139 .k(16)
5140 .qmin(128)
5141 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5142 }
5143
5144 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, qmax) {
5145 TEST_REQUIRES_ARM_NEON;
5146 GemmMicrokernelTester()
5147 .mr(1)
5148 .nr(16)
5149 .kr(2)
5150 .sr(1)
5151 .m(1)
5152 .n(16)
5153 .k(16)
5154 .qmax(128)
5155 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5156 }
5157
5158 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_LD4R, strided_cm) {
5159 TEST_REQUIRES_ARM_NEON;
5160 GemmMicrokernelTester()
5161 .mr(1)
5162 .nr(16)
5163 .kr(2)
5164 .sr(1)
5165 .m(1)
5166 .n(16)
5167 .k(16)
5168 .cm_stride(19)
5169 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5170 }
5171#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5172
5173
5174#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5175 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16) {
5176 TEST_REQUIRES_ARM_NEON;
5177 GemmMicrokernelTester()
5178 .mr(2)
5179 .nr(16)
5180 .kr(2)
5181 .sr(1)
5182 .m(2)
5183 .n(16)
5184 .k(16)
5185 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5186 }
5187
5188 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cn) {
5189 TEST_REQUIRES_ARM_NEON;
5190 GemmMicrokernelTester()
5191 .mr(2)
5192 .nr(16)
5193 .kr(2)
5194 .sr(1)
5195 .m(2)
5196 .n(16)
5197 .k(16)
5198 .cn_stride(19)
5199 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5200 }
5201
5202 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
5203 TEST_REQUIRES_ARM_NEON;
5204 for (uint32_t n = 1; n <= 16; n++) {
5205 for (uint32_t m = 1; m <= 2; m++) {
5206 GemmMicrokernelTester()
5207 .mr(2)
5208 .nr(16)
5209 .kr(2)
5210 .sr(1)
5211 .m(m)
5212 .n(n)
5213 .k(16)
5214 .iterations(1)
5215 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5216 }
5217 }
5218 }
5219
5220 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
5221 TEST_REQUIRES_ARM_NEON;
5222 for (uint32_t m = 1; m <= 2; m++) {
5223 GemmMicrokernelTester()
5224 .mr(2)
5225 .nr(16)
5226 .kr(2)
5227 .sr(1)
5228 .m(m)
5229 .n(16)
5230 .k(16)
5231 .iterations(1)
5232 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5233 }
5234 }
5235
5236 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
5237 TEST_REQUIRES_ARM_NEON;
5238 for (uint32_t n = 1; n <= 16; n++) {
5239 GemmMicrokernelTester()
5240 .mr(2)
5241 .nr(16)
5242 .kr(2)
5243 .sr(1)
5244 .m(2)
5245 .n(n)
5246 .k(16)
5247 .iterations(1)
5248 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5249 }
5250 }
5251
5252 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_lt_16) {
5253 TEST_REQUIRES_ARM_NEON;
5254 for (size_t k = 1; k < 16; k++) {
5255 GemmMicrokernelTester()
5256 .mr(2)
5257 .nr(16)
5258 .kr(2)
5259 .sr(1)
5260 .m(2)
5261 .n(16)
5262 .k(k)
5263 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5264 }
5265 }
5266
5267 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
5268 TEST_REQUIRES_ARM_NEON;
5269 for (size_t k = 1; k < 16; k++) {
5270 for (uint32_t n = 1; n <= 16; n++) {
5271 for (uint32_t m = 1; m <= 2; m++) {
5272 GemmMicrokernelTester()
5273 .mr(2)
5274 .nr(16)
5275 .kr(2)
5276 .sr(1)
5277 .m(m)
5278 .n(n)
5279 .k(k)
5280 .iterations(1)
5281 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5282 }
5283 }
5284 }
5285 }
5286
5287 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_gt_16) {
5288 TEST_REQUIRES_ARM_NEON;
5289 for (size_t k = 17; k < 32; k++) {
5290 GemmMicrokernelTester()
5291 .mr(2)
5292 .nr(16)
5293 .kr(2)
5294 .sr(1)
5295 .m(2)
5296 .n(16)
5297 .k(k)
5298 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5299 }
5300 }
5301
5302 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
5303 TEST_REQUIRES_ARM_NEON;
5304 for (size_t k = 17; k < 32; k++) {
5305 for (uint32_t n = 1; n <= 16; n++) {
5306 for (uint32_t m = 1; m <= 2; m++) {
5307 GemmMicrokernelTester()
5308 .mr(2)
5309 .nr(16)
5310 .kr(2)
5311 .sr(1)
5312 .m(m)
5313 .n(n)
5314 .k(k)
5315 .iterations(1)
5316 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5317 }
5318 }
5319 }
5320 }
5321
5322 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_div_16) {
5323 TEST_REQUIRES_ARM_NEON;
5324 for (size_t k = 32; k <= 160; k += 16) {
5325 GemmMicrokernelTester()
5326 .mr(2)
5327 .nr(16)
5328 .kr(2)
5329 .sr(1)
5330 .m(2)
5331 .n(16)
5332 .k(k)
5333 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5334 }
5335 }
5336
5337 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
5338 TEST_REQUIRES_ARM_NEON;
5339 for (size_t k = 32; k <= 160; k += 16) {
5340 for (uint32_t n = 1; n <= 16; n++) {
5341 for (uint32_t m = 1; m <= 2; m++) {
5342 GemmMicrokernelTester()
5343 .mr(2)
5344 .nr(16)
5345 .kr(2)
5346 .sr(1)
5347 .m(m)
5348 .n(n)
5349 .k(k)
5350 .iterations(1)
5351 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5352 }
5353 }
5354 }
5355 }
5356
5357 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16) {
5358 TEST_REQUIRES_ARM_NEON;
5359 for (uint32_t n = 17; n < 32; n++) {
5360 for (size_t k = 1; k <= 80; k += 17) {
5361 GemmMicrokernelTester()
5362 .mr(2)
5363 .nr(16)
5364 .kr(2)
5365 .sr(1)
5366 .m(2)
5367 .n(n)
5368 .k(k)
5369 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5370 }
5371 }
5372 }
5373
5374 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
5375 TEST_REQUIRES_ARM_NEON;
5376 for (uint32_t n = 17; n < 32; n++) {
5377 for (size_t k = 1; k <= 80; k += 17) {
5378 GemmMicrokernelTester()
5379 .mr(2)
5380 .nr(16)
5381 .kr(2)
5382 .sr(1)
5383 .m(2)
5384 .n(n)
5385 .k(k)
5386 .cn_stride(19)
5387 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5388 }
5389 }
5390 }
5391
5392 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
5393 TEST_REQUIRES_ARM_NEON;
5394 for (uint32_t n = 17; n < 32; n++) {
5395 for (size_t k = 1; k <= 80; k += 17) {
5396 for (uint32_t m = 1; m <= 2; m++) {
5397 GemmMicrokernelTester()
5398 .mr(2)
5399 .nr(16)
5400 .kr(2)
5401 .sr(1)
5402 .m(m)
5403 .n(n)
5404 .k(k)
5405 .iterations(1)
5406 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5407 }
5408 }
5409 }
5410 }
5411
5412 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16) {
5413 TEST_REQUIRES_ARM_NEON;
5414 for (uint32_t n = 32; n <= 48; n += 16) {
5415 for (size_t k = 1; k <= 80; k += 17) {
5416 GemmMicrokernelTester()
5417 .mr(2)
5418 .nr(16)
5419 .kr(2)
5420 .sr(1)
5421 .m(2)
5422 .n(n)
5423 .k(k)
5424 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5425 }
5426 }
5427 }
5428
5429 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
5430 TEST_REQUIRES_ARM_NEON;
5431 for (uint32_t n = 32; n <= 48; n += 16) {
5432 for (size_t k = 1; k <= 80; k += 17) {
5433 GemmMicrokernelTester()
5434 .mr(2)
5435 .nr(16)
5436 .kr(2)
5437 .sr(1)
5438 .m(2)
5439 .n(n)
5440 .k(k)
5441 .cn_stride(19)
5442 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5443 }
5444 }
5445 }
5446
5447 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
5448 TEST_REQUIRES_ARM_NEON;
5449 for (uint32_t n = 32; n <= 48; n += 16) {
5450 for (size_t k = 1; k <= 80; k += 17) {
5451 for (uint32_t m = 1; m <= 2; m++) {
5452 GemmMicrokernelTester()
5453 .mr(2)
5454 .nr(16)
5455 .kr(2)
5456 .sr(1)
5457 .m(m)
5458 .n(n)
5459 .k(k)
5460 .iterations(1)
5461 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5462 }
5463 }
5464 }
5465 }
5466
5467 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, small_kernel) {
5468 TEST_REQUIRES_ARM_NEON;
5469 for (size_t k = 1; k <= 80; k += 17) {
5470 GemmMicrokernelTester()
5471 .mr(2)
5472 .nr(16)
5473 .kr(2)
5474 .sr(1)
5475 .m(2)
5476 .n(16)
5477 .k(k)
5478 .ks(3)
5479 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5480 }
5481 }
5482
5483 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
5484 TEST_REQUIRES_ARM_NEON;
5485 for (size_t k = 1; k <= 80; k += 17) {
5486 for (uint32_t n = 1; n <= 16; n++) {
5487 for (uint32_t m = 1; m <= 2; m++) {
5488 GemmMicrokernelTester()
5489 .mr(2)
5490 .nr(16)
5491 .kr(2)
5492 .sr(1)
5493 .m(m)
5494 .n(n)
5495 .k(k)
5496 .ks(3)
5497 .iterations(1)
5498 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5499 }
5500 }
5501 }
5502 }
5503
5504 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
5505 TEST_REQUIRES_ARM_NEON;
5506 for (uint32_t n = 17; n < 32; n++) {
5507 for (size_t k = 1; k <= 80; k += 17) {
5508 GemmMicrokernelTester()
5509 .mr(2)
5510 .nr(16)
5511 .kr(2)
5512 .sr(1)
5513 .m(2)
5514 .n(n)
5515 .k(k)
5516 .ks(3)
5517 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5518 }
5519 }
5520 }
5521
5522 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
5523 TEST_REQUIRES_ARM_NEON;
5524 for (uint32_t n = 32; n <= 48; n += 16) {
5525 for (size_t k = 1; k <= 80; k += 17) {
5526 GemmMicrokernelTester()
5527 .mr(2)
5528 .nr(16)
5529 .kr(2)
5530 .sr(1)
5531 .m(2)
5532 .n(n)
5533 .k(k)
5534 .ks(3)
5535 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5536 }
5537 }
5538 }
5539
5540 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
5541 TEST_REQUIRES_ARM_NEON;
5542 for (size_t k = 1; k <= 80; k += 17) {
5543 for (uint32_t n = 1; n <= 16; n++) {
5544 for (uint32_t m = 1; m <= 2; m++) {
5545 GemmMicrokernelTester()
5546 .mr(2)
5547 .nr(16)
5548 .kr(2)
5549 .sr(1)
5550 .m(m)
5551 .n(n)
5552 .k(k)
5553 .cm_stride(19)
5554 .iterations(1)
5555 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5556 }
5557 }
5558 }
5559 }
5560
5561 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, a_offset) {
5562 TEST_REQUIRES_ARM_NEON;
5563 for (size_t k = 1; k <= 80; k += 17) {
5564 GemmMicrokernelTester()
5565 .mr(2)
5566 .nr(16)
5567 .kr(2)
5568 .sr(1)
5569 .m(2)
5570 .n(16)
5571 .k(k)
5572 .ks(3)
5573 .a_offset(163)
5574 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5575 }
5576 }
5577
5578 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, zero) {
5579 TEST_REQUIRES_ARM_NEON;
5580 for (size_t k = 1; k <= 80; k += 17) {
5581 for (uint32_t mz = 0; mz < 2; mz++) {
5582 GemmMicrokernelTester()
5583 .mr(2)
5584 .nr(16)
5585 .kr(2)
5586 .sr(1)
5587 .m(2)
5588 .n(16)
5589 .k(k)
5590 .ks(3)
5591 .a_offset(163)
5592 .zero_index(mz)
5593 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5594 }
5595 }
5596 }
5597
5598 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, qmin) {
5599 TEST_REQUIRES_ARM_NEON;
5600 GemmMicrokernelTester()
5601 .mr(2)
5602 .nr(16)
5603 .kr(2)
5604 .sr(1)
5605 .m(2)
5606 .n(16)
5607 .k(16)
5608 .qmin(128)
5609 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5610 }
5611
5612 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, qmax) {
5613 TEST_REQUIRES_ARM_NEON;
5614 GemmMicrokernelTester()
5615 .mr(2)
5616 .nr(16)
5617 .kr(2)
5618 .sr(1)
5619 .m(2)
5620 .n(16)
5621 .k(16)
5622 .qmax(128)
5623 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5624 }
5625
5626 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_LD4R, strided_cm) {
5627 TEST_REQUIRES_ARM_NEON;
5628 GemmMicrokernelTester()
5629 .mr(2)
5630 .nr(16)
5631 .kr(2)
5632 .sr(1)
5633 .m(2)
5634 .n(16)
5635 .k(16)
5636 .cm_stride(19)
5637 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5638 }
5639#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5640
5641
5642#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5643 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16) {
5644 TEST_REQUIRES_ARM_NEON;
5645 GemmMicrokernelTester()
5646 .mr(3)
5647 .nr(16)
5648 .kr(2)
5649 .sr(1)
5650 .m(3)
5651 .n(16)
5652 .k(16)
5653 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5654 }
5655
5656 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cn) {
5657 TEST_REQUIRES_ARM_NEON;
5658 GemmMicrokernelTester()
5659 .mr(3)
5660 .nr(16)
5661 .kr(2)
5662 .sr(1)
5663 .m(3)
5664 .n(16)
5665 .k(16)
5666 .cn_stride(19)
5667 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5668 }
5669
5670 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
5671 TEST_REQUIRES_ARM_NEON;
5672 for (uint32_t n = 1; n <= 16; n++) {
5673 for (uint32_t m = 1; m <= 3; m++) {
5674 GemmMicrokernelTester()
5675 .mr(3)
5676 .nr(16)
5677 .kr(2)
5678 .sr(1)
5679 .m(m)
5680 .n(n)
5681 .k(16)
5682 .iterations(1)
5683 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5684 }
5685 }
5686 }
5687
5688 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
5689 TEST_REQUIRES_ARM_NEON;
5690 for (uint32_t m = 1; m <= 3; m++) {
5691 GemmMicrokernelTester()
5692 .mr(3)
5693 .nr(16)
5694 .kr(2)
5695 .sr(1)
5696 .m(m)
5697 .n(16)
5698 .k(16)
5699 .iterations(1)
5700 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5701 }
5702 }
5703
5704 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
5705 TEST_REQUIRES_ARM_NEON;
5706 for (uint32_t n = 1; n <= 16; n++) {
5707 GemmMicrokernelTester()
5708 .mr(3)
5709 .nr(16)
5710 .kr(2)
5711 .sr(1)
5712 .m(3)
5713 .n(n)
5714 .k(16)
5715 .iterations(1)
5716 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5717 }
5718 }
5719
5720 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_lt_16) {
5721 TEST_REQUIRES_ARM_NEON;
5722 for (size_t k = 1; k < 16; k++) {
5723 GemmMicrokernelTester()
5724 .mr(3)
5725 .nr(16)
5726 .kr(2)
5727 .sr(1)
5728 .m(3)
5729 .n(16)
5730 .k(k)
5731 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5732 }
5733 }
5734
5735 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
5736 TEST_REQUIRES_ARM_NEON;
5737 for (size_t k = 1; k < 16; k++) {
5738 for (uint32_t n = 1; n <= 16; n++) {
5739 for (uint32_t m = 1; m <= 3; m++) {
5740 GemmMicrokernelTester()
5741 .mr(3)
5742 .nr(16)
5743 .kr(2)
5744 .sr(1)
5745 .m(m)
5746 .n(n)
5747 .k(k)
5748 .iterations(1)
5749 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5750 }
5751 }
5752 }
5753 }
5754
5755 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_gt_16) {
5756 TEST_REQUIRES_ARM_NEON;
5757 for (size_t k = 17; k < 32; k++) {
5758 GemmMicrokernelTester()
5759 .mr(3)
5760 .nr(16)
5761 .kr(2)
5762 .sr(1)
5763 .m(3)
5764 .n(16)
5765 .k(k)
5766 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5767 }
5768 }
5769
5770 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
5771 TEST_REQUIRES_ARM_NEON;
5772 for (size_t k = 17; k < 32; k++) {
5773 for (uint32_t n = 1; n <= 16; n++) {
5774 for (uint32_t m = 1; m <= 3; m++) {
5775 GemmMicrokernelTester()
5776 .mr(3)
5777 .nr(16)
5778 .kr(2)
5779 .sr(1)
5780 .m(m)
5781 .n(n)
5782 .k(k)
5783 .iterations(1)
5784 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5785 }
5786 }
5787 }
5788 }
5789
5790 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_div_16) {
5791 TEST_REQUIRES_ARM_NEON;
5792 for (size_t k = 32; k <= 160; k += 16) {
5793 GemmMicrokernelTester()
5794 .mr(3)
5795 .nr(16)
5796 .kr(2)
5797 .sr(1)
5798 .m(3)
5799 .n(16)
5800 .k(k)
5801 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5802 }
5803 }
5804
5805 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
5806 TEST_REQUIRES_ARM_NEON;
5807 for (size_t k = 32; k <= 160; k += 16) {
5808 for (uint32_t n = 1; n <= 16; n++) {
5809 for (uint32_t m = 1; m <= 3; m++) {
5810 GemmMicrokernelTester()
5811 .mr(3)
5812 .nr(16)
5813 .kr(2)
5814 .sr(1)
5815 .m(m)
5816 .n(n)
5817 .k(k)
5818 .iterations(1)
5819 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5820 }
5821 }
5822 }
5823 }
5824
5825 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16) {
5826 TEST_REQUIRES_ARM_NEON;
5827 for (uint32_t n = 17; n < 32; n++) {
5828 for (size_t k = 1; k <= 80; k += 17) {
5829 GemmMicrokernelTester()
5830 .mr(3)
5831 .nr(16)
5832 .kr(2)
5833 .sr(1)
5834 .m(3)
5835 .n(n)
5836 .k(k)
5837 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5838 }
5839 }
5840 }
5841
5842 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
5843 TEST_REQUIRES_ARM_NEON;
5844 for (uint32_t n = 17; n < 32; n++) {
5845 for (size_t k = 1; k <= 80; k += 17) {
5846 GemmMicrokernelTester()
5847 .mr(3)
5848 .nr(16)
5849 .kr(2)
5850 .sr(1)
5851 .m(3)
5852 .n(n)
5853 .k(k)
5854 .cn_stride(19)
5855 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5856 }
5857 }
5858 }
5859
5860 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
5861 TEST_REQUIRES_ARM_NEON;
5862 for (uint32_t n = 17; n < 32; n++) {
5863 for (size_t k = 1; k <= 80; k += 17) {
5864 for (uint32_t m = 1; m <= 3; m++) {
5865 GemmMicrokernelTester()
5866 .mr(3)
5867 .nr(16)
5868 .kr(2)
5869 .sr(1)
5870 .m(m)
5871 .n(n)
5872 .k(k)
5873 .iterations(1)
5874 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5875 }
5876 }
5877 }
5878 }
5879
5880 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16) {
5881 TEST_REQUIRES_ARM_NEON;
5882 for (uint32_t n = 32; n <= 48; n += 16) {
5883 for (size_t k = 1; k <= 80; k += 17) {
5884 GemmMicrokernelTester()
5885 .mr(3)
5886 .nr(16)
5887 .kr(2)
5888 .sr(1)
5889 .m(3)
5890 .n(n)
5891 .k(k)
5892 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5893 }
5894 }
5895 }
5896
5897 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
5898 TEST_REQUIRES_ARM_NEON;
5899 for (uint32_t n = 32; n <= 48; n += 16) {
5900 for (size_t k = 1; k <= 80; k += 17) {
5901 GemmMicrokernelTester()
5902 .mr(3)
5903 .nr(16)
5904 .kr(2)
5905 .sr(1)
5906 .m(3)
5907 .n(n)
5908 .k(k)
5909 .cn_stride(19)
5910 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5911 }
5912 }
5913 }
5914
5915 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
5916 TEST_REQUIRES_ARM_NEON;
5917 for (uint32_t n = 32; n <= 48; n += 16) {
5918 for (size_t k = 1; k <= 80; k += 17) {
5919 for (uint32_t m = 1; m <= 3; m++) {
5920 GemmMicrokernelTester()
5921 .mr(3)
5922 .nr(16)
5923 .kr(2)
5924 .sr(1)
5925 .m(m)
5926 .n(n)
5927 .k(k)
5928 .iterations(1)
5929 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5930 }
5931 }
5932 }
5933 }
5934
5935 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, small_kernel) {
5936 TEST_REQUIRES_ARM_NEON;
5937 for (size_t k = 1; k <= 80; k += 17) {
5938 GemmMicrokernelTester()
5939 .mr(3)
5940 .nr(16)
5941 .kr(2)
5942 .sr(1)
5943 .m(3)
5944 .n(16)
5945 .k(k)
5946 .ks(3)
5947 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5948 }
5949 }
5950
5951 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
5952 TEST_REQUIRES_ARM_NEON;
5953 for (size_t k = 1; k <= 80; k += 17) {
5954 for (uint32_t n = 1; n <= 16; n++) {
5955 for (uint32_t m = 1; m <= 3; m++) {
5956 GemmMicrokernelTester()
5957 .mr(3)
5958 .nr(16)
5959 .kr(2)
5960 .sr(1)
5961 .m(m)
5962 .n(n)
5963 .k(k)
5964 .ks(3)
5965 .iterations(1)
5966 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5967 }
5968 }
5969 }
5970 }
5971
5972 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
5973 TEST_REQUIRES_ARM_NEON;
5974 for (uint32_t n = 17; n < 32; n++) {
5975 for (size_t k = 1; k <= 80; k += 17) {
5976 GemmMicrokernelTester()
5977 .mr(3)
5978 .nr(16)
5979 .kr(2)
5980 .sr(1)
5981 .m(3)
5982 .n(n)
5983 .k(k)
5984 .ks(3)
5985 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
5986 }
5987 }
5988 }
5989
5990 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
5991 TEST_REQUIRES_ARM_NEON;
5992 for (uint32_t n = 32; n <= 48; n += 16) {
5993 for (size_t k = 1; k <= 80; k += 17) {
5994 GemmMicrokernelTester()
5995 .mr(3)
5996 .nr(16)
5997 .kr(2)
5998 .sr(1)
5999 .m(3)
6000 .n(n)
6001 .k(k)
6002 .ks(3)
6003 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6004 }
6005 }
6006 }
6007
6008 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
6009 TEST_REQUIRES_ARM_NEON;
6010 for (size_t k = 1; k <= 80; k += 17) {
6011 for (uint32_t n = 1; n <= 16; n++) {
6012 for (uint32_t m = 1; m <= 3; m++) {
6013 GemmMicrokernelTester()
6014 .mr(3)
6015 .nr(16)
6016 .kr(2)
6017 .sr(1)
6018 .m(m)
6019 .n(n)
6020 .k(k)
6021 .cm_stride(19)
6022 .iterations(1)
6023 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6024 }
6025 }
6026 }
6027 }
6028
6029 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, a_offset) {
6030 TEST_REQUIRES_ARM_NEON;
6031 for (size_t k = 1; k <= 80; k += 17) {
6032 GemmMicrokernelTester()
6033 .mr(3)
6034 .nr(16)
6035 .kr(2)
6036 .sr(1)
6037 .m(3)
6038 .n(16)
6039 .k(k)
6040 .ks(3)
6041 .a_offset(251)
6042 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6043 }
6044 }
6045
6046 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, zero) {
6047 TEST_REQUIRES_ARM_NEON;
6048 for (size_t k = 1; k <= 80; k += 17) {
6049 for (uint32_t mz = 0; mz < 3; mz++) {
6050 GemmMicrokernelTester()
6051 .mr(3)
6052 .nr(16)
6053 .kr(2)
6054 .sr(1)
6055 .m(3)
6056 .n(16)
6057 .k(k)
6058 .ks(3)
6059 .a_offset(251)
6060 .zero_index(mz)
6061 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6062 }
6063 }
6064 }
6065
6066 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, qmin) {
6067 TEST_REQUIRES_ARM_NEON;
6068 GemmMicrokernelTester()
6069 .mr(3)
6070 .nr(16)
6071 .kr(2)
6072 .sr(1)
6073 .m(3)
6074 .n(16)
6075 .k(16)
6076 .qmin(128)
6077 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6078 }
6079
6080 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, qmax) {
6081 TEST_REQUIRES_ARM_NEON;
6082 GemmMicrokernelTester()
6083 .mr(3)
6084 .nr(16)
6085 .kr(2)
6086 .sr(1)
6087 .m(3)
6088 .n(16)
6089 .k(16)
6090 .qmax(128)
6091 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6092 }
6093
6094 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2__NEON_MLAL_LD4R, strided_cm) {
6095 TEST_REQUIRES_ARM_NEON;
6096 GemmMicrokernelTester()
6097 .mr(3)
6098 .nr(16)
6099 .kr(2)
6100 .sr(1)
6101 .m(3)
6102 .n(16)
6103 .k(16)
6104 .cm_stride(19)
6105 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
6106 }
6107#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6108
6109
6110#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard42f5c502021-11-16 10:04:21 -08006111 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16) {
6112 TEST_REQUIRES_ARM_NEON;
6113 GemmMicrokernelTester()
6114 .mr(4)
6115 .nr(16)
6116 .kr(2)
6117 .sr(1)
6118 .m(4)
6119 .n(16)
6120 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08006121 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006122 }
6123
6124 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, strided_cn) {
6125 TEST_REQUIRES_ARM_NEON;
6126 GemmMicrokernelTester()
6127 .mr(4)
6128 .nr(16)
6129 .kr(2)
6130 .sr(1)
6131 .m(4)
6132 .n(16)
6133 .k(16)
6134 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006135 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006136 }
6137
6138 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16_subtile) {
6139 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006140 for (uint32_t n = 1; n <= 16; n++) {
6141 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08006142 GemmMicrokernelTester()
6143 .mr(4)
6144 .nr(16)
6145 .kr(2)
6146 .sr(1)
6147 .m(m)
6148 .n(n)
6149 .k(16)
6150 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006151 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006152 }
6153 }
6154 }
6155
6156 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_m) {
6157 TEST_REQUIRES_ARM_NEON;
6158 for (uint32_t m = 1; m <= 4; m++) {
6159 GemmMicrokernelTester()
6160 .mr(4)
6161 .nr(16)
6162 .kr(2)
6163 .sr(1)
6164 .m(m)
6165 .n(16)
6166 .k(16)
6167 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006168 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006169 }
6170 }
6171
6172 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_eq_16_subtile_n) {
6173 TEST_REQUIRES_ARM_NEON;
6174 for (uint32_t n = 1; n <= 16; n++) {
6175 GemmMicrokernelTester()
6176 .mr(4)
6177 .nr(16)
6178 .kr(2)
6179 .sr(1)
6180 .m(4)
6181 .n(n)
6182 .k(16)
6183 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006184 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006185 }
6186 }
6187
6188 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_lt_16) {
6189 TEST_REQUIRES_ARM_NEON;
6190 for (size_t k = 1; k < 16; k++) {
6191 GemmMicrokernelTester()
6192 .mr(4)
6193 .nr(16)
6194 .kr(2)
6195 .sr(1)
6196 .m(4)
6197 .n(16)
6198 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006199 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006200 }
6201 }
6202
6203 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_lt_16_subtile) {
6204 TEST_REQUIRES_ARM_NEON;
6205 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006206 for (uint32_t n = 1; n <= 16; n++) {
6207 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08006208 GemmMicrokernelTester()
6209 .mr(4)
6210 .nr(16)
6211 .kr(2)
6212 .sr(1)
6213 .m(m)
6214 .n(n)
6215 .k(k)
6216 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006217 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006218 }
6219 }
6220 }
6221 }
6222
6223 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_gt_16) {
6224 TEST_REQUIRES_ARM_NEON;
6225 for (size_t k = 17; k < 32; k++) {
6226 GemmMicrokernelTester()
6227 .mr(4)
6228 .nr(16)
6229 .kr(2)
6230 .sr(1)
6231 .m(4)
6232 .n(16)
6233 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006234 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006235 }
6236 }
6237
6238 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_gt_16_subtile) {
6239 TEST_REQUIRES_ARM_NEON;
6240 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006241 for (uint32_t n = 1; n <= 16; n++) {
6242 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08006243 GemmMicrokernelTester()
6244 .mr(4)
6245 .nr(16)
6246 .kr(2)
6247 .sr(1)
6248 .m(m)
6249 .n(n)
6250 .k(k)
6251 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006252 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006253 }
6254 }
6255 }
6256 }
6257
6258 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_div_16) {
6259 TEST_REQUIRES_ARM_NEON;
6260 for (size_t k = 32; k <= 160; k += 16) {
6261 GemmMicrokernelTester()
6262 .mr(4)
6263 .nr(16)
6264 .kr(2)
6265 .sr(1)
6266 .m(4)
6267 .n(16)
6268 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006269 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006270 }
6271 }
6272
6273 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, k_div_16_subtile) {
6274 TEST_REQUIRES_ARM_NEON;
6275 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006276 for (uint32_t n = 1; n <= 16; n++) {
6277 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08006278 GemmMicrokernelTester()
6279 .mr(4)
6280 .nr(16)
6281 .kr(2)
6282 .sr(1)
6283 .m(m)
6284 .n(n)
6285 .k(k)
6286 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006287 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006288 }
6289 }
6290 }
6291 }
6292
6293 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16) {
6294 TEST_REQUIRES_ARM_NEON;
6295 for (uint32_t n = 17; n < 32; n++) {
6296 for (size_t k = 1; k <= 80; k += 17) {
6297 GemmMicrokernelTester()
6298 .mr(4)
6299 .nr(16)
6300 .kr(2)
6301 .sr(1)
6302 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006303 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08006304 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006305 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006306 }
6307 }
6308 }
6309
6310 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16_strided_cn) {
6311 TEST_REQUIRES_ARM_NEON;
6312 for (uint32_t n = 17; n < 32; n++) {
6313 for (size_t k = 1; k <= 80; k += 17) {
6314 GemmMicrokernelTester()
6315 .mr(4)
6316 .nr(16)
6317 .kr(2)
6318 .sr(1)
6319 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006320 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08006321 .k(k)
6322 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006323 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006324 }
6325 }
6326 }
6327
6328 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16_subtile) {
6329 TEST_REQUIRES_ARM_NEON;
6330 for (uint32_t n = 17; n < 32; n++) {
6331 for (size_t k = 1; k <= 80; k += 17) {
6332 for (uint32_t m = 1; m <= 4; m++) {
6333 GemmMicrokernelTester()
6334 .mr(4)
6335 .nr(16)
6336 .kr(2)
6337 .sr(1)
6338 .m(m)
6339 .n(n)
6340 .k(k)
6341 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006342 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006343 }
6344 }
6345 }
6346 }
6347
6348 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16) {
6349 TEST_REQUIRES_ARM_NEON;
6350 for (uint32_t n = 32; n <= 48; n += 16) {
6351 for (size_t k = 1; k <= 80; k += 17) {
6352 GemmMicrokernelTester()
6353 .mr(4)
6354 .nr(16)
6355 .kr(2)
6356 .sr(1)
6357 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006358 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08006359 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006360 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006361 }
6362 }
6363 }
6364
6365 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16_strided_cn) {
6366 TEST_REQUIRES_ARM_NEON;
6367 for (uint32_t n = 32; n <= 48; n += 16) {
6368 for (size_t k = 1; k <= 80; k += 17) {
6369 GemmMicrokernelTester()
6370 .mr(4)
6371 .nr(16)
6372 .kr(2)
6373 .sr(1)
6374 .m(4)
6375 .n(n)
6376 .k(k)
6377 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006378 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006379 }
6380 }
6381 }
6382
6383 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16_subtile) {
6384 TEST_REQUIRES_ARM_NEON;
6385 for (uint32_t n = 32; n <= 48; n += 16) {
6386 for (size_t k = 1; k <= 80; k += 17) {
6387 for (uint32_t m = 1; m <= 4; m++) {
6388 GemmMicrokernelTester()
6389 .mr(4)
6390 .nr(16)
6391 .kr(2)
6392 .sr(1)
6393 .m(m)
6394 .n(n)
6395 .k(k)
6396 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006397 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006398 }
6399 }
6400 }
6401 }
6402
6403 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, small_kernel) {
6404 TEST_REQUIRES_ARM_NEON;
6405 for (size_t k = 1; k <= 80; k += 17) {
6406 GemmMicrokernelTester()
6407 .mr(4)
6408 .nr(16)
6409 .kr(2)
6410 .sr(1)
6411 .m(4)
6412 .n(16)
6413 .k(k)
6414 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006415 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006416 }
6417 }
6418
6419 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, small_kernel_subtile) {
6420 TEST_REQUIRES_ARM_NEON;
6421 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006422 for (uint32_t n = 1; n <= 16; n++) {
6423 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08006424 GemmMicrokernelTester()
6425 .mr(4)
6426 .nr(16)
6427 .kr(2)
6428 .sr(1)
6429 .m(m)
6430 .n(n)
6431 .k(k)
6432 .ks(3)
6433 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006434 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006435 }
6436 }
6437 }
6438 }
6439
6440 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_gt_16_small_kernel) {
6441 TEST_REQUIRES_ARM_NEON;
6442 for (uint32_t n = 17; n < 32; n++) {
6443 for (size_t k = 1; k <= 80; k += 17) {
6444 GemmMicrokernelTester()
6445 .mr(4)
6446 .nr(16)
6447 .kr(2)
6448 .sr(1)
6449 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006450 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08006451 .k(k)
6452 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006453 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006454 }
6455 }
6456 }
6457
6458 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, n_div_16_small_kernel) {
6459 TEST_REQUIRES_ARM_NEON;
6460 for (uint32_t n = 32; n <= 48; n += 16) {
6461 for (size_t k = 1; k <= 80; k += 17) {
6462 GemmMicrokernelTester()
6463 .mr(4)
6464 .nr(16)
6465 .kr(2)
6466 .sr(1)
6467 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006468 .n(n)
Frank Barchard42f5c502021-11-16 10:04:21 -08006469 .k(k)
6470 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006471 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006472 }
6473 }
6474 }
6475
6476 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, strided_cm_subtile) {
6477 TEST_REQUIRES_ARM_NEON;
6478 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006479 for (uint32_t n = 1; n <= 16; n++) {
6480 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08006481 GemmMicrokernelTester()
6482 .mr(4)
6483 .nr(16)
6484 .kr(2)
6485 .sr(1)
6486 .m(m)
6487 .n(n)
6488 .k(k)
6489 .cm_stride(19)
6490 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006491 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006492 }
6493 }
6494 }
6495 }
6496
6497 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, a_offset) {
6498 TEST_REQUIRES_ARM_NEON;
6499 for (size_t k = 1; k <= 80; k += 17) {
6500 GemmMicrokernelTester()
6501 .mr(4)
6502 .nr(16)
6503 .kr(2)
6504 .sr(1)
6505 .m(4)
6506 .n(16)
6507 .k(k)
6508 .ks(3)
6509 .a_offset(331)
Marat Dukhan50323b82022-01-11 00:12:01 -08006510 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006511 }
6512 }
6513
6514 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, zero) {
6515 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006516 for (size_t k = 1; k <= 80; k += 17) {
6517 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchard42f5c502021-11-16 10:04:21 -08006518 GemmMicrokernelTester()
6519 .mr(4)
6520 .nr(16)
6521 .kr(2)
6522 .sr(1)
6523 .m(4)
6524 .n(16)
6525 .k(k)
6526 .ks(3)
6527 .a_offset(331)
6528 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006529 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006530 }
6531 }
6532 }
6533
6534 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, qmin) {
6535 TEST_REQUIRES_ARM_NEON;
6536 GemmMicrokernelTester()
6537 .mr(4)
6538 .nr(16)
6539 .kr(2)
6540 .sr(1)
6541 .m(4)
6542 .n(16)
6543 .k(16)
6544 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006545 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006546 }
6547
6548 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, qmax) {
6549 TEST_REQUIRES_ARM_NEON;
6550 GemmMicrokernelTester()
6551 .mr(4)
6552 .nr(16)
6553 .kr(2)
6554 .sr(1)
6555 .m(4)
6556 .n(16)
6557 .k(16)
6558 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006559 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006560 }
6561
6562 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD4R, strided_cm) {
6563 TEST_REQUIRES_ARM_NEON;
6564 GemmMicrokernelTester()
6565 .mr(4)
6566 .nr(16)
6567 .kr(2)
6568 .sr(1)
6569 .m(4)
6570 .n(16)
6571 .k(16)
6572 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006573 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld4r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard42f5c502021-11-16 10:04:21 -08006574 }
6575#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6576
6577
6578#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08006579 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006580 TEST_REQUIRES_ARM_NEON;
6581 GemmMicrokernelTester()
6582 .mr(3)
6583 .nr(8)
6584 .kr(4)
6585 .sr(2)
6586 .m(3)
6587 .n(8)
6588 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006589 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006590 }
6591
Frank Barcharde22685a2021-11-12 11:36:58 -08006592 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006593 TEST_REQUIRES_ARM_NEON;
6594 GemmMicrokernelTester()
6595 .mr(3)
6596 .nr(8)
6597 .kr(4)
6598 .sr(2)
6599 .m(3)
6600 .n(8)
6601 .k(8)
6602 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006603 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006604 }
6605
Frank Barcharde22685a2021-11-12 11:36:58 -08006606 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006607 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006608 for (uint32_t n = 1; n <= 8; n++) {
6609 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006610 GemmMicrokernelTester()
6611 .mr(3)
6612 .nr(8)
6613 .kr(4)
6614 .sr(2)
6615 .m(m)
6616 .n(n)
6617 .k(8)
6618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006619 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006620 }
6621 }
6622 }
6623
Frank Barcharde22685a2021-11-12 11:36:58 -08006624 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8_subtile_m) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006625 TEST_REQUIRES_ARM_NEON;
6626 for (uint32_t m = 1; m <= 3; m++) {
6627 GemmMicrokernelTester()
6628 .mr(3)
6629 .nr(8)
6630 .kr(4)
6631 .sr(2)
6632 .m(m)
6633 .n(8)
6634 .k(8)
6635 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006636 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006637 }
6638 }
6639
Frank Barcharde22685a2021-11-12 11:36:58 -08006640 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_eq_8_subtile_n) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006641 TEST_REQUIRES_ARM_NEON;
6642 for (uint32_t n = 1; n <= 8; n++) {
6643 GemmMicrokernelTester()
6644 .mr(3)
6645 .nr(8)
6646 .kr(4)
6647 .sr(2)
6648 .m(3)
6649 .n(n)
6650 .k(8)
6651 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006652 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006653 }
6654 }
6655
Frank Barcharde22685a2021-11-12 11:36:58 -08006656 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_lt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006657 TEST_REQUIRES_ARM_NEON;
6658 for (size_t k = 1; k < 8; k++) {
6659 GemmMicrokernelTester()
6660 .mr(3)
6661 .nr(8)
6662 .kr(4)
6663 .sr(2)
6664 .m(3)
6665 .n(8)
6666 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006667 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006668 }
6669 }
6670
Frank Barcharde22685a2021-11-12 11:36:58 -08006671 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_lt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006672 TEST_REQUIRES_ARM_NEON;
6673 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006674 for (uint32_t n = 1; n <= 8; n++) {
6675 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006676 GemmMicrokernelTester()
6677 .mr(3)
6678 .nr(8)
6679 .kr(4)
6680 .sr(2)
6681 .m(m)
6682 .n(n)
6683 .k(k)
6684 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006685 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006686 }
6687 }
6688 }
6689 }
6690
Frank Barcharde22685a2021-11-12 11:36:58 -08006691 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_gt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006692 TEST_REQUIRES_ARM_NEON;
6693 for (size_t k = 9; k < 16; k++) {
6694 GemmMicrokernelTester()
6695 .mr(3)
6696 .nr(8)
6697 .kr(4)
6698 .sr(2)
6699 .m(3)
6700 .n(8)
6701 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006702 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006703 }
6704 }
6705
Frank Barcharde22685a2021-11-12 11:36:58 -08006706 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_gt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006707 TEST_REQUIRES_ARM_NEON;
6708 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006709 for (uint32_t n = 1; n <= 8; n++) {
6710 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006711 GemmMicrokernelTester()
6712 .mr(3)
6713 .nr(8)
6714 .kr(4)
6715 .sr(2)
6716 .m(m)
6717 .n(n)
6718 .k(k)
6719 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006720 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006721 }
6722 }
6723 }
6724 }
6725
Frank Barcharde22685a2021-11-12 11:36:58 -08006726 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_div_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006727 TEST_REQUIRES_ARM_NEON;
6728 for (size_t k = 16; k <= 80; k += 8) {
6729 GemmMicrokernelTester()
6730 .mr(3)
6731 .nr(8)
6732 .kr(4)
6733 .sr(2)
6734 .m(3)
6735 .n(8)
6736 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006737 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006738 }
6739 }
6740
Frank Barcharde22685a2021-11-12 11:36:58 -08006741 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, k_div_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006742 TEST_REQUIRES_ARM_NEON;
6743 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006744 for (uint32_t n = 1; n <= 8; n++) {
6745 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006746 GemmMicrokernelTester()
6747 .mr(3)
6748 .nr(8)
6749 .kr(4)
6750 .sr(2)
6751 .m(m)
6752 .n(n)
6753 .k(k)
6754 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006755 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006756 }
6757 }
6758 }
6759 }
6760
Frank Barcharde22685a2021-11-12 11:36:58 -08006761 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006762 TEST_REQUIRES_ARM_NEON;
6763 for (uint32_t n = 9; n < 16; n++) {
6764 for (size_t k = 1; k <= 40; k += 9) {
6765 GemmMicrokernelTester()
6766 .mr(3)
6767 .nr(8)
6768 .kr(4)
6769 .sr(2)
6770 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006771 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08006772 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006773 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006774 }
6775 }
6776 }
6777
Frank Barcharde22685a2021-11-12 11:36:58 -08006778 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006779 TEST_REQUIRES_ARM_NEON;
6780 for (uint32_t n = 9; n < 16; n++) {
6781 for (size_t k = 1; k <= 40; k += 9) {
6782 GemmMicrokernelTester()
6783 .mr(3)
6784 .nr(8)
6785 .kr(4)
6786 .sr(2)
6787 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006788 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08006789 .k(k)
6790 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006791 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006792 }
6793 }
6794 }
6795
Frank Barcharde22685a2021-11-12 11:36:58 -08006796 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006797 TEST_REQUIRES_ARM_NEON;
6798 for (uint32_t n = 9; n < 16; n++) {
6799 for (size_t k = 1; k <= 40; k += 9) {
6800 for (uint32_t m = 1; m <= 3; m++) {
6801 GemmMicrokernelTester()
6802 .mr(3)
6803 .nr(8)
6804 .kr(4)
6805 .sr(2)
6806 .m(m)
6807 .n(n)
6808 .k(k)
6809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006810 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006811 }
6812 }
6813 }
6814 }
6815
Frank Barcharde22685a2021-11-12 11:36:58 -08006816 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006817 TEST_REQUIRES_ARM_NEON;
6818 for (uint32_t n = 16; n <= 24; n += 8) {
6819 for (size_t k = 1; k <= 40; k += 9) {
6820 GemmMicrokernelTester()
6821 .mr(3)
6822 .nr(8)
6823 .kr(4)
6824 .sr(2)
6825 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006826 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08006827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006828 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006829 }
6830 }
6831 }
6832
Frank Barcharde22685a2021-11-12 11:36:58 -08006833 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006834 TEST_REQUIRES_ARM_NEON;
6835 for (uint32_t n = 16; n <= 24; n += 8) {
6836 for (size_t k = 1; k <= 40; k += 9) {
6837 GemmMicrokernelTester()
6838 .mr(3)
6839 .nr(8)
6840 .kr(4)
6841 .sr(2)
6842 .m(3)
6843 .n(n)
6844 .k(k)
6845 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006846 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006847 }
6848 }
6849 }
6850
Frank Barcharde22685a2021-11-12 11:36:58 -08006851 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006852 TEST_REQUIRES_ARM_NEON;
6853 for (uint32_t n = 16; n <= 24; n += 8) {
6854 for (size_t k = 1; k <= 40; k += 9) {
6855 for (uint32_t m = 1; m <= 3; m++) {
6856 GemmMicrokernelTester()
6857 .mr(3)
6858 .nr(8)
6859 .kr(4)
6860 .sr(2)
6861 .m(m)
6862 .n(n)
6863 .k(k)
6864 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006865 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006866 }
6867 }
6868 }
6869 }
6870
Frank Barcharde22685a2021-11-12 11:36:58 -08006871 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006872 TEST_REQUIRES_ARM_NEON;
6873 for (size_t k = 1; k <= 40; k += 9) {
6874 GemmMicrokernelTester()
6875 .mr(3)
6876 .nr(8)
6877 .kr(4)
6878 .sr(2)
6879 .m(3)
6880 .n(8)
6881 .k(k)
6882 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006883 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006884 }
6885 }
6886
Frank Barcharde22685a2021-11-12 11:36:58 -08006887 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, small_kernel_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006888 TEST_REQUIRES_ARM_NEON;
6889 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006890 for (uint32_t n = 1; n <= 8; n++) {
6891 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006892 GemmMicrokernelTester()
6893 .mr(3)
6894 .nr(8)
6895 .kr(4)
6896 .sr(2)
6897 .m(m)
6898 .n(n)
6899 .k(k)
6900 .ks(3)
6901 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006902 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006903 }
6904 }
6905 }
6906 }
6907
Frank Barcharde22685a2021-11-12 11:36:58 -08006908 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_gt_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006909 TEST_REQUIRES_ARM_NEON;
6910 for (uint32_t n = 9; n < 16; n++) {
6911 for (size_t k = 1; k <= 40; k += 9) {
6912 GemmMicrokernelTester()
6913 .mr(3)
6914 .nr(8)
6915 .kr(4)
6916 .sr(2)
6917 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006918 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08006919 .k(k)
6920 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006921 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006922 }
6923 }
6924 }
6925
Frank Barcharde22685a2021-11-12 11:36:58 -08006926 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, n_div_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006927 TEST_REQUIRES_ARM_NEON;
6928 for (uint32_t n = 16; n <= 24; n += 8) {
6929 for (size_t k = 1; k <= 40; k += 9) {
6930 GemmMicrokernelTester()
6931 .mr(3)
6932 .nr(8)
6933 .kr(4)
6934 .sr(2)
6935 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006936 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08006937 .k(k)
6938 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006939 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006940 }
6941 }
6942 }
6943
Frank Barcharde22685a2021-11-12 11:36:58 -08006944 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, strided_cm_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006945 TEST_REQUIRES_ARM_NEON;
6946 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006947 for (uint32_t n = 1; n <= 8; n++) {
6948 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006949 GemmMicrokernelTester()
6950 .mr(3)
6951 .nr(8)
6952 .kr(4)
6953 .sr(2)
6954 .m(m)
6955 .n(n)
6956 .k(k)
6957 .cm_stride(11)
6958 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006959 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006960 }
6961 }
6962 }
6963 }
6964
Frank Barcharde22685a2021-11-12 11:36:58 -08006965 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, a_offset) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006966 TEST_REQUIRES_ARM_NEON;
6967 for (size_t k = 1; k <= 40; k += 9) {
6968 GemmMicrokernelTester()
6969 .mr(3)
6970 .nr(8)
6971 .kr(4)
6972 .sr(2)
6973 .m(3)
6974 .n(8)
6975 .k(k)
6976 .ks(3)
6977 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08006978 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006979 }
6980 }
6981
Frank Barcharde22685a2021-11-12 11:36:58 -08006982 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, zero) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006983 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006984 for (size_t k = 1; k <= 40; k += 9) {
6985 for (uint32_t mz = 0; mz < 3; mz++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08006986 GemmMicrokernelTester()
6987 .mr(3)
6988 .nr(8)
6989 .kr(4)
6990 .sr(2)
6991 .m(3)
6992 .n(8)
6993 .k(k)
6994 .ks(3)
6995 .a_offset(127)
6996 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006997 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08006998 }
6999 }
7000 }
7001
Frank Barcharde22685a2021-11-12 11:36:58 -08007002 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, qmin) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007003 TEST_REQUIRES_ARM_NEON;
7004 GemmMicrokernelTester()
7005 .mr(3)
7006 .nr(8)
7007 .kr(4)
7008 .sr(2)
7009 .m(3)
7010 .n(8)
7011 .k(8)
7012 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007013 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007014 }
7015
Frank Barcharde22685a2021-11-12 11:36:58 -08007016 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, qmax) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007017 TEST_REQUIRES_ARM_NEON;
7018 GemmMicrokernelTester()
7019 .mr(3)
7020 .nr(8)
7021 .kr(4)
7022 .sr(2)
7023 .m(3)
7024 .n(8)
7025 .k(8)
7026 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007027 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007028 }
7029
Frank Barcharde22685a2021-11-12 11:36:58 -08007030 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4S2__NEON_MULL, strided_cm) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007031 TEST_REQUIRES_ARM_NEON;
7032 GemmMicrokernelTester()
7033 .mr(3)
7034 .nr(8)
7035 .kr(4)
7036 .sr(2)
7037 .m(3)
7038 .n(8)
7039 .k(8)
7040 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007041 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007042 }
7043#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7044
7045
7046#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007047 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007048 TEST_REQUIRES_ARM_NEON;
7049 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007050 .mr(4)
7051 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007052 .kr(4)
7053 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007054 .m(4)
7055 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007056 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007057 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007058 }
7059
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007060 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007061 TEST_REQUIRES_ARM_NEON;
7062 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007063 .mr(4)
7064 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007065 .kr(4)
7066 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007067 .m(4)
7068 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007069 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007070 .cn_stride(11)
7071 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007072 }
7073
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007074 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007075 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007076 for (uint32_t n = 1; n <= 8; n++) {
7077 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007078 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007079 .mr(4)
7080 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007081 .kr(4)
7082 .sr(2)
7083 .m(m)
7084 .n(n)
7085 .k(8)
7086 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007087 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007088 }
7089 }
7090 }
7091
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007092 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile_m) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007093 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007094 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007095 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007096 .mr(4)
7097 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007098 .kr(4)
7099 .sr(2)
7100 .m(m)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007101 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007102 .k(8)
7103 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007104 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007105 }
7106 }
7107
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007108 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_eq_8_subtile_n) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007109 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007110 for (uint32_t n = 1; n <= 8; n++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007111 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007112 .mr(4)
7113 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007114 .kr(4)
7115 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007116 .m(4)
Frank Barchardeb704f72021-11-12 01:26:50 -08007117 .n(n)
7118 .k(8)
7119 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007120 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007121 }
7122 }
7123
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007124 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_lt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007125 TEST_REQUIRES_ARM_NEON;
7126 for (size_t k = 1; k < 8; k++) {
7127 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007128 .mr(4)
7129 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007130 .kr(4)
7131 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007132 .m(4)
7133 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007134 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007135 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007136 }
7137 }
7138
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007139 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_lt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007140 TEST_REQUIRES_ARM_NEON;
7141 for (size_t k = 1; k < 8; k++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007142 for (uint32_t n = 1; n <= 8; n++) {
7143 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007144 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007145 .mr(4)
7146 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007147 .kr(4)
7148 .sr(2)
7149 .m(m)
7150 .n(n)
7151 .k(k)
7152 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007153 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007154 }
7155 }
7156 }
7157 }
7158
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007159 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_gt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007160 TEST_REQUIRES_ARM_NEON;
7161 for (size_t k = 9; k < 16; k++) {
7162 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007163 .mr(4)
7164 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007165 .kr(4)
7166 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007167 .m(4)
7168 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007169 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007170 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007171 }
7172 }
7173
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007174 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_gt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007175 TEST_REQUIRES_ARM_NEON;
7176 for (size_t k = 9; k < 16; k++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007177 for (uint32_t n = 1; n <= 8; n++) {
7178 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007179 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007180 .mr(4)
7181 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007182 .kr(4)
7183 .sr(2)
7184 .m(m)
7185 .n(n)
7186 .k(k)
7187 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007188 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007189 }
7190 }
7191 }
7192 }
7193
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007194 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_div_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007195 TEST_REQUIRES_ARM_NEON;
7196 for (size_t k = 16; k <= 80; k += 8) {
7197 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007198 .mr(4)
7199 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007200 .kr(4)
7201 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007202 .m(4)
7203 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007204 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007205 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007206 }
7207 }
7208
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007209 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, k_div_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007210 TEST_REQUIRES_ARM_NEON;
7211 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007212 for (uint32_t n = 1; n <= 8; n++) {
7213 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007214 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007215 .mr(4)
7216 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007217 .kr(4)
7218 .sr(2)
7219 .m(m)
7220 .n(n)
7221 .k(k)
7222 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007223 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007224 }
7225 }
7226 }
7227 }
7228
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007229 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007230 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007231 for (uint32_t n = 9; n < 16; n++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007232 for (size_t k = 1; k <= 40; k += 9) {
7233 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007234 .mr(4)
7235 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007236 .kr(4)
7237 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007238 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007239 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08007240 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007241 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007242 }
7243 }
7244 }
7245
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007246 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007247 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007248 for (uint32_t n = 9; n < 16; n++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007249 for (size_t k = 1; k <= 40; k += 9) {
7250 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007251 .mr(4)
7252 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007253 .kr(4)
7254 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007255 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007256 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08007257 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007258 .cn_stride(11)
7259 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007260 }
7261 }
7262 }
7263
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007264 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007265 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007266 for (uint32_t n = 9; n < 16; n++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007267 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007268 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007269 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007270 .mr(4)
7271 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007272 .kr(4)
7273 .sr(2)
7274 .m(m)
7275 .n(n)
7276 .k(k)
7277 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007278 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007279 }
7280 }
7281 }
7282 }
7283
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007284 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007285 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007286 for (uint32_t n = 16; n <= 24; n += 8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007287 for (size_t k = 1; k <= 40; k += 9) {
7288 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007289 .mr(4)
7290 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007291 .kr(4)
7292 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007293 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007294 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08007295 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007296 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007297 }
7298 }
7299 }
7300
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007301 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007302 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007303 for (uint32_t n = 16; n <= 24; n += 8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007304 for (size_t k = 1; k <= 40; k += 9) {
7305 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007306 .mr(4)
7307 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007308 .kr(4)
7309 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007310 .m(4)
Frank Barchardeb704f72021-11-12 01:26:50 -08007311 .n(n)
7312 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007313 .cn_stride(11)
7314 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007315 }
7316 }
7317 }
7318
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007319 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007320 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007321 for (uint32_t n = 16; n <= 24; n += 8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007322 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007323 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007324 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007325 .mr(4)
7326 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007327 .kr(4)
7328 .sr(2)
7329 .m(m)
7330 .n(n)
7331 .k(k)
7332 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007333 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007334 }
7335 }
7336 }
7337 }
7338
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007339 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007340 TEST_REQUIRES_ARM_NEON;
7341 for (size_t k = 1; k <= 40; k += 9) {
7342 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007343 .mr(4)
7344 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007345 .kr(4)
7346 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007347 .m(4)
7348 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007349 .k(k)
7350 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007351 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007352 }
7353 }
7354
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007355 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, small_kernel_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007356 TEST_REQUIRES_ARM_NEON;
7357 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007358 for (uint32_t n = 1; n <= 8; n++) {
7359 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007360 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007361 .mr(4)
7362 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007363 .kr(4)
7364 .sr(2)
7365 .m(m)
7366 .n(n)
7367 .k(k)
7368 .ks(3)
7369 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007370 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007371 }
7372 }
7373 }
7374 }
7375
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007376 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_gt_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007377 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007378 for (uint32_t n = 9; n < 16; n++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007379 for (size_t k = 1; k <= 40; k += 9) {
7380 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007381 .mr(4)
7382 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007383 .kr(4)
7384 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007385 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007386 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08007387 .k(k)
7388 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007389 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007390 }
7391 }
7392 }
7393
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007394 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, n_div_8_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007395 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007396 for (uint32_t n = 16; n <= 24; n += 8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007397 for (size_t k = 1; k <= 40; k += 9) {
7398 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007399 .mr(4)
7400 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007401 .kr(4)
7402 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007403 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007404 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08007405 .k(k)
7406 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007407 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007408 }
7409 }
7410 }
7411
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007412 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cm_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007413 TEST_REQUIRES_ARM_NEON;
7414 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007415 for (uint32_t n = 1; n <= 8; n++) {
7416 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007417 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007418 .mr(4)
7419 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007420 .kr(4)
7421 .sr(2)
7422 .m(m)
7423 .n(n)
7424 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007425 .cm_stride(11)
Frank Barchardeb704f72021-11-12 01:26:50 -08007426 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007427 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007428 }
7429 }
7430 }
7431 }
7432
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007433 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, a_offset) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007434 TEST_REQUIRES_ARM_NEON;
7435 for (size_t k = 1; k <= 40; k += 9) {
7436 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007437 .mr(4)
7438 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007439 .kr(4)
7440 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007441 .m(4)
7442 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007443 .k(k)
7444 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007445 .a_offset(163)
7446 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007447 }
7448 }
7449
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007450 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, zero) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007451 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007452 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007453 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007454 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007455 .mr(4)
7456 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007457 .kr(4)
7458 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007459 .m(4)
7460 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007461 .k(k)
7462 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007463 .a_offset(163)
Frank Barchardeb704f72021-11-12 01:26:50 -08007464 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007465 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007466 }
7467 }
7468 }
7469
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007470 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, qmin) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007471 TEST_REQUIRES_ARM_NEON;
7472 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007473 .mr(4)
7474 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007475 .kr(4)
7476 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007477 .m(4)
7478 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007479 .k(8)
7480 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007481 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007482 }
7483
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007484 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, qmax) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007485 TEST_REQUIRES_ARM_NEON;
7486 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007487 .mr(4)
7488 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007489 .kr(4)
7490 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007491 .m(4)
7492 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007493 .k(8)
7494 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007495 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007496 }
7497
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007498 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4S2__NEON_MULL, strided_cm) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007499 TEST_REQUIRES_ARM_NEON;
7500 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007501 .mr(4)
7502 .nr(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007503 .kr(4)
7504 .sr(2)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007505 .m(4)
7506 .n(8)
Frank Barchardeb704f72021-11-12 01:26:50 -08007507 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08007508 .cm_stride(11)
7509 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007510 }
7511#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7512
7513
7514#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08007515 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007516 TEST_REQUIRES_ARM_NEON;
7517 GemmMicrokernelTester()
7518 .mr(2)
7519 .nr(16)
7520 .kr(4)
7521 .sr(2)
7522 .m(2)
7523 .n(16)
7524 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007525 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007526 }
7527
Frank Barcharde22685a2021-11-12 11:36:58 -08007528 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007529 TEST_REQUIRES_ARM_NEON;
7530 GemmMicrokernelTester()
7531 .mr(2)
7532 .nr(16)
7533 .kr(4)
7534 .sr(2)
7535 .m(2)
7536 .n(16)
7537 .k(8)
7538 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007539 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007540 }
7541
Frank Barcharde22685a2021-11-12 11:36:58 -08007542 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007543 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007544 for (uint32_t n = 1; n <= 16; n++) {
7545 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007546 GemmMicrokernelTester()
7547 .mr(2)
7548 .nr(16)
7549 .kr(4)
7550 .sr(2)
7551 .m(m)
7552 .n(n)
7553 .k(8)
7554 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007555 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007556 }
7557 }
7558 }
7559
Frank Barcharde22685a2021-11-12 11:36:58 -08007560 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8_subtile_m) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007561 TEST_REQUIRES_ARM_NEON;
7562 for (uint32_t m = 1; m <= 2; m++) {
7563 GemmMicrokernelTester()
7564 .mr(2)
7565 .nr(16)
7566 .kr(4)
7567 .sr(2)
7568 .m(m)
7569 .n(16)
7570 .k(8)
7571 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007572 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007573 }
7574 }
7575
Frank Barcharde22685a2021-11-12 11:36:58 -08007576 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_eq_8_subtile_n) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007577 TEST_REQUIRES_ARM_NEON;
7578 for (uint32_t n = 1; n <= 16; n++) {
7579 GemmMicrokernelTester()
7580 .mr(2)
7581 .nr(16)
7582 .kr(4)
7583 .sr(2)
7584 .m(2)
7585 .n(n)
7586 .k(8)
7587 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007588 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007589 }
7590 }
7591
Frank Barcharde22685a2021-11-12 11:36:58 -08007592 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_lt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007593 TEST_REQUIRES_ARM_NEON;
7594 for (size_t k = 1; k < 8; k++) {
7595 GemmMicrokernelTester()
7596 .mr(2)
7597 .nr(16)
7598 .kr(4)
7599 .sr(2)
7600 .m(2)
7601 .n(16)
7602 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007603 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007604 }
7605 }
7606
Frank Barcharde22685a2021-11-12 11:36:58 -08007607 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_lt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007608 TEST_REQUIRES_ARM_NEON;
7609 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007610 for (uint32_t n = 1; n <= 16; n++) {
7611 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007612 GemmMicrokernelTester()
7613 .mr(2)
7614 .nr(16)
7615 .kr(4)
7616 .sr(2)
7617 .m(m)
7618 .n(n)
7619 .k(k)
7620 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007621 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007622 }
7623 }
7624 }
7625 }
7626
Frank Barcharde22685a2021-11-12 11:36:58 -08007627 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_gt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007628 TEST_REQUIRES_ARM_NEON;
7629 for (size_t k = 9; k < 16; k++) {
7630 GemmMicrokernelTester()
7631 .mr(2)
7632 .nr(16)
7633 .kr(4)
7634 .sr(2)
7635 .m(2)
7636 .n(16)
7637 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007638 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007639 }
7640 }
7641
Frank Barcharde22685a2021-11-12 11:36:58 -08007642 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_gt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007643 TEST_REQUIRES_ARM_NEON;
7644 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007645 for (uint32_t n = 1; n <= 16; n++) {
7646 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007647 GemmMicrokernelTester()
7648 .mr(2)
7649 .nr(16)
7650 .kr(4)
7651 .sr(2)
7652 .m(m)
7653 .n(n)
7654 .k(k)
7655 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007656 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007657 }
7658 }
7659 }
7660 }
7661
Frank Barcharde22685a2021-11-12 11:36:58 -08007662 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_div_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007663 TEST_REQUIRES_ARM_NEON;
7664 for (size_t k = 16; k <= 80; k += 8) {
7665 GemmMicrokernelTester()
7666 .mr(2)
7667 .nr(16)
7668 .kr(4)
7669 .sr(2)
7670 .m(2)
7671 .n(16)
7672 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007673 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007674 }
7675 }
7676
Frank Barcharde22685a2021-11-12 11:36:58 -08007677 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, k_div_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007678 TEST_REQUIRES_ARM_NEON;
7679 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007680 for (uint32_t n = 1; n <= 16; n++) {
7681 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007682 GemmMicrokernelTester()
7683 .mr(2)
7684 .nr(16)
7685 .kr(4)
7686 .sr(2)
7687 .m(m)
7688 .n(n)
7689 .k(k)
7690 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007691 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007692 }
7693 }
7694 }
7695 }
7696
Frank Barcharde22685a2021-11-12 11:36:58 -08007697 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007698 TEST_REQUIRES_ARM_NEON;
7699 for (uint32_t n = 17; n < 32; n++) {
7700 for (size_t k = 1; k <= 40; k += 9) {
7701 GemmMicrokernelTester()
7702 .mr(2)
7703 .nr(16)
7704 .kr(4)
7705 .sr(2)
7706 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007707 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08007708 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007709 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007710 }
7711 }
7712 }
7713
Frank Barcharde22685a2021-11-12 11:36:58 -08007714 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007715 TEST_REQUIRES_ARM_NEON;
7716 for (uint32_t n = 17; n < 32; n++) {
7717 for (size_t k = 1; k <= 40; k += 9) {
7718 GemmMicrokernelTester()
7719 .mr(2)
7720 .nr(16)
7721 .kr(4)
7722 .sr(2)
7723 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007724 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08007725 .k(k)
7726 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007727 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007728 }
7729 }
7730 }
7731
Frank Barcharde22685a2021-11-12 11:36:58 -08007732 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007733 TEST_REQUIRES_ARM_NEON;
7734 for (uint32_t n = 17; n < 32; n++) {
7735 for (size_t k = 1; k <= 40; k += 9) {
7736 for (uint32_t m = 1; m <= 2; m++) {
7737 GemmMicrokernelTester()
7738 .mr(2)
7739 .nr(16)
7740 .kr(4)
7741 .sr(2)
7742 .m(m)
7743 .n(n)
7744 .k(k)
7745 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007746 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007747 }
7748 }
7749 }
7750 }
7751
Frank Barcharde22685a2021-11-12 11:36:58 -08007752 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007753 TEST_REQUIRES_ARM_NEON;
7754 for (uint32_t n = 32; n <= 48; n += 16) {
7755 for (size_t k = 1; k <= 40; k += 9) {
7756 GemmMicrokernelTester()
7757 .mr(2)
7758 .nr(16)
7759 .kr(4)
7760 .sr(2)
7761 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007762 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08007763 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007764 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007765 }
7766 }
7767 }
7768
Frank Barcharde22685a2021-11-12 11:36:58 -08007769 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007770 TEST_REQUIRES_ARM_NEON;
7771 for (uint32_t n = 32; n <= 48; n += 16) {
7772 for (size_t k = 1; k <= 40; k += 9) {
7773 GemmMicrokernelTester()
7774 .mr(2)
7775 .nr(16)
7776 .kr(4)
7777 .sr(2)
7778 .m(2)
7779 .n(n)
7780 .k(k)
7781 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007782 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007783 }
7784 }
7785 }
7786
Frank Barcharde22685a2021-11-12 11:36:58 -08007787 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007788 TEST_REQUIRES_ARM_NEON;
7789 for (uint32_t n = 32; n <= 48; n += 16) {
7790 for (size_t k = 1; k <= 40; k += 9) {
7791 for (uint32_t m = 1; m <= 2; m++) {
7792 GemmMicrokernelTester()
7793 .mr(2)
7794 .nr(16)
7795 .kr(4)
7796 .sr(2)
7797 .m(m)
7798 .n(n)
7799 .k(k)
7800 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007801 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007802 }
7803 }
7804 }
7805 }
7806
Frank Barcharde22685a2021-11-12 11:36:58 -08007807 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007808 TEST_REQUIRES_ARM_NEON;
7809 for (size_t k = 1; k <= 40; k += 9) {
7810 GemmMicrokernelTester()
7811 .mr(2)
7812 .nr(16)
7813 .kr(4)
7814 .sr(2)
7815 .m(2)
7816 .n(16)
7817 .k(k)
7818 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007819 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007820 }
7821 }
7822
Frank Barcharde22685a2021-11-12 11:36:58 -08007823 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, small_kernel_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007824 TEST_REQUIRES_ARM_NEON;
7825 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007826 for (uint32_t n = 1; n <= 16; n++) {
7827 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007828 GemmMicrokernelTester()
7829 .mr(2)
7830 .nr(16)
7831 .kr(4)
7832 .sr(2)
7833 .m(m)
7834 .n(n)
7835 .k(k)
7836 .ks(3)
7837 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007838 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007839 }
7840 }
7841 }
7842 }
7843
Frank Barcharde22685a2021-11-12 11:36:58 -08007844 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_gt_16_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007845 TEST_REQUIRES_ARM_NEON;
7846 for (uint32_t n = 17; n < 32; n++) {
7847 for (size_t k = 1; k <= 40; k += 9) {
7848 GemmMicrokernelTester()
7849 .mr(2)
7850 .nr(16)
7851 .kr(4)
7852 .sr(2)
7853 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007854 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08007855 .k(k)
7856 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007857 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007858 }
7859 }
7860 }
7861
Frank Barcharde22685a2021-11-12 11:36:58 -08007862 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, n_div_16_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007863 TEST_REQUIRES_ARM_NEON;
7864 for (uint32_t n = 32; n <= 48; n += 16) {
7865 for (size_t k = 1; k <= 40; k += 9) {
7866 GemmMicrokernelTester()
7867 .mr(2)
7868 .nr(16)
7869 .kr(4)
7870 .sr(2)
7871 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007872 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08007873 .k(k)
7874 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007875 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007876 }
7877 }
7878 }
7879
Frank Barcharde22685a2021-11-12 11:36:58 -08007880 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, strided_cm_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007881 TEST_REQUIRES_ARM_NEON;
7882 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007883 for (uint32_t n = 1; n <= 16; n++) {
7884 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007885 GemmMicrokernelTester()
7886 .mr(2)
7887 .nr(16)
7888 .kr(4)
7889 .sr(2)
7890 .m(m)
7891 .n(n)
7892 .k(k)
7893 .cm_stride(19)
7894 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007895 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007896 }
7897 }
7898 }
7899 }
7900
Frank Barcharde22685a2021-11-12 11:36:58 -08007901 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, a_offset) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007902 TEST_REQUIRES_ARM_NEON;
7903 for (size_t k = 1; k <= 40; k += 9) {
7904 GemmMicrokernelTester()
7905 .mr(2)
7906 .nr(16)
7907 .kr(4)
7908 .sr(2)
7909 .m(2)
7910 .n(16)
7911 .k(k)
7912 .ks(3)
7913 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007914 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007915 }
7916 }
7917
Frank Barcharde22685a2021-11-12 11:36:58 -08007918 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, zero) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007919 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007920 for (size_t k = 1; k <= 40; k += 9) {
7921 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007922 GemmMicrokernelTester()
7923 .mr(2)
7924 .nr(16)
7925 .kr(4)
7926 .sr(2)
7927 .m(2)
7928 .n(16)
7929 .k(k)
7930 .ks(3)
7931 .a_offset(83)
7932 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007933 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007934 }
7935 }
7936 }
7937
Frank Barcharde22685a2021-11-12 11:36:58 -08007938 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, qmin) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007939 TEST_REQUIRES_ARM_NEON;
7940 GemmMicrokernelTester()
7941 .mr(2)
7942 .nr(16)
7943 .kr(4)
7944 .sr(2)
7945 .m(2)
7946 .n(16)
7947 .k(8)
7948 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007949 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007950 }
7951
Frank Barcharde22685a2021-11-12 11:36:58 -08007952 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, qmax) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007953 TEST_REQUIRES_ARM_NEON;
7954 GemmMicrokernelTester()
7955 .mr(2)
7956 .nr(16)
7957 .kr(4)
7958 .sr(2)
7959 .m(2)
7960 .n(16)
7961 .k(8)
7962 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007963 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007964 }
7965
Frank Barcharde22685a2021-11-12 11:36:58 -08007966 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4S2__NEON_MULL, strided_cm) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007967 TEST_REQUIRES_ARM_NEON;
7968 GemmMicrokernelTester()
7969 .mr(2)
7970 .nr(16)
7971 .kr(4)
7972 .sr(2)
7973 .m(2)
7974 .n(16)
7975 .k(8)
7976 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007977 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007978 }
7979#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7980
7981
7982#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -08007983 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_eq_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007984 TEST_REQUIRES_ARM_NEON;
7985 GemmMicrokernelTester()
7986 .mr(3)
7987 .nr(16)
7988 .kr(4)
7989 .sr(2)
7990 .m(3)
7991 .n(16)
7992 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007993 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08007994 }
7995
Frank Barcharde22685a2021-11-12 11:36:58 -08007996 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08007997 TEST_REQUIRES_ARM_NEON;
7998 GemmMicrokernelTester()
7999 .mr(3)
8000 .nr(16)
8001 .kr(4)
8002 .sr(2)
8003 .m(3)
8004 .n(16)
8005 .k(8)
8006 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008007 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008008 }
8009
Frank Barcharde22685a2021-11-12 11:36:58 -08008010 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_eq_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008011 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008012 for (uint32_t n = 1; n <= 16; n++) {
8013 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008014 GemmMicrokernelTester()
8015 .mr(3)
8016 .nr(16)
8017 .kr(4)
8018 .sr(2)
8019 .m(m)
8020 .n(n)
8021 .k(8)
8022 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008023 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008024 }
8025 }
8026 }
8027
Frank Barcharde22685a2021-11-12 11:36:58 -08008028 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_eq_8_subtile_m) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008029 TEST_REQUIRES_ARM_NEON;
8030 for (uint32_t m = 1; m <= 3; m++) {
8031 GemmMicrokernelTester()
8032 .mr(3)
8033 .nr(16)
8034 .kr(4)
8035 .sr(2)
8036 .m(m)
8037 .n(16)
8038 .k(8)
8039 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008040 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008041 }
8042 }
8043
Frank Barcharde22685a2021-11-12 11:36:58 -08008044 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_eq_8_subtile_n) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008045 TEST_REQUIRES_ARM_NEON;
8046 for (uint32_t n = 1; n <= 16; n++) {
8047 GemmMicrokernelTester()
8048 .mr(3)
8049 .nr(16)
8050 .kr(4)
8051 .sr(2)
8052 .m(3)
8053 .n(n)
8054 .k(8)
8055 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008056 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008057 }
8058 }
8059
Frank Barcharde22685a2021-11-12 11:36:58 -08008060 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_lt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008061 TEST_REQUIRES_ARM_NEON;
8062 for (size_t k = 1; k < 8; k++) {
8063 GemmMicrokernelTester()
8064 .mr(3)
8065 .nr(16)
8066 .kr(4)
8067 .sr(2)
8068 .m(3)
8069 .n(16)
8070 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008071 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008072 }
8073 }
8074
Frank Barcharde22685a2021-11-12 11:36:58 -08008075 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_lt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008076 TEST_REQUIRES_ARM_NEON;
8077 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008078 for (uint32_t n = 1; n <= 16; n++) {
8079 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008080 GemmMicrokernelTester()
8081 .mr(3)
8082 .nr(16)
8083 .kr(4)
8084 .sr(2)
8085 .m(m)
8086 .n(n)
8087 .k(k)
8088 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008089 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008090 }
8091 }
8092 }
8093 }
8094
Frank Barcharde22685a2021-11-12 11:36:58 -08008095 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_gt_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008096 TEST_REQUIRES_ARM_NEON;
8097 for (size_t k = 9; k < 16; k++) {
8098 GemmMicrokernelTester()
8099 .mr(3)
8100 .nr(16)
8101 .kr(4)
8102 .sr(2)
8103 .m(3)
8104 .n(16)
8105 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008106 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008107 }
8108 }
8109
Frank Barcharde22685a2021-11-12 11:36:58 -08008110 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_gt_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008111 TEST_REQUIRES_ARM_NEON;
8112 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008113 for (uint32_t n = 1; n <= 16; n++) {
8114 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008115 GemmMicrokernelTester()
8116 .mr(3)
8117 .nr(16)
8118 .kr(4)
8119 .sr(2)
8120 .m(m)
8121 .n(n)
8122 .k(k)
8123 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008124 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008125 }
8126 }
8127 }
8128 }
8129
Frank Barcharde22685a2021-11-12 11:36:58 -08008130 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_div_8) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008131 TEST_REQUIRES_ARM_NEON;
8132 for (size_t k = 16; k <= 80; k += 8) {
8133 GemmMicrokernelTester()
8134 .mr(3)
8135 .nr(16)
8136 .kr(4)
8137 .sr(2)
8138 .m(3)
8139 .n(16)
8140 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008141 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008142 }
8143 }
8144
Frank Barcharde22685a2021-11-12 11:36:58 -08008145 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, k_div_8_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008146 TEST_REQUIRES_ARM_NEON;
8147 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008148 for (uint32_t n = 1; n <= 16; n++) {
8149 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008150 GemmMicrokernelTester()
8151 .mr(3)
8152 .nr(16)
8153 .kr(4)
8154 .sr(2)
8155 .m(m)
8156 .n(n)
8157 .k(k)
8158 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008159 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008160 }
8161 }
8162 }
8163 }
8164
Frank Barcharde22685a2021-11-12 11:36:58 -08008165 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_gt_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008166 TEST_REQUIRES_ARM_NEON;
8167 for (uint32_t n = 17; n < 32; n++) {
8168 for (size_t k = 1; k <= 40; k += 9) {
8169 GemmMicrokernelTester()
8170 .mr(3)
8171 .nr(16)
8172 .kr(4)
8173 .sr(2)
8174 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008175 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08008176 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008177 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008178 }
8179 }
8180 }
8181
Frank Barcharde22685a2021-11-12 11:36:58 -08008182 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_gt_16_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008183 TEST_REQUIRES_ARM_NEON;
8184 for (uint32_t n = 17; n < 32; n++) {
8185 for (size_t k = 1; k <= 40; k += 9) {
8186 GemmMicrokernelTester()
8187 .mr(3)
8188 .nr(16)
8189 .kr(4)
8190 .sr(2)
8191 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008192 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08008193 .k(k)
8194 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008195 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008196 }
8197 }
8198 }
8199
Frank Barcharde22685a2021-11-12 11:36:58 -08008200 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_gt_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008201 TEST_REQUIRES_ARM_NEON;
8202 for (uint32_t n = 17; n < 32; n++) {
8203 for (size_t k = 1; k <= 40; k += 9) {
8204 for (uint32_t m = 1; m <= 3; m++) {
8205 GemmMicrokernelTester()
8206 .mr(3)
8207 .nr(16)
8208 .kr(4)
8209 .sr(2)
8210 .m(m)
8211 .n(n)
8212 .k(k)
8213 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008214 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008215 }
8216 }
8217 }
8218 }
8219
Frank Barcharde22685a2021-11-12 11:36:58 -08008220 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_div_16) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008221 TEST_REQUIRES_ARM_NEON;
8222 for (uint32_t n = 32; n <= 48; n += 16) {
8223 for (size_t k = 1; k <= 40; k += 9) {
8224 GemmMicrokernelTester()
8225 .mr(3)
8226 .nr(16)
8227 .kr(4)
8228 .sr(2)
8229 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008230 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08008231 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008232 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008233 }
8234 }
8235 }
8236
Frank Barcharde22685a2021-11-12 11:36:58 -08008237 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_div_16_strided_cn) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008238 TEST_REQUIRES_ARM_NEON;
8239 for (uint32_t n = 32; n <= 48; n += 16) {
8240 for (size_t k = 1; k <= 40; k += 9) {
8241 GemmMicrokernelTester()
8242 .mr(3)
8243 .nr(16)
8244 .kr(4)
8245 .sr(2)
8246 .m(3)
8247 .n(n)
8248 .k(k)
8249 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008250 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008251 }
8252 }
8253 }
8254
Frank Barcharde22685a2021-11-12 11:36:58 -08008255 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_div_16_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008256 TEST_REQUIRES_ARM_NEON;
8257 for (uint32_t n = 32; n <= 48; n += 16) {
8258 for (size_t k = 1; k <= 40; k += 9) {
8259 for (uint32_t m = 1; m <= 3; m++) {
8260 GemmMicrokernelTester()
8261 .mr(3)
8262 .nr(16)
8263 .kr(4)
8264 .sr(2)
8265 .m(m)
8266 .n(n)
8267 .k(k)
8268 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008269 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008270 }
8271 }
8272 }
8273 }
8274
Frank Barcharde22685a2021-11-12 11:36:58 -08008275 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008276 TEST_REQUIRES_ARM_NEON;
8277 for (size_t k = 1; k <= 40; k += 9) {
8278 GemmMicrokernelTester()
8279 .mr(3)
8280 .nr(16)
8281 .kr(4)
8282 .sr(2)
8283 .m(3)
8284 .n(16)
8285 .k(k)
8286 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008287 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008288 }
8289 }
8290
Frank Barcharde22685a2021-11-12 11:36:58 -08008291 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, small_kernel_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008292 TEST_REQUIRES_ARM_NEON;
8293 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008294 for (uint32_t n = 1; n <= 16; n++) {
8295 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008296 GemmMicrokernelTester()
8297 .mr(3)
8298 .nr(16)
8299 .kr(4)
8300 .sr(2)
8301 .m(m)
8302 .n(n)
8303 .k(k)
8304 .ks(3)
8305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008306 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008307 }
8308 }
8309 }
8310 }
8311
Frank Barcharde22685a2021-11-12 11:36:58 -08008312 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_gt_16_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008313 TEST_REQUIRES_ARM_NEON;
8314 for (uint32_t n = 17; n < 32; n++) {
8315 for (size_t k = 1; k <= 40; k += 9) {
8316 GemmMicrokernelTester()
8317 .mr(3)
8318 .nr(16)
8319 .kr(4)
8320 .sr(2)
8321 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008322 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08008323 .k(k)
8324 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008325 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008326 }
8327 }
8328 }
8329
Frank Barcharde22685a2021-11-12 11:36:58 -08008330 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, n_div_16_small_kernel) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008331 TEST_REQUIRES_ARM_NEON;
8332 for (uint32_t n = 32; n <= 48; n += 16) {
8333 for (size_t k = 1; k <= 40; k += 9) {
8334 GemmMicrokernelTester()
8335 .mr(3)
8336 .nr(16)
8337 .kr(4)
8338 .sr(2)
8339 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008340 .n(n)
Frank Barchardeb704f72021-11-12 01:26:50 -08008341 .k(k)
8342 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008343 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008344 }
8345 }
8346 }
8347
Frank Barcharde22685a2021-11-12 11:36:58 -08008348 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, strided_cm_subtile) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008349 TEST_REQUIRES_ARM_NEON;
8350 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008351 for (uint32_t n = 1; n <= 16; n++) {
8352 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008353 GemmMicrokernelTester()
8354 .mr(3)
8355 .nr(16)
8356 .kr(4)
8357 .sr(2)
8358 .m(m)
8359 .n(n)
8360 .k(k)
8361 .cm_stride(19)
8362 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008363 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008364 }
8365 }
8366 }
8367 }
8368
Frank Barcharde22685a2021-11-12 11:36:58 -08008369 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, a_offset) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008370 TEST_REQUIRES_ARM_NEON;
8371 for (size_t k = 1; k <= 40; k += 9) {
8372 GemmMicrokernelTester()
8373 .mr(3)
8374 .nr(16)
8375 .kr(4)
8376 .sr(2)
8377 .m(3)
8378 .n(16)
8379 .k(k)
8380 .ks(3)
8381 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08008382 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008383 }
8384 }
8385
Frank Barcharde22685a2021-11-12 11:36:58 -08008386 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, zero) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008387 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008388 for (size_t k = 1; k <= 40; k += 9) {
8389 for (uint32_t mz = 0; mz < 3; mz++) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008390 GemmMicrokernelTester()
8391 .mr(3)
8392 .nr(16)
8393 .kr(4)
8394 .sr(2)
8395 .m(3)
8396 .n(16)
8397 .k(k)
8398 .ks(3)
8399 .a_offset(127)
8400 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008401 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008402 }
8403 }
8404 }
8405
Frank Barcharde22685a2021-11-12 11:36:58 -08008406 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, qmin) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008407 TEST_REQUIRES_ARM_NEON;
8408 GemmMicrokernelTester()
8409 .mr(3)
8410 .nr(16)
8411 .kr(4)
8412 .sr(2)
8413 .m(3)
8414 .n(16)
8415 .k(8)
8416 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008417 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008418 }
8419
Frank Barcharde22685a2021-11-12 11:36:58 -08008420 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, qmax) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008421 TEST_REQUIRES_ARM_NEON;
8422 GemmMicrokernelTester()
8423 .mr(3)
8424 .nr(16)
8425 .kr(4)
8426 .sr(2)
8427 .m(3)
8428 .n(16)
8429 .k(8)
8430 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008431 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008432 }
8433
Frank Barcharde22685a2021-11-12 11:36:58 -08008434 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C4S2__NEON_MULL, strided_cm) {
Frank Barchardeb704f72021-11-12 01:26:50 -08008435 TEST_REQUIRES_ARM_NEON;
8436 GemmMicrokernelTester()
8437 .mr(3)
8438 .nr(16)
8439 .kr(4)
8440 .sr(2)
8441 .m(3)
8442 .n(16)
8443 .k(8)
8444 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008445 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardeb704f72021-11-12 01:26:50 -08008446 }
8447#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8448
8449
8450#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008451 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008452 TEST_REQUIRES_ARM_NEON;
8453 GemmMicrokernelTester()
8454 .mr(3)
8455 .nr(8)
8456 .kr(2)
8457 .sr(4)
8458 .m(3)
8459 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008460 .k(16)
8461 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008462 }
8463
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008464 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008465 TEST_REQUIRES_ARM_NEON;
8466 GemmMicrokernelTester()
8467 .mr(3)
8468 .nr(8)
8469 .kr(2)
8470 .sr(4)
8471 .m(3)
8472 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008473 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008474 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008475 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008476 }
8477
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008478 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008479 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008480 for (uint32_t n = 1; n <= 8; n++) {
8481 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008482 GemmMicrokernelTester()
8483 .mr(3)
8484 .nr(8)
8485 .kr(2)
8486 .sr(4)
8487 .m(m)
8488 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008489 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008490 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008491 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008492 }
8493 }
8494 }
8495
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008496 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008497 TEST_REQUIRES_ARM_NEON;
8498 for (uint32_t m = 1; m <= 3; m++) {
8499 GemmMicrokernelTester()
8500 .mr(3)
8501 .nr(8)
8502 .kr(2)
8503 .sr(4)
8504 .m(m)
8505 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008506 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008507 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008508 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008509 }
8510 }
8511
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008512 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008513 TEST_REQUIRES_ARM_NEON;
8514 for (uint32_t n = 1; n <= 8; n++) {
8515 GemmMicrokernelTester()
8516 .mr(3)
8517 .nr(8)
8518 .kr(2)
8519 .sr(4)
8520 .m(3)
8521 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008522 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008523 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008524 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008525 }
8526 }
8527
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008528 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_lt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008529 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008530 for (size_t k = 1; k < 16; k++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008531 GemmMicrokernelTester()
8532 .mr(3)
8533 .nr(8)
8534 .kr(2)
8535 .sr(4)
8536 .m(3)
8537 .n(8)
8538 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008539 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008540 }
8541 }
8542
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008543 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_lt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008544 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008545 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008546 for (uint32_t n = 1; n <= 8; n++) {
8547 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008548 GemmMicrokernelTester()
8549 .mr(3)
8550 .nr(8)
8551 .kr(2)
8552 .sr(4)
8553 .m(m)
8554 .n(n)
8555 .k(k)
8556 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008557 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008558 }
8559 }
8560 }
8561 }
8562
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008563 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_gt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008564 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008565 for (size_t k = 17; k < 32; k++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008566 GemmMicrokernelTester()
8567 .mr(3)
8568 .nr(8)
8569 .kr(2)
8570 .sr(4)
8571 .m(3)
8572 .n(8)
8573 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008574 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008575 }
8576 }
8577
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008578 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_gt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008579 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008580 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008581 for (uint32_t n = 1; n <= 8; n++) {
8582 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008583 GemmMicrokernelTester()
8584 .mr(3)
8585 .nr(8)
8586 .kr(2)
8587 .sr(4)
8588 .m(m)
8589 .n(n)
8590 .k(k)
8591 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008592 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008593 }
8594 }
8595 }
8596 }
8597
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008598 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_div_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008599 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008600 for (size_t k = 32; k <= 160; k += 16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008601 GemmMicrokernelTester()
8602 .mr(3)
8603 .nr(8)
8604 .kr(2)
8605 .sr(4)
8606 .m(3)
8607 .n(8)
8608 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008609 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008610 }
8611 }
8612
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008613 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, k_div_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008614 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008615 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008616 for (uint32_t n = 1; n <= 8; n++) {
8617 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008618 GemmMicrokernelTester()
8619 .mr(3)
8620 .nr(8)
8621 .kr(2)
8622 .sr(4)
8623 .m(m)
8624 .n(n)
8625 .k(k)
8626 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008627 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008628 }
8629 }
8630 }
8631 }
8632
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008633 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008634 TEST_REQUIRES_ARM_NEON;
8635 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008636 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008637 GemmMicrokernelTester()
8638 .mr(3)
8639 .nr(8)
8640 .kr(2)
8641 .sr(4)
8642 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008643 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008644 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008645 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008646 }
8647 }
8648 }
8649
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008650 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008651 TEST_REQUIRES_ARM_NEON;
8652 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008653 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008654 GemmMicrokernelTester()
8655 .mr(3)
8656 .nr(8)
8657 .kr(2)
8658 .sr(4)
8659 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008660 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008661 .k(k)
8662 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008663 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008664 }
8665 }
8666 }
8667
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008668 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008669 TEST_REQUIRES_ARM_NEON;
8670 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008671 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008672 for (uint32_t m = 1; m <= 3; m++) {
8673 GemmMicrokernelTester()
8674 .mr(3)
8675 .nr(8)
8676 .kr(2)
8677 .sr(4)
8678 .m(m)
8679 .n(n)
8680 .k(k)
8681 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008682 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008683 }
8684 }
8685 }
8686 }
8687
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008688 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008689 TEST_REQUIRES_ARM_NEON;
8690 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008691 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008692 GemmMicrokernelTester()
8693 .mr(3)
8694 .nr(8)
8695 .kr(2)
8696 .sr(4)
8697 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008698 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008699 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008700 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008701 }
8702 }
8703 }
8704
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008705 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008706 TEST_REQUIRES_ARM_NEON;
8707 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008708 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008709 GemmMicrokernelTester()
8710 .mr(3)
8711 .nr(8)
8712 .kr(2)
8713 .sr(4)
8714 .m(3)
8715 .n(n)
8716 .k(k)
8717 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008718 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008719 }
8720 }
8721 }
8722
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008723 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008724 TEST_REQUIRES_ARM_NEON;
8725 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008726 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008727 for (uint32_t m = 1; m <= 3; m++) {
8728 GemmMicrokernelTester()
8729 .mr(3)
8730 .nr(8)
8731 .kr(2)
8732 .sr(4)
8733 .m(m)
8734 .n(n)
8735 .k(k)
8736 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008737 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008738 }
8739 }
8740 }
8741 }
8742
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008743 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008744 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008745 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008746 GemmMicrokernelTester()
8747 .mr(3)
8748 .nr(8)
8749 .kr(2)
8750 .sr(4)
8751 .m(3)
8752 .n(8)
8753 .k(k)
8754 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008755 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008756 }
8757 }
8758
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008759 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, small_kernel_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008760 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008761 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008762 for (uint32_t n = 1; n <= 8; n++) {
8763 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008764 GemmMicrokernelTester()
8765 .mr(3)
8766 .nr(8)
8767 .kr(2)
8768 .sr(4)
8769 .m(m)
8770 .n(n)
8771 .k(k)
8772 .ks(3)
8773 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008774 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008775 }
8776 }
8777 }
8778 }
8779
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008780 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008781 TEST_REQUIRES_ARM_NEON;
8782 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008783 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008784 GemmMicrokernelTester()
8785 .mr(3)
8786 .nr(8)
8787 .kr(2)
8788 .sr(4)
8789 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008790 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008791 .k(k)
8792 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008793 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008794 }
8795 }
8796 }
8797
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008798 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008799 TEST_REQUIRES_ARM_NEON;
8800 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008801 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008802 GemmMicrokernelTester()
8803 .mr(3)
8804 .nr(8)
8805 .kr(2)
8806 .sr(4)
8807 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008808 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008809 .k(k)
8810 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008811 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008812 }
8813 }
8814 }
8815
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008816 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cm_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008817 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008818 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008819 for (uint32_t n = 1; n <= 8; n++) {
8820 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008821 GemmMicrokernelTester()
8822 .mr(3)
8823 .nr(8)
8824 .kr(2)
8825 .sr(4)
8826 .m(m)
8827 .n(n)
8828 .k(k)
8829 .cm_stride(11)
8830 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008831 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008832 }
8833 }
8834 }
8835 }
8836
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008837 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, a_offset) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008838 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008839 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008840 GemmMicrokernelTester()
8841 .mr(3)
8842 .nr(8)
8843 .kr(2)
8844 .sr(4)
8845 .m(3)
8846 .n(8)
8847 .k(k)
8848 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008849 .a_offset(251)
8850 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008851 }
8852 }
8853
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008854 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, zero) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008855 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008856 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008857 for (uint32_t mz = 0; mz < 3; mz++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008858 GemmMicrokernelTester()
8859 .mr(3)
8860 .nr(8)
8861 .kr(2)
8862 .sr(4)
8863 .m(3)
8864 .n(8)
8865 .k(k)
8866 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008867 .a_offset(251)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008868 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008869 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008870 }
8871 }
8872 }
8873
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008874 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, qmin) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008875 TEST_REQUIRES_ARM_NEON;
8876 GemmMicrokernelTester()
8877 .mr(3)
8878 .nr(8)
8879 .kr(2)
8880 .sr(4)
8881 .m(3)
8882 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008883 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008884 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008885 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008886 }
8887
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008888 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, qmax) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008889 TEST_REQUIRES_ARM_NEON;
8890 GemmMicrokernelTester()
8891 .mr(3)
8892 .nr(8)
8893 .kr(2)
8894 .sr(4)
8895 .m(3)
8896 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008897 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008898 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008899 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008900 }
8901
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008902 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2S4__NEON_MLAL, strided_cm) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008903 TEST_REQUIRES_ARM_NEON;
8904 GemmMicrokernelTester()
8905 .mr(3)
8906 .nr(8)
8907 .kr(2)
8908 .sr(4)
8909 .m(3)
8910 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008911 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008912 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008913 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008914 }
8915#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8916
8917
8918#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008919 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008920 TEST_REQUIRES_ARM_NEON;
8921 GemmMicrokernelTester()
8922 .mr(4)
8923 .nr(8)
8924 .kr(2)
8925 .sr(4)
8926 .m(4)
8927 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008928 .k(16)
8929 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008930 }
8931
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008932 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008933 TEST_REQUIRES_ARM_NEON;
8934 GemmMicrokernelTester()
8935 .mr(4)
8936 .nr(8)
8937 .kr(2)
8938 .sr(4)
8939 .m(4)
8940 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008941 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008942 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008943 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008944 }
8945
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008946 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008947 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008948 for (uint32_t n = 1; n <= 8; n++) {
8949 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008950 GemmMicrokernelTester()
8951 .mr(4)
8952 .nr(8)
8953 .kr(2)
8954 .sr(4)
8955 .m(m)
8956 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008957 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008958 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008959 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008960 }
8961 }
8962 }
8963
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008964 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008965 TEST_REQUIRES_ARM_NEON;
8966 for (uint32_t m = 1; m <= 4; m++) {
8967 GemmMicrokernelTester()
8968 .mr(4)
8969 .nr(8)
8970 .kr(2)
8971 .sr(4)
8972 .m(m)
8973 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008974 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008975 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008976 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008977 }
8978 }
8979
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008980 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008981 TEST_REQUIRES_ARM_NEON;
8982 for (uint32_t n = 1; n <= 8; n++) {
8983 GemmMicrokernelTester()
8984 .mr(4)
8985 .nr(8)
8986 .kr(2)
8987 .sr(4)
8988 .m(4)
8989 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008990 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08008991 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008992 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08008993 }
8994 }
8995
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008996 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008997 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08008998 for (size_t k = 1; k < 16; k++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08008999 GemmMicrokernelTester()
9000 .mr(4)
9001 .nr(8)
9002 .kr(2)
9003 .sr(4)
9004 .m(4)
9005 .n(8)
9006 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009007 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009008 }
9009 }
9010
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009011 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_lt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009012 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009013 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009014 for (uint32_t n = 1; n <= 8; n++) {
9015 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009016 GemmMicrokernelTester()
9017 .mr(4)
9018 .nr(8)
9019 .kr(2)
9020 .sr(4)
9021 .m(m)
9022 .n(n)
9023 .k(k)
9024 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009025 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009026 }
9027 }
9028 }
9029 }
9030
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009031 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009032 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009033 for (size_t k = 17; k < 32; k++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009034 GemmMicrokernelTester()
9035 .mr(4)
9036 .nr(8)
9037 .kr(2)
9038 .sr(4)
9039 .m(4)
9040 .n(8)
9041 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009042 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009043 }
9044 }
9045
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009046 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_gt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009047 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009048 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009049 for (uint32_t n = 1; n <= 8; n++) {
9050 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009051 GemmMicrokernelTester()
9052 .mr(4)
9053 .nr(8)
9054 .kr(2)
9055 .sr(4)
9056 .m(m)
9057 .n(n)
9058 .k(k)
9059 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009060 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009061 }
9062 }
9063 }
9064 }
9065
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009066 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009067 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009068 for (size_t k = 32; k <= 160; k += 16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009069 GemmMicrokernelTester()
9070 .mr(4)
9071 .nr(8)
9072 .kr(2)
9073 .sr(4)
9074 .m(4)
9075 .n(8)
9076 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009077 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009078 }
9079 }
9080
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009081 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, k_div_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009082 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009083 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009084 for (uint32_t n = 1; n <= 8; n++) {
9085 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009086 GemmMicrokernelTester()
9087 .mr(4)
9088 .nr(8)
9089 .kr(2)
9090 .sr(4)
9091 .m(m)
9092 .n(n)
9093 .k(k)
9094 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009095 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009096 }
9097 }
9098 }
9099 }
9100
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009101 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009102 TEST_REQUIRES_ARM_NEON;
9103 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009104 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009105 GemmMicrokernelTester()
9106 .mr(4)
9107 .nr(8)
9108 .kr(2)
9109 .sr(4)
9110 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009111 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009112 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009113 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009114 }
9115 }
9116 }
9117
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009118 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009119 TEST_REQUIRES_ARM_NEON;
9120 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009121 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009122 GemmMicrokernelTester()
9123 .mr(4)
9124 .nr(8)
9125 .kr(2)
9126 .sr(4)
9127 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009128 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009129 .k(k)
9130 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009131 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009132 }
9133 }
9134 }
9135
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009136 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009137 TEST_REQUIRES_ARM_NEON;
9138 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009139 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009140 for (uint32_t m = 1; m <= 4; m++) {
9141 GemmMicrokernelTester()
9142 .mr(4)
9143 .nr(8)
9144 .kr(2)
9145 .sr(4)
9146 .m(m)
9147 .n(n)
9148 .k(k)
9149 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009150 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009151 }
9152 }
9153 }
9154 }
9155
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009156 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009157 TEST_REQUIRES_ARM_NEON;
9158 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009159 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009160 GemmMicrokernelTester()
9161 .mr(4)
9162 .nr(8)
9163 .kr(2)
9164 .sr(4)
9165 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009166 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009167 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009168 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009169 }
9170 }
9171 }
9172
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009173 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009174 TEST_REQUIRES_ARM_NEON;
9175 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009176 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009177 GemmMicrokernelTester()
9178 .mr(4)
9179 .nr(8)
9180 .kr(2)
9181 .sr(4)
9182 .m(4)
9183 .n(n)
9184 .k(k)
9185 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009186 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009187 }
9188 }
9189 }
9190
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009191 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009192 TEST_REQUIRES_ARM_NEON;
9193 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009194 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009195 for (uint32_t m = 1; m <= 4; m++) {
9196 GemmMicrokernelTester()
9197 .mr(4)
9198 .nr(8)
9199 .kr(2)
9200 .sr(4)
9201 .m(m)
9202 .n(n)
9203 .k(k)
9204 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009205 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009206 }
9207 }
9208 }
9209 }
9210
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009211 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009212 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009213 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009214 GemmMicrokernelTester()
9215 .mr(4)
9216 .nr(8)
9217 .kr(2)
9218 .sr(4)
9219 .m(4)
9220 .n(8)
9221 .k(k)
9222 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009223 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009224 }
9225 }
9226
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009227 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, small_kernel_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009228 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009229 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009230 for (uint32_t n = 1; n <= 8; n++) {
9231 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009232 GemmMicrokernelTester()
9233 .mr(4)
9234 .nr(8)
9235 .kr(2)
9236 .sr(4)
9237 .m(m)
9238 .n(n)
9239 .k(k)
9240 .ks(3)
9241 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009242 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009243 }
9244 }
9245 }
9246 }
9247
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009248 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_gt_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009249 TEST_REQUIRES_ARM_NEON;
9250 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009251 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009252 GemmMicrokernelTester()
9253 .mr(4)
9254 .nr(8)
9255 .kr(2)
9256 .sr(4)
9257 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009258 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009259 .k(k)
9260 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009261 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009262 }
9263 }
9264 }
9265
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009266 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, n_div_8_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009267 TEST_REQUIRES_ARM_NEON;
9268 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009269 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009270 GemmMicrokernelTester()
9271 .mr(4)
9272 .nr(8)
9273 .kr(2)
9274 .sr(4)
9275 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009276 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009277 .k(k)
9278 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009279 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009280 }
9281 }
9282 }
9283
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009284 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009285 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009286 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009287 for (uint32_t n = 1; n <= 8; n++) {
9288 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009289 GemmMicrokernelTester()
9290 .mr(4)
9291 .nr(8)
9292 .kr(2)
9293 .sr(4)
9294 .m(m)
9295 .n(n)
9296 .k(k)
9297 .cm_stride(11)
9298 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009299 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009300 }
9301 }
9302 }
9303 }
9304
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009305 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, a_offset) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009306 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009307 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009308 GemmMicrokernelTester()
9309 .mr(4)
9310 .nr(8)
9311 .kr(2)
9312 .sr(4)
9313 .m(4)
9314 .n(8)
9315 .k(k)
9316 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009317 .a_offset(331)
9318 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009319 }
9320 }
9321
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009322 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, zero) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009323 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009324 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009325 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009326 GemmMicrokernelTester()
9327 .mr(4)
9328 .nr(8)
9329 .kr(2)
9330 .sr(4)
9331 .m(4)
9332 .n(8)
9333 .k(k)
9334 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009335 .a_offset(331)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009336 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009337 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009338 }
9339 }
9340 }
9341
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009342 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmin) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009343 TEST_REQUIRES_ARM_NEON;
9344 GemmMicrokernelTester()
9345 .mr(4)
9346 .nr(8)
9347 .kr(2)
9348 .sr(4)
9349 .m(4)
9350 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009351 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009352 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009353 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009354 }
9355
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009356 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, qmax) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009357 TEST_REQUIRES_ARM_NEON;
9358 GemmMicrokernelTester()
9359 .mr(4)
9360 .nr(8)
9361 .kr(2)
9362 .sr(4)
9363 .m(4)
9364 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009365 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009366 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009367 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009368 }
9369
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009370 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2S4__NEON_MLAL, strided_cm) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009371 TEST_REQUIRES_ARM_NEON;
9372 GemmMicrokernelTester()
9373 .mr(4)
9374 .nr(8)
9375 .kr(2)
9376 .sr(4)
9377 .m(4)
9378 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009379 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009380 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009381 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009382 }
9383#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9384
9385
9386#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009387 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009388 TEST_REQUIRES_ARM_NEON;
9389 GemmMicrokernelTester()
9390 .mr(2)
9391 .nr(16)
9392 .kr(2)
9393 .sr(4)
9394 .m(2)
9395 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009396 .k(16)
9397 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009398 }
9399
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009400 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009401 TEST_REQUIRES_ARM_NEON;
9402 GemmMicrokernelTester()
9403 .mr(2)
9404 .nr(16)
9405 .kr(2)
9406 .sr(4)
9407 .m(2)
9408 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009409 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009410 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009411 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009412 }
9413
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009414 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009415 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009416 for (uint32_t n = 1; n <= 16; n++) {
9417 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009418 GemmMicrokernelTester()
9419 .mr(2)
9420 .nr(16)
9421 .kr(2)
9422 .sr(4)
9423 .m(m)
9424 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009425 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009426 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009427 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009428 }
9429 }
9430 }
9431
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009432 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009433 TEST_REQUIRES_ARM_NEON;
9434 for (uint32_t m = 1; m <= 2; m++) {
9435 GemmMicrokernelTester()
9436 .mr(2)
9437 .nr(16)
9438 .kr(2)
9439 .sr(4)
9440 .m(m)
9441 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009442 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009443 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009444 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009445 }
9446 }
9447
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009448 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009449 TEST_REQUIRES_ARM_NEON;
9450 for (uint32_t n = 1; n <= 16; n++) {
9451 GemmMicrokernelTester()
9452 .mr(2)
9453 .nr(16)
9454 .kr(2)
9455 .sr(4)
9456 .m(2)
9457 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009458 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009459 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009460 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009461 }
9462 }
9463
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009464 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_lt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009465 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009466 for (size_t k = 1; k < 16; k++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009467 GemmMicrokernelTester()
9468 .mr(2)
9469 .nr(16)
9470 .kr(2)
9471 .sr(4)
9472 .m(2)
9473 .n(16)
9474 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009475 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009476 }
9477 }
9478
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009479 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_lt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009480 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009481 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009482 for (uint32_t n = 1; n <= 16; n++) {
9483 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009484 GemmMicrokernelTester()
9485 .mr(2)
9486 .nr(16)
9487 .kr(2)
9488 .sr(4)
9489 .m(m)
9490 .n(n)
9491 .k(k)
9492 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009493 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009494 }
9495 }
9496 }
9497 }
9498
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009499 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_gt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009500 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009501 for (size_t k = 17; k < 32; k++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009502 GemmMicrokernelTester()
9503 .mr(2)
9504 .nr(16)
9505 .kr(2)
9506 .sr(4)
9507 .m(2)
9508 .n(16)
9509 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009510 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009511 }
9512 }
9513
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009514 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_gt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009515 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009516 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009517 for (uint32_t n = 1; n <= 16; n++) {
9518 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009519 GemmMicrokernelTester()
9520 .mr(2)
9521 .nr(16)
9522 .kr(2)
9523 .sr(4)
9524 .m(m)
9525 .n(n)
9526 .k(k)
9527 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009528 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009529 }
9530 }
9531 }
9532 }
9533
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009534 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_div_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009535 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009536 for (size_t k = 32; k <= 160; k += 16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009537 GemmMicrokernelTester()
9538 .mr(2)
9539 .nr(16)
9540 .kr(2)
9541 .sr(4)
9542 .m(2)
9543 .n(16)
9544 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009545 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009546 }
9547 }
9548
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009549 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, k_div_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009550 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009551 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009552 for (uint32_t n = 1; n <= 16; n++) {
9553 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009554 GemmMicrokernelTester()
9555 .mr(2)
9556 .nr(16)
9557 .kr(2)
9558 .sr(4)
9559 .m(m)
9560 .n(n)
9561 .k(k)
9562 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009563 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009564 }
9565 }
9566 }
9567 }
9568
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009569 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009570 TEST_REQUIRES_ARM_NEON;
9571 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009572 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009573 GemmMicrokernelTester()
9574 .mr(2)
9575 .nr(16)
9576 .kr(2)
9577 .sr(4)
9578 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009579 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009580 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009581 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009582 }
9583 }
9584 }
9585
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009586 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009587 TEST_REQUIRES_ARM_NEON;
9588 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009589 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009590 GemmMicrokernelTester()
9591 .mr(2)
9592 .nr(16)
9593 .kr(2)
9594 .sr(4)
9595 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009596 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009597 .k(k)
9598 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009599 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009600 }
9601 }
9602 }
9603
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009604 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009605 TEST_REQUIRES_ARM_NEON;
9606 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009607 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009608 for (uint32_t m = 1; m <= 2; m++) {
9609 GemmMicrokernelTester()
9610 .mr(2)
9611 .nr(16)
9612 .kr(2)
9613 .sr(4)
9614 .m(m)
9615 .n(n)
9616 .k(k)
9617 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009618 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009619 }
9620 }
9621 }
9622 }
9623
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009624 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009625 TEST_REQUIRES_ARM_NEON;
9626 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009627 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009628 GemmMicrokernelTester()
9629 .mr(2)
9630 .nr(16)
9631 .kr(2)
9632 .sr(4)
9633 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009634 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009635 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009636 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009637 }
9638 }
9639 }
9640
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009641 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_strided_cn) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009642 TEST_REQUIRES_ARM_NEON;
9643 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009644 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009645 GemmMicrokernelTester()
9646 .mr(2)
9647 .nr(16)
9648 .kr(2)
9649 .sr(4)
9650 .m(2)
9651 .n(n)
9652 .k(k)
9653 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009654 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009655 }
9656 }
9657 }
9658
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009659 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009660 TEST_REQUIRES_ARM_NEON;
9661 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009662 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009663 for (uint32_t m = 1; m <= 2; m++) {
9664 GemmMicrokernelTester()
9665 .mr(2)
9666 .nr(16)
9667 .kr(2)
9668 .sr(4)
9669 .m(m)
9670 .n(n)
9671 .k(k)
9672 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009673 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009674 }
9675 }
9676 }
9677 }
9678
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009679 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009680 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009681 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009682 GemmMicrokernelTester()
9683 .mr(2)
9684 .nr(16)
9685 .kr(2)
9686 .sr(4)
9687 .m(2)
9688 .n(16)
9689 .k(k)
9690 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009691 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009692 }
9693 }
9694
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009695 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, small_kernel_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009696 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009697 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009698 for (uint32_t n = 1; n <= 16; n++) {
9699 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009700 GemmMicrokernelTester()
9701 .mr(2)
9702 .nr(16)
9703 .kr(2)
9704 .sr(4)
9705 .m(m)
9706 .n(n)
9707 .k(k)
9708 .ks(3)
9709 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009710 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009711 }
9712 }
9713 }
9714 }
9715
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009716 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_gt_16_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009717 TEST_REQUIRES_ARM_NEON;
9718 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009719 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009720 GemmMicrokernelTester()
9721 .mr(2)
9722 .nr(16)
9723 .kr(2)
9724 .sr(4)
9725 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009726 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009727 .k(k)
9728 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009729 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009730 }
9731 }
9732 }
9733
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009734 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, n_div_16_small_kernel) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009735 TEST_REQUIRES_ARM_NEON;
9736 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009737 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009738 GemmMicrokernelTester()
9739 .mr(2)
9740 .nr(16)
9741 .kr(2)
9742 .sr(4)
9743 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009744 .n(n)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009745 .k(k)
9746 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009747 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009748 }
9749 }
9750 }
9751
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009752 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cm_subtile) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009753 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009754 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009755 for (uint32_t n = 1; n <= 16; n++) {
9756 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009757 GemmMicrokernelTester()
9758 .mr(2)
9759 .nr(16)
9760 .kr(2)
9761 .sr(4)
9762 .m(m)
9763 .n(n)
9764 .k(k)
9765 .cm_stride(19)
9766 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009767 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009768 }
9769 }
9770 }
9771 }
9772
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009773 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, a_offset) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009774 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009775 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009776 GemmMicrokernelTester()
9777 .mr(2)
9778 .nr(16)
9779 .kr(2)
9780 .sr(4)
9781 .m(2)
9782 .n(16)
9783 .k(k)
9784 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009785 .a_offset(163)
9786 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009787 }
9788 }
9789
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009790 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, zero) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009791 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009792 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009793 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009794 GemmMicrokernelTester()
9795 .mr(2)
9796 .nr(16)
9797 .kr(2)
9798 .sr(4)
9799 .m(2)
9800 .n(16)
9801 .k(k)
9802 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009803 .a_offset(163)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009804 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009805 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009806 }
9807 }
9808 }
9809
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009810 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, qmin) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009811 TEST_REQUIRES_ARM_NEON;
9812 GemmMicrokernelTester()
9813 .mr(2)
9814 .nr(16)
9815 .kr(2)
9816 .sr(4)
9817 .m(2)
9818 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009819 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009820 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009821 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009822 }
9823
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009824 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, qmax) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009825 TEST_REQUIRES_ARM_NEON;
9826 GemmMicrokernelTester()
9827 .mr(2)
9828 .nr(16)
9829 .kr(2)
9830 .sr(4)
9831 .m(2)
9832 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009833 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009834 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009835 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009836 }
9837
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009838 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2S4__NEON_MLAL, strided_cm) {
Frank Barchardc7a032d2021-11-10 12:37:49 -08009839 TEST_REQUIRES_ARM_NEON;
9840 GemmMicrokernelTester()
9841 .mr(2)
9842 .nr(16)
9843 .kr(2)
9844 .sr(4)
9845 .m(2)
9846 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009847 .k(16)
Frank Barchardc7a032d2021-11-10 12:37:49 -08009848 .cm_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009849 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchardc7a032d2021-11-10 12:37:49 -08009850 }
9851#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9852
9853
9854#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009855 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07009856 TEST_REQUIRES_ARM_NEON;
9857 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009858 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009859 .nr(16)
9860 .kr(2)
9861 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009862 .m(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009863 .n(16)
9864 .k(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009865 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -07009866 }
9867
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009868 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -07009869 TEST_REQUIRES_ARM_NEON;
9870 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009871 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009872 .nr(16)
9873 .kr(2)
9874 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009875 .m(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009876 .n(16)
9877 .k(16)
9878 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009879 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -07009880 }
9881
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009882 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07009883 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009884 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009885 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07009886 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009887 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009888 .nr(16)
9889 .kr(2)
9890 .sr(4)
Frank Barchard287952a2021-11-03 15:26:45 -07009891 .m(m)
9892 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009893 .k(16)
Frank Barchard287952a2021-11-03 15:26:45 -07009894 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009895 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -07009896 }
9897 }
9898 }
9899
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009900 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -07009901 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009902 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07009903 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009904 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009905 .nr(16)
9906 .kr(2)
9907 .sr(4)
Frank Barchard287952a2021-11-03 15:26:45 -07009908 .m(m)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009909 .n(16)
9910 .k(16)
Frank Barchard287952a2021-11-03 15:26:45 -07009911 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009912 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -07009913 }
9914 }
9915
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009916 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_eq_16_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -07009917 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009918 for (uint32_t n = 1; n <= 16; n++) {
Frank Barchard287952a2021-11-03 15:26:45 -07009919 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009920 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009921 .nr(16)
9922 .kr(2)
9923 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009924 .m(3)
Frank Barchard287952a2021-11-03 15:26:45 -07009925 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009926 .k(16)
Frank Barchard287952a2021-11-03 15:26:45 -07009927 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009928 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -07009929 }
9930 }
9931
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009932 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_lt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07009933 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009934 for (size_t k = 1; k < 16; k++) {
Frank Barchard287952a2021-11-03 15:26:45 -07009935 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009936 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009937 .nr(16)
9938 .kr(2)
9939 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009940 .m(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009941 .n(16)
Frank Barchard287952a2021-11-03 15:26:45 -07009942 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009943 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -07009944 }
9945 }
9946
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009947 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_lt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07009948 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009949 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009950 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009951 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07009952 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009953 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009954 .nr(16)
9955 .kr(2)
9956 .sr(4)
Frank Barchard287952a2021-11-03 15:26:45 -07009957 .m(m)
9958 .n(n)
9959 .k(k)
9960 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009961 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -07009962 }
9963 }
9964 }
9965 }
9966
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009967 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -07009968 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009969 for (size_t k = 17; k < 32; k++) {
Frank Barchard287952a2021-11-03 15:26:45 -07009970 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009971 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009972 .nr(16)
9973 .kr(2)
9974 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009975 .m(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009976 .n(16)
Frank Barchard287952a2021-11-03 15:26:45 -07009977 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009978 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -07009979 }
9980 }
9981
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009982 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -07009983 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009984 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009985 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009986 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -07009987 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009988 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -08009989 .nr(16)
9990 .kr(2)
9991 .sr(4)
Frank Barchard287952a2021-11-03 15:26:45 -07009992 .m(m)
9993 .n(n)
9994 .k(k)
9995 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -08009996 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -07009997 }
9998 }
9999 }
10000 }
10001
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010002 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010003 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010004 for (size_t k = 32; k <= 160; k += 16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010005 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010006 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010007 .nr(16)
10008 .kr(2)
10009 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010010 .m(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010011 .n(16)
Frank Barchard287952a2021-11-03 15:26:45 -070010012 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010013 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010014 }
10015 }
10016
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010017 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, k_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010018 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010019 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010020 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010021 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010022 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010023 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010024 .nr(16)
10025 .kr(2)
10026 .sr(4)
Frank Barchard287952a2021-11-03 15:26:45 -070010027 .m(m)
10028 .n(n)
10029 .k(k)
10030 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010031 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010032 }
10033 }
10034 }
10035 }
10036
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010037 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010038 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010039 for (uint32_t n = 17; n < 32; n++) {
10040 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchard287952a2021-11-03 15:26:45 -070010041 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010042 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010043 .nr(16)
10044 .kr(2)
10045 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010046 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010047 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010048 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010049 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010050 }
10051 }
10052 }
10053
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010054 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010055 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010056 for (uint32_t n = 17; n < 32; n++) {
10057 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchard287952a2021-11-03 15:26:45 -070010058 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010059 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010060 .nr(16)
10061 .kr(2)
10062 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010063 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010064 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010065 .k(k)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010066 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010067 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010068 }
10069 }
10070 }
10071
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010072 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010073 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010074 for (uint32_t n = 17; n < 32; n++) {
10075 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010076 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010077 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010078 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010079 .nr(16)
10080 .kr(2)
10081 .sr(4)
Frank Barchard287952a2021-11-03 15:26:45 -070010082 .m(m)
10083 .n(n)
10084 .k(k)
10085 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010086 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010087 }
10088 }
10089 }
10090 }
10091
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010092 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070010093 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010094 for (uint32_t n = 32; n <= 48; n += 16) {
10095 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchard287952a2021-11-03 15:26:45 -070010096 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010097 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010098 .nr(16)
10099 .kr(2)
10100 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010101 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010102 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010103 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010104 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010105 }
10106 }
10107 }
10108
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010109 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010110 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010111 for (uint32_t n = 32; n <= 48; n += 16) {
10112 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchard287952a2021-11-03 15:26:45 -070010113 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010114 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010115 .nr(16)
10116 .kr(2)
10117 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010118 .m(3)
Frank Barchard287952a2021-11-03 15:26:45 -070010119 .n(n)
10120 .k(k)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010121 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010122 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010123 }
10124 }
10125 }
10126
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010127 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010128 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010129 for (uint32_t n = 32; n <= 48; n += 16) {
10130 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010131 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010132 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010133 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010134 .nr(16)
10135 .kr(2)
10136 .sr(4)
Frank Barchard287952a2021-11-03 15:26:45 -070010137 .m(m)
10138 .n(n)
10139 .k(k)
10140 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010141 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010142 }
10143 }
10144 }
10145 }
10146
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010147 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070010148 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010149 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchard287952a2021-11-03 15:26:45 -070010150 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010151 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010152 .nr(16)
10153 .kr(2)
10154 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010155 .m(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010156 .n(16)
Frank Barchard287952a2021-11-03 15:26:45 -070010157 .k(k)
10158 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010159 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010160 }
10161 }
10162
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010163 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, small_kernel_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010164 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010165 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010166 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010167 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010168 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010169 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010170 .nr(16)
10171 .kr(2)
10172 .sr(4)
Frank Barchard287952a2021-11-03 15:26:45 -070010173 .m(m)
10174 .n(n)
10175 .k(k)
10176 .ks(3)
10177 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010178 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010179 }
10180 }
10181 }
10182 }
10183
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010184 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_gt_16_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070010185 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010186 for (uint32_t n = 17; n < 32; n++) {
10187 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchard287952a2021-11-03 15:26:45 -070010188 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010189 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010190 .nr(16)
10191 .kr(2)
10192 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010193 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010194 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010195 .k(k)
10196 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010197 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010198 }
10199 }
10200 }
10201
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010202 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, n_div_16_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070010203 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010204 for (uint32_t n = 32; n <= 48; n += 16) {
10205 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchard287952a2021-11-03 15:26:45 -070010206 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010207 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010208 .nr(16)
10209 .kr(2)
10210 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010211 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010212 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010213 .k(k)
10214 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010215 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010216 }
10217 }
10218 }
10219
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010220 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010221 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010222 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010223 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010224 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010225 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010226 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010227 .nr(16)
10228 .kr(2)
10229 .sr(4)
Frank Barchard287952a2021-11-03 15:26:45 -070010230 .m(m)
10231 .n(n)
10232 .k(k)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010233 .cm_stride(19)
Frank Barchard287952a2021-11-03 15:26:45 -070010234 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010235 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010236 }
10237 }
10238 }
10239 }
10240
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010241 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, a_offset) {
Frank Barchard287952a2021-11-03 15:26:45 -070010242 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010243 for (size_t k = 1; k <= 80; k += 17) {
Frank Barchard287952a2021-11-03 15:26:45 -070010244 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010245 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010246 .nr(16)
10247 .kr(2)
10248 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010249 .m(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010250 .n(16)
Frank Barchard287952a2021-11-03 15:26:45 -070010251 .k(k)
10252 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010253 .a_offset(251)
10254 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010255 }
10256 }
10257
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010258 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, zero) {
Frank Barchard287952a2021-11-03 15:26:45 -070010259 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010260 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010261 for (uint32_t mz = 0; mz < 3; mz++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010262 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010263 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010264 .nr(16)
10265 .kr(2)
10266 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010267 .m(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010268 .n(16)
Frank Barchard287952a2021-11-03 15:26:45 -070010269 .k(k)
10270 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010271 .a_offset(251)
Frank Barchard287952a2021-11-03 15:26:45 -070010272 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010273 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010274 }
10275 }
10276 }
10277
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010278 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -070010279 TEST_REQUIRES_ARM_NEON;
10280 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010281 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010282 .nr(16)
10283 .kr(2)
10284 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010285 .m(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010286 .n(16)
10287 .k(16)
Frank Barchard287952a2021-11-03 15:26:45 -070010288 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010289 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010290 }
10291
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010292 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -070010293 TEST_REQUIRES_ARM_NEON;
10294 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010295 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010296 .nr(16)
10297 .kr(2)
10298 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010299 .m(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010300 .n(16)
10301 .k(16)
Frank Barchard287952a2021-11-03 15:26:45 -070010302 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010303 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010304 }
10305
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010306 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C2S4__NEON_MLAL, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -070010307 TEST_REQUIRES_ARM_NEON;
10308 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010309 .mr(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010310 .nr(16)
10311 .kr(2)
10312 .sr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010313 .m(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080010314 .n(16)
10315 .k(16)
10316 .cm_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010317 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c2s4__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010318 }
10319#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10320
10321
10322#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010323 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010324 TEST_REQUIRES_ARM_NEON;
10325 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010326 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010327 .nr(8)
10328 .kr(4)
10329 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010330 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010331 .n(8)
10332 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010333 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010334 }
10335
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010336 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010337 TEST_REQUIRES_ARM_NEON;
10338 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010339 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010340 .nr(8)
10341 .kr(4)
10342 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010343 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010344 .n(8)
10345 .k(8)
10346 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010347 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010348 }
10349
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010350 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010351 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010352 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010353 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010354 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010355 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010356 .nr(8)
10357 .kr(4)
10358 .sr(1)
10359 .m(m)
10360 .n(n)
10361 .k(8)
10362 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010363 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010364 }
10365 }
10366 }
10367
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010368 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -070010369 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010370 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010371 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010372 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010373 .nr(8)
10374 .kr(4)
10375 .sr(1)
10376 .m(m)
10377 .n(8)
10378 .k(8)
10379 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010380 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010381 }
10382 }
10383
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010384 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -070010385 TEST_REQUIRES_ARM_NEON;
10386 for (uint32_t n = 1; n <= 8; n++) {
10387 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010388 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010389 .nr(8)
10390 .kr(4)
10391 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010392 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010393 .n(n)
10394 .k(8)
10395 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010396 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010397 }
10398 }
10399
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010400 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_lt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010401 TEST_REQUIRES_ARM_NEON;
10402 for (size_t k = 1; k < 8; k++) {
10403 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010404 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010405 .nr(8)
10406 .kr(4)
10407 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010408 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010409 .n(8)
10410 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010411 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010412 }
10413 }
10414
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010415 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_lt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010416 TEST_REQUIRES_ARM_NEON;
10417 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010418 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010419 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010420 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010421 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010422 .nr(8)
10423 .kr(4)
10424 .sr(1)
10425 .m(m)
10426 .n(n)
10427 .k(k)
10428 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010429 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010430 }
10431 }
10432 }
10433 }
10434
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010435 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010436 TEST_REQUIRES_ARM_NEON;
10437 for (size_t k = 9; k < 16; k++) {
10438 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010439 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010440 .nr(8)
10441 .kr(4)
10442 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010443 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010444 .n(8)
10445 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010446 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010447 }
10448 }
10449
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010450 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010451 TEST_REQUIRES_ARM_NEON;
10452 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010453 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010454 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010455 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010456 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010457 .nr(8)
10458 .kr(4)
10459 .sr(1)
10460 .m(m)
10461 .n(n)
10462 .k(k)
10463 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010464 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010465 }
10466 }
10467 }
10468 }
10469
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010470 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010471 TEST_REQUIRES_ARM_NEON;
10472 for (size_t k = 16; k <= 80; k += 8) {
10473 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010474 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010475 .nr(8)
10476 .kr(4)
10477 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010478 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010479 .n(8)
10480 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010481 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010482 }
10483 }
10484
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010485 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, k_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010486 TEST_REQUIRES_ARM_NEON;
10487 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010488 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010489 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010490 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010491 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010492 .nr(8)
10493 .kr(4)
10494 .sr(1)
10495 .m(m)
10496 .n(n)
10497 .k(k)
10498 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010499 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010500 }
10501 }
10502 }
10503 }
10504
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010505 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010506 TEST_REQUIRES_ARM_NEON;
10507 for (uint32_t n = 9; n < 16; n++) {
10508 for (size_t k = 1; k <= 40; k += 9) {
10509 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010510 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010511 .nr(8)
10512 .kr(4)
10513 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010514 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010515 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010516 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010517 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010518 }
10519 }
10520 }
10521
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010522 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010523 TEST_REQUIRES_ARM_NEON;
10524 for (uint32_t n = 9; n < 16; n++) {
10525 for (size_t k = 1; k <= 40; k += 9) {
10526 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010527 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010528 .nr(8)
10529 .kr(4)
10530 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010531 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010532 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010533 .k(k)
10534 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010535 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010536 }
10537 }
10538 }
10539
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010540 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010541 TEST_REQUIRES_ARM_NEON;
10542 for (uint32_t n = 9; n < 16; n++) {
10543 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010544 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010545 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010546 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010547 .nr(8)
10548 .kr(4)
10549 .sr(1)
10550 .m(m)
10551 .n(n)
10552 .k(k)
10553 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010554 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010555 }
10556 }
10557 }
10558 }
10559
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010560 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010561 TEST_REQUIRES_ARM_NEON;
10562 for (uint32_t n = 16; n <= 24; n += 8) {
10563 for (size_t k = 1; k <= 40; k += 9) {
10564 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010565 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010566 .nr(8)
10567 .kr(4)
10568 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010569 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010570 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010571 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010572 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010573 }
10574 }
10575 }
10576
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010577 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010578 TEST_REQUIRES_ARM_NEON;
10579 for (uint32_t n = 16; n <= 24; n += 8) {
10580 for (size_t k = 1; k <= 40; k += 9) {
10581 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010582 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010583 .nr(8)
10584 .kr(4)
10585 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010586 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010587 .n(n)
10588 .k(k)
10589 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010590 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010591 }
10592 }
10593 }
10594
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010595 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010596 TEST_REQUIRES_ARM_NEON;
10597 for (uint32_t n = 16; n <= 24; n += 8) {
10598 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010599 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010600 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010601 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010602 .nr(8)
10603 .kr(4)
10604 .sr(1)
10605 .m(m)
10606 .n(n)
10607 .k(k)
10608 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010609 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010610 }
10611 }
10612 }
10613 }
10614
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010615 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070010616 TEST_REQUIRES_ARM_NEON;
10617 for (size_t k = 1; k <= 40; k += 9) {
10618 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010619 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010620 .nr(8)
10621 .kr(4)
10622 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010623 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010624 .n(8)
10625 .k(k)
10626 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010627 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010628 }
10629 }
10630
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010631 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, small_kernel_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010632 TEST_REQUIRES_ARM_NEON;
10633 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010634 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010635 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010636 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010637 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010638 .nr(8)
10639 .kr(4)
10640 .sr(1)
10641 .m(m)
10642 .n(n)
10643 .k(k)
10644 .ks(3)
10645 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010646 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010647 }
10648 }
10649 }
10650 }
10651
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010652 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_gt_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070010653 TEST_REQUIRES_ARM_NEON;
10654 for (uint32_t n = 9; n < 16; n++) {
10655 for (size_t k = 1; k <= 40; k += 9) {
10656 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010657 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010658 .nr(8)
10659 .kr(4)
10660 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010661 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010662 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010663 .k(k)
10664 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010665 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010666 }
10667 }
10668 }
10669
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010670 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, n_div_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070010671 TEST_REQUIRES_ARM_NEON;
10672 for (uint32_t n = 16; n <= 24; n += 8) {
10673 for (size_t k = 1; k <= 40; k += 9) {
10674 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010675 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010676 .nr(8)
10677 .kr(4)
10678 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010679 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010680 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010681 .k(k)
10682 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010683 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010684 }
10685 }
10686 }
10687
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010688 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010689 TEST_REQUIRES_ARM_NEON;
10690 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010691 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010692 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010693 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010694 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010695 .nr(8)
10696 .kr(4)
10697 .sr(1)
10698 .m(m)
10699 .n(n)
10700 .k(k)
10701 .cm_stride(11)
10702 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010703 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010704 }
10705 }
10706 }
10707 }
10708
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010709 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, a_offset) {
Frank Barchard287952a2021-11-03 15:26:45 -070010710 TEST_REQUIRES_ARM_NEON;
10711 for (size_t k = 1; k <= 40; k += 9) {
10712 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010713 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010714 .nr(8)
10715 .kr(4)
10716 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010717 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010718 .n(8)
10719 .k(k)
10720 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010721 .a_offset(43)
10722 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010723 }
10724 }
10725
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010726 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, zero) {
Frank Barchard287952a2021-11-03 15:26:45 -070010727 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010728 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010729 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010730 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010731 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010732 .nr(8)
10733 .kr(4)
10734 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010735 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010736 .n(8)
10737 .k(k)
10738 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010739 .a_offset(43)
Frank Barchard287952a2021-11-03 15:26:45 -070010740 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010741 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010742 }
10743 }
10744 }
10745
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010746 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -070010747 TEST_REQUIRES_ARM_NEON;
10748 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010749 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010750 .nr(8)
10751 .kr(4)
10752 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010753 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010754 .n(8)
10755 .k(8)
10756 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010757 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010758 }
10759
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010760 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -070010761 TEST_REQUIRES_ARM_NEON;
10762 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010763 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010764 .nr(8)
10765 .kr(4)
10766 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010767 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010768 .n(8)
10769 .k(8)
10770 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010771 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010772 }
10773
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010774 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -070010775 TEST_REQUIRES_ARM_NEON;
10776 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010777 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010778 .nr(8)
10779 .kr(4)
10780 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010781 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070010782 .n(8)
10783 .k(8)
10784 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080010785 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010786 }
10787#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10788
10789
10790#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -080010791 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_eq_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010792 TEST_REQUIRES_ARM_NEON;
10793 GemmMicrokernelTester()
10794 .mr(3)
10795 .nr(8)
10796 .kr(4)
10797 .sr(1)
10798 .m(3)
10799 .n(8)
10800 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010801 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010802 }
10803
Frank Barcharde22685a2021-11-12 11:36:58 -080010804 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010805 TEST_REQUIRES_ARM_NEON;
10806 GemmMicrokernelTester()
10807 .mr(3)
10808 .nr(8)
10809 .kr(4)
10810 .sr(1)
10811 .m(3)
10812 .n(8)
10813 .k(8)
10814 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010815 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010816 }
10817
Frank Barcharde22685a2021-11-12 11:36:58 -080010818 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_eq_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010819 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010820 for (uint32_t n = 1; n <= 8; n++) {
10821 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010822 GemmMicrokernelTester()
10823 .mr(3)
10824 .nr(8)
10825 .kr(4)
10826 .sr(1)
10827 .m(m)
10828 .n(n)
10829 .k(8)
10830 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010831 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010832 }
10833 }
10834 }
10835
Frank Barcharde22685a2021-11-12 11:36:58 -080010836 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -070010837 TEST_REQUIRES_ARM_NEON;
10838 for (uint32_t m = 1; m <= 3; m++) {
10839 GemmMicrokernelTester()
10840 .mr(3)
10841 .nr(8)
10842 .kr(4)
10843 .sr(1)
10844 .m(m)
10845 .n(8)
10846 .k(8)
10847 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010848 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010849 }
10850 }
10851
Frank Barcharde22685a2021-11-12 11:36:58 -080010852 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -070010853 TEST_REQUIRES_ARM_NEON;
10854 for (uint32_t n = 1; n <= 8; n++) {
10855 GemmMicrokernelTester()
10856 .mr(3)
10857 .nr(8)
10858 .kr(4)
10859 .sr(1)
10860 .m(3)
10861 .n(n)
10862 .k(8)
10863 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010864 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010865 }
10866 }
10867
Frank Barcharde22685a2021-11-12 11:36:58 -080010868 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_lt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010869 TEST_REQUIRES_ARM_NEON;
10870 for (size_t k = 1; k < 8; k++) {
10871 GemmMicrokernelTester()
10872 .mr(3)
10873 .nr(8)
10874 .kr(4)
10875 .sr(1)
10876 .m(3)
10877 .n(8)
10878 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010879 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010880 }
10881 }
10882
Frank Barcharde22685a2021-11-12 11:36:58 -080010883 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_lt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010884 TEST_REQUIRES_ARM_NEON;
10885 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010886 for (uint32_t n = 1; n <= 8; n++) {
10887 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010888 GemmMicrokernelTester()
10889 .mr(3)
10890 .nr(8)
10891 .kr(4)
10892 .sr(1)
10893 .m(m)
10894 .n(n)
10895 .k(k)
10896 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010897 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010898 }
10899 }
10900 }
10901 }
10902
Frank Barcharde22685a2021-11-12 11:36:58 -080010903 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010904 TEST_REQUIRES_ARM_NEON;
10905 for (size_t k = 9; k < 16; k++) {
10906 GemmMicrokernelTester()
10907 .mr(3)
10908 .nr(8)
10909 .kr(4)
10910 .sr(1)
10911 .m(3)
10912 .n(8)
10913 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010914 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010915 }
10916 }
10917
Frank Barcharde22685a2021-11-12 11:36:58 -080010918 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010919 TEST_REQUIRES_ARM_NEON;
10920 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010921 for (uint32_t n = 1; n <= 8; n++) {
10922 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010923 GemmMicrokernelTester()
10924 .mr(3)
10925 .nr(8)
10926 .kr(4)
10927 .sr(1)
10928 .m(m)
10929 .n(n)
10930 .k(k)
10931 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010932 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010933 }
10934 }
10935 }
10936 }
10937
Frank Barcharde22685a2021-11-12 11:36:58 -080010938 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010939 TEST_REQUIRES_ARM_NEON;
10940 for (size_t k = 16; k <= 80; k += 8) {
10941 GemmMicrokernelTester()
10942 .mr(3)
10943 .nr(8)
10944 .kr(4)
10945 .sr(1)
10946 .m(3)
10947 .n(8)
10948 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010949 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010950 }
10951 }
10952
Frank Barcharde22685a2021-11-12 11:36:58 -080010953 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, k_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070010954 TEST_REQUIRES_ARM_NEON;
10955 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010956 for (uint32_t n = 1; n <= 8; n++) {
10957 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070010958 GemmMicrokernelTester()
10959 .mr(3)
10960 .nr(8)
10961 .kr(4)
10962 .sr(1)
10963 .m(m)
10964 .n(n)
10965 .k(k)
10966 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010967 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010968 }
10969 }
10970 }
10971 }
10972
Frank Barcharde22685a2021-11-12 11:36:58 -080010973 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070010974 TEST_REQUIRES_ARM_NEON;
10975 for (uint32_t n = 9; n < 16; n++) {
10976 for (size_t k = 1; k <= 40; k += 9) {
10977 GemmMicrokernelTester()
10978 .mr(3)
10979 .nr(8)
10980 .kr(4)
10981 .sr(1)
10982 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010983 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070010984 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010985 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070010986 }
10987 }
10988 }
10989
Frank Barcharde22685a2021-11-12 11:36:58 -080010990 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_gt_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070010991 TEST_REQUIRES_ARM_NEON;
10992 for (uint32_t n = 9; n < 16; n++) {
10993 for (size_t k = 1; k <= 40; k += 9) {
10994 GemmMicrokernelTester()
10995 .mr(3)
10996 .nr(8)
10997 .kr(4)
10998 .sr(1)
10999 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011000 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011001 .k(k)
11002 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011003 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011004 }
11005 }
11006 }
11007
Frank Barcharde22685a2021-11-12 11:36:58 -080011008 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011009 TEST_REQUIRES_ARM_NEON;
11010 for (uint32_t n = 9; n < 16; n++) {
11011 for (size_t k = 1; k <= 40; k += 9) {
11012 for (uint32_t m = 1; m <= 3; m++) {
11013 GemmMicrokernelTester()
11014 .mr(3)
11015 .nr(8)
11016 .kr(4)
11017 .sr(1)
11018 .m(m)
11019 .n(n)
11020 .k(k)
11021 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011022 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011023 }
11024 }
11025 }
11026 }
11027
Frank Barcharde22685a2021-11-12 11:36:58 -080011028 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070011029 TEST_REQUIRES_ARM_NEON;
11030 for (uint32_t n = 16; n <= 24; n += 8) {
11031 for (size_t k = 1; k <= 40; k += 9) {
11032 GemmMicrokernelTester()
11033 .mr(3)
11034 .nr(8)
11035 .kr(4)
11036 .sr(1)
11037 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011038 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011039 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011040 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011041 }
11042 }
11043 }
11044
Frank Barcharde22685a2021-11-12 11:36:58 -080011045 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_div_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070011046 TEST_REQUIRES_ARM_NEON;
11047 for (uint32_t n = 16; n <= 24; n += 8) {
11048 for (size_t k = 1; k <= 40; k += 9) {
11049 GemmMicrokernelTester()
11050 .mr(3)
11051 .nr(8)
11052 .kr(4)
11053 .sr(1)
11054 .m(3)
11055 .n(n)
11056 .k(k)
11057 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011058 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011059 }
11060 }
11061 }
11062
Frank Barcharde22685a2021-11-12 11:36:58 -080011063 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011064 TEST_REQUIRES_ARM_NEON;
11065 for (uint32_t n = 16; n <= 24; n += 8) {
11066 for (size_t k = 1; k <= 40; k += 9) {
11067 for (uint32_t m = 1; m <= 3; m++) {
11068 GemmMicrokernelTester()
11069 .mr(3)
11070 .nr(8)
11071 .kr(4)
11072 .sr(1)
11073 .m(m)
11074 .n(n)
11075 .k(k)
11076 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011077 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011078 }
11079 }
11080 }
11081 }
11082
Frank Barcharde22685a2021-11-12 11:36:58 -080011083 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070011084 TEST_REQUIRES_ARM_NEON;
11085 for (size_t k = 1; k <= 40; k += 9) {
11086 GemmMicrokernelTester()
11087 .mr(3)
11088 .nr(8)
11089 .kr(4)
11090 .sr(1)
11091 .m(3)
11092 .n(8)
11093 .k(k)
11094 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011095 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011096 }
11097 }
11098
Frank Barcharde22685a2021-11-12 11:36:58 -080011099 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, small_kernel_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011100 TEST_REQUIRES_ARM_NEON;
11101 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011102 for (uint32_t n = 1; n <= 8; n++) {
11103 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011104 GemmMicrokernelTester()
11105 .mr(3)
11106 .nr(8)
11107 .kr(4)
11108 .sr(1)
11109 .m(m)
11110 .n(n)
11111 .k(k)
11112 .ks(3)
11113 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011114 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011115 }
11116 }
11117 }
11118 }
11119
Frank Barcharde22685a2021-11-12 11:36:58 -080011120 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_gt_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070011121 TEST_REQUIRES_ARM_NEON;
11122 for (uint32_t n = 9; n < 16; n++) {
11123 for (size_t k = 1; k <= 40; k += 9) {
11124 GemmMicrokernelTester()
11125 .mr(3)
11126 .nr(8)
11127 .kr(4)
11128 .sr(1)
11129 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011130 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011131 .k(k)
11132 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011133 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011134 }
11135 }
11136 }
11137
Frank Barcharde22685a2021-11-12 11:36:58 -080011138 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, n_div_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070011139 TEST_REQUIRES_ARM_NEON;
11140 for (uint32_t n = 16; n <= 24; n += 8) {
11141 for (size_t k = 1; k <= 40; k += 9) {
11142 GemmMicrokernelTester()
11143 .mr(3)
11144 .nr(8)
11145 .kr(4)
11146 .sr(1)
11147 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011148 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011149 .k(k)
11150 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080011151 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011152 }
11153 }
11154 }
11155
Frank Barcharde22685a2021-11-12 11:36:58 -080011156 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011157 TEST_REQUIRES_ARM_NEON;
11158 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011159 for (uint32_t n = 1; n <= 8; n++) {
11160 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011161 GemmMicrokernelTester()
11162 .mr(3)
11163 .nr(8)
11164 .kr(4)
11165 .sr(1)
11166 .m(m)
11167 .n(n)
11168 .k(k)
11169 .cm_stride(11)
11170 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011171 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011172 }
11173 }
11174 }
11175 }
11176
Frank Barcharde22685a2021-11-12 11:36:58 -080011177 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, a_offset) {
Frank Barchard287952a2021-11-03 15:26:45 -070011178 TEST_REQUIRES_ARM_NEON;
11179 for (size_t k = 1; k <= 40; k += 9) {
11180 GemmMicrokernelTester()
11181 .mr(3)
11182 .nr(8)
11183 .kr(4)
11184 .sr(1)
11185 .m(3)
11186 .n(8)
11187 .k(k)
11188 .ks(3)
11189 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -080011190 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011191 }
11192 }
11193
Frank Barcharde22685a2021-11-12 11:36:58 -080011194 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, zero) {
Frank Barchard287952a2021-11-03 15:26:45 -070011195 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011196 for (size_t k = 1; k <= 40; k += 9) {
11197 for (uint32_t mz = 0; mz < 3; mz++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011198 GemmMicrokernelTester()
11199 .mr(3)
11200 .nr(8)
11201 .kr(4)
11202 .sr(1)
11203 .m(3)
11204 .n(8)
11205 .k(k)
11206 .ks(3)
11207 .a_offset(127)
11208 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080011209 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011210 }
11211 }
11212 }
11213
Frank Barcharde22685a2021-11-12 11:36:58 -080011214 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -070011215 TEST_REQUIRES_ARM_NEON;
11216 GemmMicrokernelTester()
11217 .mr(3)
11218 .nr(8)
11219 .kr(4)
11220 .sr(1)
11221 .m(3)
11222 .n(8)
11223 .k(8)
11224 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011225 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011226 }
11227
Frank Barcharde22685a2021-11-12 11:36:58 -080011228 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -070011229 TEST_REQUIRES_ARM_NEON;
11230 GemmMicrokernelTester()
11231 .mr(3)
11232 .nr(8)
11233 .kr(4)
11234 .sr(1)
11235 .m(3)
11236 .n(8)
11237 .k(8)
11238 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011239 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011240 }
11241
Frank Barcharde22685a2021-11-12 11:36:58 -080011242 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -070011243 TEST_REQUIRES_ARM_NEON;
11244 GemmMicrokernelTester()
11245 .mr(3)
11246 .nr(8)
11247 .kr(4)
11248 .sr(1)
11249 .m(3)
11250 .n(8)
11251 .k(8)
11252 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011253 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011254 }
11255#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11256
11257
11258#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011259 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070011260 TEST_REQUIRES_ARM_NEON;
11261 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011262 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011263 .nr(16)
11264 .kr(4)
11265 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011266 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011267 .n(16)
11268 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011269 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011270 }
11271
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011272 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070011273 TEST_REQUIRES_ARM_NEON;
11274 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011275 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011276 .nr(16)
11277 .kr(4)
11278 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011279 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011280 .n(16)
11281 .k(8)
11282 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011283 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011284 }
11285
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011286 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011287 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011288 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011289 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011290 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011291 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011292 .nr(16)
11293 .kr(4)
11294 .sr(1)
11295 .m(m)
11296 .n(n)
11297 .k(8)
11298 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011299 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011300 }
11301 }
11302 }
11303
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011304 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -070011305 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011306 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011307 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011308 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011309 .nr(16)
11310 .kr(4)
11311 .sr(1)
11312 .m(m)
11313 .n(16)
11314 .k(8)
11315 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011316 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011317 }
11318 }
11319
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011320 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -070011321 TEST_REQUIRES_ARM_NEON;
11322 for (uint32_t n = 1; n <= 16; n++) {
11323 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011324 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011325 .nr(16)
11326 .kr(4)
11327 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011328 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011329 .n(n)
11330 .k(8)
11331 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011332 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011333 }
11334 }
11335
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011336 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_lt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070011337 TEST_REQUIRES_ARM_NEON;
11338 for (size_t k = 1; k < 8; k++) {
11339 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011340 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011341 .nr(16)
11342 .kr(4)
11343 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011344 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011345 .n(16)
11346 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011347 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011348 }
11349 }
11350
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011351 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_lt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011352 TEST_REQUIRES_ARM_NEON;
11353 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011354 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011355 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011356 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011357 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011358 .nr(16)
11359 .kr(4)
11360 .sr(1)
11361 .m(m)
11362 .n(n)
11363 .k(k)
11364 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011365 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011366 }
11367 }
11368 }
11369 }
11370
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011371 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070011372 TEST_REQUIRES_ARM_NEON;
11373 for (size_t k = 9; k < 16; k++) {
11374 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011375 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011376 .nr(16)
11377 .kr(4)
11378 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011379 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011380 .n(16)
11381 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011382 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011383 }
11384 }
11385
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011386 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011387 TEST_REQUIRES_ARM_NEON;
11388 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011389 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011390 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011391 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011392 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011393 .nr(16)
11394 .kr(4)
11395 .sr(1)
11396 .m(m)
11397 .n(n)
11398 .k(k)
11399 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011400 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011401 }
11402 }
11403 }
11404 }
11405
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011406 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070011407 TEST_REQUIRES_ARM_NEON;
11408 for (size_t k = 16; k <= 80; k += 8) {
11409 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011410 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011411 .nr(16)
11412 .kr(4)
11413 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011414 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011415 .n(16)
11416 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011417 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011418 }
11419 }
11420
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011421 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011422 TEST_REQUIRES_ARM_NEON;
11423 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011424 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011425 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011426 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011427 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011428 .nr(16)
11429 .kr(4)
11430 .sr(1)
11431 .m(m)
11432 .n(n)
11433 .k(k)
11434 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011435 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011436 }
11437 }
11438 }
11439 }
11440
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011441 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070011442 TEST_REQUIRES_ARM_NEON;
11443 for (uint32_t n = 17; n < 32; n++) {
11444 for (size_t k = 1; k <= 40; k += 9) {
11445 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011446 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011447 .nr(16)
11448 .kr(4)
11449 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011450 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011451 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011452 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011453 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011454 }
11455 }
11456 }
11457
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011458 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070011459 TEST_REQUIRES_ARM_NEON;
11460 for (uint32_t n = 17; n < 32; n++) {
11461 for (size_t k = 1; k <= 40; k += 9) {
11462 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011463 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011464 .nr(16)
11465 .kr(4)
11466 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011467 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011468 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011469 .k(k)
11470 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011471 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011472 }
11473 }
11474 }
11475
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011476 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011477 TEST_REQUIRES_ARM_NEON;
11478 for (uint32_t n = 17; n < 32; n++) {
11479 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011480 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011481 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011482 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011483 .nr(16)
11484 .kr(4)
11485 .sr(1)
11486 .m(m)
11487 .n(n)
11488 .k(k)
11489 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011490 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011491 }
11492 }
11493 }
11494 }
11495
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011496 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070011497 TEST_REQUIRES_ARM_NEON;
11498 for (uint32_t n = 32; n <= 48; n += 16) {
11499 for (size_t k = 1; k <= 40; k += 9) {
11500 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011501 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011502 .nr(16)
11503 .kr(4)
11504 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011505 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011506 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011507 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011508 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011509 }
11510 }
11511 }
11512
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011513 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070011514 TEST_REQUIRES_ARM_NEON;
11515 for (uint32_t n = 32; n <= 48; n += 16) {
11516 for (size_t k = 1; k <= 40; k += 9) {
11517 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011518 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011519 .nr(16)
11520 .kr(4)
11521 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011522 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011523 .n(n)
11524 .k(k)
11525 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011526 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011527 }
11528 }
11529 }
11530
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011531 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011532 TEST_REQUIRES_ARM_NEON;
11533 for (uint32_t n = 32; n <= 48; n += 16) {
11534 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011535 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011536 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011537 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011538 .nr(16)
11539 .kr(4)
11540 .sr(1)
11541 .m(m)
11542 .n(n)
11543 .k(k)
11544 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011545 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011546 }
11547 }
11548 }
11549 }
11550
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011551 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070011552 TEST_REQUIRES_ARM_NEON;
11553 for (size_t k = 1; k <= 40; k += 9) {
11554 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011555 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011556 .nr(16)
11557 .kr(4)
11558 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011559 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011560 .n(16)
11561 .k(k)
11562 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011563 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011564 }
11565 }
11566
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011567 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, small_kernel_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011568 TEST_REQUIRES_ARM_NEON;
11569 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011570 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011571 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011572 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011573 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011574 .nr(16)
11575 .kr(4)
11576 .sr(1)
11577 .m(m)
11578 .n(n)
11579 .k(k)
11580 .ks(3)
11581 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011582 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011583 }
11584 }
11585 }
11586 }
11587
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011588 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070011589 TEST_REQUIRES_ARM_NEON;
11590 for (uint32_t n = 17; n < 32; n++) {
11591 for (size_t k = 1; k <= 40; k += 9) {
11592 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011593 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011594 .nr(16)
11595 .kr(4)
11596 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011597 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011598 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011599 .k(k)
11600 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011601 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011602 }
11603 }
11604 }
11605
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011606 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070011607 TEST_REQUIRES_ARM_NEON;
11608 for (uint32_t n = 32; n <= 48; n += 16) {
11609 for (size_t k = 1; k <= 40; k += 9) {
11610 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011611 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011612 .nr(16)
11613 .kr(4)
11614 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011615 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011616 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011617 .k(k)
11618 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011619 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011620 }
11621 }
11622 }
11623
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011624 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011625 TEST_REQUIRES_ARM_NEON;
11626 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011627 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011628 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011629 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011630 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011631 .nr(16)
11632 .kr(4)
11633 .sr(1)
11634 .m(m)
11635 .n(n)
11636 .k(k)
11637 .cm_stride(19)
11638 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011639 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011640 }
11641 }
11642 }
11643 }
11644
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011645 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, a_offset) {
Frank Barchard287952a2021-11-03 15:26:45 -070011646 TEST_REQUIRES_ARM_NEON;
11647 for (size_t k = 1; k <= 40; k += 9) {
11648 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011649 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011650 .nr(16)
11651 .kr(4)
11652 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011653 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011654 .n(16)
11655 .k(k)
11656 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011657 .a_offset(43)
11658 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011659 }
11660 }
11661
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011662 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, zero) {
Frank Barchard287952a2021-11-03 15:26:45 -070011663 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011664 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011665 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011666 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011667 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011668 .nr(16)
11669 .kr(4)
11670 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011671 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011672 .n(16)
11673 .k(k)
11674 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011675 .a_offset(43)
Frank Barchard287952a2021-11-03 15:26:45 -070011676 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011677 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011678 }
11679 }
11680 }
11681
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011682 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -070011683 TEST_REQUIRES_ARM_NEON;
11684 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011685 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011686 .nr(16)
11687 .kr(4)
11688 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011689 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011690 .n(16)
11691 .k(8)
11692 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011693 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011694 }
11695
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011696 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -070011697 TEST_REQUIRES_ARM_NEON;
11698 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011699 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011700 .nr(16)
11701 .kr(4)
11702 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011703 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011704 .n(16)
11705 .k(8)
11706 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011707 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011708 }
11709
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011710 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -070011711 TEST_REQUIRES_ARM_NEON;
11712 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011713 .mr(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011714 .nr(16)
11715 .kr(4)
11716 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011717 .m(1)
Frank Barchard287952a2021-11-03 15:26:45 -070011718 .n(16)
11719 .k(8)
11720 .cm_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080011721 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011722 }
11723#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11724
11725
11726#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -080011727 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070011728 TEST_REQUIRES_ARM_NEON;
11729 GemmMicrokernelTester()
11730 .mr(4)
11731 .nr(8)
11732 .kr(4)
11733 .sr(1)
11734 .m(4)
11735 .n(8)
11736 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080011737 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011738 }
11739
Frank Barcharde22685a2021-11-12 11:36:58 -080011740 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070011741 TEST_REQUIRES_ARM_NEON;
11742 GemmMicrokernelTester()
11743 .mr(4)
11744 .nr(8)
11745 .kr(4)
11746 .sr(1)
11747 .m(4)
11748 .n(8)
11749 .k(16)
11750 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011751 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011752 }
11753
Frank Barcharde22685a2021-11-12 11:36:58 -080011754 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011755 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011756 for (uint32_t n = 1; n <= 8; n++) {
11757 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011758 GemmMicrokernelTester()
11759 .mr(4)
11760 .nr(8)
11761 .kr(4)
11762 .sr(1)
11763 .m(m)
11764 .n(n)
11765 .k(16)
11766 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011767 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011768 }
11769 }
11770 }
11771
Frank Barcharde22685a2021-11-12 11:36:58 -080011772 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
Frank Barchard287952a2021-11-03 15:26:45 -070011773 TEST_REQUIRES_ARM_NEON;
11774 for (uint32_t m = 1; m <= 4; m++) {
11775 GemmMicrokernelTester()
11776 .mr(4)
11777 .nr(8)
11778 .kr(4)
11779 .sr(1)
11780 .m(m)
11781 .n(8)
11782 .k(16)
11783 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011784 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011785 }
11786 }
11787
Frank Barcharde22685a2021-11-12 11:36:58 -080011788 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
Frank Barchard287952a2021-11-03 15:26:45 -070011789 TEST_REQUIRES_ARM_NEON;
11790 for (uint32_t n = 1; n <= 8; n++) {
11791 GemmMicrokernelTester()
11792 .mr(4)
11793 .nr(8)
11794 .kr(4)
11795 .sr(1)
11796 .m(4)
11797 .n(n)
11798 .k(16)
11799 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011800 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011801 }
11802 }
11803
Frank Barcharde22685a2021-11-12 11:36:58 -080011804 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_lt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070011805 TEST_REQUIRES_ARM_NEON;
11806 for (size_t k = 1; k < 16; k++) {
11807 GemmMicrokernelTester()
11808 .mr(4)
11809 .nr(8)
11810 .kr(4)
11811 .sr(1)
11812 .m(4)
11813 .n(8)
11814 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011815 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011816 }
11817 }
11818
Frank Barcharde22685a2021-11-12 11:36:58 -080011819 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011820 TEST_REQUIRES_ARM_NEON;
11821 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011822 for (uint32_t n = 1; n <= 8; n++) {
11823 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011824 GemmMicrokernelTester()
11825 .mr(4)
11826 .nr(8)
11827 .kr(4)
11828 .sr(1)
11829 .m(m)
11830 .n(n)
11831 .k(k)
11832 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011833 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011834 }
11835 }
11836 }
11837 }
11838
Frank Barcharde22685a2021-11-12 11:36:58 -080011839 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_gt_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070011840 TEST_REQUIRES_ARM_NEON;
11841 for (size_t k = 17; k < 32; k++) {
11842 GemmMicrokernelTester()
11843 .mr(4)
11844 .nr(8)
11845 .kr(4)
11846 .sr(1)
11847 .m(4)
11848 .n(8)
11849 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011850 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011851 }
11852 }
11853
Frank Barcharde22685a2021-11-12 11:36:58 -080011854 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011855 TEST_REQUIRES_ARM_NEON;
11856 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011857 for (uint32_t n = 1; n <= 8; n++) {
11858 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011859 GemmMicrokernelTester()
11860 .mr(4)
11861 .nr(8)
11862 .kr(4)
11863 .sr(1)
11864 .m(m)
11865 .n(n)
11866 .k(k)
11867 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011868 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011869 }
11870 }
11871 }
11872 }
11873
Frank Barcharde22685a2021-11-12 11:36:58 -080011874 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_div_16) {
Frank Barchard287952a2021-11-03 15:26:45 -070011875 TEST_REQUIRES_ARM_NEON;
11876 for (size_t k = 32; k <= 160; k += 16) {
11877 GemmMicrokernelTester()
11878 .mr(4)
11879 .nr(8)
11880 .kr(4)
11881 .sr(1)
11882 .m(4)
11883 .n(8)
11884 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011885 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011886 }
11887 }
11888
Frank Barcharde22685a2021-11-12 11:36:58 -080011889 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011890 TEST_REQUIRES_ARM_NEON;
11891 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011892 for (uint32_t n = 1; n <= 8; n++) {
11893 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070011894 GemmMicrokernelTester()
11895 .mr(4)
11896 .nr(8)
11897 .kr(4)
11898 .sr(1)
11899 .m(m)
11900 .n(n)
11901 .k(k)
11902 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011903 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011904 }
11905 }
11906 }
11907 }
11908
Frank Barcharde22685a2021-11-12 11:36:58 -080011909 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070011910 TEST_REQUIRES_ARM_NEON;
11911 for (uint32_t n = 9; n < 16; n++) {
11912 for (size_t k = 1; k <= 80; k += 17) {
11913 GemmMicrokernelTester()
11914 .mr(4)
11915 .nr(8)
11916 .kr(4)
11917 .sr(1)
11918 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011919 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011920 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011921 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011922 }
11923 }
11924 }
11925
Frank Barcharde22685a2021-11-12 11:36:58 -080011926 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070011927 TEST_REQUIRES_ARM_NEON;
11928 for (uint32_t n = 9; n < 16; n++) {
11929 for (size_t k = 1; k <= 80; k += 17) {
11930 GemmMicrokernelTester()
11931 .mr(4)
11932 .nr(8)
11933 .kr(4)
11934 .sr(1)
11935 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011936 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011937 .k(k)
11938 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011939 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011940 }
11941 }
11942 }
11943
Frank Barcharde22685a2021-11-12 11:36:58 -080011944 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070011945 TEST_REQUIRES_ARM_NEON;
11946 for (uint32_t n = 9; n < 16; n++) {
11947 for (size_t k = 1; k <= 80; k += 17) {
11948 for (uint32_t m = 1; m <= 4; m++) {
11949 GemmMicrokernelTester()
11950 .mr(4)
11951 .nr(8)
11952 .kr(4)
11953 .sr(1)
11954 .m(m)
11955 .n(n)
11956 .k(k)
11957 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011958 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011959 }
11960 }
11961 }
11962 }
11963
Frank Barcharde22685a2021-11-12 11:36:58 -080011964 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070011965 TEST_REQUIRES_ARM_NEON;
11966 for (uint32_t n = 16; n <= 24; n += 8) {
11967 for (size_t k = 1; k <= 80; k += 17) {
11968 GemmMicrokernelTester()
11969 .mr(4)
11970 .nr(8)
11971 .kr(4)
11972 .sr(1)
11973 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011974 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070011975 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011976 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011977 }
11978 }
11979 }
11980
Frank Barcharde22685a2021-11-12 11:36:58 -080011981 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
Frank Barchard287952a2021-11-03 15:26:45 -070011982 TEST_REQUIRES_ARM_NEON;
11983 for (uint32_t n = 16; n <= 24; n += 8) {
11984 for (size_t k = 1; k <= 80; k += 17) {
11985 GemmMicrokernelTester()
11986 .mr(4)
11987 .nr(8)
11988 .kr(4)
11989 .sr(1)
11990 .m(4)
11991 .n(n)
11992 .k(k)
11993 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011994 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070011995 }
11996 }
11997 }
11998
Frank Barcharde22685a2021-11-12 11:36:58 -080011999 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070012000 TEST_REQUIRES_ARM_NEON;
12001 for (uint32_t n = 16; n <= 24; n += 8) {
12002 for (size_t k = 1; k <= 80; k += 17) {
12003 for (uint32_t m = 1; m <= 4; m++) {
12004 GemmMicrokernelTester()
12005 .mr(4)
12006 .nr(8)
12007 .kr(4)
12008 .sr(1)
12009 .m(m)
12010 .n(n)
12011 .k(k)
12012 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012013 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070012014 }
12015 }
12016 }
12017 }
12018
Frank Barcharde22685a2021-11-12 11:36:58 -080012019 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070012020 TEST_REQUIRES_ARM_NEON;
12021 for (size_t k = 1; k <= 80; k += 17) {
12022 GemmMicrokernelTester()
12023 .mr(4)
12024 .nr(8)
12025 .kr(4)
12026 .sr(1)
12027 .m(4)
12028 .n(8)
12029 .k(k)
12030 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012031 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070012032 }
12033 }
12034
Frank Barcharde22685a2021-11-12 11:36:58 -080012035 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, small_kernel_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070012036 TEST_REQUIRES_ARM_NEON;
12037 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012038 for (uint32_t n = 1; n <= 8; n++) {
12039 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070012040 GemmMicrokernelTester()
12041 .mr(4)
12042 .nr(8)
12043 .kr(4)
12044 .sr(1)
12045 .m(m)
12046 .n(n)
12047 .k(k)
12048 .ks(3)
12049 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012050 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070012051 }
12052 }
12053 }
12054 }
12055
Frank Barcharde22685a2021-11-12 11:36:58 -080012056 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_gt_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070012057 TEST_REQUIRES_ARM_NEON;
12058 for (uint32_t n = 9; n < 16; n++) {
12059 for (size_t k = 1; k <= 80; k += 17) {
12060 GemmMicrokernelTester()
12061 .mr(4)
12062 .nr(8)
12063 .kr(4)
12064 .sr(1)
12065 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012066 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070012067 .k(k)
12068 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012069 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070012070 }
12071 }
12072 }
12073
Frank Barcharde22685a2021-11-12 11:36:58 -080012074 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, n_div_8_small_kernel) {
Frank Barchard287952a2021-11-03 15:26:45 -070012075 TEST_REQUIRES_ARM_NEON;
12076 for (uint32_t n = 16; n <= 24; n += 8) {
12077 for (size_t k = 1; k <= 80; k += 17) {
12078 GemmMicrokernelTester()
12079 .mr(4)
12080 .nr(8)
12081 .kr(4)
12082 .sr(1)
12083 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012084 .n(n)
Frank Barchard287952a2021-11-03 15:26:45 -070012085 .k(k)
12086 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080012087 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070012088 }
12089 }
12090 }
12091
Frank Barcharde22685a2021-11-12 11:36:58 -080012092 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
Frank Barchard287952a2021-11-03 15:26:45 -070012093 TEST_REQUIRES_ARM_NEON;
12094 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012095 for (uint32_t n = 1; n <= 8; n++) {
12096 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard287952a2021-11-03 15:26:45 -070012097 GemmMicrokernelTester()
12098 .mr(4)
12099 .nr(8)
12100 .kr(4)
12101 .sr(1)
12102 .m(m)
12103 .n(n)
12104 .k(k)
12105 .cm_stride(11)
12106 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012107 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070012108 }
12109 }
12110 }
12111 }
12112
Frank Barcharde22685a2021-11-12 11:36:58 -080012113 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, a_offset) {
Frank Barchard287952a2021-11-03 15:26:45 -070012114 TEST_REQUIRES_ARM_NEON;
12115 for (size_t k = 1; k <= 80; k += 17) {
12116 GemmMicrokernelTester()
12117 .mr(4)
12118 .nr(8)
12119 .kr(4)
12120 .sr(1)
12121 .m(4)
12122 .n(8)
12123 .k(k)
12124 .ks(3)
12125 .a_offset(331)
Marat Dukhan50323b82022-01-11 00:12:01 -080012126 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070012127 }
12128 }
12129
Frank Barcharde22685a2021-11-12 11:36:58 -080012130 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, zero) {
Frank Barchard287952a2021-11-03 15:26:45 -070012131 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012132 for (size_t k = 1; k <= 80; k += 17) {
12133 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchard287952a2021-11-03 15:26:45 -070012134 GemmMicrokernelTester()
12135 .mr(4)
12136 .nr(8)
12137 .kr(4)
12138 .sr(1)
12139 .m(4)
12140 .n(8)
12141 .k(k)
12142 .ks(3)
12143 .a_offset(331)
12144 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080012145 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070012146 }
12147 }
12148 }
12149
Frank Barcharde22685a2021-11-12 11:36:58 -080012150 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, qmin) {
Frank Barchard287952a2021-11-03 15:26:45 -070012151 TEST_REQUIRES_ARM_NEON;
12152 GemmMicrokernelTester()
12153 .mr(4)
12154 .nr(8)
12155 .kr(4)
12156 .sr(1)
12157 .m(4)
12158 .n(8)
12159 .k(16)
12160 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012161 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070012162 }
12163
Frank Barcharde22685a2021-11-12 11:36:58 -080012164 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, qmax) {
Frank Barchard287952a2021-11-03 15:26:45 -070012165 TEST_REQUIRES_ARM_NEON;
12166 GemmMicrokernelTester()
12167 .mr(4)
12168 .nr(8)
12169 .kr(4)
12170 .sr(1)
12171 .m(4)
12172 .n(8)
12173 .k(16)
12174 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012175 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070012176 }
12177
Frank Barcharde22685a2021-11-12 11:36:58 -080012178 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C4__NEON_MLAL_DUP, strided_cm) {
Frank Barchard287952a2021-11-03 15:26:45 -070012179 TEST_REQUIRES_ARM_NEON;
12180 GemmMicrokernelTester()
12181 .mr(4)
12182 .nr(8)
12183 .kr(4)
12184 .sr(1)
12185 .m(4)
12186 .n(8)
12187 .k(16)
12188 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012189 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard287952a2021-11-03 15:26:45 -070012190 }
12191#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12192
12193
12194#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080012195 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16) {
12196 TEST_REQUIRES_ARM_NEON;
12197 GemmMicrokernelTester()
12198 .mr(2)
12199 .nr(16)
12200 .kr(4)
12201 .sr(1)
12202 .m(2)
12203 .n(16)
12204 .k(16)
12205 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12206 }
12207
12208 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cn) {
12209 TEST_REQUIRES_ARM_NEON;
12210 GemmMicrokernelTester()
12211 .mr(2)
12212 .nr(16)
12213 .kr(4)
12214 .sr(1)
12215 .m(2)
12216 .n(16)
12217 .k(16)
12218 .cn_stride(19)
12219 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12220 }
12221
12222 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile) {
12223 TEST_REQUIRES_ARM_NEON;
12224 for (uint32_t n = 1; n <= 16; n++) {
12225 for (uint32_t m = 1; m <= 2; m++) {
12226 GemmMicrokernelTester()
12227 .mr(2)
12228 .nr(16)
12229 .kr(4)
12230 .sr(1)
12231 .m(m)
12232 .n(n)
12233 .k(16)
12234 .iterations(1)
12235 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12236 }
12237 }
12238 }
12239
12240 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
12241 TEST_REQUIRES_ARM_NEON;
12242 for (uint32_t m = 1; m <= 2; m++) {
12243 GemmMicrokernelTester()
12244 .mr(2)
12245 .nr(16)
12246 .kr(4)
12247 .sr(1)
12248 .m(m)
12249 .n(16)
12250 .k(16)
12251 .iterations(1)
12252 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12253 }
12254 }
12255
12256 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
12257 TEST_REQUIRES_ARM_NEON;
12258 for (uint32_t n = 1; n <= 16; n++) {
12259 GemmMicrokernelTester()
12260 .mr(2)
12261 .nr(16)
12262 .kr(4)
12263 .sr(1)
12264 .m(2)
12265 .n(n)
12266 .k(16)
12267 .iterations(1)
12268 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12269 }
12270 }
12271
12272 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_lt_16) {
12273 TEST_REQUIRES_ARM_NEON;
12274 for (size_t k = 1; k < 16; k++) {
12275 GemmMicrokernelTester()
12276 .mr(2)
12277 .nr(16)
12278 .kr(4)
12279 .sr(1)
12280 .m(2)
12281 .n(16)
12282 .k(k)
12283 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12284 }
12285 }
12286
12287 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_lt_16_subtile) {
12288 TEST_REQUIRES_ARM_NEON;
12289 for (size_t k = 1; k < 16; k++) {
12290 for (uint32_t n = 1; n <= 16; n++) {
12291 for (uint32_t m = 1; m <= 2; m++) {
12292 GemmMicrokernelTester()
12293 .mr(2)
12294 .nr(16)
12295 .kr(4)
12296 .sr(1)
12297 .m(m)
12298 .n(n)
12299 .k(k)
12300 .iterations(1)
12301 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12302 }
12303 }
12304 }
12305 }
12306
12307 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_gt_16) {
12308 TEST_REQUIRES_ARM_NEON;
12309 for (size_t k = 17; k < 32; k++) {
12310 GemmMicrokernelTester()
12311 .mr(2)
12312 .nr(16)
12313 .kr(4)
12314 .sr(1)
12315 .m(2)
12316 .n(16)
12317 .k(k)
12318 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12319 }
12320 }
12321
12322 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_gt_16_subtile) {
12323 TEST_REQUIRES_ARM_NEON;
12324 for (size_t k = 17; k < 32; k++) {
12325 for (uint32_t n = 1; n <= 16; n++) {
12326 for (uint32_t m = 1; m <= 2; m++) {
12327 GemmMicrokernelTester()
12328 .mr(2)
12329 .nr(16)
12330 .kr(4)
12331 .sr(1)
12332 .m(m)
12333 .n(n)
12334 .k(k)
12335 .iterations(1)
12336 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12337 }
12338 }
12339 }
12340 }
12341
12342 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_div_16) {
12343 TEST_REQUIRES_ARM_NEON;
12344 for (size_t k = 32; k <= 160; k += 16) {
12345 GemmMicrokernelTester()
12346 .mr(2)
12347 .nr(16)
12348 .kr(4)
12349 .sr(1)
12350 .m(2)
12351 .n(16)
12352 .k(k)
12353 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12354 }
12355 }
12356
12357 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, k_div_16_subtile) {
12358 TEST_REQUIRES_ARM_NEON;
12359 for (size_t k = 32; k <= 160; k += 16) {
12360 for (uint32_t n = 1; n <= 16; n++) {
12361 for (uint32_t m = 1; m <= 2; m++) {
12362 GemmMicrokernelTester()
12363 .mr(2)
12364 .nr(16)
12365 .kr(4)
12366 .sr(1)
12367 .m(m)
12368 .n(n)
12369 .k(k)
12370 .iterations(1)
12371 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12372 }
12373 }
12374 }
12375 }
12376
12377 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16) {
12378 TEST_REQUIRES_ARM_NEON;
12379 for (uint32_t n = 17; n < 32; n++) {
12380 for (size_t k = 1; k <= 80; k += 17) {
12381 GemmMicrokernelTester()
12382 .mr(2)
12383 .nr(16)
12384 .kr(4)
12385 .sr(1)
12386 .m(2)
12387 .n(n)
12388 .k(k)
12389 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12390 }
12391 }
12392 }
12393
12394 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_strided_cn) {
12395 TEST_REQUIRES_ARM_NEON;
12396 for (uint32_t n = 17; n < 32; n++) {
12397 for (size_t k = 1; k <= 80; k += 17) {
12398 GemmMicrokernelTester()
12399 .mr(2)
12400 .nr(16)
12401 .kr(4)
12402 .sr(1)
12403 .m(2)
12404 .n(n)
12405 .k(k)
12406 .cn_stride(19)
12407 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12408 }
12409 }
12410 }
12411
12412 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_subtile) {
12413 TEST_REQUIRES_ARM_NEON;
12414 for (uint32_t n = 17; n < 32; n++) {
12415 for (size_t k = 1; k <= 80; k += 17) {
12416 for (uint32_t m = 1; m <= 2; m++) {
12417 GemmMicrokernelTester()
12418 .mr(2)
12419 .nr(16)
12420 .kr(4)
12421 .sr(1)
12422 .m(m)
12423 .n(n)
12424 .k(k)
12425 .iterations(1)
12426 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12427 }
12428 }
12429 }
12430 }
12431
12432 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16) {
12433 TEST_REQUIRES_ARM_NEON;
12434 for (uint32_t n = 32; n <= 48; n += 16) {
12435 for (size_t k = 1; k <= 80; k += 17) {
12436 GemmMicrokernelTester()
12437 .mr(2)
12438 .nr(16)
12439 .kr(4)
12440 .sr(1)
12441 .m(2)
12442 .n(n)
12443 .k(k)
12444 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12445 }
12446 }
12447 }
12448
12449 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_strided_cn) {
12450 TEST_REQUIRES_ARM_NEON;
12451 for (uint32_t n = 32; n <= 48; n += 16) {
12452 for (size_t k = 1; k <= 80; k += 17) {
12453 GemmMicrokernelTester()
12454 .mr(2)
12455 .nr(16)
12456 .kr(4)
12457 .sr(1)
12458 .m(2)
12459 .n(n)
12460 .k(k)
12461 .cn_stride(19)
12462 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12463 }
12464 }
12465 }
12466
12467 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_subtile) {
12468 TEST_REQUIRES_ARM_NEON;
12469 for (uint32_t n = 32; n <= 48; n += 16) {
12470 for (size_t k = 1; k <= 80; k += 17) {
12471 for (uint32_t m = 1; m <= 2; m++) {
12472 GemmMicrokernelTester()
12473 .mr(2)
12474 .nr(16)
12475 .kr(4)
12476 .sr(1)
12477 .m(m)
12478 .n(n)
12479 .k(k)
12480 .iterations(1)
12481 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12482 }
12483 }
12484 }
12485 }
12486
12487 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, small_kernel) {
12488 TEST_REQUIRES_ARM_NEON;
12489 for (size_t k = 1; k <= 80; k += 17) {
12490 GemmMicrokernelTester()
12491 .mr(2)
12492 .nr(16)
12493 .kr(4)
12494 .sr(1)
12495 .m(2)
12496 .n(16)
12497 .k(k)
12498 .ks(3)
12499 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12500 }
12501 }
12502
12503 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, small_kernel_subtile) {
12504 TEST_REQUIRES_ARM_NEON;
12505 for (size_t k = 1; k <= 80; k += 17) {
12506 for (uint32_t n = 1; n <= 16; n++) {
12507 for (uint32_t m = 1; m <= 2; m++) {
12508 GemmMicrokernelTester()
12509 .mr(2)
12510 .nr(16)
12511 .kr(4)
12512 .sr(1)
12513 .m(m)
12514 .n(n)
12515 .k(k)
12516 .ks(3)
12517 .iterations(1)
12518 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12519 }
12520 }
12521 }
12522 }
12523
12524 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_gt_16_small_kernel) {
12525 TEST_REQUIRES_ARM_NEON;
12526 for (uint32_t n = 17; n < 32; n++) {
12527 for (size_t k = 1; k <= 80; k += 17) {
12528 GemmMicrokernelTester()
12529 .mr(2)
12530 .nr(16)
12531 .kr(4)
12532 .sr(1)
12533 .m(2)
12534 .n(n)
12535 .k(k)
12536 .ks(3)
12537 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12538 }
12539 }
12540 }
12541
12542 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, n_div_16_small_kernel) {
12543 TEST_REQUIRES_ARM_NEON;
12544 for (uint32_t n = 32; n <= 48; n += 16) {
12545 for (size_t k = 1; k <= 80; k += 17) {
12546 GemmMicrokernelTester()
12547 .mr(2)
12548 .nr(16)
12549 .kr(4)
12550 .sr(1)
12551 .m(2)
12552 .n(n)
12553 .k(k)
12554 .ks(3)
12555 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12556 }
12557 }
12558 }
12559
12560 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cm_subtile) {
12561 TEST_REQUIRES_ARM_NEON;
12562 for (size_t k = 1; k <= 80; k += 17) {
12563 for (uint32_t n = 1; n <= 16; n++) {
12564 for (uint32_t m = 1; m <= 2; m++) {
12565 GemmMicrokernelTester()
12566 .mr(2)
12567 .nr(16)
12568 .kr(4)
12569 .sr(1)
12570 .m(m)
12571 .n(n)
12572 .k(k)
12573 .cm_stride(19)
12574 .iterations(1)
12575 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12576 }
12577 }
12578 }
12579 }
12580
12581 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, a_offset) {
12582 TEST_REQUIRES_ARM_NEON;
12583 for (size_t k = 1; k <= 80; k += 17) {
12584 GemmMicrokernelTester()
12585 .mr(2)
12586 .nr(16)
12587 .kr(4)
12588 .sr(1)
12589 .m(2)
12590 .n(16)
12591 .k(k)
12592 .ks(3)
12593 .a_offset(163)
12594 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12595 }
12596 }
12597
12598 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, zero) {
12599 TEST_REQUIRES_ARM_NEON;
12600 for (size_t k = 1; k <= 80; k += 17) {
12601 for (uint32_t mz = 0; mz < 2; mz++) {
12602 GemmMicrokernelTester()
12603 .mr(2)
12604 .nr(16)
12605 .kr(4)
12606 .sr(1)
12607 .m(2)
12608 .n(16)
12609 .k(k)
12610 .ks(3)
12611 .a_offset(163)
12612 .zero_index(mz)
12613 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12614 }
12615 }
12616 }
12617
12618 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, qmin) {
12619 TEST_REQUIRES_ARM_NEON;
12620 GemmMicrokernelTester()
12621 .mr(2)
12622 .nr(16)
12623 .kr(4)
12624 .sr(1)
12625 .m(2)
12626 .n(16)
12627 .k(16)
12628 .qmin(128)
12629 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12630 }
12631
12632 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, qmax) {
12633 TEST_REQUIRES_ARM_NEON;
12634 GemmMicrokernelTester()
12635 .mr(2)
12636 .nr(16)
12637 .kr(4)
12638 .sr(1)
12639 .m(2)
12640 .n(16)
12641 .k(16)
12642 .qmax(128)
12643 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12644 }
12645
12646 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_DUP, strided_cm) {
12647 TEST_REQUIRES_ARM_NEON;
12648 GemmMicrokernelTester()
12649 .mr(2)
12650 .nr(16)
12651 .kr(4)
12652 .sr(1)
12653 .m(2)
12654 .n(16)
12655 .k(16)
12656 .cm_stride(19)
12657 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12658 }
12659#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12660
12661
12662#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12663 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8) {
12664 TEST_REQUIRES_ARM_NEON;
12665 GemmMicrokernelTester()
12666 .mr(2)
12667 .nr(8)
12668 .kr(4)
12669 .sr(1)
12670 .m(2)
12671 .n(8)
12672 .k(8)
12673 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12674 }
12675
12676 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cn) {
12677 TEST_REQUIRES_ARM_NEON;
12678 GemmMicrokernelTester()
12679 .mr(2)
12680 .nr(8)
12681 .kr(4)
12682 .sr(1)
12683 .m(2)
12684 .n(8)
12685 .k(8)
12686 .cn_stride(11)
12687 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12688 }
12689
12690 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile) {
12691 TEST_REQUIRES_ARM_NEON;
12692 for (uint32_t n = 1; n <= 8; n++) {
12693 for (uint32_t m = 1; m <= 2; m++) {
12694 GemmMicrokernelTester()
12695 .mr(2)
12696 .nr(8)
12697 .kr(4)
12698 .sr(1)
12699 .m(m)
12700 .n(n)
12701 .k(8)
12702 .iterations(1)
12703 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12704 }
12705 }
12706 }
12707
12708 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
12709 TEST_REQUIRES_ARM_NEON;
12710 for (uint32_t m = 1; m <= 2; m++) {
12711 GemmMicrokernelTester()
12712 .mr(2)
12713 .nr(8)
12714 .kr(4)
12715 .sr(1)
12716 .m(m)
12717 .n(8)
12718 .k(8)
12719 .iterations(1)
12720 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12721 }
12722 }
12723
12724 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
12725 TEST_REQUIRES_ARM_NEON;
12726 for (uint32_t n = 1; n <= 8; n++) {
12727 GemmMicrokernelTester()
12728 .mr(2)
12729 .nr(8)
12730 .kr(4)
12731 .sr(1)
12732 .m(2)
12733 .n(n)
12734 .k(8)
12735 .iterations(1)
12736 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12737 }
12738 }
12739
12740 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_lt_8) {
12741 TEST_REQUIRES_ARM_NEON;
12742 for (size_t k = 1; k < 8; k++) {
12743 GemmMicrokernelTester()
12744 .mr(2)
12745 .nr(8)
12746 .kr(4)
12747 .sr(1)
12748 .m(2)
12749 .n(8)
12750 .k(k)
12751 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12752 }
12753 }
12754
12755 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_lt_8_subtile) {
12756 TEST_REQUIRES_ARM_NEON;
12757 for (size_t k = 1; k < 8; k++) {
12758 for (uint32_t n = 1; n <= 8; n++) {
12759 for (uint32_t m = 1; m <= 2; m++) {
12760 GemmMicrokernelTester()
12761 .mr(2)
12762 .nr(8)
12763 .kr(4)
12764 .sr(1)
12765 .m(m)
12766 .n(n)
12767 .k(k)
12768 .iterations(1)
12769 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12770 }
12771 }
12772 }
12773 }
12774
12775 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_gt_8) {
12776 TEST_REQUIRES_ARM_NEON;
12777 for (size_t k = 9; k < 16; k++) {
12778 GemmMicrokernelTester()
12779 .mr(2)
12780 .nr(8)
12781 .kr(4)
12782 .sr(1)
12783 .m(2)
12784 .n(8)
12785 .k(k)
12786 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12787 }
12788 }
12789
12790 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_gt_8_subtile) {
12791 TEST_REQUIRES_ARM_NEON;
12792 for (size_t k = 9; k < 16; k++) {
12793 for (uint32_t n = 1; n <= 8; n++) {
12794 for (uint32_t m = 1; m <= 2; m++) {
12795 GemmMicrokernelTester()
12796 .mr(2)
12797 .nr(8)
12798 .kr(4)
12799 .sr(1)
12800 .m(m)
12801 .n(n)
12802 .k(k)
12803 .iterations(1)
12804 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12805 }
12806 }
12807 }
12808 }
12809
12810 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_div_8) {
12811 TEST_REQUIRES_ARM_NEON;
12812 for (size_t k = 16; k <= 80; k += 8) {
12813 GemmMicrokernelTester()
12814 .mr(2)
12815 .nr(8)
12816 .kr(4)
12817 .sr(1)
12818 .m(2)
12819 .n(8)
12820 .k(k)
12821 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12822 }
12823 }
12824
12825 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_div_8_subtile) {
12826 TEST_REQUIRES_ARM_NEON;
12827 for (size_t k = 16; k <= 80; k += 8) {
12828 for (uint32_t n = 1; n <= 8; n++) {
12829 for (uint32_t m = 1; m <= 2; m++) {
12830 GemmMicrokernelTester()
12831 .mr(2)
12832 .nr(8)
12833 .kr(4)
12834 .sr(1)
12835 .m(m)
12836 .n(n)
12837 .k(k)
12838 .iterations(1)
12839 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12840 }
12841 }
12842 }
12843 }
12844
12845 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8) {
12846 TEST_REQUIRES_ARM_NEON;
12847 for (uint32_t n = 9; n < 16; n++) {
12848 for (size_t k = 1; k <= 40; k += 9) {
12849 GemmMicrokernelTester()
12850 .mr(2)
12851 .nr(8)
12852 .kr(4)
12853 .sr(1)
12854 .m(2)
12855 .n(n)
12856 .k(k)
12857 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12858 }
12859 }
12860 }
12861
12862 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) {
12863 TEST_REQUIRES_ARM_NEON;
12864 for (uint32_t n = 9; n < 16; n++) {
12865 for (size_t k = 1; k <= 40; k += 9) {
12866 GemmMicrokernelTester()
12867 .mr(2)
12868 .nr(8)
12869 .kr(4)
12870 .sr(1)
12871 .m(2)
12872 .n(n)
12873 .k(k)
12874 .cn_stride(11)
12875 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12876 }
12877 }
12878 }
12879
12880 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_subtile) {
12881 TEST_REQUIRES_ARM_NEON;
12882 for (uint32_t n = 9; n < 16; n++) {
12883 for (size_t k = 1; k <= 40; k += 9) {
12884 for (uint32_t m = 1; m <= 2; m++) {
12885 GemmMicrokernelTester()
12886 .mr(2)
12887 .nr(8)
12888 .kr(4)
12889 .sr(1)
12890 .m(m)
12891 .n(n)
12892 .k(k)
12893 .iterations(1)
12894 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12895 }
12896 }
12897 }
12898 }
12899
12900 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8) {
12901 TEST_REQUIRES_ARM_NEON;
12902 for (uint32_t n = 16; n <= 24; n += 8) {
12903 for (size_t k = 1; k <= 40; k += 9) {
12904 GemmMicrokernelTester()
12905 .mr(2)
12906 .nr(8)
12907 .kr(4)
12908 .sr(1)
12909 .m(2)
12910 .n(n)
12911 .k(k)
12912 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12913 }
12914 }
12915 }
12916
12917 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) {
12918 TEST_REQUIRES_ARM_NEON;
12919 for (uint32_t n = 16; n <= 24; n += 8) {
12920 for (size_t k = 1; k <= 40; k += 9) {
12921 GemmMicrokernelTester()
12922 .mr(2)
12923 .nr(8)
12924 .kr(4)
12925 .sr(1)
12926 .m(2)
12927 .n(n)
12928 .k(k)
12929 .cn_stride(11)
12930 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12931 }
12932 }
12933 }
12934
12935 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_subtile) {
12936 TEST_REQUIRES_ARM_NEON;
12937 for (uint32_t n = 16; n <= 24; n += 8) {
12938 for (size_t k = 1; k <= 40; k += 9) {
12939 for (uint32_t m = 1; m <= 2; m++) {
12940 GemmMicrokernelTester()
12941 .mr(2)
12942 .nr(8)
12943 .kr(4)
12944 .sr(1)
12945 .m(m)
12946 .n(n)
12947 .k(k)
12948 .iterations(1)
12949 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12950 }
12951 }
12952 }
12953 }
12954
12955 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, small_kernel) {
12956 TEST_REQUIRES_ARM_NEON;
12957 for (size_t k = 1; k <= 40; k += 9) {
12958 GemmMicrokernelTester()
12959 .mr(2)
12960 .nr(8)
12961 .kr(4)
12962 .sr(1)
12963 .m(2)
12964 .n(8)
12965 .k(k)
12966 .ks(3)
12967 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12968 }
12969 }
12970
12971 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, small_kernel_subtile) {
12972 TEST_REQUIRES_ARM_NEON;
12973 for (size_t k = 1; k <= 40; k += 9) {
12974 for (uint32_t n = 1; n <= 8; n++) {
12975 for (uint32_t m = 1; m <= 2; m++) {
12976 GemmMicrokernelTester()
12977 .mr(2)
12978 .nr(8)
12979 .kr(4)
12980 .sr(1)
12981 .m(m)
12982 .n(n)
12983 .k(k)
12984 .ks(3)
12985 .iterations(1)
12986 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
12987 }
12988 }
12989 }
12990 }
12991
12992 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_small_kernel) {
12993 TEST_REQUIRES_ARM_NEON;
12994 for (uint32_t n = 9; n < 16; n++) {
12995 for (size_t k = 1; k <= 40; k += 9) {
12996 GemmMicrokernelTester()
12997 .mr(2)
12998 .nr(8)
12999 .kr(4)
13000 .sr(1)
13001 .m(2)
13002 .n(n)
13003 .k(k)
13004 .ks(3)
13005 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13006 }
13007 }
13008 }
13009
13010 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_small_kernel) {
13011 TEST_REQUIRES_ARM_NEON;
13012 for (uint32_t n = 16; n <= 24; n += 8) {
13013 for (size_t k = 1; k <= 40; k += 9) {
13014 GemmMicrokernelTester()
13015 .mr(2)
13016 .nr(8)
13017 .kr(4)
13018 .sr(1)
13019 .m(2)
13020 .n(n)
13021 .k(k)
13022 .ks(3)
13023 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13024 }
13025 }
13026 }
13027
13028 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cm_subtile) {
13029 TEST_REQUIRES_ARM_NEON;
13030 for (size_t k = 1; k <= 40; k += 9) {
13031 for (uint32_t n = 1; n <= 8; n++) {
13032 for (uint32_t m = 1; m <= 2; m++) {
13033 GemmMicrokernelTester()
13034 .mr(2)
13035 .nr(8)
13036 .kr(4)
13037 .sr(1)
13038 .m(m)
13039 .n(n)
13040 .k(k)
13041 .cm_stride(11)
13042 .iterations(1)
13043 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13044 }
13045 }
13046 }
13047 }
13048
13049 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, a_offset) {
13050 TEST_REQUIRES_ARM_NEON;
13051 for (size_t k = 1; k <= 40; k += 9) {
13052 GemmMicrokernelTester()
13053 .mr(2)
13054 .nr(8)
13055 .kr(4)
13056 .sr(1)
13057 .m(2)
13058 .n(8)
13059 .k(k)
13060 .ks(3)
13061 .a_offset(83)
13062 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13063 }
13064 }
13065
13066 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, zero) {
13067 TEST_REQUIRES_ARM_NEON;
13068 for (size_t k = 1; k <= 40; k += 9) {
13069 for (uint32_t mz = 0; mz < 2; mz++) {
13070 GemmMicrokernelTester()
13071 .mr(2)
13072 .nr(8)
13073 .kr(4)
13074 .sr(1)
13075 .m(2)
13076 .n(8)
13077 .k(k)
13078 .ks(3)
13079 .a_offset(83)
13080 .zero_index(mz)
13081 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13082 }
13083 }
13084 }
13085
13086 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, qmin) {
13087 TEST_REQUIRES_ARM_NEON;
13088 GemmMicrokernelTester()
13089 .mr(2)
13090 .nr(8)
13091 .kr(4)
13092 .sr(1)
13093 .m(2)
13094 .n(8)
13095 .k(8)
13096 .qmin(128)
13097 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13098 }
13099
13100 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, qmax) {
13101 TEST_REQUIRES_ARM_NEON;
13102 GemmMicrokernelTester()
13103 .mr(2)
13104 .nr(8)
13105 .kr(4)
13106 .sr(1)
13107 .m(2)
13108 .n(8)
13109 .k(8)
13110 .qmax(128)
13111 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13112 }
13113
13114 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cm) {
13115 TEST_REQUIRES_ARM_NEON;
13116 GemmMicrokernelTester()
13117 .mr(2)
13118 .nr(8)
13119 .kr(4)
13120 .sr(1)
13121 .m(2)
13122 .n(8)
13123 .k(8)
13124 .cm_stride(11)
13125 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13126 }
13127#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13128
13129
13130#if XNN_ARCH_ARM || XNN_ARCH_ARM64
13131 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16) {
13132 TEST_REQUIRES_ARM_NEON;
13133 GemmMicrokernelTester()
13134 .mr(2)
13135 .nr(8)
13136 .kr(4)
13137 .sr(1)
13138 .m(2)
13139 .n(8)
13140 .k(16)
13141 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13142 }
13143
13144 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cn) {
13145 TEST_REQUIRES_ARM_NEON;
13146 GemmMicrokernelTester()
13147 .mr(2)
13148 .nr(8)
13149 .kr(4)
13150 .sr(1)
13151 .m(2)
13152 .n(8)
13153 .k(16)
13154 .cn_stride(11)
13155 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13156 }
13157
13158 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile) {
13159 TEST_REQUIRES_ARM_NEON;
13160 for (uint32_t n = 1; n <= 8; n++) {
13161 for (uint32_t m = 1; m <= 2; m++) {
13162 GemmMicrokernelTester()
13163 .mr(2)
13164 .nr(8)
13165 .kr(4)
13166 .sr(1)
13167 .m(m)
13168 .n(n)
13169 .k(16)
13170 .iterations(1)
13171 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13172 }
13173 }
13174 }
13175
13176 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
13177 TEST_REQUIRES_ARM_NEON;
13178 for (uint32_t m = 1; m <= 2; m++) {
13179 GemmMicrokernelTester()
13180 .mr(2)
13181 .nr(8)
13182 .kr(4)
13183 .sr(1)
13184 .m(m)
13185 .n(8)
13186 .k(16)
13187 .iterations(1)
13188 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13189 }
13190 }
13191
13192 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
13193 TEST_REQUIRES_ARM_NEON;
13194 for (uint32_t n = 1; n <= 8; n++) {
13195 GemmMicrokernelTester()
13196 .mr(2)
13197 .nr(8)
13198 .kr(4)
13199 .sr(1)
13200 .m(2)
13201 .n(n)
13202 .k(16)
13203 .iterations(1)
13204 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13205 }
13206 }
13207
13208 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_lt_16) {
13209 TEST_REQUIRES_ARM_NEON;
13210 for (size_t k = 1; k < 16; k++) {
13211 GemmMicrokernelTester()
13212 .mr(2)
13213 .nr(8)
13214 .kr(4)
13215 .sr(1)
13216 .m(2)
13217 .n(8)
13218 .k(k)
13219 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13220 }
13221 }
13222
13223 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_lt_16_subtile) {
13224 TEST_REQUIRES_ARM_NEON;
13225 for (size_t k = 1; k < 16; k++) {
13226 for (uint32_t n = 1; n <= 8; n++) {
13227 for (uint32_t m = 1; m <= 2; m++) {
13228 GemmMicrokernelTester()
13229 .mr(2)
13230 .nr(8)
13231 .kr(4)
13232 .sr(1)
13233 .m(m)
13234 .n(n)
13235 .k(k)
13236 .iterations(1)
13237 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13238 }
13239 }
13240 }
13241 }
13242
13243 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_gt_16) {
13244 TEST_REQUIRES_ARM_NEON;
13245 for (size_t k = 17; k < 32; k++) {
13246 GemmMicrokernelTester()
13247 .mr(2)
13248 .nr(8)
13249 .kr(4)
13250 .sr(1)
13251 .m(2)
13252 .n(8)
13253 .k(k)
13254 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13255 }
13256 }
13257
13258 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_gt_16_subtile) {
13259 TEST_REQUIRES_ARM_NEON;
13260 for (size_t k = 17; k < 32; k++) {
13261 for (uint32_t n = 1; n <= 8; n++) {
13262 for (uint32_t m = 1; m <= 2; m++) {
13263 GemmMicrokernelTester()
13264 .mr(2)
13265 .nr(8)
13266 .kr(4)
13267 .sr(1)
13268 .m(m)
13269 .n(n)
13270 .k(k)
13271 .iterations(1)
13272 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13273 }
13274 }
13275 }
13276 }
13277
13278 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_div_16) {
13279 TEST_REQUIRES_ARM_NEON;
13280 for (size_t k = 32; k <= 160; k += 16) {
13281 GemmMicrokernelTester()
13282 .mr(2)
13283 .nr(8)
13284 .kr(4)
13285 .sr(1)
13286 .m(2)
13287 .n(8)
13288 .k(k)
13289 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13290 }
13291 }
13292
13293 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, k_div_16_subtile) {
13294 TEST_REQUIRES_ARM_NEON;
13295 for (size_t k = 32; k <= 160; k += 16) {
13296 for (uint32_t n = 1; n <= 8; n++) {
13297 for (uint32_t m = 1; m <= 2; m++) {
13298 GemmMicrokernelTester()
13299 .mr(2)
13300 .nr(8)
13301 .kr(4)
13302 .sr(1)
13303 .m(m)
13304 .n(n)
13305 .k(k)
13306 .iterations(1)
13307 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13308 }
13309 }
13310 }
13311 }
13312
13313 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8) {
13314 TEST_REQUIRES_ARM_NEON;
13315 for (uint32_t n = 9; n < 16; n++) {
13316 for (size_t k = 1; k <= 80; k += 17) {
13317 GemmMicrokernelTester()
13318 .mr(2)
13319 .nr(8)
13320 .kr(4)
13321 .sr(1)
13322 .m(2)
13323 .n(n)
13324 .k(k)
13325 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13326 }
13327 }
13328 }
13329
13330 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_strided_cn) {
13331 TEST_REQUIRES_ARM_NEON;
13332 for (uint32_t n = 9; n < 16; n++) {
13333 for (size_t k = 1; k <= 80; k += 17) {
13334 GemmMicrokernelTester()
13335 .mr(2)
13336 .nr(8)
13337 .kr(4)
13338 .sr(1)
13339 .m(2)
13340 .n(n)
13341 .k(k)
13342 .cn_stride(11)
13343 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13344 }
13345 }
13346 }
13347
13348 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_subtile) {
13349 TEST_REQUIRES_ARM_NEON;
13350 for (uint32_t n = 9; n < 16; n++) {
13351 for (size_t k = 1; k <= 80; k += 17) {
13352 for (uint32_t m = 1; m <= 2; m++) {
13353 GemmMicrokernelTester()
13354 .mr(2)
13355 .nr(8)
13356 .kr(4)
13357 .sr(1)
13358 .m(m)
13359 .n(n)
13360 .k(k)
13361 .iterations(1)
13362 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13363 }
13364 }
13365 }
13366 }
13367
13368 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8) {
13369 TEST_REQUIRES_ARM_NEON;
13370 for (uint32_t n = 16; n <= 24; n += 8) {
13371 for (size_t k = 1; k <= 80; k += 17) {
13372 GemmMicrokernelTester()
13373 .mr(2)
13374 .nr(8)
13375 .kr(4)
13376 .sr(1)
13377 .m(2)
13378 .n(n)
13379 .k(k)
13380 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13381 }
13382 }
13383 }
13384
13385 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_strided_cn) {
13386 TEST_REQUIRES_ARM_NEON;
13387 for (uint32_t n = 16; n <= 24; n += 8) {
13388 for (size_t k = 1; k <= 80; k += 17) {
13389 GemmMicrokernelTester()
13390 .mr(2)
13391 .nr(8)
13392 .kr(4)
13393 .sr(1)
13394 .m(2)
13395 .n(n)
13396 .k(k)
13397 .cn_stride(11)
13398 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13399 }
13400 }
13401 }
13402
13403 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_subtile) {
13404 TEST_REQUIRES_ARM_NEON;
13405 for (uint32_t n = 16; n <= 24; n += 8) {
13406 for (size_t k = 1; k <= 80; k += 17) {
13407 for (uint32_t m = 1; m <= 2; m++) {
13408 GemmMicrokernelTester()
13409 .mr(2)
13410 .nr(8)
13411 .kr(4)
13412 .sr(1)
13413 .m(m)
13414 .n(n)
13415 .k(k)
13416 .iterations(1)
13417 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13418 }
13419 }
13420 }
13421 }
13422
13423 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, small_kernel) {
13424 TEST_REQUIRES_ARM_NEON;
13425 for (size_t k = 1; k <= 80; k += 17) {
13426 GemmMicrokernelTester()
13427 .mr(2)
13428 .nr(8)
13429 .kr(4)
13430 .sr(1)
13431 .m(2)
13432 .n(8)
13433 .k(k)
13434 .ks(3)
13435 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13436 }
13437 }
13438
13439 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, small_kernel_subtile) {
13440 TEST_REQUIRES_ARM_NEON;
13441 for (size_t k = 1; k <= 80; k += 17) {
13442 for (uint32_t n = 1; n <= 8; n++) {
13443 for (uint32_t m = 1; m <= 2; m++) {
13444 GemmMicrokernelTester()
13445 .mr(2)
13446 .nr(8)
13447 .kr(4)
13448 .sr(1)
13449 .m(m)
13450 .n(n)
13451 .k(k)
13452 .ks(3)
13453 .iterations(1)
13454 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13455 }
13456 }
13457 }
13458 }
13459
13460 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_gt_8_small_kernel) {
13461 TEST_REQUIRES_ARM_NEON;
13462 for (uint32_t n = 9; n < 16; n++) {
13463 for (size_t k = 1; k <= 80; k += 17) {
13464 GemmMicrokernelTester()
13465 .mr(2)
13466 .nr(8)
13467 .kr(4)
13468 .sr(1)
13469 .m(2)
13470 .n(n)
13471 .k(k)
13472 .ks(3)
13473 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13474 }
13475 }
13476 }
13477
13478 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, n_div_8_small_kernel) {
13479 TEST_REQUIRES_ARM_NEON;
13480 for (uint32_t n = 16; n <= 24; n += 8) {
13481 for (size_t k = 1; k <= 80; k += 17) {
13482 GemmMicrokernelTester()
13483 .mr(2)
13484 .nr(8)
13485 .kr(4)
13486 .sr(1)
13487 .m(2)
13488 .n(n)
13489 .k(k)
13490 .ks(3)
13491 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13492 }
13493 }
13494 }
13495
13496 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cm_subtile) {
13497 TEST_REQUIRES_ARM_NEON;
13498 for (size_t k = 1; k <= 80; k += 17) {
13499 for (uint32_t n = 1; n <= 8; n++) {
13500 for (uint32_t m = 1; m <= 2; m++) {
13501 GemmMicrokernelTester()
13502 .mr(2)
13503 .nr(8)
13504 .kr(4)
13505 .sr(1)
13506 .m(m)
13507 .n(n)
13508 .k(k)
13509 .cm_stride(11)
13510 .iterations(1)
13511 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13512 }
13513 }
13514 }
13515 }
13516
13517 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, a_offset) {
13518 TEST_REQUIRES_ARM_NEON;
13519 for (size_t k = 1; k <= 80; k += 17) {
13520 GemmMicrokernelTester()
13521 .mr(2)
13522 .nr(8)
13523 .kr(4)
13524 .sr(1)
13525 .m(2)
13526 .n(8)
13527 .k(k)
13528 .ks(3)
13529 .a_offset(163)
13530 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13531 }
13532 }
13533
13534 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, zero) {
13535 TEST_REQUIRES_ARM_NEON;
13536 for (size_t k = 1; k <= 80; k += 17) {
13537 for (uint32_t mz = 0; mz < 2; mz++) {
13538 GemmMicrokernelTester()
13539 .mr(2)
13540 .nr(8)
13541 .kr(4)
13542 .sr(1)
13543 .m(2)
13544 .n(8)
13545 .k(k)
13546 .ks(3)
13547 .a_offset(163)
13548 .zero_index(mz)
13549 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13550 }
13551 }
13552 }
13553
13554 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, qmin) {
13555 TEST_REQUIRES_ARM_NEON;
13556 GemmMicrokernelTester()
13557 .mr(2)
13558 .nr(8)
13559 .kr(4)
13560 .sr(1)
13561 .m(2)
13562 .n(8)
13563 .k(16)
13564 .qmin(128)
13565 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13566 }
13567
13568 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, qmax) {
13569 TEST_REQUIRES_ARM_NEON;
13570 GemmMicrokernelTester()
13571 .mr(2)
13572 .nr(8)
13573 .kr(4)
13574 .sr(1)
13575 .m(2)
13576 .n(8)
13577 .k(16)
13578 .qmax(128)
13579 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13580 }
13581
13582 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C4__NEON_MLAL_LD1R, strided_cm) {
13583 TEST_REQUIRES_ARM_NEON;
13584 GemmMicrokernelTester()
13585 .mr(2)
13586 .nr(8)
13587 .kr(4)
13588 .sr(1)
13589 .m(2)
13590 .n(8)
13591 .k(16)
13592 .cm_stride(11)
13593 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c4__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
13594 }
13595#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13596
13597
13598#if XNN_ARCH_ARM || XNN_ARCH_ARM64
13599 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8) {
Frank Barchard287952a2021-11-03 15:26:45 -070013600 TEST_REQUIRES_ARM_NEON;
13601 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013602 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013603 .nr(8)
13604 .kr(4)
13605 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013606 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013607 .n(8)
13608 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013609 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013610 }
13611
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013612 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cn) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013613 TEST_REQUIRES_ARM_NEON;
13614 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013615 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013616 .nr(8)
13617 .kr(4)
13618 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013619 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013620 .n(8)
13621 .k(8)
13622 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013623 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013624 }
13625
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013626 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013627 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013628 for (uint32_t n = 1; n <= 8; n++) {
13629 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013630 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013631 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013632 .nr(8)
13633 .kr(4)
13634 .sr(1)
13635 .m(m)
13636 .n(n)
13637 .k(8)
13638 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013639 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013640 }
13641 }
13642 }
13643
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013644 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_m) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013645 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013646 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013647 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013648 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013649 .nr(8)
13650 .kr(4)
13651 .sr(1)
13652 .m(m)
13653 .n(8)
13654 .k(8)
13655 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013656 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013657 }
13658 }
13659
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013660 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_eq_8_subtile_n) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013661 TEST_REQUIRES_ARM_NEON;
13662 for (uint32_t n = 1; n <= 8; n++) {
13663 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013664 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013665 .nr(8)
13666 .kr(4)
13667 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013668 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013669 .n(n)
13670 .k(8)
13671 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013672 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013673 }
13674 }
13675
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013676 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013677 TEST_REQUIRES_ARM_NEON;
13678 for (size_t k = 1; k < 8; k++) {
13679 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013680 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013681 .nr(8)
13682 .kr(4)
13683 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013684 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013685 .n(8)
13686 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013687 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013688 }
13689 }
13690
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013691 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_lt_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013692 TEST_REQUIRES_ARM_NEON;
13693 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013694 for (uint32_t n = 1; n <= 8; n++) {
13695 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013696 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013697 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013698 .nr(8)
13699 .kr(4)
13700 .sr(1)
13701 .m(m)
13702 .n(n)
13703 .k(k)
13704 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013705 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013706 }
13707 }
13708 }
13709 }
13710
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013711 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013712 TEST_REQUIRES_ARM_NEON;
13713 for (size_t k = 9; k < 16; k++) {
13714 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013715 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013716 .nr(8)
13717 .kr(4)
13718 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013719 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013720 .n(8)
13721 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013722 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013723 }
13724 }
13725
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013726 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_gt_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013727 TEST_REQUIRES_ARM_NEON;
13728 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013729 for (uint32_t n = 1; n <= 8; n++) {
13730 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013731 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013732 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013733 .nr(8)
13734 .kr(4)
13735 .sr(1)
13736 .m(m)
13737 .n(n)
13738 .k(k)
13739 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013740 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013741 }
13742 }
13743 }
13744 }
13745
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013746 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013747 TEST_REQUIRES_ARM_NEON;
13748 for (size_t k = 16; k <= 80; k += 8) {
13749 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013750 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013751 .nr(8)
13752 .kr(4)
13753 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013754 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013755 .n(8)
13756 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013757 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013758 }
13759 }
13760
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013761 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, k_div_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013762 TEST_REQUIRES_ARM_NEON;
13763 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013764 for (uint32_t n = 1; n <= 8; n++) {
13765 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013766 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013767 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013768 .nr(8)
13769 .kr(4)
13770 .sr(1)
13771 .m(m)
13772 .n(n)
13773 .k(k)
13774 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013775 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013776 }
13777 }
13778 }
13779 }
13780
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013781 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013782 TEST_REQUIRES_ARM_NEON;
13783 for (uint32_t n = 9; n < 16; n++) {
13784 for (size_t k = 1; k <= 40; k += 9) {
13785 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013786 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013787 .nr(8)
13788 .kr(4)
13789 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013790 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013791 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013792 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013793 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013794 }
13795 }
13796 }
13797
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013798 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_strided_cn) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013799 TEST_REQUIRES_ARM_NEON;
13800 for (uint32_t n = 9; n < 16; n++) {
13801 for (size_t k = 1; k <= 40; k += 9) {
13802 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013803 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013804 .nr(8)
13805 .kr(4)
13806 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013807 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013808 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013809 .k(k)
13810 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013811 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013812 }
13813 }
13814 }
13815
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013816 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013817 TEST_REQUIRES_ARM_NEON;
13818 for (uint32_t n = 9; n < 16; n++) {
13819 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013820 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013821 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013822 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013823 .nr(8)
13824 .kr(4)
13825 .sr(1)
13826 .m(m)
13827 .n(n)
13828 .k(k)
13829 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013830 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013831 }
13832 }
13833 }
13834 }
13835
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013836 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013837 TEST_REQUIRES_ARM_NEON;
13838 for (uint32_t n = 16; n <= 24; n += 8) {
13839 for (size_t k = 1; k <= 40; k += 9) {
13840 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013841 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013842 .nr(8)
13843 .kr(4)
13844 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013845 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013846 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013847 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013848 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013849 }
13850 }
13851 }
13852
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013853 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_strided_cn) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013854 TEST_REQUIRES_ARM_NEON;
13855 for (uint32_t n = 16; n <= 24; n += 8) {
13856 for (size_t k = 1; k <= 40; k += 9) {
13857 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013858 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013859 .nr(8)
13860 .kr(4)
13861 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013862 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013863 .n(n)
13864 .k(k)
13865 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013866 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013867 }
13868 }
13869 }
13870
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013871 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013872 TEST_REQUIRES_ARM_NEON;
13873 for (uint32_t n = 16; n <= 24; n += 8) {
13874 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013875 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013876 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013877 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013878 .nr(8)
13879 .kr(4)
13880 .sr(1)
13881 .m(m)
13882 .n(n)
13883 .k(k)
13884 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013885 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013886 }
13887 }
13888 }
13889 }
13890
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013891 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, small_kernel) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013892 TEST_REQUIRES_ARM_NEON;
13893 for (size_t k = 1; k <= 40; k += 9) {
13894 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013895 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013896 .nr(8)
13897 .kr(4)
13898 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013899 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013900 .n(8)
13901 .k(k)
13902 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013903 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013904 }
13905 }
13906
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013907 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, small_kernel_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013908 TEST_REQUIRES_ARM_NEON;
13909 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013910 for (uint32_t n = 1; n <= 8; n++) {
13911 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013912 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013913 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013914 .nr(8)
13915 .kr(4)
13916 .sr(1)
13917 .m(m)
13918 .n(n)
13919 .k(k)
13920 .ks(3)
13921 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013922 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013923 }
13924 }
13925 }
13926 }
13927
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013928 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_gt_8_small_kernel) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013929 TEST_REQUIRES_ARM_NEON;
13930 for (uint32_t n = 9; n < 16; n++) {
13931 for (size_t k = 1; k <= 40; k += 9) {
13932 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013933 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013934 .nr(8)
13935 .kr(4)
13936 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013937 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013938 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013939 .k(k)
13940 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013941 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013942 }
13943 }
13944 }
13945
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013946 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, n_div_8_small_kernel) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013947 TEST_REQUIRES_ARM_NEON;
13948 for (uint32_t n = 16; n <= 24; n += 8) {
13949 for (size_t k = 1; k <= 40; k += 9) {
13950 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013951 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013952 .nr(8)
13953 .kr(4)
13954 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013955 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013956 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013957 .k(k)
13958 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013959 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013960 }
13961 }
13962 }
13963
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013964 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013965 TEST_REQUIRES_ARM_NEON;
13966 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013967 for (uint32_t n = 1; n <= 8; n++) {
13968 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013969 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013970 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013971 .nr(8)
13972 .kr(4)
13973 .sr(1)
13974 .m(m)
13975 .n(n)
13976 .k(k)
13977 .cm_stride(11)
13978 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013979 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013980 }
13981 }
13982 }
13983 }
13984
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013985 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, a_offset) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080013986 TEST_REQUIRES_ARM_NEON;
13987 for (size_t k = 1; k <= 40; k += 9) {
13988 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013989 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013990 .nr(8)
13991 .kr(4)
13992 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013993 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080013994 .n(8)
13995 .k(k)
13996 .ks(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080013997 .a_offset(43)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080013998 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080013999 }
14000 }
14001
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014002 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, zero) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014003 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014004 for (size_t k = 1; k <= 40; k += 9) {
14005 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014006 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014007 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014008 .nr(8)
14009 .kr(4)
14010 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014011 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014012 .n(8)
14013 .k(k)
14014 .ks(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014015 .a_offset(43)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014016 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014017 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014018 }
14019 }
14020 }
14021
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014022 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmin) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014023 TEST_REQUIRES_ARM_NEON;
14024 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014025 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014026 .nr(8)
14027 .kr(4)
14028 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014029 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014030 .n(8)
14031 .k(8)
14032 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014033 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014034 }
14035
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014036 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, qmax) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014037 TEST_REQUIRES_ARM_NEON;
14038 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014039 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014040 .nr(8)
14041 .kr(4)
14042 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014043 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014044 .n(8)
14045 .k(8)
14046 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014047 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014048 }
14049
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014050 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MULL_LD2R, strided_cm) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014051 TEST_REQUIRES_ARM_NEON;
14052 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014053 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014054 .nr(8)
14055 .kr(4)
14056 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014057 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014058 .n(8)
14059 .k(8)
14060 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014061 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014062 }
14063#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14064
14065
14066#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014067 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014068 TEST_REQUIRES_ARM_NEON;
14069 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014070 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014071 .nr(8)
14072 .kr(4)
14073 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014074 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014075 .n(8)
14076 .k(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014077 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014078 }
14079
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014080 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cn) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014081 TEST_REQUIRES_ARM_NEON;
14082 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014083 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014084 .nr(8)
14085 .kr(4)
14086 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014087 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014088 .n(8)
14089 .k(16)
14090 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014091 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014092 }
14093
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014094 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014095 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014096 for (uint32_t n = 1; n <= 8; n++) {
14097 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014098 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014099 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014100 .nr(8)
14101 .kr(4)
14102 .sr(1)
14103 .m(m)
14104 .n(n)
14105 .k(16)
14106 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014107 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014108 }
14109 }
14110 }
14111
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014112 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014113 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014114 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014115 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014116 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014117 .nr(8)
14118 .kr(4)
14119 .sr(1)
14120 .m(m)
14121 .n(8)
14122 .k(16)
14123 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014124 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014125 }
14126 }
14127
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014128 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014129 TEST_REQUIRES_ARM_NEON;
14130 for (uint32_t n = 1; n <= 8; n++) {
14131 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014132 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014133 .nr(8)
14134 .kr(4)
14135 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014136 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014137 .n(n)
14138 .k(16)
14139 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014140 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014141 }
14142 }
14143
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014144 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014145 TEST_REQUIRES_ARM_NEON;
14146 for (size_t k = 1; k < 16; k++) {
14147 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014148 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014149 .nr(8)
14150 .kr(4)
14151 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014152 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014153 .n(8)
14154 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014155 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014156 }
14157 }
14158
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014159 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014160 TEST_REQUIRES_ARM_NEON;
14161 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014162 for (uint32_t n = 1; n <= 8; n++) {
14163 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014164 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014165 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014166 .nr(8)
14167 .kr(4)
14168 .sr(1)
14169 .m(m)
14170 .n(n)
14171 .k(k)
14172 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014173 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014174 }
14175 }
14176 }
14177 }
14178
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014179 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014180 TEST_REQUIRES_ARM_NEON;
14181 for (size_t k = 17; k < 32; k++) {
14182 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014183 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014184 .nr(8)
14185 .kr(4)
14186 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014187 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014188 .n(8)
14189 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014190 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014191 }
14192 }
14193
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014194 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014195 TEST_REQUIRES_ARM_NEON;
14196 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014197 for (uint32_t n = 1; n <= 8; n++) {
14198 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014199 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014200 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014201 .nr(8)
14202 .kr(4)
14203 .sr(1)
14204 .m(m)
14205 .n(n)
14206 .k(k)
14207 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014208 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014209 }
14210 }
14211 }
14212 }
14213
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014214 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014215 TEST_REQUIRES_ARM_NEON;
14216 for (size_t k = 32; k <= 160; k += 16) {
14217 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014218 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014219 .nr(8)
14220 .kr(4)
14221 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014222 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014223 .n(8)
14224 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014225 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014226 }
14227 }
14228
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014229 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, k_div_16_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014230 TEST_REQUIRES_ARM_NEON;
14231 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014232 for (uint32_t n = 1; n <= 8; n++) {
14233 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014234 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014235 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014236 .nr(8)
14237 .kr(4)
14238 .sr(1)
14239 .m(m)
14240 .n(n)
14241 .k(k)
14242 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014243 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014244 }
14245 }
14246 }
14247 }
14248
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014249 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014250 TEST_REQUIRES_ARM_NEON;
14251 for (uint32_t n = 9; n < 16; n++) {
14252 for (size_t k = 1; k <= 80; k += 17) {
14253 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014254 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014255 .nr(8)
14256 .kr(4)
14257 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014258 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014259 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014260 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014261 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014262 }
14263 }
14264 }
14265
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014266 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014267 TEST_REQUIRES_ARM_NEON;
14268 for (uint32_t n = 9; n < 16; n++) {
14269 for (size_t k = 1; k <= 80; k += 17) {
14270 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014271 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014272 .nr(8)
14273 .kr(4)
14274 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014275 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014276 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014277 .k(k)
14278 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014279 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014280 }
14281 }
14282 }
14283
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014284 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014285 TEST_REQUIRES_ARM_NEON;
14286 for (uint32_t n = 9; n < 16; n++) {
14287 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014288 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014289 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014290 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014291 .nr(8)
14292 .kr(4)
14293 .sr(1)
14294 .m(m)
14295 .n(n)
14296 .k(k)
14297 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014298 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014299 }
14300 }
14301 }
14302 }
14303
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014304 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014305 TEST_REQUIRES_ARM_NEON;
14306 for (uint32_t n = 16; n <= 24; n += 8) {
14307 for (size_t k = 1; k <= 80; k += 17) {
14308 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014309 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014310 .nr(8)
14311 .kr(4)
14312 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014313 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014314 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014315 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014316 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014317 }
14318 }
14319 }
14320
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014321 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_strided_cn) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014322 TEST_REQUIRES_ARM_NEON;
14323 for (uint32_t n = 16; n <= 24; n += 8) {
14324 for (size_t k = 1; k <= 80; k += 17) {
14325 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014326 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014327 .nr(8)
14328 .kr(4)
14329 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014330 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014331 .n(n)
14332 .k(k)
14333 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014334 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014335 }
14336 }
14337 }
14338
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014339 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014340 TEST_REQUIRES_ARM_NEON;
14341 for (uint32_t n = 16; n <= 24; n += 8) {
14342 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014343 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014344 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014345 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014346 .nr(8)
14347 .kr(4)
14348 .sr(1)
14349 .m(m)
14350 .n(n)
14351 .k(k)
14352 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014353 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014354 }
14355 }
14356 }
14357 }
14358
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014359 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, small_kernel) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014360 TEST_REQUIRES_ARM_NEON;
14361 for (size_t k = 1; k <= 80; k += 17) {
14362 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014363 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014364 .nr(8)
14365 .kr(4)
14366 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014367 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014368 .n(8)
14369 .k(k)
14370 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014371 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014372 }
14373 }
14374
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014375 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, small_kernel_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014376 TEST_REQUIRES_ARM_NEON;
14377 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014378 for (uint32_t n = 1; n <= 8; n++) {
14379 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014380 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014381 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014382 .nr(8)
14383 .kr(4)
14384 .sr(1)
14385 .m(m)
14386 .n(n)
14387 .k(k)
14388 .ks(3)
14389 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014390 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014391 }
14392 }
14393 }
14394 }
14395
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014396 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_gt_8_small_kernel) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014397 TEST_REQUIRES_ARM_NEON;
14398 for (uint32_t n = 9; n < 16; n++) {
14399 for (size_t k = 1; k <= 80; k += 17) {
14400 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014401 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014402 .nr(8)
14403 .kr(4)
14404 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014405 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014406 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014407 .k(k)
14408 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014409 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014410 }
14411 }
14412 }
14413
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014414 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, n_div_8_small_kernel) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014415 TEST_REQUIRES_ARM_NEON;
14416 for (uint32_t n = 16; n <= 24; n += 8) {
14417 for (size_t k = 1; k <= 80; k += 17) {
14418 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014419 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014420 .nr(8)
14421 .kr(4)
14422 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014423 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014424 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014425 .k(k)
14426 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014427 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014428 }
14429 }
14430 }
14431
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014432 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014433 TEST_REQUIRES_ARM_NEON;
14434 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014435 for (uint32_t n = 1; n <= 8; n++) {
14436 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014437 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014438 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014439 .nr(8)
14440 .kr(4)
14441 .sr(1)
14442 .m(m)
14443 .n(n)
14444 .k(k)
14445 .cm_stride(11)
14446 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014447 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014448 }
14449 }
14450 }
14451 }
14452
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014453 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, a_offset) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014454 TEST_REQUIRES_ARM_NEON;
14455 for (size_t k = 1; k <= 80; k += 17) {
14456 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014457 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014458 .nr(8)
14459 .kr(4)
14460 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014461 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014462 .n(8)
14463 .k(k)
14464 .ks(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014465 .a_offset(83)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014466 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014467 }
14468 }
14469
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014470 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, zero) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014471 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014472 for (size_t k = 1; k <= 80; k += 17) {
14473 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014474 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014475 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014476 .nr(8)
14477 .kr(4)
14478 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014479 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014480 .n(8)
14481 .k(k)
14482 .ks(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014483 .a_offset(83)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014484 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014485 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014486 }
14487 }
14488 }
14489
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014490 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmin) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014491 TEST_REQUIRES_ARM_NEON;
14492 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014493 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014494 .nr(8)
14495 .kr(4)
14496 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014497 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014498 .n(8)
14499 .k(16)
14500 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014501 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014502 }
14503
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014504 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, qmax) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014505 TEST_REQUIRES_ARM_NEON;
14506 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014507 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014508 .nr(8)
14509 .kr(4)
14510 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014511 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014512 .n(8)
14513 .k(16)
14514 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014515 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014516 }
14517
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014518 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEON_MLAL_LD2R, strided_cm) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014519 TEST_REQUIRES_ARM_NEON;
14520 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014521 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014522 .nr(8)
14523 .kr(4)
14524 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080014525 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014526 .n(8)
14527 .k(16)
14528 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014529 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014530 }
14531#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14532
14533
14534#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014535 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014536 TEST_REQUIRES_ARM_NEON;
14537 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014538 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014539 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014540 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014541 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014542 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014543 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014544 .k(8)
14545 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014546 }
14547
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014548 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cn) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014549 TEST_REQUIRES_ARM_NEON;
14550 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014551 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014552 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014553 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014554 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014555 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014556 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014557 .k(8)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014558 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014559 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014560 }
14561
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014562 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014563 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014564 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014565 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014566 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014567 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014568 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014569 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014570 .sr(1)
14571 .m(m)
14572 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014573 .k(8)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014574 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014575 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014576 }
14577 }
14578 }
14579
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014580 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014581 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014582 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014583 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014584 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014585 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014586 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014587 .sr(1)
14588 .m(m)
14589 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014590 .k(8)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014591 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014592 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014593 }
14594 }
14595
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014596 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014597 TEST_REQUIRES_ARM_NEON;
14598 for (uint32_t n = 1; n <= 8; n++) {
14599 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014600 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014601 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014602 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014603 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014604 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014605 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014606 .k(8)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014607 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014608 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014609 }
14610 }
14611
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014612 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_lt_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014613 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014614 for (size_t k = 1; k < 8; k++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014615 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014616 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014617 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014618 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014619 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014620 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014621 .n(8)
14622 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014623 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014624 }
14625 }
14626
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014627 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014628 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014629 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014630 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014631 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014632 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014633 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014634 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014635 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014636 .sr(1)
14637 .m(m)
14638 .n(n)
14639 .k(k)
14640 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014641 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014642 }
14643 }
14644 }
14645 }
14646
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014647 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_gt_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014648 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014649 for (size_t k = 9; k < 16; k++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014650 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014651 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014652 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014653 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014654 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014655 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014656 .n(8)
14657 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014658 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014659 }
14660 }
14661
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014662 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014663 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014664 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014665 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014666 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014667 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014668 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014669 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014670 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014671 .sr(1)
14672 .m(m)
14673 .n(n)
14674 .k(k)
14675 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014676 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014677 }
14678 }
14679 }
14680 }
14681
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014682 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_div_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014683 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014684 for (size_t k = 16; k <= 80; k += 8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014685 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014686 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014687 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014688 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014689 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014690 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014691 .n(8)
14692 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014693 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014694 }
14695 }
14696
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014697 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, k_div_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014698 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014699 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014700 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014701 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014702 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014703 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014704 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014705 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014706 .sr(1)
14707 .m(m)
14708 .n(n)
14709 .k(k)
14710 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014711 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014712 }
14713 }
14714 }
14715 }
14716
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014717 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014718 TEST_REQUIRES_ARM_NEON;
14719 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014720 for (size_t k = 1; k <= 40; k += 9) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014721 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014722 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014723 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014724 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014725 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014726 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014727 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014728 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014729 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014730 }
14731 }
14732 }
14733
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014734 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014735 TEST_REQUIRES_ARM_NEON;
14736 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014737 for (size_t k = 1; k <= 40; k += 9) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014738 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014739 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014740 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014741 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014742 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014743 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014744 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014745 .k(k)
14746 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014747 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014748 }
14749 }
14750 }
14751
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014752 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014753 TEST_REQUIRES_ARM_NEON;
14754 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014755 for (size_t k = 1; k <= 40; k += 9) {
14756 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014757 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014758 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014759 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014760 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014761 .sr(1)
14762 .m(m)
14763 .n(n)
14764 .k(k)
14765 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014766 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014767 }
14768 }
14769 }
14770 }
14771
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014772 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014773 TEST_REQUIRES_ARM_NEON;
14774 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014775 for (size_t k = 1; k <= 40; k += 9) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014776 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014777 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014778 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014779 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014780 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014781 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014782 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014783 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014784 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014785 }
14786 }
14787 }
14788
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014789 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014790 TEST_REQUIRES_ARM_NEON;
14791 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014792 for (size_t k = 1; k <= 40; k += 9) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014793 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014794 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014795 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014796 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014797 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014798 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014799 .n(n)
14800 .k(k)
14801 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014802 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014803 }
14804 }
14805 }
14806
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014807 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014808 TEST_REQUIRES_ARM_NEON;
14809 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014810 for (size_t k = 1; k <= 40; k += 9) {
14811 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014812 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014813 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014814 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014815 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014816 .sr(1)
14817 .m(m)
14818 .n(n)
14819 .k(k)
14820 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014821 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014822 }
14823 }
14824 }
14825 }
14826
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014827 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, small_kernel) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014828 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014829 for (size_t k = 1; k <= 40; k += 9) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014830 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014831 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014832 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014833 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014834 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014835 .m(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014836 .n(8)
14837 .k(k)
14838 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014839 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014840 }
14841 }
14842
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014843 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, small_kernel_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014844 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014845 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014846 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014847 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014848 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014849 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014850 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014851 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014852 .sr(1)
14853 .m(m)
14854 .n(n)
14855 .k(k)
14856 .ks(3)
14857 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014858 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014859 }
14860 }
14861 }
14862 }
14863
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014864 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014865 TEST_REQUIRES_ARM_NEON;
14866 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014867 for (size_t k = 1; k <= 40; k += 9) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014868 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014869 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014870 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014871 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014872 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014873 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014874 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014875 .k(k)
14876 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014877 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014878 }
14879 }
14880 }
14881
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014882 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, n_div_8_small_kernel) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014883 TEST_REQUIRES_ARM_NEON;
14884 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014885 for (size_t k = 1; k <= 40; k += 9) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014886 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014887 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014888 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014889 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014890 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014891 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014892 .n(n)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014893 .k(k)
14894 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014895 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014896 }
14897 }
14898 }
14899
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014900 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cm_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014901 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014902 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014903 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014904 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014905 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014906 .mr(1)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014907 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014908 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080014909 .sr(1)
14910 .m(m)
14911 .n(n)
14912 .k(k)
14913 .cm_stride(11)
14914 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014915 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080014916 }
14917 }
14918 }
14919 }
14920
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014921 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, a_offset) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080014922 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080014923 for (size_t k = 1; k <= 40; k += 9) {
14924 GemmMicrokernelTester()
14925 .mr(1)
14926 .nr(8)
14927 .kr(2)
14928 .sr(1)
14929 .m(1)
14930 .n(8)
14931 .k(k)
14932 .ks(3)
14933 .a_offset(43)
14934 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14935 }
14936 }
14937
14938 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, zero) {
14939 TEST_REQUIRES_ARM_NEON;
14940 for (size_t k = 1; k <= 40; k += 9) {
14941 for (uint32_t mz = 0; mz < 1; mz++) {
14942 GemmMicrokernelTester()
14943 .mr(1)
14944 .nr(8)
14945 .kr(2)
14946 .sr(1)
14947 .m(1)
14948 .n(8)
14949 .k(k)
14950 .ks(3)
14951 .a_offset(43)
14952 .zero_index(mz)
14953 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14954 }
14955 }
14956 }
14957
14958 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, qmin) {
14959 TEST_REQUIRES_ARM_NEON;
14960 GemmMicrokernelTester()
14961 .mr(1)
14962 .nr(8)
14963 .kr(2)
14964 .sr(1)
14965 .m(1)
14966 .n(8)
14967 .k(8)
14968 .qmin(128)
14969 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14970 }
14971
14972 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, qmax) {
14973 TEST_REQUIRES_ARM_NEON;
14974 GemmMicrokernelTester()
14975 .mr(1)
14976 .nr(8)
14977 .kr(2)
14978 .sr(1)
14979 .m(1)
14980 .n(8)
14981 .k(8)
14982 .qmax(128)
14983 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14984 }
14985
14986 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_DUP, strided_cm) {
14987 TEST_REQUIRES_ARM_NEON;
14988 GemmMicrokernelTester()
14989 .mr(1)
14990 .nr(8)
14991 .kr(2)
14992 .sr(1)
14993 .m(1)
14994 .n(8)
14995 .k(8)
14996 .cm_stride(11)
14997 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
14998 }
14999#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15000
15001
15002#if XNN_ARCH_ARM || XNN_ARCH_ARM64
15003 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8) {
15004 TEST_REQUIRES_ARM_NEON;
15005 GemmMicrokernelTester()
15006 .mr(3)
15007 .nr(8)
15008 .kr(2)
15009 .sr(1)
15010 .m(3)
15011 .n(8)
15012 .k(8)
15013 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15014 }
15015
15016 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cn) {
15017 TEST_REQUIRES_ARM_NEON;
15018 GemmMicrokernelTester()
15019 .mr(3)
15020 .nr(8)
15021 .kr(2)
15022 .sr(1)
15023 .m(3)
15024 .n(8)
15025 .k(8)
15026 .cn_stride(11)
15027 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15028 }
15029
15030 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
15031 TEST_REQUIRES_ARM_NEON;
15032 for (uint32_t n = 1; n <= 8; n++) {
15033 for (uint32_t m = 1; m <= 3; m++) {
15034 GemmMicrokernelTester()
15035 .mr(3)
15036 .nr(8)
15037 .kr(2)
15038 .sr(1)
15039 .m(m)
15040 .n(n)
15041 .k(8)
15042 .iterations(1)
15043 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15044 }
15045 }
15046 }
15047
15048 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
15049 TEST_REQUIRES_ARM_NEON;
15050 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080015051 GemmMicrokernelTester()
15052 .mr(3)
15053 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015054 .kr(2)
15055 .sr(1)
15056 .m(m)
15057 .n(8)
15058 .k(8)
15059 .iterations(1)
15060 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15061 }
15062 }
15063
15064 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
15065 TEST_REQUIRES_ARM_NEON;
15066 for (uint32_t n = 1; n <= 8; n++) {
15067 GemmMicrokernelTester()
15068 .mr(3)
15069 .nr(8)
15070 .kr(2)
15071 .sr(1)
15072 .m(3)
15073 .n(n)
15074 .k(8)
15075 .iterations(1)
15076 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15077 }
15078 }
15079
15080 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_lt_8) {
15081 TEST_REQUIRES_ARM_NEON;
15082 for (size_t k = 1; k < 8; k++) {
15083 GemmMicrokernelTester()
15084 .mr(3)
15085 .nr(8)
15086 .kr(2)
15087 .sr(1)
15088 .m(3)
15089 .n(8)
15090 .k(k)
15091 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15092 }
15093 }
15094
15095 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
15096 TEST_REQUIRES_ARM_NEON;
15097 for (size_t k = 1; k < 8; k++) {
15098 for (uint32_t n = 1; n <= 8; n++) {
15099 for (uint32_t m = 1; m <= 3; m++) {
15100 GemmMicrokernelTester()
15101 .mr(3)
15102 .nr(8)
15103 .kr(2)
15104 .sr(1)
15105 .m(m)
15106 .n(n)
15107 .k(k)
15108 .iterations(1)
15109 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15110 }
15111 }
15112 }
15113 }
15114
15115 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_gt_8) {
15116 TEST_REQUIRES_ARM_NEON;
15117 for (size_t k = 9; k < 16; k++) {
15118 GemmMicrokernelTester()
15119 .mr(3)
15120 .nr(8)
15121 .kr(2)
15122 .sr(1)
15123 .m(3)
15124 .n(8)
15125 .k(k)
15126 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15127 }
15128 }
15129
15130 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
15131 TEST_REQUIRES_ARM_NEON;
15132 for (size_t k = 9; k < 16; k++) {
15133 for (uint32_t n = 1; n <= 8; n++) {
15134 for (uint32_t m = 1; m <= 3; m++) {
15135 GemmMicrokernelTester()
15136 .mr(3)
15137 .nr(8)
15138 .kr(2)
15139 .sr(1)
15140 .m(m)
15141 .n(n)
15142 .k(k)
15143 .iterations(1)
15144 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15145 }
15146 }
15147 }
15148 }
15149
15150 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_div_8) {
15151 TEST_REQUIRES_ARM_NEON;
15152 for (size_t k = 16; k <= 80; k += 8) {
15153 GemmMicrokernelTester()
15154 .mr(3)
15155 .nr(8)
15156 .kr(2)
15157 .sr(1)
15158 .m(3)
15159 .n(8)
15160 .k(k)
15161 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15162 }
15163 }
15164
15165 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, k_div_8_subtile) {
15166 TEST_REQUIRES_ARM_NEON;
15167 for (size_t k = 16; k <= 80; k += 8) {
15168 for (uint32_t n = 1; n <= 8; n++) {
15169 for (uint32_t m = 1; m <= 3; m++) {
15170 GemmMicrokernelTester()
15171 .mr(3)
15172 .nr(8)
15173 .kr(2)
15174 .sr(1)
15175 .m(m)
15176 .n(n)
15177 .k(k)
15178 .iterations(1)
15179 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15180 }
15181 }
15182 }
15183 }
15184
15185 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8) {
15186 TEST_REQUIRES_ARM_NEON;
15187 for (uint32_t n = 9; n < 16; n++) {
15188 for (size_t k = 1; k <= 40; k += 9) {
15189 GemmMicrokernelTester()
15190 .mr(3)
15191 .nr(8)
15192 .kr(2)
15193 .sr(1)
15194 .m(3)
15195 .n(n)
15196 .k(k)
15197 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15198 }
15199 }
15200 }
15201
15202 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
15203 TEST_REQUIRES_ARM_NEON;
15204 for (uint32_t n = 9; n < 16; n++) {
15205 for (size_t k = 1; k <= 40; k += 9) {
15206 GemmMicrokernelTester()
15207 .mr(3)
15208 .nr(8)
15209 .kr(2)
15210 .sr(1)
15211 .m(3)
15212 .n(n)
15213 .k(k)
15214 .cn_stride(11)
15215 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15216 }
15217 }
15218 }
15219
15220 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
15221 TEST_REQUIRES_ARM_NEON;
15222 for (uint32_t n = 9; n < 16; n++) {
15223 for (size_t k = 1; k <= 40; k += 9) {
15224 for (uint32_t m = 1; m <= 3; m++) {
15225 GemmMicrokernelTester()
15226 .mr(3)
15227 .nr(8)
15228 .kr(2)
15229 .sr(1)
15230 .m(m)
15231 .n(n)
15232 .k(k)
15233 .iterations(1)
15234 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15235 }
15236 }
15237 }
15238 }
15239
15240 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8) {
15241 TEST_REQUIRES_ARM_NEON;
15242 for (uint32_t n = 16; n <= 24; n += 8) {
15243 for (size_t k = 1; k <= 40; k += 9) {
15244 GemmMicrokernelTester()
15245 .mr(3)
15246 .nr(8)
15247 .kr(2)
15248 .sr(1)
15249 .m(3)
15250 .n(n)
15251 .k(k)
15252 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15253 }
15254 }
15255 }
15256
15257 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
15258 TEST_REQUIRES_ARM_NEON;
15259 for (uint32_t n = 16; n <= 24; n += 8) {
15260 for (size_t k = 1; k <= 40; k += 9) {
15261 GemmMicrokernelTester()
15262 .mr(3)
15263 .nr(8)
15264 .kr(2)
15265 .sr(1)
15266 .m(3)
15267 .n(n)
15268 .k(k)
15269 .cn_stride(11)
15270 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15271 }
15272 }
15273 }
15274
15275 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_subtile) {
15276 TEST_REQUIRES_ARM_NEON;
15277 for (uint32_t n = 16; n <= 24; n += 8) {
15278 for (size_t k = 1; k <= 40; k += 9) {
15279 for (uint32_t m = 1; m <= 3; m++) {
15280 GemmMicrokernelTester()
15281 .mr(3)
15282 .nr(8)
15283 .kr(2)
15284 .sr(1)
15285 .m(m)
15286 .n(n)
15287 .k(k)
15288 .iterations(1)
15289 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15290 }
15291 }
15292 }
15293 }
15294
15295 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, small_kernel) {
15296 TEST_REQUIRES_ARM_NEON;
15297 for (size_t k = 1; k <= 40; k += 9) {
15298 GemmMicrokernelTester()
15299 .mr(3)
15300 .nr(8)
15301 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080015302 .sr(1)
15303 .m(3)
15304 .n(8)
15305 .k(k)
15306 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015307 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080015308 }
15309 }
15310
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015311 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, small_kernel_subtile) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080015312 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015313 for (size_t k = 1; k <= 40; k += 9) {
15314 for (uint32_t n = 1; n <= 8; n++) {
15315 for (uint32_t m = 1; m <= 3; m++) {
15316 GemmMicrokernelTester()
15317 .mr(3)
15318 .nr(8)
15319 .kr(2)
15320 .sr(1)
15321 .m(m)
15322 .n(n)
15323 .k(k)
15324 .ks(3)
15325 .iterations(1)
15326 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15327 }
15328 }
15329 }
15330 }
15331
15332 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_gt_8_small_kernel) {
15333 TEST_REQUIRES_ARM_NEON;
15334 for (uint32_t n = 9; n < 16; n++) {
15335 for (size_t k = 1; k <= 40; k += 9) {
15336 GemmMicrokernelTester()
15337 .mr(3)
15338 .nr(8)
15339 .kr(2)
15340 .sr(1)
15341 .m(3)
15342 .n(n)
15343 .k(k)
15344 .ks(3)
15345 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15346 }
15347 }
15348 }
15349
15350 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, n_div_8_small_kernel) {
15351 TEST_REQUIRES_ARM_NEON;
15352 for (uint32_t n = 16; n <= 24; n += 8) {
15353 for (size_t k = 1; k <= 40; k += 9) {
15354 GemmMicrokernelTester()
15355 .mr(3)
15356 .nr(8)
15357 .kr(2)
15358 .sr(1)
15359 .m(3)
15360 .n(n)
15361 .k(k)
15362 .ks(3)
15363 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15364 }
15365 }
15366 }
15367
15368 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cm_subtile) {
15369 TEST_REQUIRES_ARM_NEON;
15370 for (size_t k = 1; k <= 40; k += 9) {
15371 for (uint32_t n = 1; n <= 8; n++) {
15372 for (uint32_t m = 1; m <= 3; m++) {
15373 GemmMicrokernelTester()
15374 .mr(3)
15375 .nr(8)
15376 .kr(2)
15377 .sr(1)
15378 .m(m)
15379 .n(n)
15380 .k(k)
15381 .cm_stride(11)
15382 .iterations(1)
15383 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15384 }
15385 }
15386 }
15387 }
15388
15389 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, a_offset) {
15390 TEST_REQUIRES_ARM_NEON;
15391 for (size_t k = 1; k <= 40; k += 9) {
15392 GemmMicrokernelTester()
15393 .mr(3)
15394 .nr(8)
15395 .kr(2)
15396 .sr(1)
15397 .m(3)
15398 .n(8)
15399 .k(k)
15400 .ks(3)
15401 .a_offset(127)
15402 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15403 }
15404 }
15405
15406 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, zero) {
15407 TEST_REQUIRES_ARM_NEON;
15408 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015409 for (uint32_t mz = 0; mz < 3; mz++) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080015410 GemmMicrokernelTester()
15411 .mr(3)
15412 .nr(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015413 .kr(2)
Frank Barchard64ab1b72021-11-22 10:57:40 -080015414 .sr(1)
15415 .m(3)
15416 .n(8)
15417 .k(k)
15418 .ks(3)
Frank Barchard64ab1b72021-11-22 10:57:40 -080015419 .a_offset(127)
15420 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015421 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard64ab1b72021-11-22 10:57:40 -080015422 }
15423 }
15424 }
15425
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015426 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, qmin) {
Frank Barchard64ab1b72021-11-22 10:57:40 -080015427 TEST_REQUIRES_ARM_NEON;
15428 GemmMicrokernelTester()
15429 .mr(3)
Frank Barchard1d412472021-10-25 17:27:21 -070015430 .nr(8)
15431 .kr(2)
15432 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015433 .m(3)
Frank Barchard1d412472021-10-25 17:27:21 -070015434 .n(8)
15435 .k(8)
15436 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015437 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015438 }
15439
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015440 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, qmax) {
Frank Barchard1d412472021-10-25 17:27:21 -070015441 TEST_REQUIRES_ARM_NEON;
15442 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015443 .mr(3)
Frank Barchard1d412472021-10-25 17:27:21 -070015444 .nr(8)
15445 .kr(2)
15446 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015447 .m(3)
Frank Barchard1d412472021-10-25 17:27:21 -070015448 .n(8)
15449 .k(8)
15450 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015451 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015452 }
15453
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015454 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8C2__NEON_MULL_DUP, strided_cm) {
Frank Barchard1d412472021-10-25 17:27:21 -070015455 TEST_REQUIRES_ARM_NEON;
15456 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015457 .mr(3)
Frank Barchard1d412472021-10-25 17:27:21 -070015458 .nr(8)
15459 .kr(2)
15460 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015461 .m(3)
Frank Barchard1d412472021-10-25 17:27:21 -070015462 .n(8)
15463 .k(8)
15464 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015465 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015466 }
15467#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15468
15469
15470#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barcharde22685a2021-11-12 11:36:58 -080015471 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_eq_8) {
Frank Barchard1d412472021-10-25 17:27:21 -070015472 TEST_REQUIRES_ARM_NEON;
15473 GemmMicrokernelTester()
15474 .mr(1)
15475 .nr(16)
15476 .kr(2)
15477 .sr(1)
15478 .m(1)
15479 .n(16)
15480 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015481 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015482 }
15483
Frank Barcharde22685a2021-11-12 11:36:58 -080015484 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, strided_cn) {
Frank Barchard1d412472021-10-25 17:27:21 -070015485 TEST_REQUIRES_ARM_NEON;
15486 GemmMicrokernelTester()
15487 .mr(1)
15488 .nr(16)
15489 .kr(2)
15490 .sr(1)
15491 .m(1)
15492 .n(16)
15493 .k(8)
15494 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015495 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015496 }
15497
Frank Barcharde22685a2021-11-12 11:36:58 -080015498 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_eq_8_subtile) {
Frank Barchard1d412472021-10-25 17:27:21 -070015499 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015500 for (uint32_t n = 1; n <= 16; n++) {
15501 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1d412472021-10-25 17:27:21 -070015502 GemmMicrokernelTester()
15503 .mr(1)
15504 .nr(16)
15505 .kr(2)
15506 .sr(1)
15507 .m(m)
15508 .n(n)
15509 .k(8)
15510 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015511 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015512 }
15513 }
15514 }
15515
Frank Barcharde22685a2021-11-12 11:36:58 -080015516 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
Frank Barchard1d412472021-10-25 17:27:21 -070015517 TEST_REQUIRES_ARM_NEON;
15518 for (uint32_t m = 1; m <= 1; m++) {
15519 GemmMicrokernelTester()
15520 .mr(1)
15521 .nr(16)
15522 .kr(2)
15523 .sr(1)
15524 .m(m)
15525 .n(16)
15526 .k(8)
15527 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015528 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015529 }
15530 }
15531
Frank Barcharde22685a2021-11-12 11:36:58 -080015532 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
Frank Barchard1d412472021-10-25 17:27:21 -070015533 TEST_REQUIRES_ARM_NEON;
15534 for (uint32_t n = 1; n <= 16; n++) {
15535 GemmMicrokernelTester()
15536 .mr(1)
15537 .nr(16)
15538 .kr(2)
15539 .sr(1)
15540 .m(1)
15541 .n(n)
15542 .k(8)
15543 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015544 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015545 }
15546 }
15547
Frank Barcharde22685a2021-11-12 11:36:58 -080015548 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_lt_8) {
Frank Barchard1d412472021-10-25 17:27:21 -070015549 TEST_REQUIRES_ARM_NEON;
15550 for (size_t k = 1; k < 8; k++) {
15551 GemmMicrokernelTester()
15552 .mr(1)
15553 .nr(16)
15554 .kr(2)
15555 .sr(1)
15556 .m(1)
15557 .n(16)
15558 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015559 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015560 }
15561 }
15562
Frank Barcharde22685a2021-11-12 11:36:58 -080015563 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_lt_8_subtile) {
Frank Barchard1d412472021-10-25 17:27:21 -070015564 TEST_REQUIRES_ARM_NEON;
15565 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015566 for (uint32_t n = 1; n <= 16; n++) {
15567 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1d412472021-10-25 17:27:21 -070015568 GemmMicrokernelTester()
15569 .mr(1)
15570 .nr(16)
15571 .kr(2)
15572 .sr(1)
15573 .m(m)
15574 .n(n)
15575 .k(k)
15576 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015577 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015578 }
15579 }
15580 }
15581 }
15582
Frank Barcharde22685a2021-11-12 11:36:58 -080015583 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_gt_8) {
Frank Barchard1d412472021-10-25 17:27:21 -070015584 TEST_REQUIRES_ARM_NEON;
15585 for (size_t k = 9; k < 16; k++) {
15586 GemmMicrokernelTester()
15587 .mr(1)
15588 .nr(16)
15589 .kr(2)
15590 .sr(1)
15591 .m(1)
15592 .n(16)
15593 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015594 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015595 }
15596 }
15597
Frank Barcharde22685a2021-11-12 11:36:58 -080015598 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_gt_8_subtile) {
Frank Barchard1d412472021-10-25 17:27:21 -070015599 TEST_REQUIRES_ARM_NEON;
15600 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015601 for (uint32_t n = 1; n <= 16; n++) {
15602 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1d412472021-10-25 17:27:21 -070015603 GemmMicrokernelTester()
15604 .mr(1)
15605 .nr(16)
15606 .kr(2)
15607 .sr(1)
15608 .m(m)
15609 .n(n)
15610 .k(k)
15611 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015612 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015613 }
15614 }
15615 }
15616 }
15617
Frank Barcharde22685a2021-11-12 11:36:58 -080015618 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_div_8) {
Frank Barchard1d412472021-10-25 17:27:21 -070015619 TEST_REQUIRES_ARM_NEON;
15620 for (size_t k = 16; k <= 80; k += 8) {
15621 GemmMicrokernelTester()
15622 .mr(1)
15623 .nr(16)
15624 .kr(2)
15625 .sr(1)
15626 .m(1)
15627 .n(16)
15628 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015629 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015630 }
15631 }
15632
Frank Barcharde22685a2021-11-12 11:36:58 -080015633 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, k_div_8_subtile) {
Frank Barchard1d412472021-10-25 17:27:21 -070015634 TEST_REQUIRES_ARM_NEON;
15635 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015636 for (uint32_t n = 1; n <= 16; n++) {
15637 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1d412472021-10-25 17:27:21 -070015638 GemmMicrokernelTester()
15639 .mr(1)
15640 .nr(16)
15641 .kr(2)
15642 .sr(1)
15643 .m(m)
15644 .n(n)
15645 .k(k)
15646 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015647 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015648 }
15649 }
15650 }
15651 }
15652
Frank Barcharde22685a2021-11-12 11:36:58 -080015653 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_gt_16) {
Frank Barchard1d412472021-10-25 17:27:21 -070015654 TEST_REQUIRES_ARM_NEON;
15655 for (uint32_t n = 17; n < 32; n++) {
15656 for (size_t k = 1; k <= 40; k += 9) {
15657 GemmMicrokernelTester()
15658 .mr(1)
15659 .nr(16)
15660 .kr(2)
15661 .sr(1)
15662 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015663 .n(n)
Frank Barchard1d412472021-10-25 17:27:21 -070015664 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015665 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015666 }
15667 }
15668 }
15669
Frank Barcharde22685a2021-11-12 11:36:58 -080015670 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_gt_16_strided_cn) {
Frank Barchard1d412472021-10-25 17:27:21 -070015671 TEST_REQUIRES_ARM_NEON;
15672 for (uint32_t n = 17; n < 32; n++) {
15673 for (size_t k = 1; k <= 40; k += 9) {
15674 GemmMicrokernelTester()
15675 .mr(1)
15676 .nr(16)
15677 .kr(2)
15678 .sr(1)
15679 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015680 .n(n)
Frank Barchard1d412472021-10-25 17:27:21 -070015681 .k(k)
15682 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015683 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015684 }
15685 }
15686 }
15687
Frank Barcharde22685a2021-11-12 11:36:58 -080015688 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_gt_16_subtile) {
Frank Barchard1d412472021-10-25 17:27:21 -070015689 TEST_REQUIRES_ARM_NEON;
15690 for (uint32_t n = 17; n < 32; n++) {
15691 for (size_t k = 1; k <= 40; k += 9) {
15692 for (uint32_t m = 1; m <= 1; m++) {
15693 GemmMicrokernelTester()
15694 .mr(1)
15695 .nr(16)
15696 .kr(2)
15697 .sr(1)
15698 .m(m)
15699 .n(n)
15700 .k(k)
15701 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015702 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015703 }
15704 }
15705 }
15706 }
15707
Frank Barcharde22685a2021-11-12 11:36:58 -080015708 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_div_16) {
Frank Barchard1d412472021-10-25 17:27:21 -070015709 TEST_REQUIRES_ARM_NEON;
15710 for (uint32_t n = 32; n <= 48; n += 16) {
15711 for (size_t k = 1; k <= 40; k += 9) {
15712 GemmMicrokernelTester()
15713 .mr(1)
15714 .nr(16)
15715 .kr(2)
15716 .sr(1)
15717 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015718 .n(n)
Frank Barchard1d412472021-10-25 17:27:21 -070015719 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015720 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015721 }
15722 }
15723 }
15724
Frank Barcharde22685a2021-11-12 11:36:58 -080015725 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_div_16_strided_cn) {
Frank Barchard1d412472021-10-25 17:27:21 -070015726 TEST_REQUIRES_ARM_NEON;
15727 for (uint32_t n = 32; n <= 48; n += 16) {
15728 for (size_t k = 1; k <= 40; k += 9) {
15729 GemmMicrokernelTester()
15730 .mr(1)
15731 .nr(16)
15732 .kr(2)
15733 .sr(1)
15734 .m(1)
15735 .n(n)
15736 .k(k)
15737 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015738 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015739 }
15740 }
15741 }
15742
Frank Barcharde22685a2021-11-12 11:36:58 -080015743 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_div_16_subtile) {
Frank Barchard1d412472021-10-25 17:27:21 -070015744 TEST_REQUIRES_ARM_NEON;
15745 for (uint32_t n = 32; n <= 48; n += 16) {
15746 for (size_t k = 1; k <= 40; k += 9) {
15747 for (uint32_t m = 1; m <= 1; m++) {
15748 GemmMicrokernelTester()
15749 .mr(1)
15750 .nr(16)
15751 .kr(2)
15752 .sr(1)
15753 .m(m)
15754 .n(n)
15755 .k(k)
15756 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015757 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015758 }
15759 }
15760 }
15761 }
15762
Frank Barcharde22685a2021-11-12 11:36:58 -080015763 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, small_kernel) {
Frank Barchard1d412472021-10-25 17:27:21 -070015764 TEST_REQUIRES_ARM_NEON;
15765 for (size_t k = 1; k <= 40; k += 9) {
15766 GemmMicrokernelTester()
15767 .mr(1)
15768 .nr(16)
15769 .kr(2)
15770 .sr(1)
15771 .m(1)
15772 .n(16)
15773 .k(k)
15774 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015775 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015776 }
15777 }
15778
Frank Barcharde22685a2021-11-12 11:36:58 -080015779 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, small_kernel_subtile) {
Frank Barchard1d412472021-10-25 17:27:21 -070015780 TEST_REQUIRES_ARM_NEON;
15781 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015782 for (uint32_t n = 1; n <= 16; n++) {
15783 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1d412472021-10-25 17:27:21 -070015784 GemmMicrokernelTester()
15785 .mr(1)
15786 .nr(16)
15787 .kr(2)
15788 .sr(1)
15789 .m(m)
15790 .n(n)
15791 .k(k)
15792 .ks(3)
15793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015794 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015795 }
15796 }
15797 }
15798 }
15799
Frank Barcharde22685a2021-11-12 11:36:58 -080015800 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_gt_16_small_kernel) {
Frank Barchard1d412472021-10-25 17:27:21 -070015801 TEST_REQUIRES_ARM_NEON;
15802 for (uint32_t n = 17; n < 32; n++) {
15803 for (size_t k = 1; k <= 40; k += 9) {
15804 GemmMicrokernelTester()
15805 .mr(1)
15806 .nr(16)
15807 .kr(2)
15808 .sr(1)
15809 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015810 .n(n)
Frank Barchard1d412472021-10-25 17:27:21 -070015811 .k(k)
15812 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015813 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015814 }
15815 }
15816 }
15817
Frank Barcharde22685a2021-11-12 11:36:58 -080015818 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, n_div_16_small_kernel) {
Frank Barchard1d412472021-10-25 17:27:21 -070015819 TEST_REQUIRES_ARM_NEON;
15820 for (uint32_t n = 32; n <= 48; n += 16) {
15821 for (size_t k = 1; k <= 40; k += 9) {
15822 GemmMicrokernelTester()
15823 .mr(1)
15824 .nr(16)
15825 .kr(2)
15826 .sr(1)
15827 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015828 .n(n)
Frank Barchard1d412472021-10-25 17:27:21 -070015829 .k(k)
15830 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080015831 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015832 }
15833 }
15834 }
15835
Frank Barcharde22685a2021-11-12 11:36:58 -080015836 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, strided_cm_subtile) {
Frank Barchard1d412472021-10-25 17:27:21 -070015837 TEST_REQUIRES_ARM_NEON;
15838 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015839 for (uint32_t n = 1; n <= 16; n++) {
15840 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard1d412472021-10-25 17:27:21 -070015841 GemmMicrokernelTester()
15842 .mr(1)
15843 .nr(16)
15844 .kr(2)
15845 .sr(1)
15846 .m(m)
15847 .n(n)
15848 .k(k)
15849 .cm_stride(19)
15850 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015851 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015852 }
15853 }
15854 }
15855 }
15856
Frank Barcharde22685a2021-11-12 11:36:58 -080015857 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, a_offset) {
Frank Barchard1d412472021-10-25 17:27:21 -070015858 TEST_REQUIRES_ARM_NEON;
15859 for (size_t k = 1; k <= 40; k += 9) {
15860 GemmMicrokernelTester()
15861 .mr(1)
15862 .nr(16)
15863 .kr(2)
15864 .sr(1)
15865 .m(1)
15866 .n(16)
15867 .k(k)
15868 .ks(3)
15869 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015870 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015871 }
15872 }
15873
Frank Barcharde22685a2021-11-12 11:36:58 -080015874 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, zero) {
Frank Barchard1d412472021-10-25 17:27:21 -070015875 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015876 for (size_t k = 1; k <= 40; k += 9) {
15877 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard1d412472021-10-25 17:27:21 -070015878 GemmMicrokernelTester()
15879 .mr(1)
15880 .nr(16)
15881 .kr(2)
15882 .sr(1)
15883 .m(1)
15884 .n(16)
15885 .k(k)
15886 .ks(3)
15887 .a_offset(43)
15888 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080015889 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015890 }
15891 }
15892 }
15893
Frank Barcharde22685a2021-11-12 11:36:58 -080015894 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, qmin) {
Frank Barchard1d412472021-10-25 17:27:21 -070015895 TEST_REQUIRES_ARM_NEON;
15896 GemmMicrokernelTester()
15897 .mr(1)
15898 .nr(16)
15899 .kr(2)
15900 .sr(1)
15901 .m(1)
15902 .n(16)
15903 .k(8)
15904 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015905 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015906 }
15907
Frank Barcharde22685a2021-11-12 11:36:58 -080015908 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, qmax) {
Frank Barchard1d412472021-10-25 17:27:21 -070015909 TEST_REQUIRES_ARM_NEON;
15910 GemmMicrokernelTester()
15911 .mr(1)
15912 .nr(16)
15913 .kr(2)
15914 .sr(1)
15915 .m(1)
15916 .n(16)
15917 .k(8)
15918 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015919 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015920 }
15921
Frank Barcharde22685a2021-11-12 11:36:58 -080015922 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_DUP, strided_cm) {
Frank Barchard1d412472021-10-25 17:27:21 -070015923 TEST_REQUIRES_ARM_NEON;
15924 GemmMicrokernelTester()
15925 .mr(1)
15926 .nr(16)
15927 .kr(2)
15928 .sr(1)
15929 .m(1)
15930 .n(16)
15931 .k(8)
15932 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015933 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015934 }
15935#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15936
15937
15938#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015939 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16) {
Frank Barchard1d412472021-10-25 17:27:21 -070015940 TEST_REQUIRES_ARM_NEON;
15941 GemmMicrokernelTester()
15942 .mr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015943 .nr(8)
Frank Barchard1d412472021-10-25 17:27:21 -070015944 .kr(2)
15945 .sr(1)
15946 .m(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015947 .n(8)
15948 .k(16)
15949 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard1d412472021-10-25 17:27:21 -070015950 }
15951
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015952 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cn) {
Frank Barchard1d412472021-10-25 17:27:21 -070015953 TEST_REQUIRES_ARM_NEON;
15954 GemmMicrokernelTester()
15955 .mr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015956 .nr(8)
Frank Barchard1d412472021-10-25 17:27:21 -070015957 .kr(2)
15958 .sr(1)
15959 .m(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015960 .n(8)
15961 .k(16)
15962 .cn_stride(11)
15963 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080015964 }
15965
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015966 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080015967 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015968 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015969 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080015970 GemmMicrokernelTester()
15971 .mr(4)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080015972 .nr(8)
15973 .kr(2)
15974 .sr(1)
15975 .m(m)
15976 .n(n)
15977 .k(16)
15978 .iterations(1)
15979 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15980 }
15981 }
15982 }
15983
15984 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
15985 TEST_REQUIRES_ARM_NEON;
15986 for (uint32_t m = 1; m <= 4; m++) {
15987 GemmMicrokernelTester()
15988 .mr(4)
15989 .nr(8)
15990 .kr(2)
15991 .sr(1)
15992 .m(m)
15993 .n(8)
15994 .k(16)
15995 .iterations(1)
15996 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
15997 }
15998 }
15999
16000 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
16001 TEST_REQUIRES_ARM_NEON;
16002 for (uint32_t n = 1; n <= 8; n++) {
16003 GemmMicrokernelTester()
16004 .mr(4)
16005 .nr(8)
16006 .kr(2)
16007 .sr(1)
16008 .m(4)
16009 .n(n)
16010 .k(16)
16011 .iterations(1)
16012 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16013 }
16014 }
16015
16016 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_lt_16) {
16017 TEST_REQUIRES_ARM_NEON;
16018 for (size_t k = 1; k < 16; k++) {
16019 GemmMicrokernelTester()
16020 .mr(4)
16021 .nr(8)
16022 .kr(2)
16023 .sr(1)
16024 .m(4)
16025 .n(8)
16026 .k(k)
16027 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16028 }
16029 }
16030
16031 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
16032 TEST_REQUIRES_ARM_NEON;
16033 for (size_t k = 1; k < 16; k++) {
16034 for (uint32_t n = 1; n <= 8; n++) {
16035 for (uint32_t m = 1; m <= 4; m++) {
16036 GemmMicrokernelTester()
16037 .mr(4)
16038 .nr(8)
16039 .kr(2)
16040 .sr(1)
16041 .m(m)
16042 .n(n)
16043 .k(k)
16044 .iterations(1)
16045 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16046 }
16047 }
16048 }
16049 }
16050
16051 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_gt_16) {
16052 TEST_REQUIRES_ARM_NEON;
16053 for (size_t k = 17; k < 32; k++) {
16054 GemmMicrokernelTester()
16055 .mr(4)
16056 .nr(8)
16057 .kr(2)
16058 .sr(1)
16059 .m(4)
16060 .n(8)
16061 .k(k)
16062 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16063 }
16064 }
16065
16066 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
16067 TEST_REQUIRES_ARM_NEON;
16068 for (size_t k = 17; k < 32; k++) {
16069 for (uint32_t n = 1; n <= 8; n++) {
16070 for (uint32_t m = 1; m <= 4; m++) {
16071 GemmMicrokernelTester()
16072 .mr(4)
16073 .nr(8)
16074 .kr(2)
16075 .sr(1)
16076 .m(m)
16077 .n(n)
16078 .k(k)
16079 .iterations(1)
16080 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16081 }
16082 }
16083 }
16084 }
16085
16086 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_div_16) {
16087 TEST_REQUIRES_ARM_NEON;
16088 for (size_t k = 32; k <= 160; k += 16) {
16089 GemmMicrokernelTester()
16090 .mr(4)
16091 .nr(8)
16092 .kr(2)
16093 .sr(1)
16094 .m(4)
16095 .n(8)
16096 .k(k)
16097 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16098 }
16099 }
16100
16101 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
16102 TEST_REQUIRES_ARM_NEON;
16103 for (size_t k = 32; k <= 160; k += 16) {
16104 for (uint32_t n = 1; n <= 8; n++) {
16105 for (uint32_t m = 1; m <= 4; m++) {
16106 GemmMicrokernelTester()
16107 .mr(4)
16108 .nr(8)
16109 .kr(2)
16110 .sr(1)
16111 .m(m)
16112 .n(n)
16113 .k(k)
16114 .iterations(1)
16115 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16116 }
16117 }
16118 }
16119 }
16120
16121 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8) {
16122 TEST_REQUIRES_ARM_NEON;
16123 for (uint32_t n = 9; n < 16; n++) {
16124 for (size_t k = 1; k <= 80; k += 17) {
16125 GemmMicrokernelTester()
16126 .mr(4)
16127 .nr(8)
16128 .kr(2)
16129 .sr(1)
16130 .m(4)
16131 .n(n)
16132 .k(k)
16133 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16134 }
16135 }
16136 }
16137
16138 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
16139 TEST_REQUIRES_ARM_NEON;
16140 for (uint32_t n = 9; n < 16; n++) {
16141 for (size_t k = 1; k <= 80; k += 17) {
16142 GemmMicrokernelTester()
16143 .mr(4)
16144 .nr(8)
16145 .kr(2)
16146 .sr(1)
16147 .m(4)
16148 .n(n)
16149 .k(k)
16150 .cn_stride(11)
16151 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16152 }
16153 }
16154 }
16155
16156 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
16157 TEST_REQUIRES_ARM_NEON;
16158 for (uint32_t n = 9; n < 16; n++) {
16159 for (size_t k = 1; k <= 80; k += 17) {
16160 for (uint32_t m = 1; m <= 4; m++) {
16161 GemmMicrokernelTester()
16162 .mr(4)
16163 .nr(8)
16164 .kr(2)
16165 .sr(1)
16166 .m(m)
16167 .n(n)
16168 .k(k)
16169 .iterations(1)
16170 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16171 }
16172 }
16173 }
16174 }
16175
16176 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8) {
16177 TEST_REQUIRES_ARM_NEON;
16178 for (uint32_t n = 16; n <= 24; n += 8) {
16179 for (size_t k = 1; k <= 80; k += 17) {
16180 GemmMicrokernelTester()
16181 .mr(4)
16182 .nr(8)
16183 .kr(2)
16184 .sr(1)
16185 .m(4)
16186 .n(n)
16187 .k(k)
16188 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16189 }
16190 }
16191 }
16192
16193 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
16194 TEST_REQUIRES_ARM_NEON;
16195 for (uint32_t n = 16; n <= 24; n += 8) {
16196 for (size_t k = 1; k <= 80; k += 17) {
16197 GemmMicrokernelTester()
16198 .mr(4)
16199 .nr(8)
16200 .kr(2)
16201 .sr(1)
16202 .m(4)
16203 .n(n)
16204 .k(k)
16205 .cn_stride(11)
16206 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16207 }
16208 }
16209 }
16210
16211 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
16212 TEST_REQUIRES_ARM_NEON;
16213 for (uint32_t n = 16; n <= 24; n += 8) {
16214 for (size_t k = 1; k <= 80; k += 17) {
16215 for (uint32_t m = 1; m <= 4; m++) {
16216 GemmMicrokernelTester()
16217 .mr(4)
16218 .nr(8)
16219 .kr(2)
16220 .sr(1)
16221 .m(m)
16222 .n(n)
16223 .k(k)
16224 .iterations(1)
16225 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16226 }
16227 }
16228 }
16229 }
16230
16231 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, small_kernel) {
16232 TEST_REQUIRES_ARM_NEON;
16233 for (size_t k = 1; k <= 80; k += 17) {
16234 GemmMicrokernelTester()
16235 .mr(4)
16236 .nr(8)
16237 .kr(2)
16238 .sr(1)
16239 .m(4)
16240 .n(8)
16241 .k(k)
16242 .ks(3)
16243 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16244 }
16245 }
16246
16247 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, small_kernel_subtile) {
16248 TEST_REQUIRES_ARM_NEON;
16249 for (size_t k = 1; k <= 80; k += 17) {
16250 for (uint32_t n = 1; n <= 8; n++) {
16251 for (uint32_t m = 1; m <= 4; m++) {
16252 GemmMicrokernelTester()
16253 .mr(4)
16254 .nr(8)
16255 .kr(2)
16256 .sr(1)
16257 .m(m)
16258 .n(n)
16259 .k(k)
16260 .ks(3)
16261 .iterations(1)
16262 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16263 }
16264 }
16265 }
16266 }
16267
16268 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_small_kernel) {
16269 TEST_REQUIRES_ARM_NEON;
16270 for (uint32_t n = 9; n < 16; n++) {
16271 for (size_t k = 1; k <= 80; k += 17) {
16272 GemmMicrokernelTester()
16273 .mr(4)
16274 .nr(8)
16275 .kr(2)
16276 .sr(1)
16277 .m(4)
16278 .n(n)
16279 .k(k)
16280 .ks(3)
16281 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16282 }
16283 }
16284 }
16285
16286 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_small_kernel) {
16287 TEST_REQUIRES_ARM_NEON;
16288 for (uint32_t n = 16; n <= 24; n += 8) {
16289 for (size_t k = 1; k <= 80; k += 17) {
16290 GemmMicrokernelTester()
16291 .mr(4)
16292 .nr(8)
16293 .kr(2)
16294 .sr(1)
16295 .m(4)
16296 .n(n)
16297 .k(k)
16298 .ks(3)
16299 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16300 }
16301 }
16302 }
16303
16304 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
16305 TEST_REQUIRES_ARM_NEON;
16306 for (size_t k = 1; k <= 80; k += 17) {
16307 for (uint32_t n = 1; n <= 8; n++) {
16308 for (uint32_t m = 1; m <= 4; m++) {
16309 GemmMicrokernelTester()
16310 .mr(4)
16311 .nr(8)
16312 .kr(2)
16313 .sr(1)
16314 .m(m)
16315 .n(n)
16316 .k(k)
16317 .cm_stride(11)
16318 .iterations(1)
16319 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16320 }
16321 }
16322 }
16323 }
16324
16325 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, a_offset) {
16326 TEST_REQUIRES_ARM_NEON;
16327 for (size_t k = 1; k <= 80; k += 17) {
16328 GemmMicrokernelTester()
16329 .mr(4)
16330 .nr(8)
16331 .kr(2)
16332 .sr(1)
16333 .m(4)
16334 .n(8)
16335 .k(k)
16336 .ks(3)
16337 .a_offset(331)
16338 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16339 }
16340 }
16341
16342 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, zero) {
16343 TEST_REQUIRES_ARM_NEON;
16344 for (size_t k = 1; k <= 80; k += 17) {
16345 for (uint32_t mz = 0; mz < 4; mz++) {
16346 GemmMicrokernelTester()
16347 .mr(4)
16348 .nr(8)
16349 .kr(2)
16350 .sr(1)
16351 .m(4)
16352 .n(8)
16353 .k(k)
16354 .ks(3)
16355 .a_offset(331)
16356 .zero_index(mz)
16357 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16358 }
16359 }
16360 }
16361
16362 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, qmin) {
16363 TEST_REQUIRES_ARM_NEON;
16364 GemmMicrokernelTester()
16365 .mr(4)
16366 .nr(8)
16367 .kr(2)
16368 .sr(1)
16369 .m(4)
16370 .n(8)
16371 .k(16)
16372 .qmin(128)
16373 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16374 }
16375
16376 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, qmax) {
16377 TEST_REQUIRES_ARM_NEON;
16378 GemmMicrokernelTester()
16379 .mr(4)
16380 .nr(8)
16381 .kr(2)
16382 .sr(1)
16383 .m(4)
16384 .n(8)
16385 .k(16)
16386 .qmax(128)
16387 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16388 }
16389
16390 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cm) {
16391 TEST_REQUIRES_ARM_NEON;
16392 GemmMicrokernelTester()
16393 .mr(4)
16394 .nr(8)
16395 .kr(2)
16396 .sr(1)
16397 .m(4)
16398 .n(8)
16399 .k(16)
16400 .cm_stride(11)
16401 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16402 }
16403#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16404
16405
16406#if XNN_ARCH_ARM || XNN_ARCH_ARM64
16407 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16) {
16408 TEST_REQUIRES_ARM_NEON;
16409 GemmMicrokernelTester()
16410 .mr(2)
16411 .nr(16)
16412 .kr(2)
16413 .sr(1)
16414 .m(2)
16415 .n(16)
16416 .k(16)
16417 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16418 }
16419
16420 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cn) {
16421 TEST_REQUIRES_ARM_NEON;
16422 GemmMicrokernelTester()
16423 .mr(2)
16424 .nr(16)
16425 .kr(2)
16426 .sr(1)
16427 .m(2)
16428 .n(16)
16429 .k(16)
16430 .cn_stride(19)
16431 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
16432 }
16433
16434 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile) {
16435 TEST_REQUIRES_ARM_NEON;
16436 for (uint32_t n = 1; n <= 16; n++) {
16437 for (uint32_t m = 1; m <= 2; m++) {
16438 GemmMicrokernelTester()
16439 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016440 .nr(16)
16441 .kr(2)
16442 .sr(1)
16443 .m(m)
16444 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016445 .k(16)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016446 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016447 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016448 }
16449 }
16450 }
16451
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016452 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016453 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016454 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016455 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016456 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016457 .nr(16)
16458 .kr(2)
16459 .sr(1)
16460 .m(m)
16461 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016462 .k(16)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016463 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016464 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016465 }
16466 }
16467
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016468 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016469 TEST_REQUIRES_ARM_NEON;
16470 for (uint32_t n = 1; n <= 16; n++) {
16471 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016472 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016473 .nr(16)
16474 .kr(2)
16475 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016476 .m(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016477 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016478 .k(16)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016479 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016480 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016481 }
16482 }
16483
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016484 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_lt_16) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016485 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016486 for (size_t k = 1; k < 16; k++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016487 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016488 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016489 .nr(16)
16490 .kr(2)
16491 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016492 .m(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016493 .n(16)
16494 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016495 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016496 }
16497 }
16498
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016499 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_lt_16_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016500 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016501 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016502 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016503 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016504 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016505 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016506 .nr(16)
16507 .kr(2)
16508 .sr(1)
16509 .m(m)
16510 .n(n)
16511 .k(k)
16512 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016513 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016514 }
16515 }
16516 }
16517 }
16518
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016519 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_gt_16) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016520 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016521 for (size_t k = 17; k < 32; k++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016522 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016523 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016524 .nr(16)
16525 .kr(2)
16526 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016527 .m(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016528 .n(16)
16529 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016530 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016531 }
16532 }
16533
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016534 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_gt_16_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016535 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016536 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016537 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016538 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016539 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016540 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016541 .nr(16)
16542 .kr(2)
16543 .sr(1)
16544 .m(m)
16545 .n(n)
16546 .k(k)
16547 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016548 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016549 }
16550 }
16551 }
16552 }
16553
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016554 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_div_16) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016555 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016556 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016557 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016558 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016559 .nr(16)
16560 .kr(2)
16561 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016562 .m(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016563 .n(16)
16564 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016565 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016566 }
16567 }
16568
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016569 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, k_div_16_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016570 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016571 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016572 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016573 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016574 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016575 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016576 .nr(16)
16577 .kr(2)
16578 .sr(1)
16579 .m(m)
16580 .n(n)
16581 .k(k)
16582 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016583 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016584 }
16585 }
16586 }
16587 }
16588
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016589 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016590 TEST_REQUIRES_ARM_NEON;
16591 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016592 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016593 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016594 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016595 .nr(16)
16596 .kr(2)
16597 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016598 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016599 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016600 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016601 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016602 }
16603 }
16604 }
16605
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016606 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016607 TEST_REQUIRES_ARM_NEON;
16608 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016609 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016610 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016611 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016612 .nr(16)
16613 .kr(2)
16614 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016615 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016616 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016617 .k(k)
16618 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016619 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016620 }
16621 }
16622 }
16623
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016624 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016625 TEST_REQUIRES_ARM_NEON;
16626 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016627 for (size_t k = 1; k <= 80; k += 17) {
16628 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016629 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016630 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016631 .nr(16)
16632 .kr(2)
16633 .sr(1)
16634 .m(m)
16635 .n(n)
16636 .k(k)
16637 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016638 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016639 }
16640 }
16641 }
16642 }
16643
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016644 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016645 TEST_REQUIRES_ARM_NEON;
16646 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016647 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016648 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016649 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016650 .nr(16)
16651 .kr(2)
16652 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016653 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016654 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016655 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016656 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016657 }
16658 }
16659 }
16660
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016661 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016662 TEST_REQUIRES_ARM_NEON;
16663 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016664 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016665 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016666 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016667 .nr(16)
16668 .kr(2)
16669 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016670 .m(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016671 .n(n)
16672 .k(k)
16673 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016674 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016675 }
16676 }
16677 }
16678
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016679 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016680 TEST_REQUIRES_ARM_NEON;
16681 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016682 for (size_t k = 1; k <= 80; k += 17) {
16683 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016684 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016685 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016686 .nr(16)
16687 .kr(2)
16688 .sr(1)
16689 .m(m)
16690 .n(n)
16691 .k(k)
16692 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016693 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016694 }
16695 }
16696 }
16697 }
16698
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016699 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, small_kernel) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016700 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016701 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016702 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016703 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016704 .nr(16)
16705 .kr(2)
16706 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016707 .m(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016708 .n(16)
16709 .k(k)
16710 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016711 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016712 }
16713 }
16714
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016715 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, small_kernel_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016716 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016717 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016718 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016719 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016720 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016721 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016722 .nr(16)
16723 .kr(2)
16724 .sr(1)
16725 .m(m)
16726 .n(n)
16727 .k(k)
16728 .ks(3)
16729 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016730 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016731 }
16732 }
16733 }
16734 }
16735
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016736 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_gt_16_small_kernel) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016737 TEST_REQUIRES_ARM_NEON;
16738 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016739 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016740 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016741 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016742 .nr(16)
16743 .kr(2)
16744 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016745 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016746 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016747 .k(k)
16748 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016749 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016750 }
16751 }
16752 }
16753
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016754 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, n_div_16_small_kernel) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016755 TEST_REQUIRES_ARM_NEON;
16756 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016757 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016758 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016759 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016760 .nr(16)
16761 .kr(2)
16762 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016763 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016764 .n(n)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016765 .k(k)
16766 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016767 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016768 }
16769 }
16770 }
16771
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016772 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cm_subtile) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016773 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016774 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016775 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016776 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016777 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016778 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016779 .nr(16)
16780 .kr(2)
16781 .sr(1)
16782 .m(m)
16783 .n(n)
16784 .k(k)
16785 .cm_stride(19)
16786 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016787 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016788 }
16789 }
16790 }
16791 }
16792
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016793 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, a_offset) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016794 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016795 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016796 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016797 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016798 .nr(16)
16799 .kr(2)
16800 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016801 .m(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016802 .n(16)
16803 .k(k)
16804 .ks(3)
16805 .a_offset(163)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016806 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016807 }
16808 }
16809
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016810 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, zero) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016811 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016812 for (size_t k = 1; k <= 80; k += 17) {
16813 for (uint32_t mz = 0; mz < 2; mz++) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016814 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016815 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016816 .nr(16)
16817 .kr(2)
16818 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016819 .m(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016820 .n(16)
16821 .k(k)
16822 .ks(3)
16823 .a_offset(163)
16824 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016825 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016826 }
16827 }
16828 }
16829
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016830 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, qmin) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016831 TEST_REQUIRES_ARM_NEON;
16832 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016833 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016834 .nr(16)
16835 .kr(2)
16836 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016837 .m(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016838 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016839 .k(16)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016840 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016841 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016842 }
16843
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016844 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, qmax) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016845 TEST_REQUIRES_ARM_NEON;
16846 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016847 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016848 .nr(16)
16849 .kr(2)
16850 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016851 .m(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016852 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016853 .k(16)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016854 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016855 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016856 }
16857
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016858 TEST(QS8_IGEMM_MINMAX_RNDNU_2X16C2__NEON_MLAL_DUP, strided_cm) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016859 TEST_REQUIRES_ARM_NEON;
16860 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016861 .mr(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016862 .nr(16)
16863 .kr(2)
16864 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016865 .m(2)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016866 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016867 .k(16)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016868 .cm_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080016869 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ngc27f04b2022-01-11 09:34:07 -080016870 }
16871#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16872
16873
Frank Barcharde31f29e2021-12-21 15:57:10 -080016874#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barcharde22685a2021-11-12 11:36:58 -080016875 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016876 TEST_REQUIRES_ARM_NEON;
16877 GemmMicrokernelTester()
16878 .mr(1)
16879 .nr(8)
16880 .kr(8)
16881 .sr(1)
16882 .m(1)
16883 .n(8)
16884 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080016885 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016886 }
16887
Frank Barcharde22685a2021-11-12 11:36:58 -080016888 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016889 TEST_REQUIRES_ARM_NEON;
16890 GemmMicrokernelTester()
16891 .mr(1)
16892 .nr(8)
16893 .kr(8)
16894 .sr(1)
16895 .m(1)
16896 .n(8)
16897 .k(16)
16898 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016899 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016900 }
16901
Frank Barcharde22685a2021-11-12 11:36:58 -080016902 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016903 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016904 for (uint32_t n = 1; n <= 8; n++) {
16905 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016906 GemmMicrokernelTester()
16907 .mr(1)
16908 .nr(8)
16909 .kr(8)
16910 .sr(1)
16911 .m(m)
16912 .n(n)
16913 .k(16)
16914 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016915 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016916 }
16917 }
16918 }
16919
Frank Barcharde22685a2021-11-12 11:36:58 -080016920 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_m) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016921 TEST_REQUIRES_ARM_NEON;
16922 for (uint32_t m = 1; m <= 1; m++) {
16923 GemmMicrokernelTester()
16924 .mr(1)
16925 .nr(8)
16926 .kr(8)
16927 .sr(1)
16928 .m(m)
16929 .n(8)
16930 .k(16)
16931 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016932 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016933 }
16934 }
16935
Frank Barcharde22685a2021-11-12 11:36:58 -080016936 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_eq_16_subtile_n) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016937 TEST_REQUIRES_ARM_NEON;
16938 for (uint32_t n = 1; n <= 8; n++) {
16939 GemmMicrokernelTester()
16940 .mr(1)
16941 .nr(8)
16942 .kr(8)
16943 .sr(1)
16944 .m(1)
16945 .n(n)
16946 .k(16)
16947 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016948 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016949 }
16950 }
16951
Frank Barcharde22685a2021-11-12 11:36:58 -080016952 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016953 TEST_REQUIRES_ARM_NEON;
16954 for (size_t k = 1; k < 16; k++) {
16955 GemmMicrokernelTester()
16956 .mr(1)
16957 .nr(8)
16958 .kr(8)
16959 .sr(1)
16960 .m(1)
16961 .n(8)
16962 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016963 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016964 }
16965 }
16966
Frank Barcharde22685a2021-11-12 11:36:58 -080016967 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_lt_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016968 TEST_REQUIRES_ARM_NEON;
16969 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016970 for (uint32_t n = 1; n <= 8; n++) {
16971 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016972 GemmMicrokernelTester()
16973 .mr(1)
16974 .nr(8)
16975 .kr(8)
16976 .sr(1)
16977 .m(m)
16978 .n(n)
16979 .k(k)
16980 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016981 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016982 }
16983 }
16984 }
16985 }
16986
Frank Barcharde22685a2021-11-12 11:36:58 -080016987 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070016988 TEST_REQUIRES_ARM_NEON;
16989 for (size_t k = 17; k < 32; k++) {
16990 GemmMicrokernelTester()
16991 .mr(1)
16992 .nr(8)
16993 .kr(8)
16994 .sr(1)
16995 .m(1)
16996 .n(8)
16997 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016998 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070016999 }
17000 }
17001
Frank Barcharde22685a2021-11-12 11:36:58 -080017002 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_gt_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017003 TEST_REQUIRES_ARM_NEON;
17004 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017005 for (uint32_t n = 1; n <= 8; n++) {
17006 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017007 GemmMicrokernelTester()
17008 .mr(1)
17009 .nr(8)
17010 .kr(8)
17011 .sr(1)
17012 .m(m)
17013 .n(n)
17014 .k(k)
17015 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017016 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017017 }
17018 }
17019 }
17020 }
17021
Frank Barcharde22685a2021-11-12 11:36:58 -080017022 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017023 TEST_REQUIRES_ARM_NEON;
17024 for (size_t k = 32; k <= 160; k += 16) {
17025 GemmMicrokernelTester()
17026 .mr(1)
17027 .nr(8)
17028 .kr(8)
17029 .sr(1)
17030 .m(1)
17031 .n(8)
17032 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017033 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017034 }
17035 }
17036
Frank Barcharde22685a2021-11-12 11:36:58 -080017037 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, k_div_16_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017038 TEST_REQUIRES_ARM_NEON;
17039 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017040 for (uint32_t n = 1; n <= 8; n++) {
17041 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017042 GemmMicrokernelTester()
17043 .mr(1)
17044 .nr(8)
17045 .kr(8)
17046 .sr(1)
17047 .m(m)
17048 .n(n)
17049 .k(k)
17050 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017051 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017052 }
17053 }
17054 }
17055 }
17056
Frank Barcharde22685a2021-11-12 11:36:58 -080017057 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017058 TEST_REQUIRES_ARM_NEON;
17059 for (uint32_t n = 9; n < 16; n++) {
17060 for (size_t k = 1; k <= 80; k += 17) {
17061 GemmMicrokernelTester()
17062 .mr(1)
17063 .nr(8)
17064 .kr(8)
17065 .sr(1)
17066 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017067 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070017068 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017069 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017070 }
17071 }
17072 }
17073
Frank Barcharde22685a2021-11-12 11:36:58 -080017074 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017075 TEST_REQUIRES_ARM_NEON;
17076 for (uint32_t n = 9; n < 16; n++) {
17077 for (size_t k = 1; k <= 80; k += 17) {
17078 GemmMicrokernelTester()
17079 .mr(1)
17080 .nr(8)
17081 .kr(8)
17082 .sr(1)
17083 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017084 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070017085 .k(k)
17086 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017087 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017088 }
17089 }
17090 }
17091
Frank Barcharde22685a2021-11-12 11:36:58 -080017092 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017093 TEST_REQUIRES_ARM_NEON;
17094 for (uint32_t n = 9; n < 16; n++) {
17095 for (size_t k = 1; k <= 80; k += 17) {
17096 for (uint32_t m = 1; m <= 1; m++) {
17097 GemmMicrokernelTester()
17098 .mr(1)
17099 .nr(8)
17100 .kr(8)
17101 .sr(1)
17102 .m(m)
17103 .n(n)
17104 .k(k)
17105 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017106 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017107 }
17108 }
17109 }
17110 }
17111
Frank Barcharde22685a2021-11-12 11:36:58 -080017112 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017113 TEST_REQUIRES_ARM_NEON;
17114 for (uint32_t n = 16; n <= 24; n += 8) {
17115 for (size_t k = 1; k <= 80; k += 17) {
17116 GemmMicrokernelTester()
17117 .mr(1)
17118 .nr(8)
17119 .kr(8)
17120 .sr(1)
17121 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017122 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070017123 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017124 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017125 }
17126 }
17127 }
17128
Frank Barcharde22685a2021-11-12 11:36:58 -080017129 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_strided_cn) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017130 TEST_REQUIRES_ARM_NEON;
17131 for (uint32_t n = 16; n <= 24; n += 8) {
17132 for (size_t k = 1; k <= 80; k += 17) {
17133 GemmMicrokernelTester()
17134 .mr(1)
17135 .nr(8)
17136 .kr(8)
17137 .sr(1)
17138 .m(1)
17139 .n(n)
17140 .k(k)
17141 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017142 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017143 }
17144 }
17145 }
17146
Frank Barcharde22685a2021-11-12 11:36:58 -080017147 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017148 TEST_REQUIRES_ARM_NEON;
17149 for (uint32_t n = 16; n <= 24; n += 8) {
17150 for (size_t k = 1; k <= 80; k += 17) {
17151 for (uint32_t m = 1; m <= 1; m++) {
17152 GemmMicrokernelTester()
17153 .mr(1)
17154 .nr(8)
17155 .kr(8)
17156 .sr(1)
17157 .m(m)
17158 .n(n)
17159 .k(k)
17160 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017161 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017162 }
17163 }
17164 }
17165 }
17166
Frank Barcharde22685a2021-11-12 11:36:58 -080017167 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017168 TEST_REQUIRES_ARM_NEON;
17169 for (size_t k = 1; k <= 80; k += 17) {
17170 GemmMicrokernelTester()
17171 .mr(1)
17172 .nr(8)
17173 .kr(8)
17174 .sr(1)
17175 .m(1)
17176 .n(8)
17177 .k(k)
17178 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017179 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017180 }
17181 }
17182
Frank Barcharde22685a2021-11-12 11:36:58 -080017183 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, small_kernel_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017184 TEST_REQUIRES_ARM_NEON;
17185 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017186 for (uint32_t n = 1; n <= 8; n++) {
17187 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017188 GemmMicrokernelTester()
17189 .mr(1)
17190 .nr(8)
17191 .kr(8)
17192 .sr(1)
17193 .m(m)
17194 .n(n)
17195 .k(k)
17196 .ks(3)
17197 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017198 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017199 }
17200 }
17201 }
17202 }
17203
Frank Barcharde22685a2021-11-12 11:36:58 -080017204 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_gt_8_small_kernel) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017205 TEST_REQUIRES_ARM_NEON;
17206 for (uint32_t n = 9; n < 16; n++) {
17207 for (size_t k = 1; k <= 80; k += 17) {
17208 GemmMicrokernelTester()
17209 .mr(1)
17210 .nr(8)
17211 .kr(8)
17212 .sr(1)
17213 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017214 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070017215 .k(k)
17216 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017217 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017218 }
17219 }
17220 }
17221
Frank Barcharde22685a2021-11-12 11:36:58 -080017222 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, n_div_8_small_kernel) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017223 TEST_REQUIRES_ARM_NEON;
17224 for (uint32_t n = 16; n <= 24; n += 8) {
17225 for (size_t k = 1; k <= 80; k += 17) {
17226 GemmMicrokernelTester()
17227 .mr(1)
17228 .nr(8)
17229 .kr(8)
17230 .sr(1)
17231 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017232 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070017233 .k(k)
17234 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080017235 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017236 }
17237 }
17238 }
17239
Frank Barcharde22685a2021-11-12 11:36:58 -080017240 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm_subtile) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017241 TEST_REQUIRES_ARM_NEON;
17242 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017243 for (uint32_t n = 1; n <= 8; n++) {
17244 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017245 GemmMicrokernelTester()
17246 .mr(1)
17247 .nr(8)
17248 .kr(8)
17249 .sr(1)
17250 .m(m)
17251 .n(n)
17252 .k(k)
17253 .cm_stride(11)
17254 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017255 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017256 }
17257 }
17258 }
17259 }
17260
Frank Barcharde22685a2021-11-12 11:36:58 -080017261 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, a_offset) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017262 TEST_REQUIRES_ARM_NEON;
17263 for (size_t k = 1; k <= 80; k += 17) {
17264 GemmMicrokernelTester()
17265 .mr(1)
17266 .nr(8)
17267 .kr(8)
17268 .sr(1)
17269 .m(1)
17270 .n(8)
17271 .k(k)
17272 .ks(3)
17273 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017274 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017275 }
17276 }
17277
Frank Barcharde22685a2021-11-12 11:36:58 -080017278 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, zero) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017279 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017280 for (size_t k = 1; k <= 80; k += 17) {
17281 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017282 GemmMicrokernelTester()
17283 .mr(1)
17284 .nr(8)
17285 .kr(8)
17286 .sr(1)
17287 .m(1)
17288 .n(8)
17289 .k(k)
17290 .ks(3)
17291 .a_offset(83)
17292 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080017293 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017294 }
17295 }
17296 }
17297
Frank Barcharde22685a2021-11-12 11:36:58 -080017298 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmin) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017299 TEST_REQUIRES_ARM_NEON;
17300 GemmMicrokernelTester()
17301 .mr(1)
17302 .nr(8)
17303 .kr(8)
17304 .sr(1)
17305 .m(1)
17306 .n(8)
17307 .k(16)
17308 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017309 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017310 }
17311
Frank Barcharde22685a2021-11-12 11:36:58 -080017312 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, qmax) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017313 TEST_REQUIRES_ARM_NEON;
17314 GemmMicrokernelTester()
17315 .mr(1)
17316 .nr(8)
17317 .kr(8)
17318 .sr(1)
17319 .m(1)
17320 .n(8)
17321 .k(16)
17322 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017323 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017324 }
17325
Frank Barcharde22685a2021-11-12 11:36:58 -080017326 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__AARCH64_NEON_MLAL_CORTEX_A53, strided_cm) {
Frank Barchard13db60f2021-07-20 14:34:35 -070017327 TEST_REQUIRES_ARM_NEON;
17328 GemmMicrokernelTester()
17329 .mr(1)
17330 .nr(8)
17331 .kr(8)
17332 .sr(1)
17333 .m(1)
17334 .n(8)
17335 .k(16)
17336 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017337 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__aarch64_neon_mlal_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070017338 }
Frank Barcharde31f29e2021-12-21 15:57:10 -080017339#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard13db60f2021-07-20 14:34:35 -070017340
17341
Frank Barcharde31f29e2021-12-21 15:57:10 -080017342#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017343 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017344 TEST_REQUIRES_ARM_NEON;
17345 GemmMicrokernelTester()
17346 .mr(4)
17347 .nr(16)
17348 .kr(1)
17349 .sr(1)
17350 .m(4)
17351 .n(16)
17352 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017353 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017354 }
17355
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017356 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017357 TEST_REQUIRES_ARM_NEON;
17358 GemmMicrokernelTester()
17359 .mr(4)
17360 .nr(16)
17361 .kr(1)
17362 .sr(1)
17363 .m(4)
17364 .n(16)
17365 .k(8)
17366 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017367 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017368 }
17369
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017370 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017371 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017372 for (uint32_t n = 1; n <= 16; n++) {
17373 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017374 GemmMicrokernelTester()
17375 .mr(4)
17376 .nr(16)
17377 .kr(1)
17378 .sr(1)
17379 .m(m)
17380 .n(n)
17381 .k(8)
17382 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017383 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017384 }
17385 }
17386 }
17387
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017388 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017389 TEST_REQUIRES_ARM_NEON;
17390 for (uint32_t m = 1; m <= 4; m++) {
17391 GemmMicrokernelTester()
17392 .mr(4)
17393 .nr(16)
17394 .kr(1)
17395 .sr(1)
17396 .m(m)
17397 .n(16)
17398 .k(8)
17399 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017400 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017401 }
17402 }
17403
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017404 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017405 TEST_REQUIRES_ARM_NEON;
17406 for (uint32_t n = 1; n <= 16; n++) {
17407 GemmMicrokernelTester()
17408 .mr(4)
17409 .nr(16)
17410 .kr(1)
17411 .sr(1)
17412 .m(4)
17413 .n(n)
17414 .k(8)
17415 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017416 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017417 }
17418 }
17419
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017420 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017421 TEST_REQUIRES_ARM_NEON;
17422 for (size_t k = 1; k < 8; k++) {
17423 GemmMicrokernelTester()
17424 .mr(4)
17425 .nr(16)
17426 .kr(1)
17427 .sr(1)
17428 .m(4)
17429 .n(16)
17430 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017431 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017432 }
17433 }
17434
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017435 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017436 TEST_REQUIRES_ARM_NEON;
17437 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017438 for (uint32_t n = 1; n <= 16; n++) {
17439 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017440 GemmMicrokernelTester()
17441 .mr(4)
17442 .nr(16)
17443 .kr(1)
17444 .sr(1)
17445 .m(m)
17446 .n(n)
17447 .k(k)
17448 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017449 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017450 }
17451 }
17452 }
17453 }
17454
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017455 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017456 TEST_REQUIRES_ARM_NEON;
17457 for (size_t k = 9; k < 16; k++) {
17458 GemmMicrokernelTester()
17459 .mr(4)
17460 .nr(16)
17461 .kr(1)
17462 .sr(1)
17463 .m(4)
17464 .n(16)
17465 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017466 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017467 }
17468 }
17469
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017470 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017471 TEST_REQUIRES_ARM_NEON;
17472 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017473 for (uint32_t n = 1; n <= 16; n++) {
17474 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017475 GemmMicrokernelTester()
17476 .mr(4)
17477 .nr(16)
17478 .kr(1)
17479 .sr(1)
17480 .m(m)
17481 .n(n)
17482 .k(k)
17483 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017484 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017485 }
17486 }
17487 }
17488 }
17489
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017490 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017491 TEST_REQUIRES_ARM_NEON;
17492 for (size_t k = 16; k <= 80; k += 8) {
17493 GemmMicrokernelTester()
17494 .mr(4)
17495 .nr(16)
17496 .kr(1)
17497 .sr(1)
17498 .m(4)
17499 .n(16)
17500 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017501 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017502 }
17503 }
17504
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017505 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017506 TEST_REQUIRES_ARM_NEON;
17507 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017508 for (uint32_t n = 1; n <= 16; n++) {
17509 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017510 GemmMicrokernelTester()
17511 .mr(4)
17512 .nr(16)
17513 .kr(1)
17514 .sr(1)
17515 .m(m)
17516 .n(n)
17517 .k(k)
17518 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017519 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017520 }
17521 }
17522 }
17523 }
17524
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017525 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017526 TEST_REQUIRES_ARM_NEON;
17527 for (uint32_t n = 17; n < 32; n++) {
17528 for (size_t k = 1; k <= 40; k += 9) {
17529 GemmMicrokernelTester()
17530 .mr(4)
17531 .nr(16)
17532 .kr(1)
17533 .sr(1)
17534 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017535 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -080017536 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017537 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017538 }
17539 }
17540 }
17541
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017542 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017543 TEST_REQUIRES_ARM_NEON;
17544 for (uint32_t n = 17; n < 32; n++) {
17545 for (size_t k = 1; k <= 40; k += 9) {
17546 GemmMicrokernelTester()
17547 .mr(4)
17548 .nr(16)
17549 .kr(1)
17550 .sr(1)
17551 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017552 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -080017553 .k(k)
17554 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017555 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017556 }
17557 }
17558 }
17559
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017560 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017561 TEST_REQUIRES_ARM_NEON;
17562 for (uint32_t n = 17; n < 32; n++) {
17563 for (size_t k = 1; k <= 40; k += 9) {
17564 for (uint32_t m = 1; m <= 4; m++) {
17565 GemmMicrokernelTester()
17566 .mr(4)
17567 .nr(16)
17568 .kr(1)
17569 .sr(1)
17570 .m(m)
17571 .n(n)
17572 .k(k)
17573 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017574 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017575 }
17576 }
17577 }
17578 }
17579
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017580 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017581 TEST_REQUIRES_ARM_NEON;
17582 for (uint32_t n = 32; n <= 48; n += 16) {
17583 for (size_t k = 1; k <= 40; k += 9) {
17584 GemmMicrokernelTester()
17585 .mr(4)
17586 .nr(16)
17587 .kr(1)
17588 .sr(1)
17589 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017590 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -080017591 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017592 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017593 }
17594 }
17595 }
17596
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017597 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017598 TEST_REQUIRES_ARM_NEON;
17599 for (uint32_t n = 32; n <= 48; n += 16) {
17600 for (size_t k = 1; k <= 40; k += 9) {
17601 GemmMicrokernelTester()
17602 .mr(4)
17603 .nr(16)
17604 .kr(1)
17605 .sr(1)
17606 .m(4)
17607 .n(n)
17608 .k(k)
17609 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017610 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017611 }
17612 }
17613 }
17614
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017615 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017616 TEST_REQUIRES_ARM_NEON;
17617 for (uint32_t n = 32; n <= 48; n += 16) {
17618 for (size_t k = 1; k <= 40; k += 9) {
17619 for (uint32_t m = 1; m <= 4; m++) {
17620 GemmMicrokernelTester()
17621 .mr(4)
17622 .nr(16)
17623 .kr(1)
17624 .sr(1)
17625 .m(m)
17626 .n(n)
17627 .k(k)
17628 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017629 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017630 }
17631 }
17632 }
17633 }
17634
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017635 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017636 TEST_REQUIRES_ARM_NEON;
17637 for (size_t k = 1; k <= 40; k += 9) {
17638 GemmMicrokernelTester()
17639 .mr(4)
17640 .nr(16)
17641 .kr(1)
17642 .sr(1)
17643 .m(4)
17644 .n(16)
17645 .k(k)
17646 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017647 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017648 }
17649 }
17650
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017651 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017652 TEST_REQUIRES_ARM_NEON;
17653 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017654 for (uint32_t n = 1; n <= 16; n++) {
17655 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017656 GemmMicrokernelTester()
17657 .mr(4)
17658 .nr(16)
17659 .kr(1)
17660 .sr(1)
17661 .m(m)
17662 .n(n)
17663 .k(k)
17664 .ks(3)
17665 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017666 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017667 }
17668 }
17669 }
17670 }
17671
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017672 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017673 TEST_REQUIRES_ARM_NEON;
17674 for (uint32_t n = 17; n < 32; n++) {
17675 for (size_t k = 1; k <= 40; k += 9) {
17676 GemmMicrokernelTester()
17677 .mr(4)
17678 .nr(16)
17679 .kr(1)
17680 .sr(1)
17681 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017682 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -080017683 .k(k)
17684 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017685 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017686 }
17687 }
17688 }
17689
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017690 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017691 TEST_REQUIRES_ARM_NEON;
17692 for (uint32_t n = 32; n <= 48; n += 16) {
17693 for (size_t k = 1; k <= 40; k += 9) {
17694 GemmMicrokernelTester()
17695 .mr(4)
17696 .nr(16)
17697 .kr(1)
17698 .sr(1)
17699 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017700 .n(n)
Frank Barchard5cffb642021-11-22 13:59:43 -080017701 .k(k)
17702 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017703 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017704 }
17705 }
17706 }
17707
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017708 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017709 TEST_REQUIRES_ARM_NEON;
17710 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017711 for (uint32_t n = 1; n <= 16; n++) {
17712 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017713 GemmMicrokernelTester()
17714 .mr(4)
17715 .nr(16)
17716 .kr(1)
17717 .sr(1)
17718 .m(m)
17719 .n(n)
17720 .k(k)
17721 .cm_stride(19)
17722 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017723 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017724 }
17725 }
17726 }
17727 }
17728
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017729 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017730 TEST_REQUIRES_ARM_NEON;
17731 for (size_t k = 1; k <= 40; k += 9) {
17732 GemmMicrokernelTester()
17733 .mr(4)
17734 .nr(16)
17735 .kr(1)
17736 .sr(1)
17737 .m(4)
17738 .n(16)
17739 .k(k)
17740 .ks(3)
17741 .a_offset(163)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017742 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017743 }
17744 }
17745
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017746 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017747 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017748 for (size_t k = 1; k <= 40; k += 9) {
17749 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017750 GemmMicrokernelTester()
17751 .mr(4)
17752 .nr(16)
17753 .kr(1)
17754 .sr(1)
17755 .m(4)
17756 .n(16)
17757 .k(k)
17758 .ks(3)
17759 .a_offset(163)
17760 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017761 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017762 }
17763 }
17764 }
17765
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017766 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017767 TEST_REQUIRES_ARM_NEON;
17768 GemmMicrokernelTester()
17769 .mr(4)
17770 .nr(16)
17771 .kr(1)
17772 .sr(1)
17773 .m(4)
17774 .n(16)
17775 .k(8)
17776 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017777 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017778 }
17779
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017780 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017781 TEST_REQUIRES_ARM_NEON;
17782 GemmMicrokernelTester()
17783 .mr(4)
17784 .nr(16)
17785 .kr(1)
17786 .sr(1)
17787 .m(4)
17788 .n(16)
17789 .k(8)
17790 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017791 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080017792 }
17793
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017794 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
Frank Barchard5cffb642021-11-22 13:59:43 -080017795 TEST_REQUIRES_ARM_NEON;
17796 GemmMicrokernelTester()
17797 .mr(4)
17798 .nr(16)
17799 .kr(1)
17800 .sr(1)
17801 .m(4)
17802 .n(16)
17803 .k(8)
17804 .cm_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080017805 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17806 }
17807#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
17808
17809
17810#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
17811 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
17812 TEST_REQUIRES_ARM_NEON;
17813 GemmMicrokernelTester()
17814 .mr(4)
17815 .nr(16)
17816 .kr(1)
17817 .sr(1)
17818 .m(4)
17819 .n(16)
17820 .k(8)
17821 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17822 }
17823
17824 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
17825 TEST_REQUIRES_ARM_NEON;
17826 GemmMicrokernelTester()
17827 .mr(4)
17828 .nr(16)
17829 .kr(1)
17830 .sr(1)
17831 .m(4)
17832 .n(16)
17833 .k(8)
17834 .cn_stride(19)
17835 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17836 }
17837
17838 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
17839 TEST_REQUIRES_ARM_NEON;
17840 for (uint32_t n = 1; n <= 16; n++) {
17841 for (uint32_t m = 1; m <= 4; m++) {
17842 GemmMicrokernelTester()
17843 .mr(4)
17844 .nr(16)
17845 .kr(1)
17846 .sr(1)
17847 .m(m)
17848 .n(n)
17849 .k(8)
17850 .iterations(1)
17851 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17852 }
17853 }
17854 }
17855
17856 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
17857 TEST_REQUIRES_ARM_NEON;
17858 for (uint32_t m = 1; m <= 4; m++) {
17859 GemmMicrokernelTester()
17860 .mr(4)
17861 .nr(16)
17862 .kr(1)
17863 .sr(1)
17864 .m(m)
17865 .n(16)
17866 .k(8)
17867 .iterations(1)
17868 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17869 }
17870 }
17871
17872 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
17873 TEST_REQUIRES_ARM_NEON;
17874 for (uint32_t n = 1; n <= 16; n++) {
17875 GemmMicrokernelTester()
17876 .mr(4)
17877 .nr(16)
17878 .kr(1)
17879 .sr(1)
17880 .m(4)
17881 .n(n)
17882 .k(8)
17883 .iterations(1)
17884 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17885 }
17886 }
17887
17888 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
17889 TEST_REQUIRES_ARM_NEON;
17890 for (size_t k = 1; k < 8; k++) {
17891 GemmMicrokernelTester()
17892 .mr(4)
17893 .nr(16)
17894 .kr(1)
17895 .sr(1)
17896 .m(4)
17897 .n(16)
17898 .k(k)
17899 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17900 }
17901 }
17902
17903 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
17904 TEST_REQUIRES_ARM_NEON;
17905 for (size_t k = 1; k < 8; k++) {
17906 for (uint32_t n = 1; n <= 16; n++) {
17907 for (uint32_t m = 1; m <= 4; m++) {
17908 GemmMicrokernelTester()
17909 .mr(4)
17910 .nr(16)
17911 .kr(1)
17912 .sr(1)
17913 .m(m)
17914 .n(n)
17915 .k(k)
17916 .iterations(1)
17917 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17918 }
17919 }
17920 }
17921 }
17922
17923 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
17924 TEST_REQUIRES_ARM_NEON;
17925 for (size_t k = 9; k < 16; k++) {
17926 GemmMicrokernelTester()
17927 .mr(4)
17928 .nr(16)
17929 .kr(1)
17930 .sr(1)
17931 .m(4)
17932 .n(16)
17933 .k(k)
17934 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17935 }
17936 }
17937
17938 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
17939 TEST_REQUIRES_ARM_NEON;
17940 for (size_t k = 9; k < 16; k++) {
17941 for (uint32_t n = 1; n <= 16; n++) {
17942 for (uint32_t m = 1; m <= 4; m++) {
17943 GemmMicrokernelTester()
17944 .mr(4)
17945 .nr(16)
17946 .kr(1)
17947 .sr(1)
17948 .m(m)
17949 .n(n)
17950 .k(k)
17951 .iterations(1)
17952 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17953 }
17954 }
17955 }
17956 }
17957
17958 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
17959 TEST_REQUIRES_ARM_NEON;
17960 for (size_t k = 16; k <= 80; k += 8) {
17961 GemmMicrokernelTester()
17962 .mr(4)
17963 .nr(16)
17964 .kr(1)
17965 .sr(1)
17966 .m(4)
17967 .n(16)
17968 .k(k)
17969 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17970 }
17971 }
17972
17973 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
17974 TEST_REQUIRES_ARM_NEON;
17975 for (size_t k = 16; k <= 80; k += 8) {
17976 for (uint32_t n = 1; n <= 16; n++) {
17977 for (uint32_t m = 1; m <= 4; m++) {
17978 GemmMicrokernelTester()
17979 .mr(4)
17980 .nr(16)
17981 .kr(1)
17982 .sr(1)
17983 .m(m)
17984 .n(n)
17985 .k(k)
17986 .iterations(1)
17987 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
17988 }
17989 }
17990 }
17991 }
17992
17993 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) {
17994 TEST_REQUIRES_ARM_NEON;
17995 for (uint32_t n = 17; n < 32; n++) {
17996 for (size_t k = 1; k <= 40; k += 9) {
17997 GemmMicrokernelTester()
17998 .mr(4)
17999 .nr(16)
18000 .kr(1)
18001 .sr(1)
18002 .m(4)
18003 .n(n)
18004 .k(k)
18005 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18006 }
18007 }
18008 }
18009
18010 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) {
18011 TEST_REQUIRES_ARM_NEON;
18012 for (uint32_t n = 17; n < 32; n++) {
18013 for (size_t k = 1; k <= 40; k += 9) {
18014 GemmMicrokernelTester()
18015 .mr(4)
18016 .nr(16)
18017 .kr(1)
18018 .sr(1)
18019 .m(4)
18020 .n(n)
18021 .k(k)
18022 .cn_stride(19)
18023 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18024 }
18025 }
18026 }
18027
18028 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) {
18029 TEST_REQUIRES_ARM_NEON;
18030 for (uint32_t n = 17; n < 32; n++) {
18031 for (size_t k = 1; k <= 40; k += 9) {
18032 for (uint32_t m = 1; m <= 4; m++) {
18033 GemmMicrokernelTester()
18034 .mr(4)
18035 .nr(16)
18036 .kr(1)
18037 .sr(1)
18038 .m(m)
18039 .n(n)
18040 .k(k)
18041 .iterations(1)
18042 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18043 }
18044 }
18045 }
18046 }
18047
18048 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) {
18049 TEST_REQUIRES_ARM_NEON;
18050 for (uint32_t n = 32; n <= 48; n += 16) {
18051 for (size_t k = 1; k <= 40; k += 9) {
18052 GemmMicrokernelTester()
18053 .mr(4)
18054 .nr(16)
18055 .kr(1)
18056 .sr(1)
18057 .m(4)
18058 .n(n)
18059 .k(k)
18060 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18061 }
18062 }
18063 }
18064
18065 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) {
18066 TEST_REQUIRES_ARM_NEON;
18067 for (uint32_t n = 32; n <= 48; n += 16) {
18068 for (size_t k = 1; k <= 40; k += 9) {
18069 GemmMicrokernelTester()
18070 .mr(4)
18071 .nr(16)
18072 .kr(1)
18073 .sr(1)
18074 .m(4)
18075 .n(n)
18076 .k(k)
18077 .cn_stride(19)
18078 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18079 }
18080 }
18081 }
18082
18083 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) {
18084 TEST_REQUIRES_ARM_NEON;
18085 for (uint32_t n = 32; n <= 48; n += 16) {
18086 for (size_t k = 1; k <= 40; k += 9) {
18087 for (uint32_t m = 1; m <= 4; m++) {
18088 GemmMicrokernelTester()
18089 .mr(4)
18090 .nr(16)
18091 .kr(1)
18092 .sr(1)
18093 .m(m)
18094 .n(n)
18095 .k(k)
18096 .iterations(1)
18097 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18098 }
18099 }
18100 }
18101 }
18102
18103 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel) {
18104 TEST_REQUIRES_ARM_NEON;
18105 for (size_t k = 1; k <= 40; k += 9) {
18106 GemmMicrokernelTester()
18107 .mr(4)
18108 .nr(16)
18109 .kr(1)
18110 .sr(1)
18111 .m(4)
18112 .n(16)
18113 .k(k)
18114 .ks(3)
18115 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18116 }
18117 }
18118
18119 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
18120 TEST_REQUIRES_ARM_NEON;
18121 for (size_t k = 1; k <= 40; k += 9) {
18122 for (uint32_t n = 1; n <= 16; n++) {
18123 for (uint32_t m = 1; m <= 4; m++) {
18124 GemmMicrokernelTester()
18125 .mr(4)
18126 .nr(16)
18127 .kr(1)
18128 .sr(1)
18129 .m(m)
18130 .n(n)
18131 .k(k)
18132 .ks(3)
18133 .iterations(1)
18134 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18135 }
18136 }
18137 }
18138 }
18139
18140 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_small_kernel) {
18141 TEST_REQUIRES_ARM_NEON;
18142 for (uint32_t n = 17; n < 32; n++) {
18143 for (size_t k = 1; k <= 40; k += 9) {
18144 GemmMicrokernelTester()
18145 .mr(4)
18146 .nr(16)
18147 .kr(1)
18148 .sr(1)
18149 .m(4)
18150 .n(n)
18151 .k(k)
18152 .ks(3)
18153 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18154 }
18155 }
18156 }
18157
18158 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_small_kernel) {
18159 TEST_REQUIRES_ARM_NEON;
18160 for (uint32_t n = 32; n <= 48; n += 16) {
18161 for (size_t k = 1; k <= 40; k += 9) {
18162 GemmMicrokernelTester()
18163 .mr(4)
18164 .nr(16)
18165 .kr(1)
18166 .sr(1)
18167 .m(4)
18168 .n(n)
18169 .k(k)
18170 .ks(3)
18171 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18172 }
18173 }
18174 }
18175
18176 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
18177 TEST_REQUIRES_ARM_NEON;
18178 for (size_t k = 1; k <= 40; k += 9) {
18179 for (uint32_t n = 1; n <= 16; n++) {
18180 for (uint32_t m = 1; m <= 4; m++) {
18181 GemmMicrokernelTester()
18182 .mr(4)
18183 .nr(16)
18184 .kr(1)
18185 .sr(1)
18186 .m(m)
18187 .n(n)
18188 .k(k)
18189 .cm_stride(19)
18190 .iterations(1)
18191 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18192 }
18193 }
18194 }
18195 }
18196
18197 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, a_offset) {
18198 TEST_REQUIRES_ARM_NEON;
18199 for (size_t k = 1; k <= 40; k += 9) {
18200 GemmMicrokernelTester()
18201 .mr(4)
18202 .nr(16)
18203 .kr(1)
18204 .sr(1)
18205 .m(4)
18206 .n(16)
18207 .k(k)
18208 .ks(3)
18209 .a_offset(163)
18210 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18211 }
18212 }
18213
18214 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, zero) {
18215 TEST_REQUIRES_ARM_NEON;
18216 for (size_t k = 1; k <= 40; k += 9) {
18217 for (uint32_t mz = 0; mz < 4; mz++) {
18218 GemmMicrokernelTester()
18219 .mr(4)
18220 .nr(16)
18221 .kr(1)
18222 .sr(1)
18223 .m(4)
18224 .n(16)
18225 .k(k)
18226 .ks(3)
18227 .a_offset(163)
18228 .zero_index(mz)
18229 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18230 }
18231 }
18232 }
18233
18234 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
18235 TEST_REQUIRES_ARM_NEON;
18236 GemmMicrokernelTester()
18237 .mr(4)
18238 .nr(16)
18239 .kr(1)
18240 .sr(1)
18241 .m(4)
18242 .n(16)
18243 .k(8)
18244 .qmin(128)
18245 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18246 }
18247
18248 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
18249 TEST_REQUIRES_ARM_NEON;
18250 GemmMicrokernelTester()
18251 .mr(4)
18252 .nr(16)
18253 .kr(1)
18254 .sr(1)
18255 .m(4)
18256 .n(16)
18257 .k(8)
18258 .qmax(128)
18259 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18260 }
18261
18262 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
18263 TEST_REQUIRES_ARM_NEON;
18264 GemmMicrokernelTester()
18265 .mr(4)
18266 .nr(16)
18267 .kr(1)
18268 .sr(1)
18269 .m(4)
18270 .n(16)
18271 .k(8)
18272 .cm_stride(19)
18273 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard5cffb642021-11-22 13:59:43 -080018274 }
Frank Barcharde31f29e2021-12-21 15:57:10 -080018275#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard5cffb642021-11-22 13:59:43 -080018276
18277
Frank Barcharde31f29e2021-12-21 15:57:10 -080018278#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard13db60f2021-07-20 14:34:35 -070018279 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
18280 TEST_REQUIRES_ARM_NEON_DOT;
18281 GemmMicrokernelTester()
18282 .mr(4)
18283 .nr(16)
18284 .kr(4)
18285 .sr(1)
18286 .m(4)
18287 .n(16)
18288 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080018289 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018290 }
18291
18292 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
18293 TEST_REQUIRES_ARM_NEON_DOT;
18294 GemmMicrokernelTester()
18295 .mr(4)
18296 .nr(16)
18297 .kr(4)
18298 .sr(1)
18299 .m(4)
18300 .n(16)
18301 .k(16)
18302 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018303 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018304 }
18305
18306 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
18307 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018308 for (uint32_t n = 1; n <= 16; n++) {
18309 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070018310 GemmMicrokernelTester()
18311 .mr(4)
18312 .nr(16)
18313 .kr(4)
18314 .sr(1)
18315 .m(m)
18316 .n(n)
18317 .k(16)
18318 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018319 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018320 }
18321 }
18322 }
18323
18324 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
18325 TEST_REQUIRES_ARM_NEON_DOT;
18326 for (uint32_t m = 1; m <= 4; m++) {
18327 GemmMicrokernelTester()
18328 .mr(4)
18329 .nr(16)
18330 .kr(4)
18331 .sr(1)
18332 .m(m)
18333 .n(16)
18334 .k(16)
18335 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018336 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018337 }
18338 }
18339
18340 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
18341 TEST_REQUIRES_ARM_NEON_DOT;
18342 for (uint32_t n = 1; n <= 16; n++) {
18343 GemmMicrokernelTester()
18344 .mr(4)
18345 .nr(16)
18346 .kr(4)
18347 .sr(1)
18348 .m(4)
18349 .n(n)
18350 .k(16)
18351 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018352 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018353 }
18354 }
18355
18356 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
18357 TEST_REQUIRES_ARM_NEON_DOT;
18358 for (size_t k = 1; k < 16; k++) {
18359 GemmMicrokernelTester()
18360 .mr(4)
18361 .nr(16)
18362 .kr(4)
18363 .sr(1)
18364 .m(4)
18365 .n(16)
18366 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018367 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018368 }
18369 }
18370
18371 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
18372 TEST_REQUIRES_ARM_NEON_DOT;
18373 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018374 for (uint32_t n = 1; n <= 16; n++) {
18375 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070018376 GemmMicrokernelTester()
18377 .mr(4)
18378 .nr(16)
18379 .kr(4)
18380 .sr(1)
18381 .m(m)
18382 .n(n)
18383 .k(k)
18384 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018385 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018386 }
18387 }
18388 }
18389 }
18390
18391 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
18392 TEST_REQUIRES_ARM_NEON_DOT;
18393 for (size_t k = 17; k < 32; k++) {
18394 GemmMicrokernelTester()
18395 .mr(4)
18396 .nr(16)
18397 .kr(4)
18398 .sr(1)
18399 .m(4)
18400 .n(16)
18401 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018402 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018403 }
18404 }
18405
18406 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
18407 TEST_REQUIRES_ARM_NEON_DOT;
18408 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018409 for (uint32_t n = 1; n <= 16; n++) {
18410 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070018411 GemmMicrokernelTester()
18412 .mr(4)
18413 .nr(16)
18414 .kr(4)
18415 .sr(1)
18416 .m(m)
18417 .n(n)
18418 .k(k)
18419 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018420 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018421 }
18422 }
18423 }
18424 }
18425
18426 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
18427 TEST_REQUIRES_ARM_NEON_DOT;
18428 for (size_t k = 32; k <= 160; k += 16) {
18429 GemmMicrokernelTester()
18430 .mr(4)
18431 .nr(16)
18432 .kr(4)
18433 .sr(1)
18434 .m(4)
18435 .n(16)
18436 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018437 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018438 }
18439 }
18440
18441 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
18442 TEST_REQUIRES_ARM_NEON_DOT;
18443 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018444 for (uint32_t n = 1; n <= 16; n++) {
18445 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070018446 GemmMicrokernelTester()
18447 .mr(4)
18448 .nr(16)
18449 .kr(4)
18450 .sr(1)
18451 .m(m)
18452 .n(n)
18453 .k(k)
18454 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018455 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018456 }
18457 }
18458 }
18459 }
18460
18461 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
18462 TEST_REQUIRES_ARM_NEON_DOT;
18463 for (uint32_t n = 17; n < 32; n++) {
18464 for (size_t k = 1; k <= 80; k += 17) {
18465 GemmMicrokernelTester()
18466 .mr(4)
18467 .nr(16)
18468 .kr(4)
18469 .sr(1)
18470 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018471 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070018472 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018473 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018474 }
18475 }
18476 }
18477
18478 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
18479 TEST_REQUIRES_ARM_NEON_DOT;
18480 for (uint32_t n = 17; n < 32; n++) {
18481 for (size_t k = 1; k <= 80; k += 17) {
18482 GemmMicrokernelTester()
18483 .mr(4)
18484 .nr(16)
18485 .kr(4)
18486 .sr(1)
18487 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018488 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070018489 .k(k)
18490 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018491 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018492 }
18493 }
18494 }
18495
18496 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
18497 TEST_REQUIRES_ARM_NEON_DOT;
18498 for (uint32_t n = 17; n < 32; n++) {
18499 for (size_t k = 1; k <= 80; k += 17) {
18500 for (uint32_t m = 1; m <= 4; m++) {
18501 GemmMicrokernelTester()
18502 .mr(4)
18503 .nr(16)
18504 .kr(4)
18505 .sr(1)
18506 .m(m)
18507 .n(n)
18508 .k(k)
18509 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018510 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018511 }
18512 }
18513 }
18514 }
18515
18516 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
18517 TEST_REQUIRES_ARM_NEON_DOT;
18518 for (uint32_t n = 32; n <= 48; n += 16) {
18519 for (size_t k = 1; k <= 80; k += 17) {
18520 GemmMicrokernelTester()
18521 .mr(4)
18522 .nr(16)
18523 .kr(4)
18524 .sr(1)
18525 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018526 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070018527 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018528 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018529 }
18530 }
18531 }
18532
18533 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
18534 TEST_REQUIRES_ARM_NEON_DOT;
18535 for (uint32_t n = 32; n <= 48; n += 16) {
18536 for (size_t k = 1; k <= 80; k += 17) {
18537 GemmMicrokernelTester()
18538 .mr(4)
18539 .nr(16)
18540 .kr(4)
18541 .sr(1)
18542 .m(4)
18543 .n(n)
18544 .k(k)
18545 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018546 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018547 }
18548 }
18549 }
18550
18551 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
18552 TEST_REQUIRES_ARM_NEON_DOT;
18553 for (uint32_t n = 32; n <= 48; n += 16) {
18554 for (size_t k = 1; k <= 80; k += 17) {
18555 for (uint32_t m = 1; m <= 4; m++) {
18556 GemmMicrokernelTester()
18557 .mr(4)
18558 .nr(16)
18559 .kr(4)
18560 .sr(1)
18561 .m(m)
18562 .n(n)
18563 .k(k)
18564 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018565 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018566 }
18567 }
18568 }
18569 }
18570
18571 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
18572 TEST_REQUIRES_ARM_NEON_DOT;
18573 for (size_t k = 1; k <= 80; k += 17) {
18574 GemmMicrokernelTester()
18575 .mr(4)
18576 .nr(16)
18577 .kr(4)
18578 .sr(1)
18579 .m(4)
18580 .n(16)
18581 .k(k)
18582 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018583 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018584 }
18585 }
18586
18587 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
18588 TEST_REQUIRES_ARM_NEON_DOT;
18589 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018590 for (uint32_t n = 1; n <= 16; n++) {
18591 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070018592 GemmMicrokernelTester()
18593 .mr(4)
18594 .nr(16)
18595 .kr(4)
18596 .sr(1)
18597 .m(m)
18598 .n(n)
18599 .k(k)
18600 .ks(3)
18601 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018602 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018603 }
18604 }
18605 }
18606 }
18607
18608 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_small_kernel) {
18609 TEST_REQUIRES_ARM_NEON_DOT;
18610 for (uint32_t n = 17; n < 32; n++) {
18611 for (size_t k = 1; k <= 80; k += 17) {
18612 GemmMicrokernelTester()
18613 .mr(4)
18614 .nr(16)
18615 .kr(4)
18616 .sr(1)
18617 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018618 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070018619 .k(k)
18620 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018621 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018622 }
18623 }
18624 }
18625
18626 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_small_kernel) {
18627 TEST_REQUIRES_ARM_NEON_DOT;
18628 for (uint32_t n = 32; n <= 48; n += 16) {
18629 for (size_t k = 1; k <= 80; k += 17) {
18630 GemmMicrokernelTester()
18631 .mr(4)
18632 .nr(16)
18633 .kr(4)
18634 .sr(1)
18635 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018636 .n(n)
Frank Barchard13db60f2021-07-20 14:34:35 -070018637 .k(k)
18638 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080018639 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018640 }
18641 }
18642 }
18643
18644 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
18645 TEST_REQUIRES_ARM_NEON_DOT;
18646 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018647 for (uint32_t n = 1; n <= 16; n++) {
18648 for (uint32_t m = 1; m <= 4; m++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070018649 GemmMicrokernelTester()
18650 .mr(4)
18651 .nr(16)
18652 .kr(4)
18653 .sr(1)
18654 .m(m)
18655 .n(n)
18656 .k(k)
18657 .cm_stride(19)
18658 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018659 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018660 }
18661 }
18662 }
18663 }
18664
18665 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
18666 TEST_REQUIRES_ARM_NEON_DOT;
18667 for (size_t k = 1; k <= 80; k += 17) {
18668 GemmMicrokernelTester()
18669 .mr(4)
18670 .nr(16)
18671 .kr(4)
18672 .sr(1)
18673 .m(4)
18674 .n(16)
18675 .k(k)
18676 .ks(3)
18677 .a_offset(331)
Marat Dukhan50323b82022-01-11 00:12:01 -080018678 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018679 }
18680 }
18681
18682 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
18683 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018684 for (size_t k = 1; k <= 80; k += 17) {
18685 for (uint32_t mz = 0; mz < 4; mz++) {
Frank Barchard13db60f2021-07-20 14:34:35 -070018686 GemmMicrokernelTester()
18687 .mr(4)
18688 .nr(16)
18689 .kr(4)
18690 .sr(1)
18691 .m(4)
18692 .n(16)
18693 .k(k)
18694 .ks(3)
18695 .a_offset(331)
18696 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080018697 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018698 }
18699 }
18700 }
18701
18702 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
18703 TEST_REQUIRES_ARM_NEON_DOT;
18704 GemmMicrokernelTester()
18705 .mr(4)
18706 .nr(16)
18707 .kr(4)
18708 .sr(1)
18709 .m(4)
18710 .n(16)
18711 .k(16)
18712 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018713 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018714 }
18715
18716 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
18717 TEST_REQUIRES_ARM_NEON_DOT;
18718 GemmMicrokernelTester()
18719 .mr(4)
18720 .nr(16)
18721 .kr(4)
18722 .sr(1)
18723 .m(4)
18724 .n(16)
18725 .k(16)
18726 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018727 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018728 }
18729
18730 TEST(QS8_IGEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
18731 TEST_REQUIRES_ARM_NEON_DOT;
18732 GemmMicrokernelTester()
18733 .mr(4)
18734 .nr(16)
18735 .kr(4)
18736 .sr(1)
18737 .m(4)
18738 .n(16)
18739 .k(16)
18740 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018741 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard13db60f2021-07-20 14:34:35 -070018742 }
Frank Barcharde31f29e2021-12-21 15:57:10 -080018743#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
Frank Barchard13db60f2021-07-20 14:34:35 -070018744
18745
Marat Dukhane903dff2021-07-16 19:43:41 -070018746#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080018747 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8) {
Marat Dukhan89991902021-12-06 00:54:36 -080018748 TEST_REQUIRES_ARM_NEON;
18749 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080018750 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080018751 .nr(8)
18752 .kr(8)
18753 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080018754 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080018755 .n(8)
18756 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080018757 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018758 }
18759
Zhi An Nge96b6bc2022-02-03 10:49:46 -080018760 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cn) {
Marat Dukhan89991902021-12-06 00:54:36 -080018761 TEST_REQUIRES_ARM_NEON;
18762 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080018763 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080018764 .nr(8)
18765 .kr(8)
18766 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080018767 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080018768 .n(8)
18769 .k(8)
18770 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080018771 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080018772 }
18773
Zhi An Nge96b6bc2022-02-03 10:49:46 -080018774 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile) {
18775 TEST_REQUIRES_ARM_NEON;
18776 for (uint32_t n = 1; n <= 8; n++) {
18777 for (uint32_t m = 1; m <= 1; m++) {
18778 GemmMicrokernelTester()
18779 .mr(1)
18780 .nr(8)
18781 .kr(8)
18782 .sr(1)
18783 .m(m)
18784 .n(n)
18785 .k(8)
18786 .iterations(1)
18787 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18788 }
18789 }
18790 }
18791
18792 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile_m) {
18793 TEST_REQUIRES_ARM_NEON;
18794 for (uint32_t m = 1; m <= 1; m++) {
18795 GemmMicrokernelTester()
18796 .mr(1)
18797 .nr(8)
18798 .kr(8)
18799 .sr(1)
18800 .m(m)
18801 .n(8)
18802 .k(8)
18803 .iterations(1)
18804 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18805 }
18806 }
18807
18808 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_eq_8_subtile_n) {
18809 TEST_REQUIRES_ARM_NEON;
18810 for (uint32_t n = 1; n <= 8; n++) {
18811 GemmMicrokernelTester()
18812 .mr(1)
18813 .nr(8)
18814 .kr(8)
18815 .sr(1)
18816 .m(1)
18817 .n(n)
18818 .k(8)
18819 .iterations(1)
18820 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18821 }
18822 }
18823
18824 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_lt_8) {
18825 TEST_REQUIRES_ARM_NEON;
18826 for (size_t k = 1; k < 8; k++) {
18827 GemmMicrokernelTester()
18828 .mr(1)
18829 .nr(8)
18830 .kr(8)
18831 .sr(1)
18832 .m(1)
18833 .n(8)
18834 .k(k)
18835 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18836 }
18837 }
18838
18839 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_lt_8_subtile) {
18840 TEST_REQUIRES_ARM_NEON;
18841 for (size_t k = 1; k < 8; k++) {
18842 for (uint32_t n = 1; n <= 8; n++) {
18843 for (uint32_t m = 1; m <= 1; m++) {
18844 GemmMicrokernelTester()
18845 .mr(1)
18846 .nr(8)
18847 .kr(8)
18848 .sr(1)
18849 .m(m)
18850 .n(n)
18851 .k(k)
18852 .iterations(1)
18853 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18854 }
18855 }
18856 }
18857 }
18858
18859 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_gt_8) {
18860 TEST_REQUIRES_ARM_NEON;
18861 for (size_t k = 9; k < 16; k++) {
18862 GemmMicrokernelTester()
18863 .mr(1)
18864 .nr(8)
18865 .kr(8)
18866 .sr(1)
18867 .m(1)
18868 .n(8)
18869 .k(k)
18870 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18871 }
18872 }
18873
18874 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_gt_8_subtile) {
18875 TEST_REQUIRES_ARM_NEON;
18876 for (size_t k = 9; k < 16; k++) {
18877 for (uint32_t n = 1; n <= 8; n++) {
18878 for (uint32_t m = 1; m <= 1; m++) {
18879 GemmMicrokernelTester()
18880 .mr(1)
18881 .nr(8)
18882 .kr(8)
18883 .sr(1)
18884 .m(m)
18885 .n(n)
18886 .k(k)
18887 .iterations(1)
18888 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18889 }
18890 }
18891 }
18892 }
18893
18894 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_div_8) {
18895 TEST_REQUIRES_ARM_NEON;
18896 for (size_t k = 16; k <= 80; k += 8) {
18897 GemmMicrokernelTester()
18898 .mr(1)
18899 .nr(8)
18900 .kr(8)
18901 .sr(1)
18902 .m(1)
18903 .n(8)
18904 .k(k)
18905 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18906 }
18907 }
18908
18909 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, k_div_8_subtile) {
18910 TEST_REQUIRES_ARM_NEON;
18911 for (size_t k = 16; k <= 80; k += 8) {
18912 for (uint32_t n = 1; n <= 8; n++) {
18913 for (uint32_t m = 1; m <= 1; m++) {
18914 GemmMicrokernelTester()
18915 .mr(1)
18916 .nr(8)
18917 .kr(8)
18918 .sr(1)
18919 .m(m)
18920 .n(n)
18921 .k(k)
18922 .iterations(1)
18923 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18924 }
18925 }
18926 }
18927 }
18928
18929 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8) {
18930 TEST_REQUIRES_ARM_NEON;
18931 for (uint32_t n = 9; n < 16; n++) {
18932 for (size_t k = 1; k <= 40; k += 9) {
18933 GemmMicrokernelTester()
18934 .mr(1)
18935 .nr(8)
18936 .kr(8)
18937 .sr(1)
18938 .m(1)
18939 .n(n)
18940 .k(k)
18941 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18942 }
18943 }
18944 }
18945
18946 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_strided_cn) {
18947 TEST_REQUIRES_ARM_NEON;
18948 for (uint32_t n = 9; n < 16; n++) {
18949 for (size_t k = 1; k <= 40; k += 9) {
18950 GemmMicrokernelTester()
18951 .mr(1)
18952 .nr(8)
18953 .kr(8)
18954 .sr(1)
18955 .m(1)
18956 .n(n)
18957 .k(k)
18958 .cn_stride(11)
18959 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18960 }
18961 }
18962 }
18963
18964 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_subtile) {
18965 TEST_REQUIRES_ARM_NEON;
18966 for (uint32_t n = 9; n < 16; n++) {
18967 for (size_t k = 1; k <= 40; k += 9) {
18968 for (uint32_t m = 1; m <= 1; m++) {
18969 GemmMicrokernelTester()
18970 .mr(1)
18971 .nr(8)
18972 .kr(8)
18973 .sr(1)
18974 .m(m)
18975 .n(n)
18976 .k(k)
18977 .iterations(1)
18978 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18979 }
18980 }
18981 }
18982 }
18983
18984 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8) {
18985 TEST_REQUIRES_ARM_NEON;
18986 for (uint32_t n = 16; n <= 24; n += 8) {
18987 for (size_t k = 1; k <= 40; k += 9) {
18988 GemmMicrokernelTester()
18989 .mr(1)
18990 .nr(8)
18991 .kr(8)
18992 .sr(1)
18993 .m(1)
18994 .n(n)
18995 .k(k)
18996 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
18997 }
18998 }
18999 }
19000
19001 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_strided_cn) {
19002 TEST_REQUIRES_ARM_NEON;
19003 for (uint32_t n = 16; n <= 24; n += 8) {
19004 for (size_t k = 1; k <= 40; k += 9) {
19005 GemmMicrokernelTester()
19006 .mr(1)
19007 .nr(8)
19008 .kr(8)
19009 .sr(1)
19010 .m(1)
19011 .n(n)
19012 .k(k)
19013 .cn_stride(11)
19014 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19015 }
19016 }
19017 }
19018
19019 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_subtile) {
19020 TEST_REQUIRES_ARM_NEON;
19021 for (uint32_t n = 16; n <= 24; n += 8) {
19022 for (size_t k = 1; k <= 40; k += 9) {
19023 for (uint32_t m = 1; m <= 1; m++) {
19024 GemmMicrokernelTester()
19025 .mr(1)
19026 .nr(8)
19027 .kr(8)
19028 .sr(1)
19029 .m(m)
19030 .n(n)
19031 .k(k)
19032 .iterations(1)
19033 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19034 }
19035 }
19036 }
19037 }
19038
19039 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, small_kernel) {
19040 TEST_REQUIRES_ARM_NEON;
19041 for (size_t k = 1; k <= 40; k += 9) {
19042 GemmMicrokernelTester()
19043 .mr(1)
19044 .nr(8)
19045 .kr(8)
19046 .sr(1)
19047 .m(1)
19048 .n(8)
19049 .k(k)
19050 .ks(3)
19051 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19052 }
19053 }
19054
19055 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, small_kernel_subtile) {
19056 TEST_REQUIRES_ARM_NEON;
19057 for (size_t k = 1; k <= 40; k += 9) {
19058 for (uint32_t n = 1; n <= 8; n++) {
19059 for (uint32_t m = 1; m <= 1; m++) {
19060 GemmMicrokernelTester()
19061 .mr(1)
19062 .nr(8)
19063 .kr(8)
19064 .sr(1)
19065 .m(m)
19066 .n(n)
19067 .k(k)
19068 .ks(3)
19069 .iterations(1)
19070 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19071 }
19072 }
19073 }
19074 }
19075
19076 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_gt_8_small_kernel) {
19077 TEST_REQUIRES_ARM_NEON;
19078 for (uint32_t n = 9; n < 16; n++) {
19079 for (size_t k = 1; k <= 40; k += 9) {
19080 GemmMicrokernelTester()
19081 .mr(1)
19082 .nr(8)
19083 .kr(8)
19084 .sr(1)
19085 .m(1)
19086 .n(n)
19087 .k(k)
19088 .ks(3)
19089 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19090 }
19091 }
19092 }
19093
19094 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, n_div_8_small_kernel) {
19095 TEST_REQUIRES_ARM_NEON;
19096 for (uint32_t n = 16; n <= 24; n += 8) {
19097 for (size_t k = 1; k <= 40; k += 9) {
19098 GemmMicrokernelTester()
19099 .mr(1)
19100 .nr(8)
19101 .kr(8)
19102 .sr(1)
19103 .m(1)
19104 .n(n)
19105 .k(k)
19106 .ks(3)
19107 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19108 }
19109 }
19110 }
19111
19112 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cm_subtile) {
19113 TEST_REQUIRES_ARM_NEON;
19114 for (size_t k = 1; k <= 40; k += 9) {
19115 for (uint32_t n = 1; n <= 8; n++) {
19116 for (uint32_t m = 1; m <= 1; m++) {
19117 GemmMicrokernelTester()
19118 .mr(1)
19119 .nr(8)
19120 .kr(8)
19121 .sr(1)
19122 .m(m)
19123 .n(n)
19124 .k(k)
19125 .cm_stride(11)
19126 .iterations(1)
19127 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19128 }
19129 }
19130 }
19131 }
19132
19133 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, a_offset) {
19134 TEST_REQUIRES_ARM_NEON;
19135 for (size_t k = 1; k <= 40; k += 9) {
19136 GemmMicrokernelTester()
19137 .mr(1)
19138 .nr(8)
19139 .kr(8)
19140 .sr(1)
19141 .m(1)
19142 .n(8)
19143 .k(k)
19144 .ks(3)
19145 .a_offset(43)
19146 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19147 }
19148 }
19149
19150 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, zero) {
19151 TEST_REQUIRES_ARM_NEON;
19152 for (size_t k = 1; k <= 40; k += 9) {
19153 for (uint32_t mz = 0; mz < 1; mz++) {
19154 GemmMicrokernelTester()
19155 .mr(1)
19156 .nr(8)
19157 .kr(8)
19158 .sr(1)
19159 .m(1)
19160 .n(8)
19161 .k(k)
19162 .ks(3)
19163 .a_offset(43)
19164 .zero_index(mz)
19165 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19166 }
19167 }
19168 }
19169
19170 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, qmin) {
19171 TEST_REQUIRES_ARM_NEON;
19172 GemmMicrokernelTester()
19173 .mr(1)
19174 .nr(8)
19175 .kr(8)
19176 .sr(1)
19177 .m(1)
19178 .n(8)
19179 .k(8)
19180 .qmin(128)
19181 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19182 }
19183
19184 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, qmax) {
19185 TEST_REQUIRES_ARM_NEON;
19186 GemmMicrokernelTester()
19187 .mr(1)
19188 .nr(8)
19189 .kr(8)
19190 .sr(1)
19191 .m(1)
19192 .n(8)
19193 .k(8)
19194 .qmax(128)
19195 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19196 }
19197
19198 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C8__NEON_MULL, strided_cm) {
19199 TEST_REQUIRES_ARM_NEON;
19200 GemmMicrokernelTester()
19201 .mr(1)
19202 .nr(8)
19203 .kr(8)
19204 .sr(1)
19205 .m(1)
19206 .n(8)
19207 .k(8)
19208 .cm_stride(11)
19209 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19210 }
19211#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
19212
19213
19214#if XNN_ARCH_ARM || XNN_ARCH_ARM64
19215 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8) {
19216 TEST_REQUIRES_ARM_NEON;
19217 GemmMicrokernelTester()
19218 .mr(3)
19219 .nr(16)
19220 .kr(8)
19221 .sr(1)
19222 .m(3)
19223 .n(16)
19224 .k(8)
19225 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19226 }
19227
19228 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cn) {
19229 TEST_REQUIRES_ARM_NEON;
19230 GemmMicrokernelTester()
19231 .mr(3)
19232 .nr(16)
19233 .kr(8)
19234 .sr(1)
19235 .m(3)
19236 .n(16)
19237 .k(8)
19238 .cn_stride(19)
19239 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19240 }
19241
19242 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile) {
19243 TEST_REQUIRES_ARM_NEON;
19244 for (uint32_t n = 1; n <= 16; n++) {
19245 for (uint32_t m = 1; m <= 3; m++) {
19246 GemmMicrokernelTester()
19247 .mr(3)
19248 .nr(16)
19249 .kr(8)
19250 .sr(1)
19251 .m(m)
19252 .n(n)
19253 .k(8)
19254 .iterations(1)
19255 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19256 }
19257 }
19258 }
19259
19260 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile_m) {
19261 TEST_REQUIRES_ARM_NEON;
19262 for (uint32_t m = 1; m <= 3; m++) {
19263 GemmMicrokernelTester()
19264 .mr(3)
19265 .nr(16)
19266 .kr(8)
19267 .sr(1)
19268 .m(m)
19269 .n(16)
19270 .k(8)
19271 .iterations(1)
19272 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19273 }
19274 }
19275
19276 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_eq_8_subtile_n) {
19277 TEST_REQUIRES_ARM_NEON;
19278 for (uint32_t n = 1; n <= 16; n++) {
19279 GemmMicrokernelTester()
19280 .mr(3)
19281 .nr(16)
19282 .kr(8)
19283 .sr(1)
19284 .m(3)
19285 .n(n)
19286 .k(8)
19287 .iterations(1)
19288 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19289 }
19290 }
19291
19292 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_lt_8) {
19293 TEST_REQUIRES_ARM_NEON;
19294 for (size_t k = 1; k < 8; k++) {
19295 GemmMicrokernelTester()
19296 .mr(3)
19297 .nr(16)
19298 .kr(8)
19299 .sr(1)
19300 .m(3)
19301 .n(16)
19302 .k(k)
19303 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19304 }
19305 }
19306
19307 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_lt_8_subtile) {
19308 TEST_REQUIRES_ARM_NEON;
19309 for (size_t k = 1; k < 8; k++) {
19310 for (uint32_t n = 1; n <= 16; n++) {
19311 for (uint32_t m = 1; m <= 3; m++) {
19312 GemmMicrokernelTester()
19313 .mr(3)
19314 .nr(16)
19315 .kr(8)
19316 .sr(1)
19317 .m(m)
19318 .n(n)
19319 .k(k)
19320 .iterations(1)
19321 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19322 }
19323 }
19324 }
19325 }
19326
19327 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_gt_8) {
19328 TEST_REQUIRES_ARM_NEON;
19329 for (size_t k = 9; k < 16; k++) {
19330 GemmMicrokernelTester()
19331 .mr(3)
19332 .nr(16)
19333 .kr(8)
19334 .sr(1)
19335 .m(3)
19336 .n(16)
19337 .k(k)
19338 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19339 }
19340 }
19341
19342 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_gt_8_subtile) {
19343 TEST_REQUIRES_ARM_NEON;
19344 for (size_t k = 9; k < 16; k++) {
19345 for (uint32_t n = 1; n <= 16; n++) {
19346 for (uint32_t m = 1; m <= 3; m++) {
19347 GemmMicrokernelTester()
19348 .mr(3)
19349 .nr(16)
19350 .kr(8)
19351 .sr(1)
19352 .m(m)
19353 .n(n)
19354 .k(k)
19355 .iterations(1)
19356 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19357 }
19358 }
19359 }
19360 }
19361
19362 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_div_8) {
19363 TEST_REQUIRES_ARM_NEON;
19364 for (size_t k = 16; k <= 80; k += 8) {
19365 GemmMicrokernelTester()
19366 .mr(3)
19367 .nr(16)
19368 .kr(8)
19369 .sr(1)
19370 .m(3)
19371 .n(16)
19372 .k(k)
19373 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19374 }
19375 }
19376
19377 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, k_div_8_subtile) {
19378 TEST_REQUIRES_ARM_NEON;
19379 for (size_t k = 16; k <= 80; k += 8) {
19380 for (uint32_t n = 1; n <= 16; n++) {
19381 for (uint32_t m = 1; m <= 3; m++) {
19382 GemmMicrokernelTester()
19383 .mr(3)
19384 .nr(16)
19385 .kr(8)
19386 .sr(1)
19387 .m(m)
19388 .n(n)
19389 .k(k)
19390 .iterations(1)
19391 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19392 }
19393 }
19394 }
19395 }
19396
19397 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16) {
19398 TEST_REQUIRES_ARM_NEON;
19399 for (uint32_t n = 17; n < 32; n++) {
19400 for (size_t k = 1; k <= 40; k += 9) {
19401 GemmMicrokernelTester()
19402 .mr(3)
19403 .nr(16)
19404 .kr(8)
19405 .sr(1)
19406 .m(3)
19407 .n(n)
19408 .k(k)
19409 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19410 }
19411 }
19412 }
19413
19414 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_strided_cn) {
19415 TEST_REQUIRES_ARM_NEON;
19416 for (uint32_t n = 17; n < 32; n++) {
19417 for (size_t k = 1; k <= 40; k += 9) {
19418 GemmMicrokernelTester()
19419 .mr(3)
19420 .nr(16)
19421 .kr(8)
19422 .sr(1)
19423 .m(3)
19424 .n(n)
19425 .k(k)
19426 .cn_stride(19)
19427 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19428 }
19429 }
19430 }
19431
19432 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_subtile) {
19433 TEST_REQUIRES_ARM_NEON;
19434 for (uint32_t n = 17; n < 32; n++) {
19435 for (size_t k = 1; k <= 40; k += 9) {
19436 for (uint32_t m = 1; m <= 3; m++) {
19437 GemmMicrokernelTester()
19438 .mr(3)
19439 .nr(16)
19440 .kr(8)
19441 .sr(1)
19442 .m(m)
19443 .n(n)
19444 .k(k)
19445 .iterations(1)
19446 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19447 }
19448 }
19449 }
19450 }
19451
19452 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16) {
19453 TEST_REQUIRES_ARM_NEON;
19454 for (uint32_t n = 32; n <= 48; n += 16) {
19455 for (size_t k = 1; k <= 40; k += 9) {
19456 GemmMicrokernelTester()
19457 .mr(3)
19458 .nr(16)
19459 .kr(8)
19460 .sr(1)
19461 .m(3)
19462 .n(n)
19463 .k(k)
19464 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19465 }
19466 }
19467 }
19468
19469 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_strided_cn) {
19470 TEST_REQUIRES_ARM_NEON;
19471 for (uint32_t n = 32; n <= 48; n += 16) {
19472 for (size_t k = 1; k <= 40; k += 9) {
19473 GemmMicrokernelTester()
19474 .mr(3)
19475 .nr(16)
19476 .kr(8)
19477 .sr(1)
19478 .m(3)
19479 .n(n)
19480 .k(k)
19481 .cn_stride(19)
19482 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19483 }
19484 }
19485 }
19486
19487 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_subtile) {
19488 TEST_REQUIRES_ARM_NEON;
19489 for (uint32_t n = 32; n <= 48; n += 16) {
19490 for (size_t k = 1; k <= 40; k += 9) {
19491 for (uint32_t m = 1; m <= 3; m++) {
19492 GemmMicrokernelTester()
19493 .mr(3)
19494 .nr(16)
19495 .kr(8)
19496 .sr(1)
19497 .m(m)
19498 .n(n)
19499 .k(k)
19500 .iterations(1)
19501 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19502 }
19503 }
19504 }
19505 }
19506
19507 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, small_kernel) {
19508 TEST_REQUIRES_ARM_NEON;
19509 for (size_t k = 1; k <= 40; k += 9) {
19510 GemmMicrokernelTester()
19511 .mr(3)
19512 .nr(16)
19513 .kr(8)
19514 .sr(1)
19515 .m(3)
19516 .n(16)
19517 .k(k)
19518 .ks(3)
19519 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19520 }
19521 }
19522
19523 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, small_kernel_subtile) {
19524 TEST_REQUIRES_ARM_NEON;
19525 for (size_t k = 1; k <= 40; k += 9) {
19526 for (uint32_t n = 1; n <= 16; n++) {
19527 for (uint32_t m = 1; m <= 3; m++) {
19528 GemmMicrokernelTester()
19529 .mr(3)
19530 .nr(16)
19531 .kr(8)
19532 .sr(1)
19533 .m(m)
19534 .n(n)
19535 .k(k)
19536 .ks(3)
19537 .iterations(1)
19538 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19539 }
19540 }
19541 }
19542 }
19543
19544 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_gt_16_small_kernel) {
19545 TEST_REQUIRES_ARM_NEON;
19546 for (uint32_t n = 17; n < 32; n++) {
19547 for (size_t k = 1; k <= 40; k += 9) {
19548 GemmMicrokernelTester()
19549 .mr(3)
19550 .nr(16)
19551 .kr(8)
19552 .sr(1)
19553 .m(3)
19554 .n(n)
19555 .k(k)
19556 .ks(3)
19557 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19558 }
19559 }
19560 }
19561
19562 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, n_div_16_small_kernel) {
19563 TEST_REQUIRES_ARM_NEON;
19564 for (uint32_t n = 32; n <= 48; n += 16) {
19565 for (size_t k = 1; k <= 40; k += 9) {
19566 GemmMicrokernelTester()
19567 .mr(3)
19568 .nr(16)
19569 .kr(8)
19570 .sr(1)
19571 .m(3)
19572 .n(n)
19573 .k(k)
19574 .ks(3)
19575 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19576 }
19577 }
19578 }
19579
19580 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cm_subtile) {
19581 TEST_REQUIRES_ARM_NEON;
19582 for (size_t k = 1; k <= 40; k += 9) {
19583 for (uint32_t n = 1; n <= 16; n++) {
19584 for (uint32_t m = 1; m <= 3; m++) {
19585 GemmMicrokernelTester()
19586 .mr(3)
19587 .nr(16)
19588 .kr(8)
19589 .sr(1)
19590 .m(m)
19591 .n(n)
19592 .k(k)
19593 .cm_stride(19)
19594 .iterations(1)
19595 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19596 }
19597 }
19598 }
19599 }
19600
19601 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, a_offset) {
19602 TEST_REQUIRES_ARM_NEON;
19603 for (size_t k = 1; k <= 40; k += 9) {
19604 GemmMicrokernelTester()
19605 .mr(3)
19606 .nr(16)
19607 .kr(8)
19608 .sr(1)
19609 .m(3)
19610 .n(16)
19611 .k(k)
19612 .ks(3)
19613 .a_offset(127)
19614 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19615 }
19616 }
19617
19618 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, zero) {
19619 TEST_REQUIRES_ARM_NEON;
19620 for (size_t k = 1; k <= 40; k += 9) {
19621 for (uint32_t mz = 0; mz < 3; mz++) {
19622 GemmMicrokernelTester()
19623 .mr(3)
19624 .nr(16)
19625 .kr(8)
19626 .sr(1)
19627 .m(3)
19628 .n(16)
19629 .k(k)
19630 .ks(3)
19631 .a_offset(127)
19632 .zero_index(mz)
19633 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19634 }
19635 }
19636 }
19637
19638 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, qmin) {
19639 TEST_REQUIRES_ARM_NEON;
19640 GemmMicrokernelTester()
19641 .mr(3)
19642 .nr(16)
19643 .kr(8)
19644 .sr(1)
19645 .m(3)
19646 .n(16)
19647 .k(8)
19648 .qmin(128)
19649 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19650 }
19651
19652 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, qmax) {
19653 TEST_REQUIRES_ARM_NEON;
19654 GemmMicrokernelTester()
19655 .mr(3)
19656 .nr(16)
19657 .kr(8)
19658 .sr(1)
19659 .m(3)
19660 .n(16)
19661 .k(8)
19662 .qmax(128)
19663 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19664 }
19665
19666 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16C8__NEON_MULL, strided_cm) {
19667 TEST_REQUIRES_ARM_NEON;
19668 GemmMicrokernelTester()
19669 .mr(3)
19670 .nr(16)
19671 .kr(8)
19672 .sr(1)
19673 .m(3)
19674 .n(16)
19675 .k(8)
19676 .cm_stride(19)
19677 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19678 }
19679#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
19680
19681
19682#if XNN_ARCH_ARM || XNN_ARCH_ARM64
19683 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16) {
19684 TEST_REQUIRES_ARM_NEON;
19685 GemmMicrokernelTester()
19686 .mr(2)
19687 .nr(8)
19688 .kr(8)
19689 .sr(1)
19690 .m(2)
19691 .n(8)
19692 .k(16)
19693 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19694 }
19695
19696 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cn) {
19697 TEST_REQUIRES_ARM_NEON;
19698 GemmMicrokernelTester()
19699 .mr(2)
19700 .nr(8)
19701 .kr(8)
19702 .sr(1)
19703 .m(2)
19704 .n(8)
19705 .k(16)
19706 .cn_stride(11)
19707 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
19708 }
19709
19710 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080019711 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019712 for (uint32_t n = 1; n <= 8; n++) {
19713 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019714 GemmMicrokernelTester()
19715 .mr(2)
19716 .nr(8)
19717 .kr(8)
19718 .sr(1)
19719 .m(m)
19720 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019721 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080019722 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019723 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019724 }
19725 }
19726 }
19727
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019728 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile_m) {
Marat Dukhan89991902021-12-06 00:54:36 -080019729 TEST_REQUIRES_ARM_NEON;
19730 for (uint32_t m = 1; m <= 2; m++) {
19731 GemmMicrokernelTester()
19732 .mr(2)
19733 .nr(8)
19734 .kr(8)
19735 .sr(1)
19736 .m(m)
19737 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019738 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080019739 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019740 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019741 }
19742 }
19743
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019744 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_eq_16_subtile_n) {
Marat Dukhan89991902021-12-06 00:54:36 -080019745 TEST_REQUIRES_ARM_NEON;
19746 for (uint32_t n = 1; n <= 8; n++) {
19747 GemmMicrokernelTester()
19748 .mr(2)
19749 .nr(8)
19750 .kr(8)
19751 .sr(1)
19752 .m(2)
19753 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019754 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080019755 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019756 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019757 }
19758 }
19759
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019760 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_lt_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080019761 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019762 for (size_t k = 1; k < 16; k++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019763 GemmMicrokernelTester()
19764 .mr(2)
19765 .nr(8)
19766 .kr(8)
19767 .sr(1)
19768 .m(2)
19769 .n(8)
19770 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019771 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019772 }
19773 }
19774
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019775 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_lt_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080019776 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019777 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019778 for (uint32_t n = 1; n <= 8; n++) {
19779 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019780 GemmMicrokernelTester()
19781 .mr(2)
19782 .nr(8)
19783 .kr(8)
19784 .sr(1)
19785 .m(m)
19786 .n(n)
19787 .k(k)
19788 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019789 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019790 }
19791 }
19792 }
19793 }
19794
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019795 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_gt_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080019796 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019797 for (size_t k = 17; k < 32; k++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019798 GemmMicrokernelTester()
19799 .mr(2)
19800 .nr(8)
19801 .kr(8)
19802 .sr(1)
19803 .m(2)
19804 .n(8)
19805 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019806 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019807 }
19808 }
19809
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019810 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_gt_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080019811 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019812 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019813 for (uint32_t n = 1; n <= 8; n++) {
19814 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019815 GemmMicrokernelTester()
19816 .mr(2)
19817 .nr(8)
19818 .kr(8)
19819 .sr(1)
19820 .m(m)
19821 .n(n)
19822 .k(k)
19823 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019824 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019825 }
19826 }
19827 }
19828 }
19829
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019830 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_div_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080019831 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019832 for (size_t k = 32; k <= 160; k += 16) {
Marat Dukhan89991902021-12-06 00:54:36 -080019833 GemmMicrokernelTester()
19834 .mr(2)
19835 .nr(8)
19836 .kr(8)
19837 .sr(1)
19838 .m(2)
19839 .n(8)
19840 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019841 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019842 }
19843 }
19844
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019845 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, k_div_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080019846 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019847 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019848 for (uint32_t n = 1; n <= 8; n++) {
19849 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019850 GemmMicrokernelTester()
19851 .mr(2)
19852 .nr(8)
19853 .kr(8)
19854 .sr(1)
19855 .m(m)
19856 .n(n)
19857 .k(k)
19858 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019859 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019860 }
19861 }
19862 }
19863 }
19864
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019865 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8) {
Marat Dukhan89991902021-12-06 00:54:36 -080019866 TEST_REQUIRES_ARM_NEON;
19867 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019868 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080019869 GemmMicrokernelTester()
19870 .mr(2)
19871 .nr(8)
19872 .kr(8)
19873 .sr(1)
19874 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019875 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080019876 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019877 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019878 }
19879 }
19880 }
19881
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019882 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_strided_cn) {
Marat Dukhan89991902021-12-06 00:54:36 -080019883 TEST_REQUIRES_ARM_NEON;
19884 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019885 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080019886 GemmMicrokernelTester()
19887 .mr(2)
19888 .nr(8)
19889 .kr(8)
19890 .sr(1)
19891 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019892 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080019893 .k(k)
19894 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019895 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019896 }
19897 }
19898 }
19899
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019900 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080019901 TEST_REQUIRES_ARM_NEON;
19902 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019903 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080019904 for (uint32_t m = 1; m <= 2; m++) {
19905 GemmMicrokernelTester()
19906 .mr(2)
19907 .nr(8)
19908 .kr(8)
19909 .sr(1)
19910 .m(m)
19911 .n(n)
19912 .k(k)
19913 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019914 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019915 }
19916 }
19917 }
19918 }
19919
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019920 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8) {
Marat Dukhan89991902021-12-06 00:54:36 -080019921 TEST_REQUIRES_ARM_NEON;
19922 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019923 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080019924 GemmMicrokernelTester()
19925 .mr(2)
19926 .nr(8)
19927 .kr(8)
19928 .sr(1)
19929 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019930 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080019931 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019932 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019933 }
19934 }
19935 }
19936
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019937 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_strided_cn) {
Marat Dukhan89991902021-12-06 00:54:36 -080019938 TEST_REQUIRES_ARM_NEON;
19939 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019940 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080019941 GemmMicrokernelTester()
19942 .mr(2)
19943 .nr(8)
19944 .kr(8)
19945 .sr(1)
19946 .m(2)
19947 .n(n)
19948 .k(k)
19949 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019950 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019951 }
19952 }
19953 }
19954
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019955 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080019956 TEST_REQUIRES_ARM_NEON;
19957 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019958 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080019959 for (uint32_t m = 1; m <= 2; m++) {
19960 GemmMicrokernelTester()
19961 .mr(2)
19962 .nr(8)
19963 .kr(8)
19964 .sr(1)
19965 .m(m)
19966 .n(n)
19967 .k(k)
19968 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019969 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019970 }
19971 }
19972 }
19973 }
19974
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019975 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, small_kernel) {
Marat Dukhan89991902021-12-06 00:54:36 -080019976 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019977 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080019978 GemmMicrokernelTester()
19979 .mr(2)
19980 .nr(8)
19981 .kr(8)
19982 .sr(1)
19983 .m(2)
19984 .n(8)
19985 .k(k)
19986 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019987 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080019988 }
19989 }
19990
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019991 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, small_kernel_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080019992 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080019993 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019994 for (uint32_t n = 1; n <= 8; n++) {
19995 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080019996 GemmMicrokernelTester()
19997 .mr(2)
19998 .nr(8)
19999 .kr(8)
20000 .sr(1)
20001 .m(m)
20002 .n(n)
20003 .k(k)
20004 .ks(3)
20005 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020006 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020007 }
20008 }
20009 }
20010 }
20011
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020012 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_gt_8_small_kernel) {
Marat Dukhan89991902021-12-06 00:54:36 -080020013 TEST_REQUIRES_ARM_NEON;
20014 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020015 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020016 GemmMicrokernelTester()
20017 .mr(2)
20018 .nr(8)
20019 .kr(8)
20020 .sr(1)
20021 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020022 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020023 .k(k)
20024 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020025 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020026 }
20027 }
20028 }
20029
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020030 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, n_div_8_small_kernel) {
Marat Dukhan89991902021-12-06 00:54:36 -080020031 TEST_REQUIRES_ARM_NEON;
20032 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020033 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020034 GemmMicrokernelTester()
20035 .mr(2)
20036 .nr(8)
20037 .kr(8)
20038 .sr(1)
20039 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020040 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020041 .k(k)
20042 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020043 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020044 }
20045 }
20046 }
20047
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020048 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cm_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020049 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020050 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020051 for (uint32_t n = 1; n <= 8; n++) {
20052 for (uint32_t m = 1; m <= 2; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020053 GemmMicrokernelTester()
20054 .mr(2)
20055 .nr(8)
20056 .kr(8)
20057 .sr(1)
20058 .m(m)
20059 .n(n)
20060 .k(k)
20061 .cm_stride(11)
20062 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020063 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020064 }
20065 }
20066 }
20067 }
20068
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020069 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, a_offset) {
Marat Dukhan89991902021-12-06 00:54:36 -080020070 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020071 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020072 GemmMicrokernelTester()
20073 .mr(2)
20074 .nr(8)
20075 .kr(8)
20076 .sr(1)
20077 .m(2)
20078 .n(8)
20079 .k(k)
20080 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020081 .a_offset(163)
20082 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020083 }
20084 }
20085
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020086 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, zero) {
Marat Dukhan89991902021-12-06 00:54:36 -080020087 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020088 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020089 for (uint32_t mz = 0; mz < 2; mz++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020090 GemmMicrokernelTester()
20091 .mr(2)
20092 .nr(8)
20093 .kr(8)
20094 .sr(1)
20095 .m(2)
20096 .n(8)
20097 .k(k)
20098 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020099 .a_offset(163)
Marat Dukhan89991902021-12-06 00:54:36 -080020100 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020101 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020102 }
20103 }
20104 }
20105
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020106 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, qmin) {
Marat Dukhan89991902021-12-06 00:54:36 -080020107 TEST_REQUIRES_ARM_NEON;
20108 GemmMicrokernelTester()
20109 .mr(2)
20110 .nr(8)
20111 .kr(8)
20112 .sr(1)
20113 .m(2)
20114 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020115 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020116 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020117 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020118 }
20119
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020120 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, qmax) {
Marat Dukhan89991902021-12-06 00:54:36 -080020121 TEST_REQUIRES_ARM_NEON;
20122 GemmMicrokernelTester()
20123 .mr(2)
20124 .nr(8)
20125 .kr(8)
20126 .sr(1)
20127 .m(2)
20128 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020129 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020130 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020131 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020132 }
20133
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020134 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8C8__NEON_MLAL, strided_cm) {
Marat Dukhan89991902021-12-06 00:54:36 -080020135 TEST_REQUIRES_ARM_NEON;
20136 GemmMicrokernelTester()
20137 .mr(2)
20138 .nr(8)
20139 .kr(8)
20140 .sr(1)
20141 .m(2)
20142 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020143 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020144 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020145 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020146 }
20147#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
20148
20149
20150#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020151 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020152 TEST_REQUIRES_ARM_NEON;
20153 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020154 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020155 .nr(8)
20156 .kr(8)
20157 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020158 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020159 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020160 .k(16)
20161 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020162 }
20163
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020164 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cn) {
Marat Dukhan89991902021-12-06 00:54:36 -080020165 TEST_REQUIRES_ARM_NEON;
20166 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020167 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020168 .nr(8)
20169 .kr(8)
20170 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020171 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020172 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020173 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020174 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020175 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020176 }
20177
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020178 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020179 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020180 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020181 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020182 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020183 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020184 .nr(8)
20185 .kr(8)
20186 .sr(1)
20187 .m(m)
20188 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020189 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020190 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020191 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020192 }
20193 }
20194 }
20195
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020196 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_m) {
Marat Dukhan89991902021-12-06 00:54:36 -080020197 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020198 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020199 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020200 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020201 .nr(8)
20202 .kr(8)
20203 .sr(1)
20204 .m(m)
20205 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020206 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020207 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020208 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020209 }
20210 }
20211
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020212 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_eq_16_subtile_n) {
Marat Dukhan89991902021-12-06 00:54:36 -080020213 TEST_REQUIRES_ARM_NEON;
20214 for (uint32_t n = 1; n <= 8; n++) {
20215 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020216 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020217 .nr(8)
20218 .kr(8)
20219 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020220 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020221 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020222 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020223 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020224 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020225 }
20226 }
20227
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020228 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020229 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020230 for (size_t k = 1; k < 16; k++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020231 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020232 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020233 .nr(8)
20234 .kr(8)
20235 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020236 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020237 .n(8)
20238 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020239 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020240 }
20241 }
20242
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020243 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_lt_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020244 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020245 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020246 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020247 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020248 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020249 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020250 .nr(8)
20251 .kr(8)
20252 .sr(1)
20253 .m(m)
20254 .n(n)
20255 .k(k)
20256 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020257 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020258 }
20259 }
20260 }
20261 }
20262
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020263 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020264 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020265 for (size_t k = 17; k < 32; k++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020266 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020267 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020268 .nr(8)
20269 .kr(8)
20270 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020271 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020272 .n(8)
20273 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020274 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020275 }
20276 }
20277
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020278 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_gt_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020279 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020280 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020281 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020282 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020283 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020284 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020285 .nr(8)
20286 .kr(8)
20287 .sr(1)
20288 .m(m)
20289 .n(n)
20290 .k(k)
20291 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020292 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020293 }
20294 }
20295 }
20296 }
20297
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020298 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020299 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020300 for (size_t k = 32; k <= 160; k += 16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020301 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020302 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020303 .nr(8)
20304 .kr(8)
20305 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020306 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020307 .n(8)
20308 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020309 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020310 }
20311 }
20312
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020313 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, k_div_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020314 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020315 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020316 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020317 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020318 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020319 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020320 .nr(8)
20321 .kr(8)
20322 .sr(1)
20323 .m(m)
20324 .n(n)
20325 .k(k)
20326 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020327 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020328 }
20329 }
20330 }
20331 }
20332
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020333 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8) {
Marat Dukhan89991902021-12-06 00:54:36 -080020334 TEST_REQUIRES_ARM_NEON;
20335 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020336 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020337 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020338 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020339 .nr(8)
20340 .kr(8)
20341 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020342 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020343 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020344 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020345 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020346 }
20347 }
20348 }
20349
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020350 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_strided_cn) {
Marat Dukhan89991902021-12-06 00:54:36 -080020351 TEST_REQUIRES_ARM_NEON;
20352 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020353 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020354 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020355 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020356 .nr(8)
20357 .kr(8)
20358 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020359 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020360 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020361 .k(k)
20362 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020363 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020364 }
20365 }
20366 }
20367
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020368 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020369 TEST_REQUIRES_ARM_NEON;
20370 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020371 for (size_t k = 1; k <= 80; k += 17) {
20372 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020373 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020374 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020375 .nr(8)
20376 .kr(8)
20377 .sr(1)
20378 .m(m)
20379 .n(n)
20380 .k(k)
20381 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020382 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020383 }
20384 }
20385 }
20386 }
20387
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020388 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8) {
Marat Dukhan89991902021-12-06 00:54:36 -080020389 TEST_REQUIRES_ARM_NEON;
20390 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020391 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020392 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020393 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020394 .nr(8)
20395 .kr(8)
20396 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020397 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020398 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020399 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020400 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020401 }
20402 }
20403 }
20404
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020405 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_strided_cn) {
Marat Dukhan89991902021-12-06 00:54:36 -080020406 TEST_REQUIRES_ARM_NEON;
20407 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020408 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020409 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020410 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020411 .nr(8)
20412 .kr(8)
20413 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020414 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020415 .n(n)
20416 .k(k)
20417 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020418 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020419 }
20420 }
20421 }
20422
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020423 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020424 TEST_REQUIRES_ARM_NEON;
20425 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020426 for (size_t k = 1; k <= 80; k += 17) {
20427 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020428 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020429 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020430 .nr(8)
20431 .kr(8)
20432 .sr(1)
20433 .m(m)
20434 .n(n)
20435 .k(k)
20436 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020437 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020438 }
20439 }
20440 }
20441 }
20442
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020443 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, small_kernel) {
Marat Dukhan89991902021-12-06 00:54:36 -080020444 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020445 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020446 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020447 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020448 .nr(8)
20449 .kr(8)
20450 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020451 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020452 .n(8)
20453 .k(k)
20454 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020455 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020456 }
20457 }
20458
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020459 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, small_kernel_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020460 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020461 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020462 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020463 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020464 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020465 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020466 .nr(8)
20467 .kr(8)
20468 .sr(1)
20469 .m(m)
20470 .n(n)
20471 .k(k)
20472 .ks(3)
20473 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020474 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020475 }
20476 }
20477 }
20478 }
20479
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020480 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_gt_8_small_kernel) {
Marat Dukhan89991902021-12-06 00:54:36 -080020481 TEST_REQUIRES_ARM_NEON;
20482 for (uint32_t n = 9; n < 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020483 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020484 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020485 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020486 .nr(8)
20487 .kr(8)
20488 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020489 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020490 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020491 .k(k)
20492 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020493 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020494 }
20495 }
20496 }
20497
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020498 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, n_div_8_small_kernel) {
Marat Dukhan89991902021-12-06 00:54:36 -080020499 TEST_REQUIRES_ARM_NEON;
20500 for (uint32_t n = 16; n <= 24; n += 8) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020501 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020502 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020503 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020504 .nr(8)
20505 .kr(8)
20506 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020507 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020508 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020509 .k(k)
20510 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020511 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020512 }
20513 }
20514 }
20515
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020516 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020517 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020518 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020519 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020520 for (uint32_t m = 1; m <= 4; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020521 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020522 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020523 .nr(8)
20524 .kr(8)
20525 .sr(1)
20526 .m(m)
20527 .n(n)
20528 .k(k)
20529 .cm_stride(11)
20530 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020531 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020532 }
20533 }
20534 }
20535 }
20536
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020537 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, a_offset) {
Marat Dukhan89991902021-12-06 00:54:36 -080020538 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020539 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020540 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020541 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020542 .nr(8)
20543 .kr(8)
20544 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020545 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020546 .n(8)
20547 .k(k)
20548 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020549 .a_offset(331)
20550 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020551 }
20552 }
20553
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020554 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, zero) {
Marat Dukhan89991902021-12-06 00:54:36 -080020555 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020556 for (size_t k = 1; k <= 80; k += 17) {
20557 for (uint32_t mz = 0; mz < 4; mz++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020558 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020559 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020560 .nr(8)
20561 .kr(8)
20562 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020563 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020564 .n(8)
20565 .k(k)
20566 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020567 .a_offset(331)
Marat Dukhan89991902021-12-06 00:54:36 -080020568 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020569 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020570 }
20571 }
20572 }
20573
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020574 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmin) {
Marat Dukhan89991902021-12-06 00:54:36 -080020575 TEST_REQUIRES_ARM_NEON;
20576 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020577 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020578 .nr(8)
20579 .kr(8)
20580 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020581 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020582 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020583 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020584 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020585 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020586 }
20587
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020588 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, qmax) {
Marat Dukhan89991902021-12-06 00:54:36 -080020589 TEST_REQUIRES_ARM_NEON;
20590 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020591 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020592 .nr(8)
20593 .kr(8)
20594 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020595 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020596 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020597 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020598 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020599 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020600 }
20601
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020602 TEST(QS8_IGEMM_MINMAX_RNDNU_4X8C8__NEON_MLAL, strided_cm) {
Marat Dukhan89991902021-12-06 00:54:36 -080020603 TEST_REQUIRES_ARM_NEON;
20604 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020605 .mr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020606 .nr(8)
20607 .kr(8)
20608 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020609 .m(4)
Marat Dukhan89991902021-12-06 00:54:36 -080020610 .n(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020611 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020612 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020613 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_4x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020614 }
20615#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
20616
20617
20618#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020619 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020620 TEST_REQUIRES_ARM_NEON;
20621 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020622 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020623 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020624 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020625 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020626 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020627 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020628 .k(16)
20629 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020630 }
20631
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020632 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cn) {
Marat Dukhan89991902021-12-06 00:54:36 -080020633 TEST_REQUIRES_ARM_NEON;
20634 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020635 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020636 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020637 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020638 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020639 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020640 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020641 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020642 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020643 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020644 }
20645
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020646 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020647 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020648 for (uint32_t n = 1; n <= 16; n++) {
20649 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020650 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020651 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020652 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020653 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020654 .sr(1)
20655 .m(m)
20656 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020657 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020658 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020659 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020660 }
20661 }
20662 }
20663
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020664 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile_m) {
Marat Dukhan89991902021-12-06 00:54:36 -080020665 TEST_REQUIRES_ARM_NEON;
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020666 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020667 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020668 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020669 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020670 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020671 .sr(1)
20672 .m(m)
20673 .n(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020674 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020675 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020676 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020677 }
20678 }
20679
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020680 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_eq_16_subtile_n) {
Marat Dukhan89991902021-12-06 00:54:36 -080020681 TEST_REQUIRES_ARM_NEON;
20682 for (uint32_t n = 1; n <= 16; n++) {
20683 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020684 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020685 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020686 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020687 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020688 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020689 .n(n)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020690 .k(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020691 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020692 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020693 }
20694 }
20695
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020696 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_lt_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020697 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020698 for (size_t k = 1; k < 16; k++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020699 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020700 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020701 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020702 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020703 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020704 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020705 .n(16)
20706 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020707 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020708 }
20709 }
20710
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020711 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_lt_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020712 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020713 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020714 for (uint32_t n = 1; n <= 16; n++) {
20715 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020716 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020717 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020718 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020719 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020720 .sr(1)
20721 .m(m)
20722 .n(n)
20723 .k(k)
20724 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020725 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020726 }
20727 }
20728 }
20729 }
20730
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020731 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_gt_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020732 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020733 for (size_t k = 17; k < 32; k++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020734 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020735 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020736 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020737 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020738 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020739 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020740 .n(16)
20741 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020742 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020743 }
20744 }
20745
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020746 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_gt_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020747 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020748 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020749 for (uint32_t n = 1; n <= 16; n++) {
20750 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020751 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020752 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020753 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020754 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020755 .sr(1)
20756 .m(m)
20757 .n(n)
20758 .k(k)
20759 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020760 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020761 }
20762 }
20763 }
20764 }
20765
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020766 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_div_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020767 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020768 for (size_t k = 32; k <= 160; k += 16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020769 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020770 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020771 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020772 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020773 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020774 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020775 .n(16)
20776 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020777 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020778 }
20779 }
20780
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020781 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, k_div_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020782 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020783 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020784 for (uint32_t n = 1; n <= 16; n++) {
20785 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020786 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020787 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020788 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020789 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020790 .sr(1)
20791 .m(m)
20792 .n(n)
20793 .k(k)
20794 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020795 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020796 }
20797 }
20798 }
20799 }
20800
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020801 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020802 TEST_REQUIRES_ARM_NEON;
20803 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020804 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020805 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020806 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020807 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020808 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020809 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020810 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020811 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020812 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020813 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020814 }
20815 }
20816 }
20817
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020818 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_strided_cn) {
Marat Dukhan89991902021-12-06 00:54:36 -080020819 TEST_REQUIRES_ARM_NEON;
20820 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020821 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020822 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020823 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020824 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020825 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020826 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020827 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020828 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020829 .k(k)
20830 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020831 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020832 }
20833 }
20834 }
20835
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020836 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020837 TEST_REQUIRES_ARM_NEON;
20838 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020839 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020840 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020841 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020842 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020843 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020844 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020845 .sr(1)
20846 .m(m)
20847 .n(n)
20848 .k(k)
20849 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020850 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020851 }
20852 }
20853 }
20854 }
20855
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020856 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16) {
Marat Dukhan89991902021-12-06 00:54:36 -080020857 TEST_REQUIRES_ARM_NEON;
20858 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020859 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020860 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020861 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020862 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020863 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020864 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020865 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020866 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020867 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020868 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020869 }
20870 }
20871 }
20872
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020873 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_strided_cn) {
Marat Dukhan89991902021-12-06 00:54:36 -080020874 TEST_REQUIRES_ARM_NEON;
20875 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020876 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020877 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020878 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020879 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020880 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020881 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020882 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020883 .n(n)
20884 .k(k)
20885 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020886 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020887 }
20888 }
20889 }
20890
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020891 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020892 TEST_REQUIRES_ARM_NEON;
20893 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020894 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020895 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020896 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020897 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020898 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020899 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020900 .sr(1)
20901 .m(m)
20902 .n(n)
20903 .k(k)
20904 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020905 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020906 }
20907 }
20908 }
20909 }
20910
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020911 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, small_kernel) {
Marat Dukhan89991902021-12-06 00:54:36 -080020912 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020913 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020914 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020915 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020916 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020917 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020918 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020919 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020920 .n(16)
20921 .k(k)
20922 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020923 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020924 }
20925 }
20926
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020927 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, small_kernel_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020928 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020929 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020930 for (uint32_t n = 1; n <= 16; n++) {
20931 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020932 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020933 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020934 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020935 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020936 .sr(1)
20937 .m(m)
20938 .n(n)
20939 .k(k)
20940 .ks(3)
20941 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020942 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020943 }
20944 }
20945 }
20946 }
20947
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020948 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_gt_16_small_kernel) {
Marat Dukhan89991902021-12-06 00:54:36 -080020949 TEST_REQUIRES_ARM_NEON;
20950 for (uint32_t n = 17; n < 32; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020951 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020952 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020953 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020954 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020955 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020956 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020957 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020958 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020959 .k(k)
20960 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020961 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020962 }
20963 }
20964 }
20965
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020966 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, n_div_16_small_kernel) {
Marat Dukhan89991902021-12-06 00:54:36 -080020967 TEST_REQUIRES_ARM_NEON;
20968 for (uint32_t n = 32; n <= 48; n += 16) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020969 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080020970 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020971 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020972 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020973 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020974 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020975 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020976 .n(n)
Marat Dukhan89991902021-12-06 00:54:36 -080020977 .k(k)
20978 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020979 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080020980 }
20981 }
20982 }
20983
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020984 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cm_subtile) {
Marat Dukhan89991902021-12-06 00:54:36 -080020985 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020986 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020987 for (uint32_t n = 1; n <= 16; n++) {
20988 for (uint32_t m = 1; m <= 1; m++) {
Marat Dukhan89991902021-12-06 00:54:36 -080020989 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080020990 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080020991 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020992 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080020993 .sr(1)
20994 .m(m)
20995 .n(n)
20996 .k(k)
20997 .cm_stride(19)
20998 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080020999 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021000 }
21001 }
21002 }
21003 }
21004
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021005 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, a_offset) {
Marat Dukhan89991902021-12-06 00:54:36 -080021006 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021007 for (size_t k = 1; k <= 80; k += 17) {
Marat Dukhan89991902021-12-06 00:54:36 -080021008 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080021009 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080021010 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021011 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080021012 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080021013 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080021014 .n(16)
21015 .k(k)
21016 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021017 .a_offset(83)
21018 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021019 }
21020 }
21021
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021022 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, zero) {
Marat Dukhan89991902021-12-06 00:54:36 -080021023 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021024 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021025 for (uint32_t mz = 0; mz < 1; mz++) {
Marat Dukhan89991902021-12-06 00:54:36 -080021026 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080021027 .mr(1)
Marat Dukhan89991902021-12-06 00:54:36 -080021028 .nr(16)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021029 .kr(16)
Marat Dukhan89991902021-12-06 00:54:36 -080021030 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080021031 .m(1)
Marat Dukhan89991902021-12-06 00:54:36 -080021032 .n(16)
21033 .k(k)
21034 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021035 .a_offset(83)
21036 .zero_index(mz)
21037 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21038 }
21039 }
21040 }
21041
21042 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, qmin) {
21043 TEST_REQUIRES_ARM_NEON;
21044 GemmMicrokernelTester()
21045 .mr(1)
21046 .nr(16)
21047 .kr(16)
21048 .sr(1)
21049 .m(1)
21050 .n(16)
21051 .k(16)
21052 .qmin(128)
21053 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21054 }
21055
21056 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, qmax) {
21057 TEST_REQUIRES_ARM_NEON;
21058 GemmMicrokernelTester()
21059 .mr(1)
21060 .nr(16)
21061 .kr(16)
21062 .sr(1)
21063 .m(1)
21064 .n(16)
21065 .k(16)
21066 .qmax(128)
21067 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21068 }
21069
21070 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16C16__NEON_MLAL, strided_cm) {
21071 TEST_REQUIRES_ARM_NEON;
21072 GemmMicrokernelTester()
21073 .mr(1)
21074 .nr(16)
21075 .kr(16)
21076 .sr(1)
21077 .m(1)
21078 .n(16)
21079 .k(16)
21080 .cm_stride(19)
21081 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21082 }
21083#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
21084
21085
21086#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
21087 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8) {
21088 TEST_REQUIRES_ARM_NEON_DOT;
21089 GemmMicrokernelTester()
21090 .mr(1)
21091 .nr(8)
21092 .kr(4)
21093 .sr(1)
21094 .m(1)
21095 .n(8)
21096 .k(8)
21097 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21098 }
21099
21100 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cn) {
21101 TEST_REQUIRES_ARM_NEON_DOT;
21102 GemmMicrokernelTester()
21103 .mr(1)
21104 .nr(8)
21105 .kr(4)
21106 .sr(1)
21107 .m(1)
21108 .n(8)
21109 .k(8)
21110 .cn_stride(11)
21111 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21112 }
21113
21114 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile) {
21115 TEST_REQUIRES_ARM_NEON_DOT;
21116 for (uint32_t n = 1; n <= 8; n++) {
21117 for (uint32_t m = 1; m <= 1; m++) {
21118 GemmMicrokernelTester()
21119 .mr(1)
21120 .nr(8)
21121 .kr(4)
21122 .sr(1)
21123 .m(m)
21124 .n(n)
21125 .k(8)
21126 .iterations(1)
21127 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21128 }
21129 }
21130 }
21131
21132 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_m) {
21133 TEST_REQUIRES_ARM_NEON_DOT;
21134 for (uint32_t m = 1; m <= 1; m++) {
21135 GemmMicrokernelTester()
21136 .mr(1)
21137 .nr(8)
21138 .kr(4)
21139 .sr(1)
21140 .m(m)
21141 .n(8)
21142 .k(8)
21143 .iterations(1)
21144 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21145 }
21146 }
21147
21148 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_n) {
21149 TEST_REQUIRES_ARM_NEON_DOT;
21150 for (uint32_t n = 1; n <= 8; n++) {
21151 GemmMicrokernelTester()
21152 .mr(1)
21153 .nr(8)
21154 .kr(4)
21155 .sr(1)
21156 .m(1)
21157 .n(n)
21158 .k(8)
21159 .iterations(1)
21160 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21161 }
21162 }
21163
21164 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8) {
21165 TEST_REQUIRES_ARM_NEON_DOT;
21166 for (size_t k = 1; k < 8; k++) {
21167 GemmMicrokernelTester()
21168 .mr(1)
21169 .nr(8)
21170 .kr(4)
21171 .sr(1)
21172 .m(1)
21173 .n(8)
21174 .k(k)
21175 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21176 }
21177 }
21178
21179 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8_subtile) {
21180 TEST_REQUIRES_ARM_NEON_DOT;
21181 for (size_t k = 1; k < 8; k++) {
21182 for (uint32_t n = 1; n <= 8; n++) {
21183 for (uint32_t m = 1; m <= 1; m++) {
21184 GemmMicrokernelTester()
21185 .mr(1)
21186 .nr(8)
21187 .kr(4)
21188 .sr(1)
21189 .m(m)
21190 .n(n)
21191 .k(k)
21192 .iterations(1)
21193 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21194 }
21195 }
21196 }
21197 }
21198
21199 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8) {
21200 TEST_REQUIRES_ARM_NEON_DOT;
21201 for (size_t k = 9; k < 16; k++) {
21202 GemmMicrokernelTester()
21203 .mr(1)
21204 .nr(8)
21205 .kr(4)
21206 .sr(1)
21207 .m(1)
21208 .n(8)
21209 .k(k)
21210 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21211 }
21212 }
21213
21214 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8_subtile) {
21215 TEST_REQUIRES_ARM_NEON_DOT;
21216 for (size_t k = 9; k < 16; k++) {
21217 for (uint32_t n = 1; n <= 8; n++) {
21218 for (uint32_t m = 1; m <= 1; m++) {
21219 GemmMicrokernelTester()
21220 .mr(1)
21221 .nr(8)
21222 .kr(4)
21223 .sr(1)
21224 .m(m)
21225 .n(n)
21226 .k(k)
21227 .iterations(1)
21228 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21229 }
21230 }
21231 }
21232 }
21233
21234 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8) {
21235 TEST_REQUIRES_ARM_NEON_DOT;
21236 for (size_t k = 16; k <= 80; k += 8) {
21237 GemmMicrokernelTester()
21238 .mr(1)
21239 .nr(8)
21240 .kr(4)
21241 .sr(1)
21242 .m(1)
21243 .n(8)
21244 .k(k)
21245 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21246 }
21247 }
21248
21249 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8_subtile) {
21250 TEST_REQUIRES_ARM_NEON_DOT;
21251 for (size_t k = 16; k <= 80; k += 8) {
21252 for (uint32_t n = 1; n <= 8; n++) {
21253 for (uint32_t m = 1; m <= 1; m++) {
21254 GemmMicrokernelTester()
21255 .mr(1)
21256 .nr(8)
21257 .kr(4)
21258 .sr(1)
21259 .m(m)
21260 .n(n)
21261 .k(k)
21262 .iterations(1)
21263 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21264 }
21265 }
21266 }
21267 }
21268
21269 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8) {
21270 TEST_REQUIRES_ARM_NEON_DOT;
21271 for (uint32_t n = 9; n < 16; n++) {
21272 for (size_t k = 1; k <= 40; k += 9) {
21273 GemmMicrokernelTester()
21274 .mr(1)
21275 .nr(8)
21276 .kr(4)
21277 .sr(1)
21278 .m(1)
21279 .n(n)
21280 .k(k)
21281 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21282 }
21283 }
21284 }
21285
21286 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_strided_cn) {
21287 TEST_REQUIRES_ARM_NEON_DOT;
21288 for (uint32_t n = 9; n < 16; n++) {
21289 for (size_t k = 1; k <= 40; k += 9) {
21290 GemmMicrokernelTester()
21291 .mr(1)
21292 .nr(8)
21293 .kr(4)
21294 .sr(1)
21295 .m(1)
21296 .n(n)
21297 .k(k)
21298 .cn_stride(11)
21299 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21300 }
21301 }
21302 }
21303
21304 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_subtile) {
21305 TEST_REQUIRES_ARM_NEON_DOT;
21306 for (uint32_t n = 9; n < 16; n++) {
21307 for (size_t k = 1; k <= 40; k += 9) {
21308 for (uint32_t m = 1; m <= 1; m++) {
21309 GemmMicrokernelTester()
21310 .mr(1)
21311 .nr(8)
21312 .kr(4)
21313 .sr(1)
21314 .m(m)
21315 .n(n)
21316 .k(k)
21317 .iterations(1)
21318 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21319 }
21320 }
21321 }
21322 }
21323
21324 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8) {
21325 TEST_REQUIRES_ARM_NEON_DOT;
21326 for (uint32_t n = 16; n <= 24; n += 8) {
21327 for (size_t k = 1; k <= 40; k += 9) {
21328 GemmMicrokernelTester()
21329 .mr(1)
21330 .nr(8)
21331 .kr(4)
21332 .sr(1)
21333 .m(1)
21334 .n(n)
21335 .k(k)
21336 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21337 }
21338 }
21339 }
21340
21341 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_strided_cn) {
21342 TEST_REQUIRES_ARM_NEON_DOT;
21343 for (uint32_t n = 16; n <= 24; n += 8) {
21344 for (size_t k = 1; k <= 40; k += 9) {
21345 GemmMicrokernelTester()
21346 .mr(1)
21347 .nr(8)
21348 .kr(4)
21349 .sr(1)
21350 .m(1)
21351 .n(n)
21352 .k(k)
21353 .cn_stride(11)
21354 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21355 }
21356 }
21357 }
21358
21359 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_subtile) {
21360 TEST_REQUIRES_ARM_NEON_DOT;
21361 for (uint32_t n = 16; n <= 24; n += 8) {
21362 for (size_t k = 1; k <= 40; k += 9) {
21363 for (uint32_t m = 1; m <= 1; m++) {
21364 GemmMicrokernelTester()
21365 .mr(1)
21366 .nr(8)
21367 .kr(4)
21368 .sr(1)
21369 .m(m)
21370 .n(n)
21371 .k(k)
21372 .iterations(1)
21373 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21374 }
21375 }
21376 }
21377 }
21378
21379 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel) {
21380 TEST_REQUIRES_ARM_NEON_DOT;
21381 for (size_t k = 1; k <= 40; k += 9) {
21382 GemmMicrokernelTester()
21383 .mr(1)
21384 .nr(8)
21385 .kr(4)
21386 .sr(1)
21387 .m(1)
21388 .n(8)
21389 .k(k)
21390 .ks(3)
21391 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21392 }
21393 }
21394
21395 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel_subtile) {
21396 TEST_REQUIRES_ARM_NEON_DOT;
21397 for (size_t k = 1; k <= 40; k += 9) {
21398 for (uint32_t n = 1; n <= 8; n++) {
21399 for (uint32_t m = 1; m <= 1; m++) {
21400 GemmMicrokernelTester()
21401 .mr(1)
21402 .nr(8)
21403 .kr(4)
21404 .sr(1)
21405 .m(m)
21406 .n(n)
21407 .k(k)
21408 .ks(3)
21409 .iterations(1)
21410 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21411 }
21412 }
21413 }
21414 }
21415
21416 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_small_kernel) {
21417 TEST_REQUIRES_ARM_NEON_DOT;
21418 for (uint32_t n = 9; n < 16; n++) {
21419 for (size_t k = 1; k <= 40; k += 9) {
21420 GemmMicrokernelTester()
21421 .mr(1)
21422 .nr(8)
21423 .kr(4)
21424 .sr(1)
21425 .m(1)
21426 .n(n)
21427 .k(k)
21428 .ks(3)
21429 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21430 }
21431 }
21432 }
21433
21434 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_small_kernel) {
21435 TEST_REQUIRES_ARM_NEON_DOT;
21436 for (uint32_t n = 16; n <= 24; n += 8) {
21437 for (size_t k = 1; k <= 40; k += 9) {
21438 GemmMicrokernelTester()
21439 .mr(1)
21440 .nr(8)
21441 .kr(4)
21442 .sr(1)
21443 .m(1)
21444 .n(n)
21445 .k(k)
21446 .ks(3)
21447 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21448 }
21449 }
21450 }
21451
21452 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm_subtile) {
21453 TEST_REQUIRES_ARM_NEON_DOT;
21454 for (size_t k = 1; k <= 40; k += 9) {
21455 for (uint32_t n = 1; n <= 8; n++) {
21456 for (uint32_t m = 1; m <= 1; m++) {
21457 GemmMicrokernelTester()
21458 .mr(1)
21459 .nr(8)
21460 .kr(4)
21461 .sr(1)
21462 .m(m)
21463 .n(n)
21464 .k(k)
21465 .cm_stride(11)
21466 .iterations(1)
21467 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21468 }
21469 }
21470 }
21471 }
21472
21473 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, a_offset) {
21474 TEST_REQUIRES_ARM_NEON_DOT;
21475 for (size_t k = 1; k <= 40; k += 9) {
21476 GemmMicrokernelTester()
21477 .mr(1)
21478 .nr(8)
21479 .kr(4)
21480 .sr(1)
21481 .m(1)
21482 .n(8)
21483 .k(k)
21484 .ks(3)
21485 .a_offset(43)
21486 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
21487 }
21488 }
21489
21490 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, zero) {
21491 TEST_REQUIRES_ARM_NEON_DOT;
21492 for (size_t k = 1; k <= 40; k += 9) {
21493 for (uint32_t mz = 0; mz < 1; mz++) {
21494 GemmMicrokernelTester()
21495 .mr(1)
21496 .nr(8)
21497 .kr(4)
21498 .sr(1)
21499 .m(1)
21500 .n(8)
21501 .k(k)
21502 .ks(3)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080021503 .a_offset(43)
Marat Dukhan89991902021-12-06 00:54:36 -080021504 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021505 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021506 }
21507 }
21508 }
21509
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021510 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmin) {
21511 TEST_REQUIRES_ARM_NEON_DOT;
Marat Dukhan89991902021-12-06 00:54:36 -080021512 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080021513 .mr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021514 .nr(8)
21515 .kr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080021516 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080021517 .m(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021518 .n(8)
Marat Dukhan89991902021-12-06 00:54:36 -080021519 .k(8)
21520 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021521 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021522 }
21523
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021524 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmax) {
21525 TEST_REQUIRES_ARM_NEON_DOT;
Marat Dukhan89991902021-12-06 00:54:36 -080021526 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080021527 .mr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021528 .nr(8)
21529 .kr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080021530 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080021531 .m(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021532 .n(8)
Marat Dukhan89991902021-12-06 00:54:36 -080021533 .k(8)
21534 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021535 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021536 }
21537
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021538 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm) {
21539 TEST_REQUIRES_ARM_NEON_DOT;
Marat Dukhan89991902021-12-06 00:54:36 -080021540 GemmMicrokernelTester()
Zhi An Ngc27f04b2022-01-11 09:34:07 -080021541 .mr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021542 .nr(8)
21543 .kr(4)
Marat Dukhan89991902021-12-06 00:54:36 -080021544 .sr(1)
Zhi An Ngc27f04b2022-01-11 09:34:07 -080021545 .m(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021546 .n(8)
Marat Dukhan89991902021-12-06 00:54:36 -080021547 .k(8)
Marat Dukhane903dff2021-07-16 19:43:41 -070021548 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021549 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan89991902021-12-06 00:54:36 -080021550 }
Zhi An Nge96b6bc2022-02-03 10:49:46 -080021551#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
Marat Dukhane903dff2021-07-16 19:43:41 -070021552
21553
21554#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
Marat Dukhan4486f872021-08-07 15:22:50 -070021555 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8) {
21556 TEST_REQUIRES_ARM_NEON_DOT;
21557 GemmMicrokernelTester()
21558 .mr(6)
21559 .nr(8)
21560 .kr(4)
21561 .sr(1)
21562 .m(6)
21563 .n(8)
21564 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080021565 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021566 }
21567
21568 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cn) {
21569 TEST_REQUIRES_ARM_NEON_DOT;
21570 GemmMicrokernelTester()
21571 .mr(6)
21572 .nr(8)
21573 .kr(4)
21574 .sr(1)
21575 .m(6)
21576 .n(8)
21577 .k(8)
21578 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021579 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021580 }
21581
21582 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile) {
21583 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021584 for (uint32_t n = 1; n <= 8; n++) {
21585 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070021586 GemmMicrokernelTester()
21587 .mr(6)
21588 .nr(8)
21589 .kr(4)
21590 .sr(1)
21591 .m(m)
21592 .n(n)
21593 .k(8)
21594 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021595 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021596 }
21597 }
21598 }
21599
21600 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_m) {
21601 TEST_REQUIRES_ARM_NEON_DOT;
21602 for (uint32_t m = 1; m <= 6; m++) {
21603 GemmMicrokernelTester()
21604 .mr(6)
21605 .nr(8)
21606 .kr(4)
21607 .sr(1)
21608 .m(m)
21609 .n(8)
21610 .k(8)
21611 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021612 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021613 }
21614 }
21615
21616 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_n) {
21617 TEST_REQUIRES_ARM_NEON_DOT;
21618 for (uint32_t n = 1; n <= 8; n++) {
21619 GemmMicrokernelTester()
21620 .mr(6)
21621 .nr(8)
21622 .kr(4)
21623 .sr(1)
21624 .m(6)
21625 .n(n)
21626 .k(8)
21627 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021628 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021629 }
21630 }
21631
21632 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8) {
21633 TEST_REQUIRES_ARM_NEON_DOT;
21634 for (size_t k = 1; k < 8; k++) {
21635 GemmMicrokernelTester()
21636 .mr(6)
21637 .nr(8)
21638 .kr(4)
21639 .sr(1)
21640 .m(6)
21641 .n(8)
21642 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021643 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021644 }
21645 }
21646
21647 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8_subtile) {
21648 TEST_REQUIRES_ARM_NEON_DOT;
21649 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021650 for (uint32_t n = 1; n <= 8; n++) {
21651 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070021652 GemmMicrokernelTester()
21653 .mr(6)
21654 .nr(8)
21655 .kr(4)
21656 .sr(1)
21657 .m(m)
21658 .n(n)
21659 .k(k)
21660 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021661 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021662 }
21663 }
21664 }
21665 }
21666
21667 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8) {
21668 TEST_REQUIRES_ARM_NEON_DOT;
21669 for (size_t k = 9; k < 16; k++) {
21670 GemmMicrokernelTester()
21671 .mr(6)
21672 .nr(8)
21673 .kr(4)
21674 .sr(1)
21675 .m(6)
21676 .n(8)
21677 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021678 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021679 }
21680 }
21681
21682 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8_subtile) {
21683 TEST_REQUIRES_ARM_NEON_DOT;
21684 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021685 for (uint32_t n = 1; n <= 8; n++) {
21686 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070021687 GemmMicrokernelTester()
21688 .mr(6)
21689 .nr(8)
21690 .kr(4)
21691 .sr(1)
21692 .m(m)
21693 .n(n)
21694 .k(k)
21695 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021696 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021697 }
21698 }
21699 }
21700 }
21701
21702 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8) {
21703 TEST_REQUIRES_ARM_NEON_DOT;
21704 for (size_t k = 16; k <= 80; k += 8) {
21705 GemmMicrokernelTester()
21706 .mr(6)
21707 .nr(8)
21708 .kr(4)
21709 .sr(1)
21710 .m(6)
21711 .n(8)
21712 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021713 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021714 }
21715 }
21716
21717 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8_subtile) {
21718 TEST_REQUIRES_ARM_NEON_DOT;
21719 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021720 for (uint32_t n = 1; n <= 8; n++) {
21721 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070021722 GemmMicrokernelTester()
21723 .mr(6)
21724 .nr(8)
21725 .kr(4)
21726 .sr(1)
21727 .m(m)
21728 .n(n)
21729 .k(k)
21730 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021731 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021732 }
21733 }
21734 }
21735 }
21736
21737 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8) {
21738 TEST_REQUIRES_ARM_NEON_DOT;
21739 for (uint32_t n = 9; n < 16; n++) {
21740 for (size_t k = 1; k <= 40; k += 9) {
21741 GemmMicrokernelTester()
21742 .mr(6)
21743 .nr(8)
21744 .kr(4)
21745 .sr(1)
21746 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021747 .n(n)
Marat Dukhan4486f872021-08-07 15:22:50 -070021748 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021749 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021750 }
21751 }
21752 }
21753
21754 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_strided_cn) {
21755 TEST_REQUIRES_ARM_NEON_DOT;
21756 for (uint32_t n = 9; n < 16; n++) {
21757 for (size_t k = 1; k <= 40; k += 9) {
21758 GemmMicrokernelTester()
21759 .mr(6)
21760 .nr(8)
21761 .kr(4)
21762 .sr(1)
21763 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021764 .n(n)
Marat Dukhan4486f872021-08-07 15:22:50 -070021765 .k(k)
21766 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021767 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021768 }
21769 }
21770 }
21771
21772 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_subtile) {
21773 TEST_REQUIRES_ARM_NEON_DOT;
21774 for (uint32_t n = 9; n < 16; n++) {
21775 for (size_t k = 1; k <= 40; k += 9) {
21776 for (uint32_t m = 1; m <= 6; m++) {
21777 GemmMicrokernelTester()
21778 .mr(6)
21779 .nr(8)
21780 .kr(4)
21781 .sr(1)
21782 .m(m)
21783 .n(n)
21784 .k(k)
21785 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021786 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021787 }
21788 }
21789 }
21790 }
21791
21792 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8) {
21793 TEST_REQUIRES_ARM_NEON_DOT;
21794 for (uint32_t n = 16; n <= 24; n += 8) {
21795 for (size_t k = 1; k <= 40; k += 9) {
21796 GemmMicrokernelTester()
21797 .mr(6)
21798 .nr(8)
21799 .kr(4)
21800 .sr(1)
21801 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021802 .n(n)
Marat Dukhan4486f872021-08-07 15:22:50 -070021803 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021804 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021805 }
21806 }
21807 }
21808
21809 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_strided_cn) {
21810 TEST_REQUIRES_ARM_NEON_DOT;
21811 for (uint32_t n = 16; n <= 24; n += 8) {
21812 for (size_t k = 1; k <= 40; k += 9) {
21813 GemmMicrokernelTester()
21814 .mr(6)
21815 .nr(8)
21816 .kr(4)
21817 .sr(1)
21818 .m(6)
21819 .n(n)
21820 .k(k)
21821 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021822 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021823 }
21824 }
21825 }
21826
21827 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_subtile) {
21828 TEST_REQUIRES_ARM_NEON_DOT;
21829 for (uint32_t n = 16; n <= 24; n += 8) {
21830 for (size_t k = 1; k <= 40; k += 9) {
21831 for (uint32_t m = 1; m <= 6; m++) {
21832 GemmMicrokernelTester()
21833 .mr(6)
21834 .nr(8)
21835 .kr(4)
21836 .sr(1)
21837 .m(m)
21838 .n(n)
21839 .k(k)
21840 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021841 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021842 }
21843 }
21844 }
21845 }
21846
21847 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, small_kernel) {
21848 TEST_REQUIRES_ARM_NEON_DOT;
21849 for (size_t k = 1; k <= 40; k += 9) {
21850 GemmMicrokernelTester()
21851 .mr(6)
21852 .nr(8)
21853 .kr(4)
21854 .sr(1)
21855 .m(6)
21856 .n(8)
21857 .k(k)
21858 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021859 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021860 }
21861 }
21862
21863 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, small_kernel_subtile) {
21864 TEST_REQUIRES_ARM_NEON_DOT;
21865 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021866 for (uint32_t n = 1; n <= 8; n++) {
21867 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070021868 GemmMicrokernelTester()
21869 .mr(6)
21870 .nr(8)
21871 .kr(4)
21872 .sr(1)
21873 .m(m)
21874 .n(n)
21875 .k(k)
21876 .ks(3)
21877 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021878 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021879 }
21880 }
21881 }
21882 }
21883
21884 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_small_kernel) {
21885 TEST_REQUIRES_ARM_NEON_DOT;
21886 for (uint32_t n = 9; n < 16; n++) {
21887 for (size_t k = 1; k <= 40; k += 9) {
21888 GemmMicrokernelTester()
21889 .mr(6)
21890 .nr(8)
21891 .kr(4)
21892 .sr(1)
21893 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021894 .n(n)
Marat Dukhan4486f872021-08-07 15:22:50 -070021895 .k(k)
21896 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021897 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021898 }
21899 }
21900 }
21901
21902 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_small_kernel) {
21903 TEST_REQUIRES_ARM_NEON_DOT;
21904 for (uint32_t n = 16; n <= 24; n += 8) {
21905 for (size_t k = 1; k <= 40; k += 9) {
21906 GemmMicrokernelTester()
21907 .mr(6)
21908 .nr(8)
21909 .kr(4)
21910 .sr(1)
21911 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021912 .n(n)
Marat Dukhan4486f872021-08-07 15:22:50 -070021913 .k(k)
21914 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080021915 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021916 }
21917 }
21918 }
21919
21920 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm_subtile) {
21921 TEST_REQUIRES_ARM_NEON_DOT;
21922 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021923 for (uint32_t n = 1; n <= 8; n++) {
21924 for (uint32_t m = 1; m <= 6; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070021925 GemmMicrokernelTester()
21926 .mr(6)
21927 .nr(8)
21928 .kr(4)
21929 .sr(1)
21930 .m(m)
21931 .n(n)
21932 .k(k)
21933 .cm_stride(11)
21934 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021935 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021936 }
21937 }
21938 }
21939 }
21940
21941 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, a_offset) {
21942 TEST_REQUIRES_ARM_NEON_DOT;
21943 for (size_t k = 1; k <= 40; k += 9) {
21944 GemmMicrokernelTester()
21945 .mr(6)
21946 .nr(8)
21947 .kr(4)
21948 .sr(1)
21949 .m(6)
21950 .n(8)
21951 .k(k)
21952 .ks(3)
21953 .a_offset(251)
Marat Dukhan50323b82022-01-11 00:12:01 -080021954 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021955 }
21956 }
21957
21958 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, zero) {
21959 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021960 for (size_t k = 1; k <= 40; k += 9) {
21961 for (uint32_t mz = 0; mz < 6; mz++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070021962 GemmMicrokernelTester()
21963 .mr(6)
21964 .nr(8)
21965 .kr(4)
21966 .sr(1)
21967 .m(6)
21968 .n(8)
21969 .k(k)
21970 .ks(3)
21971 .a_offset(251)
21972 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080021973 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021974 }
21975 }
21976 }
21977
21978 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmin) {
21979 TEST_REQUIRES_ARM_NEON_DOT;
21980 GemmMicrokernelTester()
21981 .mr(6)
21982 .nr(8)
21983 .kr(4)
21984 .sr(1)
21985 .m(6)
21986 .n(8)
21987 .k(8)
21988 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021989 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070021990 }
21991
21992 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmax) {
21993 TEST_REQUIRES_ARM_NEON_DOT;
21994 GemmMicrokernelTester()
21995 .mr(6)
21996 .nr(8)
21997 .kr(4)
21998 .sr(1)
21999 .m(6)
22000 .n(8)
22001 .k(8)
22002 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022003 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022004 }
22005
22006 TEST(QS8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm) {
22007 TEST_REQUIRES_ARM_NEON_DOT;
22008 GemmMicrokernelTester()
22009 .mr(6)
22010 .nr(8)
22011 .kr(4)
22012 .sr(1)
22013 .m(6)
22014 .n(8)
22015 .k(8)
22016 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022017 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022018 }
22019#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
22020
22021
22022#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
22023 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8) {
22024 TEST_REQUIRES_ARM_NEON_DOT;
22025 GemmMicrokernelTester()
22026 .mr(8)
22027 .nr(8)
22028 .kr(4)
22029 .sr(1)
22030 .m(8)
22031 .n(8)
22032 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022033 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022034 }
22035
22036 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cn) {
22037 TEST_REQUIRES_ARM_NEON_DOT;
22038 GemmMicrokernelTester()
22039 .mr(8)
22040 .nr(8)
22041 .kr(4)
22042 .sr(1)
22043 .m(8)
22044 .n(8)
22045 .k(8)
22046 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022047 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022048 }
22049
22050 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile) {
22051 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022052 for (uint32_t n = 1; n <= 8; n++) {
22053 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070022054 GemmMicrokernelTester()
22055 .mr(8)
22056 .nr(8)
22057 .kr(4)
22058 .sr(1)
22059 .m(m)
22060 .n(n)
22061 .k(8)
22062 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022063 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022064 }
22065 }
22066 }
22067
22068 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_m) {
22069 TEST_REQUIRES_ARM_NEON_DOT;
22070 for (uint32_t m = 1; m <= 8; m++) {
22071 GemmMicrokernelTester()
22072 .mr(8)
22073 .nr(8)
22074 .kr(4)
22075 .sr(1)
22076 .m(m)
22077 .n(8)
22078 .k(8)
22079 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022080 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022081 }
22082 }
22083
22084 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_n) {
22085 TEST_REQUIRES_ARM_NEON_DOT;
22086 for (uint32_t n = 1; n <= 8; n++) {
22087 GemmMicrokernelTester()
22088 .mr(8)
22089 .nr(8)
22090 .kr(4)
22091 .sr(1)
22092 .m(8)
22093 .n(n)
22094 .k(8)
22095 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022096 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022097 }
22098 }
22099
22100 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8) {
22101 TEST_REQUIRES_ARM_NEON_DOT;
22102 for (size_t k = 1; k < 8; k++) {
22103 GemmMicrokernelTester()
22104 .mr(8)
22105 .nr(8)
22106 .kr(4)
22107 .sr(1)
22108 .m(8)
22109 .n(8)
22110 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022111 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022112 }
22113 }
22114
22115 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8_subtile) {
22116 TEST_REQUIRES_ARM_NEON_DOT;
22117 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022118 for (uint32_t n = 1; n <= 8; n++) {
22119 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070022120 GemmMicrokernelTester()
22121 .mr(8)
22122 .nr(8)
22123 .kr(4)
22124 .sr(1)
22125 .m(m)
22126 .n(n)
22127 .k(k)
22128 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022129 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022130 }
22131 }
22132 }
22133 }
22134
22135 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8) {
22136 TEST_REQUIRES_ARM_NEON_DOT;
22137 for (size_t k = 9; k < 16; k++) {
22138 GemmMicrokernelTester()
22139 .mr(8)
22140 .nr(8)
22141 .kr(4)
22142 .sr(1)
22143 .m(8)
22144 .n(8)
22145 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022146 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022147 }
22148 }
22149
22150 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8_subtile) {
22151 TEST_REQUIRES_ARM_NEON_DOT;
22152 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022153 for (uint32_t n = 1; n <= 8; n++) {
22154 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070022155 GemmMicrokernelTester()
22156 .mr(8)
22157 .nr(8)
22158 .kr(4)
22159 .sr(1)
22160 .m(m)
22161 .n(n)
22162 .k(k)
22163 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022164 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022165 }
22166 }
22167 }
22168 }
22169
22170 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8) {
22171 TEST_REQUIRES_ARM_NEON_DOT;
22172 for (size_t k = 16; k <= 80; k += 8) {
22173 GemmMicrokernelTester()
22174 .mr(8)
22175 .nr(8)
22176 .kr(4)
22177 .sr(1)
22178 .m(8)
22179 .n(8)
22180 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022181 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022182 }
22183 }
22184
22185 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8_subtile) {
22186 TEST_REQUIRES_ARM_NEON_DOT;
22187 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022188 for (uint32_t n = 1; n <= 8; n++) {
22189 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070022190 GemmMicrokernelTester()
22191 .mr(8)
22192 .nr(8)
22193 .kr(4)
22194 .sr(1)
22195 .m(m)
22196 .n(n)
22197 .k(k)
22198 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022199 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022200 }
22201 }
22202 }
22203 }
22204
22205 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8) {
22206 TEST_REQUIRES_ARM_NEON_DOT;
22207 for (uint32_t n = 9; n < 16; n++) {
22208 for (size_t k = 1; k <= 40; k += 9) {
22209 GemmMicrokernelTester()
22210 .mr(8)
22211 .nr(8)
22212 .kr(4)
22213 .sr(1)
22214 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022215 .n(n)
Marat Dukhan4486f872021-08-07 15:22:50 -070022216 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022217 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022218 }
22219 }
22220 }
22221
22222 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_strided_cn) {
22223 TEST_REQUIRES_ARM_NEON_DOT;
22224 for (uint32_t n = 9; n < 16; n++) {
22225 for (size_t k = 1; k <= 40; k += 9) {
22226 GemmMicrokernelTester()
22227 .mr(8)
22228 .nr(8)
22229 .kr(4)
22230 .sr(1)
22231 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022232 .n(n)
Marat Dukhan4486f872021-08-07 15:22:50 -070022233 .k(k)
22234 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022235 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022236 }
22237 }
22238 }
22239
22240 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_subtile) {
22241 TEST_REQUIRES_ARM_NEON_DOT;
22242 for (uint32_t n = 9; n < 16; n++) {
22243 for (size_t k = 1; k <= 40; k += 9) {
22244 for (uint32_t m = 1; m <= 8; m++) {
22245 GemmMicrokernelTester()
22246 .mr(8)
22247 .nr(8)
22248 .kr(4)
22249 .sr(1)
22250 .m(m)
22251 .n(n)
22252 .k(k)
22253 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022254 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022255 }
22256 }
22257 }
22258 }
22259
22260 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8) {
22261 TEST_REQUIRES_ARM_NEON_DOT;
22262 for (uint32_t n = 16; n <= 24; n += 8) {
22263 for (size_t k = 1; k <= 40; k += 9) {
22264 GemmMicrokernelTester()
22265 .mr(8)
22266 .nr(8)
22267 .kr(4)
22268 .sr(1)
22269 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022270 .n(n)
Marat Dukhan4486f872021-08-07 15:22:50 -070022271 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022272 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022273 }
22274 }
22275 }
22276
22277 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_strided_cn) {
22278 TEST_REQUIRES_ARM_NEON_DOT;
22279 for (uint32_t n = 16; n <= 24; n += 8) {
22280 for (size_t k = 1; k <= 40; k += 9) {
22281 GemmMicrokernelTester()
22282 .mr(8)
22283 .nr(8)
22284 .kr(4)
22285 .sr(1)
22286 .m(8)
22287 .n(n)
22288 .k(k)
22289 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022290 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022291 }
22292 }
22293 }
22294
22295 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_subtile) {
22296 TEST_REQUIRES_ARM_NEON_DOT;
22297 for (uint32_t n = 16; n <= 24; n += 8) {
22298 for (size_t k = 1; k <= 40; k += 9) {
22299 for (uint32_t m = 1; m <= 8; m++) {
22300 GemmMicrokernelTester()
22301 .mr(8)
22302 .nr(8)
22303 .kr(4)
22304 .sr(1)
22305 .m(m)
22306 .n(n)
22307 .k(k)
22308 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022309 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022310 }
22311 }
22312 }
22313 }
22314
22315 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, small_kernel) {
22316 TEST_REQUIRES_ARM_NEON_DOT;
22317 for (size_t k = 1; k <= 40; k += 9) {
22318 GemmMicrokernelTester()
22319 .mr(8)
22320 .nr(8)
22321 .kr(4)
22322 .sr(1)
22323 .m(8)
22324 .n(8)
22325 .k(k)
22326 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022327 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022328 }
22329 }
22330
22331 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, small_kernel_subtile) {
22332 TEST_REQUIRES_ARM_NEON_DOT;
22333 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022334 for (uint32_t n = 1; n <= 8; n++) {
22335 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070022336 GemmMicrokernelTester()
22337 .mr(8)
22338 .nr(8)
22339 .kr(4)
22340 .sr(1)
22341 .m(m)
22342 .n(n)
22343 .k(k)
22344 .ks(3)
22345 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022346 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022347 }
22348 }
22349 }
22350 }
22351
22352 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_small_kernel) {
22353 TEST_REQUIRES_ARM_NEON_DOT;
22354 for (uint32_t n = 9; n < 16; n++) {
22355 for (size_t k = 1; k <= 40; k += 9) {
22356 GemmMicrokernelTester()
22357 .mr(8)
22358 .nr(8)
22359 .kr(4)
22360 .sr(1)
22361 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022362 .n(n)
Marat Dukhan4486f872021-08-07 15:22:50 -070022363 .k(k)
22364 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022365 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022366 }
22367 }
22368 }
22369
22370 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_small_kernel) {
22371 TEST_REQUIRES_ARM_NEON_DOT;
22372 for (uint32_t n = 16; n <= 24; n += 8) {
22373 for (size_t k = 1; k <= 40; k += 9) {
22374 GemmMicrokernelTester()
22375 .mr(8)
22376 .nr(8)
22377 .kr(4)
22378 .sr(1)
22379 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022380 .n(n)
Marat Dukhan4486f872021-08-07 15:22:50 -070022381 .k(k)
22382 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022383 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022384 }
22385 }
22386 }
22387
22388 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm_subtile) {
22389 TEST_REQUIRES_ARM_NEON_DOT;
22390 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022391 for (uint32_t n = 1; n <= 8; n++) {
22392 for (uint32_t m = 1; m <= 8; m++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070022393 GemmMicrokernelTester()
22394 .mr(8)
22395 .nr(8)
22396 .kr(4)
22397 .sr(1)
22398 .m(m)
22399 .n(n)
22400 .k(k)
22401 .cm_stride(11)
22402 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022403 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022404 }
22405 }
22406 }
22407 }
22408
22409 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, a_offset) {
22410 TEST_REQUIRES_ARM_NEON_DOT;
22411 for (size_t k = 1; k <= 40; k += 9) {
22412 GemmMicrokernelTester()
22413 .mr(8)
22414 .nr(8)
22415 .kr(4)
22416 .sr(1)
22417 .m(8)
22418 .n(8)
22419 .k(k)
22420 .ks(3)
22421 .a_offset(331)
Marat Dukhan50323b82022-01-11 00:12:01 -080022422 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022423 }
22424 }
22425
22426 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, zero) {
22427 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022428 for (size_t k = 1; k <= 40; k += 9) {
22429 for (uint32_t mz = 0; mz < 8; mz++) {
Marat Dukhan4486f872021-08-07 15:22:50 -070022430 GemmMicrokernelTester()
22431 .mr(8)
22432 .nr(8)
22433 .kr(4)
22434 .sr(1)
22435 .m(8)
22436 .n(8)
22437 .k(k)
22438 .ks(3)
22439 .a_offset(331)
22440 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022441 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022442 }
22443 }
22444 }
22445
22446 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmin) {
22447 TEST_REQUIRES_ARM_NEON_DOT;
22448 GemmMicrokernelTester()
22449 .mr(8)
22450 .nr(8)
22451 .kr(4)
22452 .sr(1)
22453 .m(8)
22454 .n(8)
22455 .k(8)
22456 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022457 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022458 }
22459
22460 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmax) {
22461 TEST_REQUIRES_ARM_NEON_DOT;
22462 GemmMicrokernelTester()
22463 .mr(8)
22464 .nr(8)
22465 .kr(4)
22466 .sr(1)
22467 .m(8)
22468 .n(8)
22469 .k(8)
22470 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022471 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022472 }
22473
22474 TEST(QS8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm) {
22475 TEST_REQUIRES_ARM_NEON_DOT;
22476 GemmMicrokernelTester()
22477 .mr(8)
22478 .nr(8)
22479 .kr(4)
22480 .sr(1)
22481 .m(8)
22482 .n(8)
22483 .k(8)
22484 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022485 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Marat Dukhan4486f872021-08-07 15:22:50 -070022486 }
22487#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
22488
22489
Frank Barchard22fbe772021-07-20 15:56:32 -070022490#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard27bf92c2021-11-24 15:47:52 -080022491 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8) {
22492 TEST_REQUIRES_ARM_NEON;
22493 GemmMicrokernelTester()
22494 .mr(1)
22495 .nr(8)
22496 .kr(1)
22497 .sr(1)
22498 .m(1)
22499 .n(8)
22500 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022501 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022502 }
22503
22504 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cn) {
22505 TEST_REQUIRES_ARM_NEON;
22506 GemmMicrokernelTester()
22507 .mr(1)
22508 .nr(8)
22509 .kr(1)
22510 .sr(1)
22511 .m(1)
22512 .n(8)
22513 .k(8)
22514 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022515 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022516 }
22517
22518 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
22519 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022520 for (uint32_t n = 1; n <= 8; n++) {
22521 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080022522 GemmMicrokernelTester()
22523 .mr(1)
22524 .nr(8)
22525 .kr(1)
22526 .sr(1)
22527 .m(m)
22528 .n(n)
22529 .k(8)
22530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022531 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022532 }
22533 }
22534 }
22535
22536 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
22537 TEST_REQUIRES_ARM_NEON;
22538 for (uint32_t m = 1; m <= 1; m++) {
22539 GemmMicrokernelTester()
22540 .mr(1)
22541 .nr(8)
22542 .kr(1)
22543 .sr(1)
22544 .m(m)
22545 .n(8)
22546 .k(8)
22547 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022548 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022549 }
22550 }
22551
22552 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
22553 TEST_REQUIRES_ARM_NEON;
22554 for (uint32_t n = 1; n <= 8; n++) {
22555 GemmMicrokernelTester()
22556 .mr(1)
22557 .nr(8)
22558 .kr(1)
22559 .sr(1)
22560 .m(1)
22561 .n(n)
22562 .k(8)
22563 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022564 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022565 }
22566 }
22567
22568 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8) {
22569 TEST_REQUIRES_ARM_NEON;
22570 for (size_t k = 1; k < 8; k++) {
22571 GemmMicrokernelTester()
22572 .mr(1)
22573 .nr(8)
22574 .kr(1)
22575 .sr(1)
22576 .m(1)
22577 .n(8)
22578 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022579 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022580 }
22581 }
22582
22583 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
22584 TEST_REQUIRES_ARM_NEON;
22585 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022586 for (uint32_t n = 1; n <= 8; n++) {
22587 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080022588 GemmMicrokernelTester()
22589 .mr(1)
22590 .nr(8)
22591 .kr(1)
22592 .sr(1)
22593 .m(m)
22594 .n(n)
22595 .k(k)
22596 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022597 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022598 }
22599 }
22600 }
22601 }
22602
22603 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8) {
22604 TEST_REQUIRES_ARM_NEON;
22605 for (size_t k = 9; k < 16; k++) {
22606 GemmMicrokernelTester()
22607 .mr(1)
22608 .nr(8)
22609 .kr(1)
22610 .sr(1)
22611 .m(1)
22612 .n(8)
22613 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022614 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022615 }
22616 }
22617
22618 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
22619 TEST_REQUIRES_ARM_NEON;
22620 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022621 for (uint32_t n = 1; n <= 8; n++) {
22622 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080022623 GemmMicrokernelTester()
22624 .mr(1)
22625 .nr(8)
22626 .kr(1)
22627 .sr(1)
22628 .m(m)
22629 .n(n)
22630 .k(k)
22631 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022632 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022633 }
22634 }
22635 }
22636 }
22637
22638 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8) {
22639 TEST_REQUIRES_ARM_NEON;
22640 for (size_t k = 16; k <= 80; k += 8) {
22641 GemmMicrokernelTester()
22642 .mr(1)
22643 .nr(8)
22644 .kr(1)
22645 .sr(1)
22646 .m(1)
22647 .n(8)
22648 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022649 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022650 }
22651 }
22652
22653 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
22654 TEST_REQUIRES_ARM_NEON;
22655 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022656 for (uint32_t n = 1; n <= 8; n++) {
22657 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080022658 GemmMicrokernelTester()
22659 .mr(1)
22660 .nr(8)
22661 .kr(1)
22662 .sr(1)
22663 .m(m)
22664 .n(n)
22665 .k(k)
22666 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022667 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022668 }
22669 }
22670 }
22671 }
22672
22673 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8) {
22674 TEST_REQUIRES_ARM_NEON;
22675 for (uint32_t n = 9; n < 16; n++) {
22676 for (size_t k = 1; k <= 40; k += 9) {
22677 GemmMicrokernelTester()
22678 .mr(1)
22679 .nr(8)
22680 .kr(1)
22681 .sr(1)
22682 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022683 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080022684 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022685 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022686 }
22687 }
22688 }
22689
22690 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
22691 TEST_REQUIRES_ARM_NEON;
22692 for (uint32_t n = 9; n < 16; n++) {
22693 for (size_t k = 1; k <= 40; k += 9) {
22694 GemmMicrokernelTester()
22695 .mr(1)
22696 .nr(8)
22697 .kr(1)
22698 .sr(1)
22699 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022700 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080022701 .k(k)
22702 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022703 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022704 }
22705 }
22706 }
22707
22708 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
22709 TEST_REQUIRES_ARM_NEON;
22710 for (uint32_t n = 9; n < 16; n++) {
22711 for (size_t k = 1; k <= 40; k += 9) {
22712 for (uint32_t m = 1; m <= 1; m++) {
22713 GemmMicrokernelTester()
22714 .mr(1)
22715 .nr(8)
22716 .kr(1)
22717 .sr(1)
22718 .m(m)
22719 .n(n)
22720 .k(k)
22721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022722 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022723 }
22724 }
22725 }
22726 }
22727
22728 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8) {
22729 TEST_REQUIRES_ARM_NEON;
22730 for (uint32_t n = 16; n <= 24; n += 8) {
22731 for (size_t k = 1; k <= 40; k += 9) {
22732 GemmMicrokernelTester()
22733 .mr(1)
22734 .nr(8)
22735 .kr(1)
22736 .sr(1)
22737 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022738 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080022739 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080022740 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022741 }
22742 }
22743 }
22744
22745 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
22746 TEST_REQUIRES_ARM_NEON;
22747 for (uint32_t n = 16; n <= 24; n += 8) {
22748 for (size_t k = 1; k <= 40; k += 9) {
22749 GemmMicrokernelTester()
22750 .mr(1)
22751 .nr(8)
22752 .kr(1)
22753 .sr(1)
22754 .m(1)
22755 .n(n)
22756 .k(k)
22757 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022758 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022759 }
22760 }
22761 }
22762
22763 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
22764 TEST_REQUIRES_ARM_NEON;
22765 for (uint32_t n = 16; n <= 24; n += 8) {
22766 for (size_t k = 1; k <= 40; k += 9) {
22767 for (uint32_t m = 1; m <= 1; m++) {
22768 GemmMicrokernelTester()
22769 .mr(1)
22770 .nr(8)
22771 .kr(1)
22772 .sr(1)
22773 .m(m)
22774 .n(n)
22775 .k(k)
22776 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022777 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022778 }
22779 }
22780 }
22781 }
22782
22783 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, small_kernel) {
22784 TEST_REQUIRES_ARM_NEON;
22785 for (size_t k = 1; k <= 40; k += 9) {
22786 GemmMicrokernelTester()
22787 .mr(1)
22788 .nr(8)
22789 .kr(1)
22790 .sr(1)
22791 .m(1)
22792 .n(8)
22793 .k(k)
22794 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022795 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022796 }
22797 }
22798
22799 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, small_kernel_subtile) {
22800 TEST_REQUIRES_ARM_NEON;
22801 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022802 for (uint32_t n = 1; n <= 8; n++) {
22803 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080022804 GemmMicrokernelTester()
22805 .mr(1)
22806 .nr(8)
22807 .kr(1)
22808 .sr(1)
22809 .m(m)
22810 .n(n)
22811 .k(k)
22812 .ks(3)
22813 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022814 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022815 }
22816 }
22817 }
22818 }
22819
22820 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
22821 TEST_REQUIRES_ARM_NEON;
22822 for (uint32_t n = 9; n < 16; n++) {
22823 for (size_t k = 1; k <= 40; k += 9) {
22824 GemmMicrokernelTester()
22825 .mr(1)
22826 .nr(8)
22827 .kr(1)
22828 .sr(1)
22829 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022830 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080022831 .k(k)
22832 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022833 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022834 }
22835 }
22836 }
22837
22838 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
22839 TEST_REQUIRES_ARM_NEON;
22840 for (uint32_t n = 16; n <= 24; n += 8) {
22841 for (size_t k = 1; k <= 40; k += 9) {
22842 GemmMicrokernelTester()
22843 .mr(1)
22844 .nr(8)
22845 .kr(1)
22846 .sr(1)
22847 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080022848 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080022849 .k(k)
22850 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080022851 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022852 }
22853 }
22854 }
22855
22856 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
22857 TEST_REQUIRES_ARM_NEON;
22858 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080022859 for (uint32_t n = 1; n <= 8; n++) {
22860 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080022861 GemmMicrokernelTester()
22862 .mr(1)
22863 .nr(8)
22864 .kr(1)
22865 .sr(1)
22866 .m(m)
22867 .n(n)
22868 .k(k)
22869 .cm_stride(11)
22870 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022871 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022872 }
22873 }
22874 }
22875 }
22876
22877 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, a_offset) {
22878 TEST_REQUIRES_ARM_NEON;
22879 for (size_t k = 1; k <= 40; k += 9) {
22880 GemmMicrokernelTester()
22881 .mr(1)
22882 .nr(8)
22883 .kr(1)
22884 .sr(1)
22885 .m(1)
22886 .n(8)
22887 .k(k)
22888 .ks(3)
22889 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080022890 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022891 }
22892 }
22893
22894 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, zero) {
22895 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022896 for (size_t k = 1; k <= 40; k += 9) {
22897 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080022898 GemmMicrokernelTester()
22899 .mr(1)
22900 .nr(8)
22901 .kr(1)
22902 .sr(1)
22903 .m(1)
22904 .n(8)
22905 .k(k)
22906 .ks(3)
22907 .a_offset(43)
22908 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080022909 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022910 }
22911 }
22912 }
22913
22914 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmin) {
22915 TEST_REQUIRES_ARM_NEON;
22916 GemmMicrokernelTester()
22917 .mr(1)
22918 .nr(8)
22919 .kr(1)
22920 .sr(1)
22921 .m(1)
22922 .n(8)
22923 .k(8)
22924 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022925 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022926 }
22927
22928 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmax) {
22929 TEST_REQUIRES_ARM_NEON;
22930 GemmMicrokernelTester()
22931 .mr(1)
22932 .nr(8)
22933 .kr(1)
22934 .sr(1)
22935 .m(1)
22936 .n(8)
22937 .k(8)
22938 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080022939 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022940 }
22941
22942 TEST(QS8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm) {
22943 TEST_REQUIRES_ARM_NEON;
22944 GemmMicrokernelTester()
22945 .mr(1)
22946 .nr(8)
22947 .kr(1)
22948 .sr(1)
22949 .m(1)
22950 .n(8)
22951 .k(8)
22952 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080022953 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080022954 }
22955#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
22956
22957
22958#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard22fbe772021-07-20 15:56:32 -070022959 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8) {
22960 TEST_REQUIRES_ARM_NEON;
22961 GemmMicrokernelTester()
22962 .mr(1)
22963 .nr(16)
22964 .kr(1)
22965 .sr(1)
22966 .m(1)
22967 .n(16)
22968 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080022969 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070022970 }
22971
22972 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cn) {
22973 TEST_REQUIRES_ARM_NEON;
22974 GemmMicrokernelTester()
22975 .mr(1)
22976 .nr(16)
22977 .kr(1)
22978 .sr(1)
22979 .m(1)
22980 .n(16)
22981 .k(8)
22982 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080022983 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070022984 }
22985
22986 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
22987 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080022988 for (uint32_t n = 1; n <= 16; n++) {
22989 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070022990 GemmMicrokernelTester()
22991 .mr(1)
22992 .nr(16)
22993 .kr(1)
22994 .sr(1)
22995 .m(m)
22996 .n(n)
22997 .k(8)
22998 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080022999 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023000 }
23001 }
23002 }
23003
23004 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
23005 TEST_REQUIRES_ARM_NEON;
23006 for (uint32_t m = 1; m <= 1; m++) {
23007 GemmMicrokernelTester()
23008 .mr(1)
23009 .nr(16)
23010 .kr(1)
23011 .sr(1)
23012 .m(m)
23013 .n(16)
23014 .k(8)
23015 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023016 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023017 }
23018 }
23019
23020 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
23021 TEST_REQUIRES_ARM_NEON;
23022 for (uint32_t n = 1; n <= 16; n++) {
23023 GemmMicrokernelTester()
23024 .mr(1)
23025 .nr(16)
23026 .kr(1)
23027 .sr(1)
23028 .m(1)
23029 .n(n)
23030 .k(8)
23031 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023032 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023033 }
23034 }
23035
23036 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8) {
23037 TEST_REQUIRES_ARM_NEON;
23038 for (size_t k = 1; k < 8; k++) {
23039 GemmMicrokernelTester()
23040 .mr(1)
23041 .nr(16)
23042 .kr(1)
23043 .sr(1)
23044 .m(1)
23045 .n(16)
23046 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023047 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023048 }
23049 }
23050
23051 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
23052 TEST_REQUIRES_ARM_NEON;
23053 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023054 for (uint32_t n = 1; n <= 16; n++) {
23055 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023056 GemmMicrokernelTester()
23057 .mr(1)
23058 .nr(16)
23059 .kr(1)
23060 .sr(1)
23061 .m(m)
23062 .n(n)
23063 .k(k)
23064 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023065 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023066 }
23067 }
23068 }
23069 }
23070
23071 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8) {
23072 TEST_REQUIRES_ARM_NEON;
23073 for (size_t k = 9; k < 16; k++) {
23074 GemmMicrokernelTester()
23075 .mr(1)
23076 .nr(16)
23077 .kr(1)
23078 .sr(1)
23079 .m(1)
23080 .n(16)
23081 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023082 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023083 }
23084 }
23085
23086 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
23087 TEST_REQUIRES_ARM_NEON;
23088 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023089 for (uint32_t n = 1; n <= 16; n++) {
23090 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023091 GemmMicrokernelTester()
23092 .mr(1)
23093 .nr(16)
23094 .kr(1)
23095 .sr(1)
23096 .m(m)
23097 .n(n)
23098 .k(k)
23099 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023100 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023101 }
23102 }
23103 }
23104 }
23105
23106 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8) {
23107 TEST_REQUIRES_ARM_NEON;
23108 for (size_t k = 16; k <= 80; k += 8) {
23109 GemmMicrokernelTester()
23110 .mr(1)
23111 .nr(16)
23112 .kr(1)
23113 .sr(1)
23114 .m(1)
23115 .n(16)
23116 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023117 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023118 }
23119 }
23120
23121 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
23122 TEST_REQUIRES_ARM_NEON;
23123 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023124 for (uint32_t n = 1; n <= 16; n++) {
23125 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023126 GemmMicrokernelTester()
23127 .mr(1)
23128 .nr(16)
23129 .kr(1)
23130 .sr(1)
23131 .m(m)
23132 .n(n)
23133 .k(k)
23134 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023135 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023136 }
23137 }
23138 }
23139 }
23140
23141 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16) {
23142 TEST_REQUIRES_ARM_NEON;
23143 for (uint32_t n = 17; n < 32; n++) {
23144 for (size_t k = 1; k <= 40; k += 9) {
23145 GemmMicrokernelTester()
23146 .mr(1)
23147 .nr(16)
23148 .kr(1)
23149 .sr(1)
23150 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023151 .n(n)
Frank Barchard22fbe772021-07-20 15:56:32 -070023152 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023153 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023154 }
23155 }
23156 }
23157
23158 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
23159 TEST_REQUIRES_ARM_NEON;
23160 for (uint32_t n = 17; n < 32; n++) {
23161 for (size_t k = 1; k <= 40; k += 9) {
23162 GemmMicrokernelTester()
23163 .mr(1)
23164 .nr(16)
23165 .kr(1)
23166 .sr(1)
23167 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023168 .n(n)
Frank Barchard22fbe772021-07-20 15:56:32 -070023169 .k(k)
23170 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023171 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023172 }
23173 }
23174 }
23175
23176 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
23177 TEST_REQUIRES_ARM_NEON;
23178 for (uint32_t n = 17; n < 32; n++) {
23179 for (size_t k = 1; k <= 40; k += 9) {
23180 for (uint32_t m = 1; m <= 1; m++) {
23181 GemmMicrokernelTester()
23182 .mr(1)
23183 .nr(16)
23184 .kr(1)
23185 .sr(1)
23186 .m(m)
23187 .n(n)
23188 .k(k)
23189 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023190 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023191 }
23192 }
23193 }
23194 }
23195
23196 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16) {
23197 TEST_REQUIRES_ARM_NEON;
23198 for (uint32_t n = 32; n <= 48; n += 16) {
23199 for (size_t k = 1; k <= 40; k += 9) {
23200 GemmMicrokernelTester()
23201 .mr(1)
23202 .nr(16)
23203 .kr(1)
23204 .sr(1)
23205 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023206 .n(n)
Frank Barchard22fbe772021-07-20 15:56:32 -070023207 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080023208 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023209 }
23210 }
23211 }
23212
23213 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
23214 TEST_REQUIRES_ARM_NEON;
23215 for (uint32_t n = 32; n <= 48; n += 16) {
23216 for (size_t k = 1; k <= 40; k += 9) {
23217 GemmMicrokernelTester()
23218 .mr(1)
23219 .nr(16)
23220 .kr(1)
23221 .sr(1)
23222 .m(1)
23223 .n(n)
23224 .k(k)
23225 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023226 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023227 }
23228 }
23229 }
23230
23231 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
23232 TEST_REQUIRES_ARM_NEON;
23233 for (uint32_t n = 32; n <= 48; n += 16) {
23234 for (size_t k = 1; k <= 40; k += 9) {
23235 for (uint32_t m = 1; m <= 1; m++) {
23236 GemmMicrokernelTester()
23237 .mr(1)
23238 .nr(16)
23239 .kr(1)
23240 .sr(1)
23241 .m(m)
23242 .n(n)
23243 .k(k)
23244 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023245 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023246 }
23247 }
23248 }
23249 }
23250
23251 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, small_kernel) {
23252 TEST_REQUIRES_ARM_NEON;
23253 for (size_t k = 1; k <= 40; k += 9) {
23254 GemmMicrokernelTester()
23255 .mr(1)
23256 .nr(16)
23257 .kr(1)
23258 .sr(1)
23259 .m(1)
23260 .n(16)
23261 .k(k)
23262 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023263 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023264 }
23265 }
23266
23267 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
23268 TEST_REQUIRES_ARM_NEON;
23269 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023270 for (uint32_t n = 1; n <= 16; n++) {
23271 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023272 GemmMicrokernelTester()
23273 .mr(1)
23274 .nr(16)
23275 .kr(1)
23276 .sr(1)
23277 .m(m)
23278 .n(n)
23279 .k(k)
23280 .ks(3)
23281 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023282 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023283 }
23284 }
23285 }
23286 }
23287
23288 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
23289 TEST_REQUIRES_ARM_NEON;
23290 for (uint32_t n = 17; n < 32; n++) {
23291 for (size_t k = 1; k <= 40; k += 9) {
23292 GemmMicrokernelTester()
23293 .mr(1)
23294 .nr(16)
23295 .kr(1)
23296 .sr(1)
23297 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023298 .n(n)
Frank Barchard22fbe772021-07-20 15:56:32 -070023299 .k(k)
23300 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023301 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023302 }
23303 }
23304 }
23305
23306 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
23307 TEST_REQUIRES_ARM_NEON;
23308 for (uint32_t n = 32; n <= 48; n += 16) {
23309 for (size_t k = 1; k <= 40; k += 9) {
23310 GemmMicrokernelTester()
23311 .mr(1)
23312 .nr(16)
23313 .kr(1)
23314 .sr(1)
23315 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023316 .n(n)
Frank Barchard22fbe772021-07-20 15:56:32 -070023317 .k(k)
23318 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080023319 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023320 }
23321 }
23322 }
23323
23324 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
23325 TEST_REQUIRES_ARM_NEON;
23326 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023327 for (uint32_t n = 1; n <= 16; n++) {
23328 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023329 GemmMicrokernelTester()
23330 .mr(1)
23331 .nr(16)
23332 .kr(1)
23333 .sr(1)
23334 .m(m)
23335 .n(n)
23336 .k(k)
23337 .cm_stride(19)
23338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080023339 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023340 }
23341 }
23342 }
23343 }
23344
23345 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, a_offset) {
23346 TEST_REQUIRES_ARM_NEON;
23347 for (size_t k = 1; k <= 40; k += 9) {
23348 GemmMicrokernelTester()
23349 .mr(1)
23350 .nr(16)
23351 .kr(1)
23352 .sr(1)
23353 .m(1)
23354 .n(16)
23355 .k(k)
23356 .ks(3)
23357 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080023358 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023359 }
23360 }
23361
23362 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, zero) {
23363 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023364 for (size_t k = 1; k <= 40; k += 9) {
23365 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023366 GemmMicrokernelTester()
23367 .mr(1)
23368 .nr(16)
23369 .kr(1)
23370 .sr(1)
23371 .m(1)
23372 .n(16)
23373 .k(k)
23374 .ks(3)
23375 .a_offset(43)
23376 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080023377 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023378 }
23379 }
23380 }
23381
23382 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmin) {
23383 TEST_REQUIRES_ARM_NEON;
23384 GemmMicrokernelTester()
23385 .mr(1)
23386 .nr(16)
23387 .kr(1)
23388 .sr(1)
23389 .m(1)
23390 .n(16)
23391 .k(8)
23392 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023393 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023394 }
23395
23396 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmax) {
23397 TEST_REQUIRES_ARM_NEON;
23398 GemmMicrokernelTester()
23399 .mr(1)
23400 .nr(16)
23401 .kr(1)
23402 .sr(1)
23403 .m(1)
23404 .n(16)
23405 .k(8)
23406 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080023407 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023408 }
23409
23410 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm) {
23411 TEST_REQUIRES_ARM_NEON;
23412 GemmMicrokernelTester()
23413 .mr(1)
23414 .nr(16)
23415 .kr(1)
23416 .sr(1)
23417 .m(1)
23418 .n(16)
23419 .k(8)
23420 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080023421 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023422 }
23423#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
23424
23425
23426#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023427 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023428 TEST_REQUIRES_ARM_NEON;
23429 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023430 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023431 .nr(16)
23432 .kr(1)
23433 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023434 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023435 .n(16)
23436 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023437 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023438 }
23439
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023440 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cn) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023441 TEST_REQUIRES_ARM_NEON;
23442 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023443 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023444 .nr(16)
23445 .kr(1)
23446 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023447 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023448 .n(16)
23449 .k(8)
23450 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023451 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023452 }
23453
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023454 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023455 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023456 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023457 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023458 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023459 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023460 .nr(16)
23461 .kr(1)
23462 .sr(1)
23463 .m(m)
23464 .n(n)
23465 .k(8)
23466 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023467 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023468 }
23469 }
23470 }
23471
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023472 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023473 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023474 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023475 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023476 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023477 .nr(16)
23478 .kr(1)
23479 .sr(1)
23480 .m(m)
23481 .n(16)
23482 .k(8)
23483 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023484 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023485 }
23486 }
23487
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023488 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023489 TEST_REQUIRES_ARM_NEON;
23490 for (uint32_t n = 1; n <= 16; n++) {
23491 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023492 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023493 .nr(16)
23494 .kr(1)
23495 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023496 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023497 .n(n)
23498 .k(8)
23499 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023500 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023501 }
23502 }
23503
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023504 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023505 TEST_REQUIRES_ARM_NEON;
23506 for (size_t k = 1; k < 8; k++) {
23507 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023508 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023509 .nr(16)
23510 .kr(1)
23511 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023512 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023513 .n(16)
23514 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023515 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023516 }
23517 }
23518
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023519 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_lt_8_subtile) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023520 TEST_REQUIRES_ARM_NEON;
23521 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023522 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023523 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023524 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023525 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023526 .nr(16)
23527 .kr(1)
23528 .sr(1)
23529 .m(m)
23530 .n(n)
23531 .k(k)
23532 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023533 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023534 }
23535 }
23536 }
23537 }
23538
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023539 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023540 TEST_REQUIRES_ARM_NEON;
23541 for (size_t k = 9; k < 16; k++) {
23542 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023543 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023544 .nr(16)
23545 .kr(1)
23546 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023547 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023548 .n(16)
23549 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023550 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023551 }
23552 }
23553
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023554 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_gt_8_subtile) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023555 TEST_REQUIRES_ARM_NEON;
23556 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023557 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023558 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023559 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023560 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023561 .nr(16)
23562 .kr(1)
23563 .sr(1)
23564 .m(m)
23565 .n(n)
23566 .k(k)
23567 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023568 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023569 }
23570 }
23571 }
23572 }
23573
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023574 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023575 TEST_REQUIRES_ARM_NEON;
23576 for (size_t k = 16; k <= 80; k += 8) {
23577 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023578 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023579 .nr(16)
23580 .kr(1)
23581 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023582 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023583 .n(16)
23584 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023585 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023586 }
23587 }
23588
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023589 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, k_div_8_subtile) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023590 TEST_REQUIRES_ARM_NEON;
23591 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023592 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023593 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023594 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023595 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023596 .nr(16)
23597 .kr(1)
23598 .sr(1)
23599 .m(m)
23600 .n(n)
23601 .k(k)
23602 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023603 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023604 }
23605 }
23606 }
23607 }
23608
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023609 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023610 TEST_REQUIRES_ARM_NEON;
23611 for (uint32_t n = 17; n < 32; n++) {
23612 for (size_t k = 1; k <= 40; k += 9) {
23613 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023614 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023615 .nr(16)
23616 .kr(1)
23617 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023618 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023619 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023620 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023621 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023622 }
23623 }
23624 }
23625
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023626 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023627 TEST_REQUIRES_ARM_NEON;
23628 for (uint32_t n = 17; n < 32; n++) {
23629 for (size_t k = 1; k <= 40; k += 9) {
23630 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023631 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023632 .nr(16)
23633 .kr(1)
23634 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023635 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023636 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023637 .k(k)
23638 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023639 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023640 }
23641 }
23642 }
23643
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023644 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_subtile) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023645 TEST_REQUIRES_ARM_NEON;
23646 for (uint32_t n = 17; n < 32; n++) {
23647 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023648 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023649 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023650 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023651 .nr(16)
23652 .kr(1)
23653 .sr(1)
23654 .m(m)
23655 .n(n)
23656 .k(k)
23657 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023658 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023659 }
23660 }
23661 }
23662 }
23663
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023664 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023665 TEST_REQUIRES_ARM_NEON;
23666 for (uint32_t n = 32; n <= 48; n += 16) {
23667 for (size_t k = 1; k <= 40; k += 9) {
23668 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023669 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023670 .nr(16)
23671 .kr(1)
23672 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023673 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023674 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023675 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023676 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023677 }
23678 }
23679 }
23680
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023681 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023682 TEST_REQUIRES_ARM_NEON;
23683 for (uint32_t n = 32; n <= 48; n += 16) {
23684 for (size_t k = 1; k <= 40; k += 9) {
23685 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023686 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023687 .nr(16)
23688 .kr(1)
23689 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023690 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023691 .n(n)
23692 .k(k)
23693 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023694 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023695 }
23696 }
23697 }
23698
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023699 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_subtile) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023700 TEST_REQUIRES_ARM_NEON;
23701 for (uint32_t n = 32; n <= 48; n += 16) {
23702 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023703 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023704 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023705 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023706 .nr(16)
23707 .kr(1)
23708 .sr(1)
23709 .m(m)
23710 .n(n)
23711 .k(k)
23712 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023713 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023714 }
23715 }
23716 }
23717 }
23718
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023719 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023720 TEST_REQUIRES_ARM_NEON;
23721 for (size_t k = 1; k <= 40; k += 9) {
23722 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023723 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023724 .nr(16)
23725 .kr(1)
23726 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023727 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023728 .n(16)
23729 .k(k)
23730 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023731 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023732 }
23733 }
23734
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023735 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, small_kernel_subtile) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023736 TEST_REQUIRES_ARM_NEON;
23737 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023738 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023739 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023740 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023741 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023742 .nr(16)
23743 .kr(1)
23744 .sr(1)
23745 .m(m)
23746 .n(n)
23747 .k(k)
23748 .ks(3)
23749 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023750 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023751 }
23752 }
23753 }
23754 }
23755
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023756 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023757 TEST_REQUIRES_ARM_NEON;
23758 for (uint32_t n = 17; n < 32; n++) {
23759 for (size_t k = 1; k <= 40; k += 9) {
23760 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023761 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023762 .nr(16)
23763 .kr(1)
23764 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023765 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023766 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023767 .k(k)
23768 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023769 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023770 }
23771 }
23772 }
23773
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023774 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023775 TEST_REQUIRES_ARM_NEON;
23776 for (uint32_t n = 32; n <= 48; n += 16) {
23777 for (size_t k = 1; k <= 40; k += 9) {
23778 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023779 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023780 .nr(16)
23781 .kr(1)
23782 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023783 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080023784 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023785 .k(k)
23786 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023787 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023788 }
23789 }
23790 }
23791
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023792 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm_subtile) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023793 TEST_REQUIRES_ARM_NEON;
23794 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023795 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023796 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023797 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023798 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023799 .nr(16)
23800 .kr(1)
23801 .sr(1)
23802 .m(m)
23803 .n(n)
23804 .k(k)
23805 .cm_stride(19)
23806 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023807 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023808 }
23809 }
23810 }
23811 }
23812
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023813 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, a_offset) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023814 TEST_REQUIRES_ARM_NEON;
23815 for (size_t k = 1; k <= 40; k += 9) {
23816 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023817 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023818 .nr(16)
23819 .kr(1)
23820 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023821 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023822 .n(16)
23823 .k(k)
23824 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023825 .a_offset(127)
23826 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023827 }
23828 }
23829
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023830 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, zero) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023831 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023832 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023833 for (uint32_t mz = 0; mz < 3; mz++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023834 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023835 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023836 .nr(16)
23837 .kr(1)
23838 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023839 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023840 .n(16)
23841 .k(k)
23842 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023843 .a_offset(127)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023844 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023845 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023846 }
23847 }
23848 }
23849
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023850 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmin) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023851 TEST_REQUIRES_ARM_NEON;
23852 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023853 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023854 .nr(16)
23855 .kr(1)
23856 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023857 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023858 .n(16)
23859 .k(8)
23860 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023861 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023862 }
23863
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023864 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, qmax) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023865 TEST_REQUIRES_ARM_NEON;
23866 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023867 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023868 .nr(16)
23869 .kr(1)
23870 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023871 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023872 .n(16)
23873 .k(8)
23874 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023875 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023876 }
23877
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023878 TEST(QS8_IGEMM_MINMAX_RNDNU_3X16__NEON_MLAL_LANE, strided_cm) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023879 TEST_REQUIRES_ARM_NEON;
23880 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023881 .mr(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023882 .nr(16)
23883 .kr(1)
23884 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023885 .m(3)
Frank Barchard27bf92c2021-11-24 15:47:52 -080023886 .n(16)
23887 .k(8)
23888 .cm_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023889 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080023890 }
23891#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
23892
23893
23894#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023895 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080023896 TEST_REQUIRES_ARM_NEON;
23897 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023898 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070023899 .nr(16)
23900 .kr(1)
23901 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023902 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070023903 .n(16)
23904 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023905 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023906 }
23907
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023908 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cn) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023909 TEST_REQUIRES_ARM_NEON;
23910 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023911 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070023912 .nr(16)
23913 .kr(1)
23914 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023915 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070023916 .n(16)
23917 .k(8)
23918 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023919 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023920 }
23921
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023922 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023923 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080023924 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023925 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023926 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023927 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070023928 .nr(16)
23929 .kr(1)
23930 .sr(1)
23931 .m(m)
23932 .n(n)
23933 .k(8)
23934 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023935 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023936 }
23937 }
23938 }
23939
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023940 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023941 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023942 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023943 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023944 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070023945 .nr(16)
23946 .kr(1)
23947 .sr(1)
23948 .m(m)
23949 .n(16)
23950 .k(8)
23951 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023952 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023953 }
23954 }
23955
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023956 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023957 TEST_REQUIRES_ARM_NEON;
23958 for (uint32_t n = 1; n <= 16; n++) {
23959 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023960 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070023961 .nr(16)
23962 .kr(1)
23963 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023964 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070023965 .n(n)
23966 .k(8)
23967 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023968 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023969 }
23970 }
23971
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023972 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023973 TEST_REQUIRES_ARM_NEON;
23974 for (size_t k = 1; k < 8; k++) {
23975 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023976 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070023977 .nr(16)
23978 .kr(1)
23979 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023980 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070023981 .n(16)
23982 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023983 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070023984 }
23985 }
23986
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023987 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023988 TEST_REQUIRES_ARM_NEON;
23989 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080023990 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023991 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070023992 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080023993 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070023994 .nr(16)
23995 .kr(1)
23996 .sr(1)
23997 .m(m)
23998 .n(n)
23999 .k(k)
24000 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024001 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024002 }
24003 }
24004 }
24005 }
24006
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024007 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024008 TEST_REQUIRES_ARM_NEON;
24009 for (size_t k = 9; k < 16; k++) {
24010 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024011 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024012 .nr(16)
24013 .kr(1)
24014 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024015 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024016 .n(16)
24017 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024018 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024019 }
24020 }
24021
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024022 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024023 TEST_REQUIRES_ARM_NEON;
24024 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024025 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024026 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024027 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024028 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024029 .nr(16)
24030 .kr(1)
24031 .sr(1)
24032 .m(m)
24033 .n(n)
24034 .k(k)
24035 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024036 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024037 }
24038 }
24039 }
24040 }
24041
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024042 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_div_8) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024043 TEST_REQUIRES_ARM_NEON;
24044 for (size_t k = 16; k <= 80; k += 8) {
24045 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024046 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024047 .nr(16)
24048 .kr(1)
24049 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024050 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024051 .n(16)
24052 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024053 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024054 }
24055 }
24056
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024057 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024058 TEST_REQUIRES_ARM_NEON;
24059 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024060 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024061 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024062 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024063 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024064 .nr(16)
24065 .kr(1)
24066 .sr(1)
24067 .m(m)
24068 .n(n)
24069 .k(k)
24070 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024071 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024072 }
24073 }
24074 }
24075 }
24076
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024077 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024078 TEST_REQUIRES_ARM_NEON;
24079 for (uint32_t n = 17; n < 32; n++) {
24080 for (size_t k = 1; k <= 40; k += 9) {
24081 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024082 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024083 .nr(16)
24084 .kr(1)
24085 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024086 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024087 .n(n)
Frank Barchard22fbe772021-07-20 15:56:32 -070024088 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024089 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024090 }
24091 }
24092 }
24093
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024094 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024095 TEST_REQUIRES_ARM_NEON;
24096 for (uint32_t n = 17; n < 32; n++) {
24097 for (size_t k = 1; k <= 40; k += 9) {
24098 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024099 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024100 .nr(16)
24101 .kr(1)
24102 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024103 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024104 .n(n)
Frank Barchard22fbe772021-07-20 15:56:32 -070024105 .k(k)
24106 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024107 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024108 }
24109 }
24110 }
24111
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024112 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024113 TEST_REQUIRES_ARM_NEON;
24114 for (uint32_t n = 17; n < 32; n++) {
24115 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024116 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024117 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024118 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024119 .nr(16)
24120 .kr(1)
24121 .sr(1)
24122 .m(m)
24123 .n(n)
24124 .k(k)
24125 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024126 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024127 }
24128 }
24129 }
24130 }
24131
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024132 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024133 TEST_REQUIRES_ARM_NEON;
24134 for (uint32_t n = 32; n <= 48; n += 16) {
24135 for (size_t k = 1; k <= 40; k += 9) {
24136 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024137 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024138 .nr(16)
24139 .kr(1)
24140 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024141 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024142 .n(n)
Frank Barchard22fbe772021-07-20 15:56:32 -070024143 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024144 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024145 }
24146 }
24147 }
24148
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024149 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024150 TEST_REQUIRES_ARM_NEON;
24151 for (uint32_t n = 32; n <= 48; n += 16) {
24152 for (size_t k = 1; k <= 40; k += 9) {
24153 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024154 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024155 .nr(16)
24156 .kr(1)
24157 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024158 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024159 .n(n)
24160 .k(k)
24161 .cn_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024162 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024163 }
24164 }
24165 }
24166
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024167 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024168 TEST_REQUIRES_ARM_NEON;
24169 for (uint32_t n = 32; n <= 48; n += 16) {
24170 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024171 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024172 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024173 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024174 .nr(16)
24175 .kr(1)
24176 .sr(1)
24177 .m(m)
24178 .n(n)
24179 .k(k)
24180 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024181 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024182 }
24183 }
24184 }
24185 }
24186
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024187 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, small_kernel) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024188 TEST_REQUIRES_ARM_NEON;
24189 for (size_t k = 1; k <= 40; k += 9) {
24190 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024191 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024192 .nr(16)
24193 .kr(1)
24194 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024195 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024196 .n(16)
24197 .k(k)
24198 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024199 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024200 }
24201 }
24202
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024203 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024204 TEST_REQUIRES_ARM_NEON;
24205 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024206 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024207 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024208 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024209 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024210 .nr(16)
24211 .kr(1)
24212 .sr(1)
24213 .m(m)
24214 .n(n)
24215 .k(k)
24216 .ks(3)
24217 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024218 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024219 }
24220 }
24221 }
24222 }
24223
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024224 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024225 TEST_REQUIRES_ARM_NEON;
24226 for (uint32_t n = 17; n < 32; n++) {
24227 for (size_t k = 1; k <= 40; k += 9) {
24228 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024229 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024230 .nr(16)
24231 .kr(1)
24232 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024233 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024234 .n(n)
Frank Barchard22fbe772021-07-20 15:56:32 -070024235 .k(k)
24236 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024237 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024238 }
24239 }
24240 }
24241
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024242 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024243 TEST_REQUIRES_ARM_NEON;
24244 for (uint32_t n = 32; n <= 48; n += 16) {
24245 for (size_t k = 1; k <= 40; k += 9) {
24246 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024247 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024248 .nr(16)
24249 .kr(1)
24250 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024251 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024252 .n(n)
Frank Barchard22fbe772021-07-20 15:56:32 -070024253 .k(k)
24254 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024255 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024256 }
24257 }
24258 }
24259
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024260 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024261 TEST_REQUIRES_ARM_NEON;
24262 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024263 for (uint32_t n = 1; n <= 16; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024264 for (uint32_t m = 1; m <= 1; m++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024265 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024266 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024267 .nr(16)
24268 .kr(1)
24269 .sr(1)
24270 .m(m)
24271 .n(n)
24272 .k(k)
24273 .cm_stride(19)
24274 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024275 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024276 }
24277 }
24278 }
24279 }
24280
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024281 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, a_offset) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024282 TEST_REQUIRES_ARM_NEON;
24283 for (size_t k = 1; k <= 40; k += 9) {
24284 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024285 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024286 .nr(16)
24287 .kr(1)
24288 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024289 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024290 .n(16)
24291 .k(k)
24292 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024293 .a_offset(43)
24294 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024295 }
24296 }
24297
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024298 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, zero) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024299 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024300 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024301 for (uint32_t mz = 0; mz < 1; mz++) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024302 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024303 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024304 .nr(16)
24305 .kr(1)
24306 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024307 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024308 .n(16)
24309 .k(k)
24310 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024311 .a_offset(43)
Frank Barchard22fbe772021-07-20 15:56:32 -070024312 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024313 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024314 }
24315 }
24316 }
24317
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024318 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, qmin) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024319 TEST_REQUIRES_ARM_NEON;
24320 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024321 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024322 .nr(16)
24323 .kr(1)
24324 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024325 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024326 .n(16)
24327 .k(8)
24328 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024329 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024330 }
24331
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024332 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, qmax) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024333 TEST_REQUIRES_ARM_NEON;
24334 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024335 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024336 .nr(16)
24337 .kr(1)
24338 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024339 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024340 .n(16)
24341 .k(8)
24342 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024343 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024344 }
24345
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024346 TEST(QS8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE_PRFM, strided_cm) {
Frank Barchard22fbe772021-07-20 15:56:32 -070024347 TEST_REQUIRES_ARM_NEON;
24348 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024349 .mr(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024350 .nr(16)
24351 .kr(1)
24352 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024353 .m(1)
Frank Barchard22fbe772021-07-20 15:56:32 -070024354 .n(16)
24355 .k(8)
24356 .cm_stride(19)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024357 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard22fbe772021-07-20 15:56:32 -070024358 }
24359#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
24360
24361
24362#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard27bf92c2021-11-24 15:47:52 -080024363 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8) {
24364 TEST_REQUIRES_ARM_NEON;
24365 GemmMicrokernelTester()
24366 .mr(6)
24367 .nr(16)
24368 .kr(1)
24369 .sr(1)
24370 .m(6)
24371 .n(16)
24372 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080024373 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024374 }
24375
24376 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, strided_cn) {
24377 TEST_REQUIRES_ARM_NEON;
24378 GemmMicrokernelTester()
24379 .mr(6)
24380 .nr(16)
24381 .kr(1)
24382 .sr(1)
24383 .m(6)
24384 .n(16)
24385 .k(8)
24386 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080024387 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024388 }
24389
24390 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile) {
24391 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024392 for (uint32_t n = 1; n <= 16; n++) {
24393 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024394 GemmMicrokernelTester()
24395 .mr(6)
24396 .nr(16)
24397 .kr(1)
24398 .sr(1)
24399 .m(m)
24400 .n(n)
24401 .k(8)
24402 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024403 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024404 }
24405 }
24406 }
24407
24408 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_m) {
24409 TEST_REQUIRES_ARM_NEON;
24410 for (uint32_t m = 1; m <= 6; m++) {
24411 GemmMicrokernelTester()
24412 .mr(6)
24413 .nr(16)
24414 .kr(1)
24415 .sr(1)
24416 .m(m)
24417 .n(16)
24418 .k(8)
24419 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024420 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024421 }
24422 }
24423
24424 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_eq_8_subtile_n) {
24425 TEST_REQUIRES_ARM_NEON;
24426 for (uint32_t n = 1; n <= 16; n++) {
24427 GemmMicrokernelTester()
24428 .mr(6)
24429 .nr(16)
24430 .kr(1)
24431 .sr(1)
24432 .m(6)
24433 .n(n)
24434 .k(8)
24435 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024436 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024437 }
24438 }
24439
24440 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_lt_8) {
24441 TEST_REQUIRES_ARM_NEON;
24442 for (size_t k = 1; k < 8; k++) {
24443 GemmMicrokernelTester()
24444 .mr(6)
24445 .nr(16)
24446 .kr(1)
24447 .sr(1)
24448 .m(6)
24449 .n(16)
24450 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024451 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024452 }
24453 }
24454
24455 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_lt_8_subtile) {
24456 TEST_REQUIRES_ARM_NEON;
24457 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024458 for (uint32_t n = 1; n <= 16; n++) {
24459 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024460 GemmMicrokernelTester()
24461 .mr(6)
24462 .nr(16)
24463 .kr(1)
24464 .sr(1)
24465 .m(m)
24466 .n(n)
24467 .k(k)
24468 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024469 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024470 }
24471 }
24472 }
24473 }
24474
24475 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_gt_8) {
24476 TEST_REQUIRES_ARM_NEON;
24477 for (size_t k = 9; k < 16; k++) {
24478 GemmMicrokernelTester()
24479 .mr(6)
24480 .nr(16)
24481 .kr(1)
24482 .sr(1)
24483 .m(6)
24484 .n(16)
24485 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024486 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024487 }
24488 }
24489
24490 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_gt_8_subtile) {
24491 TEST_REQUIRES_ARM_NEON;
24492 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024493 for (uint32_t n = 1; n <= 16; n++) {
24494 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024495 GemmMicrokernelTester()
24496 .mr(6)
24497 .nr(16)
24498 .kr(1)
24499 .sr(1)
24500 .m(m)
24501 .n(n)
24502 .k(k)
24503 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024504 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024505 }
24506 }
24507 }
24508 }
24509
24510 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_div_8) {
24511 TEST_REQUIRES_ARM_NEON;
24512 for (size_t k = 16; k <= 80; k += 8) {
24513 GemmMicrokernelTester()
24514 .mr(6)
24515 .nr(16)
24516 .kr(1)
24517 .sr(1)
24518 .m(6)
24519 .n(16)
24520 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024521 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024522 }
24523 }
24524
24525 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, k_div_8_subtile) {
24526 TEST_REQUIRES_ARM_NEON;
24527 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024528 for (uint32_t n = 1; n <= 16; n++) {
24529 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024530 GemmMicrokernelTester()
24531 .mr(6)
24532 .nr(16)
24533 .kr(1)
24534 .sr(1)
24535 .m(m)
24536 .n(n)
24537 .k(k)
24538 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024539 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024540 }
24541 }
24542 }
24543 }
24544
24545 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16) {
24546 TEST_REQUIRES_ARM_NEON;
24547 for (uint32_t n = 17; n < 32; n++) {
24548 for (size_t k = 1; k <= 40; k += 9) {
24549 GemmMicrokernelTester()
24550 .mr(6)
24551 .nr(16)
24552 .kr(1)
24553 .sr(1)
24554 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024555 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024556 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024557 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024558 }
24559 }
24560 }
24561
24562 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_strided_cn) {
24563 TEST_REQUIRES_ARM_NEON;
24564 for (uint32_t n = 17; n < 32; n++) {
24565 for (size_t k = 1; k <= 40; k += 9) {
24566 GemmMicrokernelTester()
24567 .mr(6)
24568 .nr(16)
24569 .kr(1)
24570 .sr(1)
24571 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024572 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024573 .k(k)
24574 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080024575 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024576 }
24577 }
24578 }
24579
24580 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_subtile) {
24581 TEST_REQUIRES_ARM_NEON;
24582 for (uint32_t n = 17; n < 32; n++) {
24583 for (size_t k = 1; k <= 40; k += 9) {
24584 for (uint32_t m = 1; m <= 6; m++) {
24585 GemmMicrokernelTester()
24586 .mr(6)
24587 .nr(16)
24588 .kr(1)
24589 .sr(1)
24590 .m(m)
24591 .n(n)
24592 .k(k)
24593 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024594 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024595 }
24596 }
24597 }
24598 }
24599
24600 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16) {
24601 TEST_REQUIRES_ARM_NEON;
24602 for (uint32_t n = 32; n <= 48; n += 16) {
24603 for (size_t k = 1; k <= 40; k += 9) {
24604 GemmMicrokernelTester()
24605 .mr(6)
24606 .nr(16)
24607 .kr(1)
24608 .sr(1)
24609 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024610 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024611 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080024612 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024613 }
24614 }
24615 }
24616
24617 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16_strided_cn) {
24618 TEST_REQUIRES_ARM_NEON;
24619 for (uint32_t n = 32; n <= 48; n += 16) {
24620 for (size_t k = 1; k <= 40; k += 9) {
24621 GemmMicrokernelTester()
24622 .mr(6)
24623 .nr(16)
24624 .kr(1)
24625 .sr(1)
24626 .m(6)
24627 .n(n)
24628 .k(k)
24629 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080024630 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024631 }
24632 }
24633 }
24634
24635 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16_subtile) {
24636 TEST_REQUIRES_ARM_NEON;
24637 for (uint32_t n = 32; n <= 48; n += 16) {
24638 for (size_t k = 1; k <= 40; k += 9) {
24639 for (uint32_t m = 1; m <= 6; m++) {
24640 GemmMicrokernelTester()
24641 .mr(6)
24642 .nr(16)
24643 .kr(1)
24644 .sr(1)
24645 .m(m)
24646 .n(n)
24647 .k(k)
24648 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024649 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024650 }
24651 }
24652 }
24653 }
24654
24655 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, small_kernel) {
24656 TEST_REQUIRES_ARM_NEON;
24657 for (size_t k = 1; k <= 40; k += 9) {
24658 GemmMicrokernelTester()
24659 .mr(6)
24660 .nr(16)
24661 .kr(1)
24662 .sr(1)
24663 .m(6)
24664 .n(16)
24665 .k(k)
24666 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024667 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024668 }
24669 }
24670
24671 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, small_kernel_subtile) {
24672 TEST_REQUIRES_ARM_NEON;
24673 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024674 for (uint32_t n = 1; n <= 16; n++) {
24675 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024676 GemmMicrokernelTester()
24677 .mr(6)
24678 .nr(16)
24679 .kr(1)
24680 .sr(1)
24681 .m(m)
24682 .n(n)
24683 .k(k)
24684 .ks(3)
24685 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024686 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024687 }
24688 }
24689 }
24690 }
24691
24692 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_gt_16_small_kernel) {
24693 TEST_REQUIRES_ARM_NEON;
24694 for (uint32_t n = 17; n < 32; n++) {
24695 for (size_t k = 1; k <= 40; k += 9) {
24696 GemmMicrokernelTester()
24697 .mr(6)
24698 .nr(16)
24699 .kr(1)
24700 .sr(1)
24701 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024702 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024703 .k(k)
24704 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024705 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024706 }
24707 }
24708 }
24709
24710 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, n_div_16_small_kernel) {
24711 TEST_REQUIRES_ARM_NEON;
24712 for (uint32_t n = 32; n <= 48; n += 16) {
24713 for (size_t k = 1; k <= 40; k += 9) {
24714 GemmMicrokernelTester()
24715 .mr(6)
24716 .nr(16)
24717 .kr(1)
24718 .sr(1)
24719 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080024720 .n(n)
Frank Barchard27bf92c2021-11-24 15:47:52 -080024721 .k(k)
24722 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080024723 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024724 }
24725 }
24726 }
24727
24728 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, strided_cm_subtile) {
24729 TEST_REQUIRES_ARM_NEON;
24730 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024731 for (uint32_t n = 1; n <= 16; n++) {
24732 for (uint32_t m = 1; m <= 6; m++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024733 GemmMicrokernelTester()
24734 .mr(6)
24735 .nr(16)
24736 .kr(1)
24737 .sr(1)
24738 .m(m)
24739 .n(n)
24740 .k(k)
24741 .cm_stride(19)
24742 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080024743 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024744 }
24745 }
24746 }
24747 }
24748
24749 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, a_offset) {
24750 TEST_REQUIRES_ARM_NEON;
24751 for (size_t k = 1; k <= 40; k += 9) {
24752 GemmMicrokernelTester()
24753 .mr(6)
24754 .nr(16)
24755 .kr(1)
24756 .sr(1)
24757 .m(6)
24758 .n(16)
24759 .k(k)
24760 .ks(3)
24761 .a_offset(251)
Marat Dukhan50323b82022-01-11 00:12:01 -080024762 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024763 }
24764 }
24765
24766 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, zero) {
24767 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024768 for (size_t k = 1; k <= 40; k += 9) {
24769 for (uint32_t mz = 0; mz < 6; mz++) {
Frank Barchard27bf92c2021-11-24 15:47:52 -080024770 GemmMicrokernelTester()
24771 .mr(6)
24772 .nr(16)
24773 .kr(1)
24774 .sr(1)
24775 .m(6)
24776 .n(16)
24777 .k(k)
24778 .ks(3)
24779 .a_offset(251)
24780 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080024781 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024782 }
24783 }
24784 }
24785
24786 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, qmin) {
24787 TEST_REQUIRES_ARM_NEON;
24788 GemmMicrokernelTester()
24789 .mr(6)
24790 .nr(16)
24791 .kr(1)
24792 .sr(1)
24793 .m(6)
24794 .n(16)
24795 .k(8)
24796 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024797 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024798 }
24799
24800 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, qmax) {
24801 TEST_REQUIRES_ARM_NEON;
24802 GemmMicrokernelTester()
24803 .mr(6)
24804 .nr(16)
24805 .kr(1)
24806 .sr(1)
24807 .m(6)
24808 .n(16)
24809 .k(8)
24810 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080024811 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024812 }
24813
24814 TEST(QS8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE_PRFM, strided_cm) {
24815 TEST_REQUIRES_ARM_NEON;
24816 GemmMicrokernelTester()
24817 .mr(6)
24818 .nr(16)
24819 .kr(1)
24820 .sr(1)
24821 .m(6)
24822 .n(16)
24823 .k(8)
24824 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080024825 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard27bf92c2021-11-24 15:47:52 -080024826 }
24827#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
24828
24829
24830#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024831 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024832 TEST_REQUIRES_ARM_NEON;
24833 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024834 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024835 .nr(8)
24836 .kr(1)
24837 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024838 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024839 .n(8)
24840 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024841 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070024842 }
24843
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024844 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cn) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024845 TEST_REQUIRES_ARM_NEON;
24846 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024847 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024848 .nr(8)
24849 .kr(1)
24850 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024851 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024852 .n(8)
24853 .k(8)
24854 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024855 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070024856 }
24857
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024858 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024859 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080024860 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024861 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024862 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024863 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024864 .nr(8)
24865 .kr(1)
24866 .sr(1)
24867 .m(m)
24868 .n(n)
24869 .k(8)
24870 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024871 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070024872 }
24873 }
24874 }
24875
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024876 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024877 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024878 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024879 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024880 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024881 .nr(8)
24882 .kr(1)
24883 .sr(1)
24884 .m(m)
24885 .n(8)
24886 .k(8)
24887 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024888 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070024889 }
24890 }
24891
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024892 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024893 TEST_REQUIRES_ARM_NEON;
24894 for (uint32_t n = 1; n <= 8; n++) {
24895 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024896 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024897 .nr(8)
24898 .kr(1)
24899 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024900 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024901 .n(n)
24902 .k(8)
24903 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024904 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070024905 }
24906 }
24907
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024908 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_lt_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024909 TEST_REQUIRES_ARM_NEON;
24910 for (size_t k = 1; k < 8; k++) {
24911 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024912 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024913 .nr(8)
24914 .kr(1)
24915 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024916 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024917 .n(8)
24918 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024919 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070024920 }
24921 }
24922
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024923 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024924 TEST_REQUIRES_ARM_NEON;
24925 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024926 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024927 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024928 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024929 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024930 .nr(8)
24931 .kr(1)
24932 .sr(1)
24933 .m(m)
24934 .n(n)
24935 .k(k)
24936 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024937 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070024938 }
24939 }
24940 }
24941 }
24942
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024943 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_gt_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024944 TEST_REQUIRES_ARM_NEON;
24945 for (size_t k = 9; k < 16; k++) {
24946 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024947 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024948 .nr(8)
24949 .kr(1)
24950 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024951 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024952 .n(8)
24953 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024954 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070024955 }
24956 }
24957
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024958 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024959 TEST_REQUIRES_ARM_NEON;
24960 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024961 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024962 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024963 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024964 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024965 .nr(8)
24966 .kr(1)
24967 .sr(1)
24968 .m(m)
24969 .n(n)
24970 .k(k)
24971 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024972 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070024973 }
24974 }
24975 }
24976 }
24977
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024978 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_div_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024979 TEST_REQUIRES_ARM_NEON;
24980 for (size_t k = 16; k <= 80; k += 8) {
24981 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024982 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024983 .nr(8)
24984 .kr(1)
24985 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024986 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070024987 .n(8)
24988 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024989 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070024990 }
24991 }
24992
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024993 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024994 TEST_REQUIRES_ARM_NEON;
24995 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080024996 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024997 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070024998 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080024999 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025000 .nr(8)
25001 .kr(1)
25002 .sr(1)
25003 .m(m)
25004 .n(n)
25005 .k(k)
25006 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025007 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025008 }
25009 }
25010 }
25011 }
25012
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025013 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025014 TEST_REQUIRES_ARM_NEON;
25015 for (uint32_t n = 9; n < 16; n++) {
25016 for (size_t k = 1; k <= 40; k += 9) {
25017 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025018 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025019 .nr(8)
25020 .kr(1)
25021 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025022 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025023 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025024 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025025 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025026 }
25027 }
25028 }
25029
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025030 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025031 TEST_REQUIRES_ARM_NEON;
25032 for (uint32_t n = 9; n < 16; n++) {
25033 for (size_t k = 1; k <= 40; k += 9) {
25034 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025035 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025036 .nr(8)
25037 .kr(1)
25038 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025039 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025040 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025041 .k(k)
25042 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025043 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025044 }
25045 }
25046 }
25047
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025048 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025049 TEST_REQUIRES_ARM_NEON;
25050 for (uint32_t n = 9; n < 16; n++) {
25051 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025052 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025053 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025054 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025055 .nr(8)
25056 .kr(1)
25057 .sr(1)
25058 .m(m)
25059 .n(n)
25060 .k(k)
25061 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025062 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025063 }
25064 }
25065 }
25066 }
25067
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025068 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025069 TEST_REQUIRES_ARM_NEON;
25070 for (uint32_t n = 16; n <= 24; n += 8) {
25071 for (size_t k = 1; k <= 40; k += 9) {
25072 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025073 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025074 .nr(8)
25075 .kr(1)
25076 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025077 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025078 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025079 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025080 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025081 }
25082 }
25083 }
25084
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025085 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025086 TEST_REQUIRES_ARM_NEON;
25087 for (uint32_t n = 16; n <= 24; n += 8) {
25088 for (size_t k = 1; k <= 40; k += 9) {
25089 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025090 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025091 .nr(8)
25092 .kr(1)
25093 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025094 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025095 .n(n)
25096 .k(k)
25097 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025098 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025099 }
25100 }
25101 }
25102
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025103 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025104 TEST_REQUIRES_ARM_NEON;
25105 for (uint32_t n = 16; n <= 24; n += 8) {
25106 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025107 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025108 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025109 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025110 .nr(8)
25111 .kr(1)
25112 .sr(1)
25113 .m(m)
25114 .n(n)
25115 .k(k)
25116 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025117 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025118 }
25119 }
25120 }
25121 }
25122
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025123 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, small_kernel) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025124 TEST_REQUIRES_ARM_NEON;
25125 for (size_t k = 1; k <= 40; k += 9) {
25126 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025127 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025128 .nr(8)
25129 .kr(1)
25130 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025131 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025132 .n(8)
25133 .k(k)
25134 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025135 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025136 }
25137 }
25138
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025139 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025140 TEST_REQUIRES_ARM_NEON;
25141 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025142 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025143 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025144 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025145 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025146 .nr(8)
25147 .kr(1)
25148 .sr(1)
25149 .m(m)
25150 .n(n)
25151 .k(k)
25152 .ks(3)
25153 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025154 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025155 }
25156 }
25157 }
25158 }
25159
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025160 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025161 TEST_REQUIRES_ARM_NEON;
25162 for (uint32_t n = 9; n < 16; n++) {
25163 for (size_t k = 1; k <= 40; k += 9) {
25164 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025165 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025166 .nr(8)
25167 .kr(1)
25168 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025169 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025170 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025171 .k(k)
25172 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025173 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025174 }
25175 }
25176 }
25177
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025178 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025179 TEST_REQUIRES_ARM_NEON;
25180 for (uint32_t n = 16; n <= 24; n += 8) {
25181 for (size_t k = 1; k <= 40; k += 9) {
25182 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025183 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025184 .nr(8)
25185 .kr(1)
25186 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025187 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025188 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025189 .k(k)
25190 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025191 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025192 }
25193 }
25194 }
25195
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025196 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025197 TEST_REQUIRES_ARM_NEON;
25198 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025199 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025200 for (uint32_t m = 1; m <= 2; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025201 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025202 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025203 .nr(8)
25204 .kr(1)
25205 .sr(1)
25206 .m(m)
25207 .n(n)
25208 .k(k)
25209 .cm_stride(11)
25210 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025211 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025212 }
25213 }
25214 }
25215 }
25216
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025217 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, a_offset) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025218 TEST_REQUIRES_ARM_NEON;
25219 for (size_t k = 1; k <= 40; k += 9) {
25220 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025221 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025222 .nr(8)
25223 .kr(1)
25224 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025225 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025226 .n(8)
25227 .k(k)
25228 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025229 .a_offset(83)
25230 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025231 }
25232 }
25233
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025234 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, zero) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025235 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025236 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025237 for (uint32_t mz = 0; mz < 2; mz++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025238 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025239 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025240 .nr(8)
25241 .kr(1)
25242 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025243 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025244 .n(8)
25245 .k(k)
25246 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025247 .a_offset(83)
Frank Barchard510b8e02021-07-26 17:25:18 -070025248 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025249 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025250 }
25251 }
25252 }
25253
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025254 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, qmin) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025255 TEST_REQUIRES_ARM_NEON;
25256 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025257 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025258 .nr(8)
25259 .kr(1)
25260 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025261 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025262 .n(8)
25263 .k(8)
25264 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025265 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025266 }
25267
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025268 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, qmax) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025269 TEST_REQUIRES_ARM_NEON;
25270 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025271 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025272 .nr(8)
25273 .kr(1)
25274 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025275 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025276 .n(8)
25277 .k(8)
25278 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025279 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025280 }
25281
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025282 TEST(QS8_IGEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cm) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025283 TEST_REQUIRES_ARM_NEON;
25284 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025285 .mr(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025286 .nr(8)
25287 .kr(1)
25288 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025289 .m(2)
Frank Barchard510b8e02021-07-26 17:25:18 -070025290 .n(8)
25291 .k(8)
25292 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025293 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025294 }
25295#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
25296
25297
25298#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025299 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025300 TEST_REQUIRES_ARM_NEON;
25301 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025302 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025303 .nr(8)
25304 .kr(1)
25305 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025306 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025307 .n(8)
25308 .k(8)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025309 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025310 }
25311
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025312 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cn) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025313 TEST_REQUIRES_ARM_NEON;
25314 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025315 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025316 .nr(8)
25317 .kr(1)
25318 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025319 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025320 .n(8)
25321 .k(8)
25322 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025323 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025324 }
25325
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025326 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025327 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025328 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025329 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025330 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025331 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025332 .nr(8)
25333 .kr(1)
25334 .sr(1)
25335 .m(m)
25336 .n(n)
25337 .k(8)
25338 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025339 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025340 }
25341 }
25342 }
25343
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025344 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025345 TEST_REQUIRES_ARM_NEON;
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025346 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025347 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025348 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025349 .nr(8)
25350 .kr(1)
25351 .sr(1)
25352 .m(m)
25353 .n(8)
25354 .k(8)
25355 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025356 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025357 }
25358 }
25359
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025360 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025361 TEST_REQUIRES_ARM_NEON;
25362 for (uint32_t n = 1; n <= 8; n++) {
25363 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025364 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025365 .nr(8)
25366 .kr(1)
25367 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025368 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025369 .n(n)
25370 .k(8)
25371 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025372 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025373 }
25374 }
25375
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025376 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025377 TEST_REQUIRES_ARM_NEON;
25378 for (size_t k = 1; k < 8; k++) {
25379 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025380 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025381 .nr(8)
25382 .kr(1)
25383 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025384 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025385 .n(8)
25386 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025387 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025388 }
25389 }
25390
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025391 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025392 TEST_REQUIRES_ARM_NEON;
25393 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025394 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025395 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025396 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025397 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025398 .nr(8)
25399 .kr(1)
25400 .sr(1)
25401 .m(m)
25402 .n(n)
25403 .k(k)
25404 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025405 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025406 }
25407 }
25408 }
25409 }
25410
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025411 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025412 TEST_REQUIRES_ARM_NEON;
25413 for (size_t k = 9; k < 16; k++) {
25414 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025415 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025416 .nr(8)
25417 .kr(1)
25418 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025419 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025420 .n(8)
25421 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025422 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025423 }
25424 }
25425
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025426 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025427 TEST_REQUIRES_ARM_NEON;
25428 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025429 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025430 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025431 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025432 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025433 .nr(8)
25434 .kr(1)
25435 .sr(1)
25436 .m(m)
25437 .n(n)
25438 .k(k)
25439 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025440 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025441 }
25442 }
25443 }
25444 }
25445
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025446 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025447 TEST_REQUIRES_ARM_NEON;
25448 for (size_t k = 16; k <= 80; k += 8) {
25449 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025450 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025451 .nr(8)
25452 .kr(1)
25453 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025454 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025455 .n(8)
25456 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025457 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025458 }
25459 }
25460
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025461 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025462 TEST_REQUIRES_ARM_NEON;
25463 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025464 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025465 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025466 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025467 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025468 .nr(8)
25469 .kr(1)
25470 .sr(1)
25471 .m(m)
25472 .n(n)
25473 .k(k)
25474 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025475 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025476 }
25477 }
25478 }
25479 }
25480
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025481 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025482 TEST_REQUIRES_ARM_NEON;
25483 for (uint32_t n = 9; n < 16; n++) {
25484 for (size_t k = 1; k <= 40; k += 9) {
25485 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025486 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025487 .nr(8)
25488 .kr(1)
25489 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025490 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025491 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025492 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025493 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025494 }
25495 }
25496 }
25497
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025498 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025499 TEST_REQUIRES_ARM_NEON;
25500 for (uint32_t n = 9; n < 16; n++) {
25501 for (size_t k = 1; k <= 40; k += 9) {
25502 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025503 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025504 .nr(8)
25505 .kr(1)
25506 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025507 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025508 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025509 .k(k)
25510 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025511 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025512 }
25513 }
25514 }
25515
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025516 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025517 TEST_REQUIRES_ARM_NEON;
25518 for (uint32_t n = 9; n < 16; n++) {
25519 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025520 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025521 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025522 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025523 .nr(8)
25524 .kr(1)
25525 .sr(1)
25526 .m(m)
25527 .n(n)
25528 .k(k)
25529 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025530 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025531 }
25532 }
25533 }
25534 }
25535
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025536 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025537 TEST_REQUIRES_ARM_NEON;
25538 for (uint32_t n = 16; n <= 24; n += 8) {
25539 for (size_t k = 1; k <= 40; k += 9) {
25540 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025541 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025542 .nr(8)
25543 .kr(1)
25544 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025545 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025546 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025547 .k(k)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025548 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025549 }
25550 }
25551 }
25552
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025553 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025554 TEST_REQUIRES_ARM_NEON;
25555 for (uint32_t n = 16; n <= 24; n += 8) {
25556 for (size_t k = 1; k <= 40; k += 9) {
25557 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025558 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025559 .nr(8)
25560 .kr(1)
25561 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025562 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025563 .n(n)
25564 .k(k)
25565 .cn_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025566 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025567 }
25568 }
25569 }
25570
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025571 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025572 TEST_REQUIRES_ARM_NEON;
25573 for (uint32_t n = 16; n <= 24; n += 8) {
25574 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025575 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025576 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025577 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025578 .nr(8)
25579 .kr(1)
25580 .sr(1)
25581 .m(m)
25582 .n(n)
25583 .k(k)
25584 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025585 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025586 }
25587 }
25588 }
25589 }
25590
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025591 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, small_kernel) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025592 TEST_REQUIRES_ARM_NEON;
25593 for (size_t k = 1; k <= 40; k += 9) {
25594 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025595 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025596 .nr(8)
25597 .kr(1)
25598 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025599 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025600 .n(8)
25601 .k(k)
25602 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025603 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025604 }
25605 }
25606
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025607 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, small_kernel_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025608 TEST_REQUIRES_ARM_NEON;
25609 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025610 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025611 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025612 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025613 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025614 .nr(8)
25615 .kr(1)
25616 .sr(1)
25617 .m(m)
25618 .n(n)
25619 .k(k)
25620 .ks(3)
25621 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025622 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025623 }
25624 }
25625 }
25626 }
25627
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025628 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_gt_8_small_kernel) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025629 TEST_REQUIRES_ARM_NEON;
25630 for (uint32_t n = 9; n < 16; n++) {
25631 for (size_t k = 1; k <= 40; k += 9) {
25632 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025633 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025634 .nr(8)
25635 .kr(1)
25636 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025637 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025638 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025639 .k(k)
25640 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025641 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025642 }
25643 }
25644 }
25645
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025646 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, n_div_8_small_kernel) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025647 TEST_REQUIRES_ARM_NEON;
25648 for (uint32_t n = 16; n <= 24; n += 8) {
25649 for (size_t k = 1; k <= 40; k += 9) {
25650 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025651 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025652 .nr(8)
25653 .kr(1)
25654 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025655 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080025656 .n(n)
Frank Barchard510b8e02021-07-26 17:25:18 -070025657 .k(k)
25658 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025659 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025660 }
25661 }
25662 }
25663
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025664 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025665 TEST_REQUIRES_ARM_NEON;
25666 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080025667 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025668 for (uint32_t m = 1; m <= 3; m++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025669 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025670 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025671 .nr(8)
25672 .kr(1)
25673 .sr(1)
25674 .m(m)
25675 .n(n)
25676 .k(k)
25677 .cm_stride(11)
25678 .iterations(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025679 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025680 }
25681 }
25682 }
25683 }
25684
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025685 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, a_offset) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025686 TEST_REQUIRES_ARM_NEON;
25687 for (size_t k = 1; k <= 40; k += 9) {
25688 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025689 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025690 .nr(8)
25691 .kr(1)
25692 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025693 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025694 .n(8)
25695 .k(k)
25696 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025697 .a_offset(127)
25698 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025699 }
25700 }
25701
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025702 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, zero) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025703 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080025704 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025705 for (uint32_t mz = 0; mz < 3; mz++) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025706 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025707 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025708 .nr(8)
25709 .kr(1)
25710 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025711 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025712 .n(8)
25713 .k(k)
25714 .ks(3)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025715 .a_offset(127)
Frank Barchard510b8e02021-07-26 17:25:18 -070025716 .zero_index(mz)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025717 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025718 }
25719 }
25720 }
25721
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025722 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmin) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025723 TEST_REQUIRES_ARM_NEON;
25724 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025725 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025726 .nr(8)
25727 .kr(1)
25728 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025729 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025730 .n(8)
25731 .k(8)
25732 .qmin(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025733 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025734 }
25735
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025736 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, qmax) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025737 TEST_REQUIRES_ARM_NEON;
25738 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025739 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025740 .nr(8)
25741 .kr(1)
25742 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025743 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025744 .n(8)
25745 .k(8)
25746 .qmax(128)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025747 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025748 }
25749
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025750 TEST(QS8_IGEMM_MINMAX_RNDNU_3X8__NEON_MULL_ADDW_DUP, strided_cm) {
Frank Barchard510b8e02021-07-26 17:25:18 -070025751 TEST_REQUIRES_ARM_NEON;
25752 GemmMicrokernelTester()
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025753 .mr(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025754 .nr(8)
25755 .kr(1)
25756 .sr(1)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025757 .m(3)
Frank Barchard510b8e02021-07-26 17:25:18 -070025758 .n(8)
25759 .k(8)
25760 .cm_stride(11)
Zhi An Nge96b6bc2022-02-03 10:49:46 -080025761 .Test(xnn_qs8_igemm_minmax_rndnu_ukernel_3x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Frank Barchard510b8e02021-07-26 17:25:18 -070025762 }
25763#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64