blob: 24438a7ed04141d2deb7b3448d55340e16b98def [file] [log] [blame]
Marat Dukhan173661d2021-07-26 23:47:08 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qu8-igemm-minmax-rndnu.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/common.h>
17#include <xnnpack/isa-checks.h>
18
19#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/ppmm.h>
22#include "gemm-microkernel-tester.h"
23
24
Frank Barchard88e839c2021-08-11 00:12:31 -070025#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
26 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8) {
27 TEST_REQUIRES_ARM_NEON_DOT;
28 GemmMicrokernelTester()
29 .mr(1)
30 .nr(8)
31 .kr(4)
32 .sr(1)
33 .m(1)
34 .n(8)
35 .k(8)
36 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
37 }
38
39 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cn) {
40 TEST_REQUIRES_ARM_NEON_DOT;
41 GemmMicrokernelTester()
42 .mr(1)
43 .nr(8)
44 .kr(4)
45 .sr(1)
46 .m(1)
47 .n(8)
48 .k(8)
49 .cn_stride(11)
50 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
51 }
52
53 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile) {
54 TEST_REQUIRES_ARM_NEON_DOT;
55 for (uint32_t m = 1; m <= 1; m++) {
56 for (uint32_t n = 1; n <= 8; n++) {
57 GemmMicrokernelTester()
58 .mr(1)
59 .nr(8)
60 .kr(4)
61 .sr(1)
62 .m(m)
63 .n(n)
64 .k(8)
65 .iterations(1)
66 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
67 }
68 }
69 }
70
71 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_m) {
72 TEST_REQUIRES_ARM_NEON_DOT;
73 for (uint32_t m = 1; m <= 1; m++) {
74 GemmMicrokernelTester()
75 .mr(1)
76 .nr(8)
77 .kr(4)
78 .sr(1)
79 .m(m)
80 .n(8)
81 .k(8)
82 .iterations(1)
83 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
84 }
85 }
86
87 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_n) {
88 TEST_REQUIRES_ARM_NEON_DOT;
89 for (uint32_t n = 1; n <= 8; n++) {
90 GemmMicrokernelTester()
91 .mr(1)
92 .nr(8)
93 .kr(4)
94 .sr(1)
95 .m(1)
96 .n(n)
97 .k(8)
98 .iterations(1)
99 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
100 }
101 }
102
103 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8) {
104 TEST_REQUIRES_ARM_NEON_DOT;
105 for (size_t k = 1; k < 8; k++) {
106 GemmMicrokernelTester()
107 .mr(1)
108 .nr(8)
109 .kr(4)
110 .sr(1)
111 .m(1)
112 .n(8)
113 .k(k)
114 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
115 }
116 }
117
118 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8_subtile) {
119 TEST_REQUIRES_ARM_NEON_DOT;
120 for (size_t k = 1; k < 8; k++) {
121 for (uint32_t m = 1; m <= 1; m++) {
122 for (uint32_t n = 1; n <= 8; n++) {
123 GemmMicrokernelTester()
124 .mr(1)
125 .nr(8)
126 .kr(4)
127 .sr(1)
128 .m(m)
129 .n(n)
130 .k(k)
131 .iterations(1)
132 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
133 }
134 }
135 }
136 }
137
138 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8) {
139 TEST_REQUIRES_ARM_NEON_DOT;
140 for (size_t k = 9; k < 16; k++) {
141 GemmMicrokernelTester()
142 .mr(1)
143 .nr(8)
144 .kr(4)
145 .sr(1)
146 .m(1)
147 .n(8)
148 .k(k)
149 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
150 }
151 }
152
153 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8_subtile) {
154 TEST_REQUIRES_ARM_NEON_DOT;
155 for (size_t k = 9; k < 16; k++) {
156 for (uint32_t m = 1; m <= 1; m++) {
157 for (uint32_t n = 1; n <= 8; n++) {
158 GemmMicrokernelTester()
159 .mr(1)
160 .nr(8)
161 .kr(4)
162 .sr(1)
163 .m(m)
164 .n(n)
165 .k(k)
166 .iterations(1)
167 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
168 }
169 }
170 }
171 }
172
173 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8) {
174 TEST_REQUIRES_ARM_NEON_DOT;
175 for (size_t k = 16; k <= 80; k += 8) {
176 GemmMicrokernelTester()
177 .mr(1)
178 .nr(8)
179 .kr(4)
180 .sr(1)
181 .m(1)
182 .n(8)
183 .k(k)
184 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
185 }
186 }
187
188 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8_subtile) {
189 TEST_REQUIRES_ARM_NEON_DOT;
190 for (size_t k = 16; k <= 80; k += 8) {
191 for (uint32_t m = 1; m <= 1; m++) {
192 for (uint32_t n = 1; n <= 8; n++) {
193 GemmMicrokernelTester()
194 .mr(1)
195 .nr(8)
196 .kr(4)
197 .sr(1)
198 .m(m)
199 .n(n)
200 .k(k)
201 .iterations(1)
202 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
203 }
204 }
205 }
206 }
207
208 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8) {
209 TEST_REQUIRES_ARM_NEON_DOT;
210 for (uint32_t n = 9; n < 16; n++) {
211 for (size_t k = 1; k <= 40; k += 9) {
212 GemmMicrokernelTester()
213 .mr(1)
214 .nr(8)
215 .kr(4)
216 .sr(1)
217 .m(1)
218 .n(8)
219 .k(k)
220 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
221 }
222 }
223 }
224
225 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_strided_cn) {
226 TEST_REQUIRES_ARM_NEON_DOT;
227 for (uint32_t n = 9; n < 16; n++) {
228 for (size_t k = 1; k <= 40; k += 9) {
229 GemmMicrokernelTester()
230 .mr(1)
231 .nr(8)
232 .kr(4)
233 .sr(1)
234 .m(1)
235 .n(8)
236 .k(k)
237 .cn_stride(11)
238 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
239 }
240 }
241 }
242
243 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_subtile) {
244 TEST_REQUIRES_ARM_NEON_DOT;
245 for (uint32_t n = 9; n < 16; n++) {
246 for (size_t k = 1; k <= 40; k += 9) {
247 for (uint32_t m = 1; m <= 1; m++) {
248 GemmMicrokernelTester()
249 .mr(1)
250 .nr(8)
251 .kr(4)
252 .sr(1)
253 .m(m)
254 .n(n)
255 .k(k)
256 .iterations(1)
257 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
258 }
259 }
260 }
261 }
262
263 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8) {
264 TEST_REQUIRES_ARM_NEON_DOT;
265 for (uint32_t n = 16; n <= 24; n += 8) {
266 for (size_t k = 1; k <= 40; k += 9) {
267 GemmMicrokernelTester()
268 .mr(1)
269 .nr(8)
270 .kr(4)
271 .sr(1)
272 .m(1)
273 .n(8)
274 .k(k)
275 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
276 }
277 }
278 }
279
280 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_strided_cn) {
281 TEST_REQUIRES_ARM_NEON_DOT;
282 for (uint32_t n = 16; n <= 24; n += 8) {
283 for (size_t k = 1; k <= 40; k += 9) {
284 GemmMicrokernelTester()
285 .mr(1)
286 .nr(8)
287 .kr(4)
288 .sr(1)
289 .m(1)
290 .n(n)
291 .k(k)
292 .cn_stride(11)
293 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
294 }
295 }
296 }
297
298 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_subtile) {
299 TEST_REQUIRES_ARM_NEON_DOT;
300 for (uint32_t n = 16; n <= 24; n += 8) {
301 for (size_t k = 1; k <= 40; k += 9) {
302 for (uint32_t m = 1; m <= 1; m++) {
303 GemmMicrokernelTester()
304 .mr(1)
305 .nr(8)
306 .kr(4)
307 .sr(1)
308 .m(m)
309 .n(n)
310 .k(k)
311 .iterations(1)
312 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
313 }
314 }
315 }
316 }
317
318 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel) {
319 TEST_REQUIRES_ARM_NEON_DOT;
320 for (size_t k = 1; k <= 40; k += 9) {
321 GemmMicrokernelTester()
322 .mr(1)
323 .nr(8)
324 .kr(4)
325 .sr(1)
326 .m(1)
327 .n(8)
328 .k(k)
329 .ks(3)
330 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
331 }
332 }
333
334 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, small_kernel_subtile) {
335 TEST_REQUIRES_ARM_NEON_DOT;
336 for (size_t k = 1; k <= 40; k += 9) {
337 for (uint32_t m = 1; m <= 1; m++) {
338 for (uint32_t n = 1; n <= 8; n++) {
339 GemmMicrokernelTester()
340 .mr(1)
341 .nr(8)
342 .kr(4)
343 .sr(1)
344 .m(m)
345 .n(n)
346 .k(k)
347 .ks(3)
348 .iterations(1)
349 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
350 }
351 }
352 }
353 }
354
355 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_small_kernel) {
356 TEST_REQUIRES_ARM_NEON_DOT;
357 for (uint32_t n = 9; n < 16; n++) {
358 for (size_t k = 1; k <= 40; k += 9) {
359 GemmMicrokernelTester()
360 .mr(1)
361 .nr(8)
362 .kr(4)
363 .sr(1)
364 .m(1)
365 .n(8)
366 .k(k)
367 .ks(3)
368 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
369 }
370 }
371 }
372
373 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_small_kernel) {
374 TEST_REQUIRES_ARM_NEON_DOT;
375 for (uint32_t n = 16; n <= 24; n += 8) {
376 for (size_t k = 1; k <= 40; k += 9) {
377 GemmMicrokernelTester()
378 .mr(1)
379 .nr(8)
380 .kr(4)
381 .sr(1)
382 .m(1)
383 .n(8)
384 .k(k)
385 .ks(3)
386 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
387 }
388 }
389 }
390
391 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm_subtile) {
392 TEST_REQUIRES_ARM_NEON_DOT;
393 for (size_t k = 1; k <= 40; k += 9) {
394 for (uint32_t m = 1; m <= 1; m++) {
395 for (uint32_t n = 1; n <= 8; n++) {
396 GemmMicrokernelTester()
397 .mr(1)
398 .nr(8)
399 .kr(4)
400 .sr(1)
401 .m(m)
402 .n(n)
403 .k(k)
404 .cm_stride(11)
405 .iterations(1)
406 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
407 }
408 }
409 }
410 }
411
412 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, a_offset) {
413 TEST_REQUIRES_ARM_NEON_DOT;
414 for (size_t k = 1; k <= 40; k += 9) {
415 GemmMicrokernelTester()
416 .mr(1)
417 .nr(8)
418 .kr(4)
419 .sr(1)
420 .m(1)
421 .n(8)
422 .k(k)
423 .ks(3)
424 .a_offset(43)
425 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
426 }
427 }
428
429 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, zero) {
430 TEST_REQUIRES_ARM_NEON_DOT;
431 for (uint32_t mz = 0; mz < 1; mz++) {
432 for (size_t k = 1; k <= 40; k += 9) {
433 GemmMicrokernelTester()
434 .mr(1)
435 .nr(8)
436 .kr(4)
437 .sr(1)
438 .m(1)
439 .n(8)
440 .k(k)
441 .ks(3)
442 .a_offset(43)
443 .zero_index(mz)
444 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
445 }
446 }
447 }
448
449 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmin) {
450 TEST_REQUIRES_ARM_NEON_DOT;
451 GemmMicrokernelTester()
452 .mr(1)
453 .nr(8)
454 .kr(4)
455 .sr(1)
456 .m(1)
457 .n(8)
458 .k(8)
459 .qmin(128)
460 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
461 }
462
463 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmax) {
464 TEST_REQUIRES_ARM_NEON_DOT;
465 GemmMicrokernelTester()
466 .mr(1)
467 .nr(8)
468 .kr(4)
469 .sr(1)
470 .m(1)
471 .n(8)
472 .k(8)
473 .qmax(128)
474 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
475 }
476
477 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm) {
478 TEST_REQUIRES_ARM_NEON_DOT;
479 GemmMicrokernelTester()
480 .mr(1)
481 .nr(8)
482 .kr(4)
483 .sr(1)
484 .m(1)
485 .n(8)
486 .k(8)
487 .cm_stride(11)
488 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
489 }
490
491 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_a_zero_point) {
492 TEST_REQUIRES_ARM_NEON_DOT;
493 for (size_t k = 1; k <= 40; k += 9) {
494 GemmMicrokernelTester()
495 .mr(1)
496 .nr(8)
497 .kr(4)
498 .sr(1)
499 .m(1)
500 .n(8)
501 .k(k)
502 .a_zero_point(0)
503 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
504 }
505 }
506
507 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_b_zero_point) {
508 TEST_REQUIRES_ARM_NEON_DOT;
509 for (size_t k = 1; k <= 40; k += 9) {
510 GemmMicrokernelTester()
511 .mr(1)
512 .nr(8)
513 .kr(4)
514 .sr(1)
515 .m(1)
516 .n(8)
517 .k(k)
518 .b_zero_point(0)
519 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
520 }
521 }
522
523 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_zero_point) {
524 TEST_REQUIRES_ARM_NEON_DOT;
525 for (size_t k = 1; k <= 40; k += 9) {
526 GemmMicrokernelTester()
527 .mr(1)
528 .nr(8)
529 .kr(4)
530 .sr(1)
531 .m(1)
532 .n(8)
533 .k(k)
534 .a_zero_point(0)
535 .b_zero_point(0)
536 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
537 }
538 }
539#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
540
541
542#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
543 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8) {
544 TEST_REQUIRES_ARM_NEON_DOT;
545 GemmMicrokernelTester()
546 .mr(4)
547 .nr(8)
548 .kr(4)
549 .sr(1)
550 .m(4)
551 .n(8)
552 .k(8)
553 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
554 }
555
556 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cn) {
557 TEST_REQUIRES_ARM_NEON_DOT;
558 GemmMicrokernelTester()
559 .mr(4)
560 .nr(8)
561 .kr(4)
562 .sr(1)
563 .m(4)
564 .n(8)
565 .k(8)
566 .cn_stride(11)
567 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
568 }
569
570 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile) {
571 TEST_REQUIRES_ARM_NEON_DOT;
572 for (uint32_t m = 1; m <= 4; m++) {
573 for (uint32_t n = 1; n <= 8; n++) {
574 GemmMicrokernelTester()
575 .mr(4)
576 .nr(8)
577 .kr(4)
578 .sr(1)
579 .m(m)
580 .n(n)
581 .k(8)
582 .iterations(1)
583 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
584 }
585 }
586 }
587
588 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile_m) {
589 TEST_REQUIRES_ARM_NEON_DOT;
590 for (uint32_t m = 1; m <= 4; m++) {
591 GemmMicrokernelTester()
592 .mr(4)
593 .nr(8)
594 .kr(4)
595 .sr(1)
596 .m(m)
597 .n(8)
598 .k(8)
599 .iterations(1)
600 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
601 }
602 }
603
604 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile_n) {
605 TEST_REQUIRES_ARM_NEON_DOT;
606 for (uint32_t n = 1; n <= 8; n++) {
607 GemmMicrokernelTester()
608 .mr(4)
609 .nr(8)
610 .kr(4)
611 .sr(1)
612 .m(4)
613 .n(n)
614 .k(8)
615 .iterations(1)
616 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
617 }
618 }
619
620 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_lt_8) {
621 TEST_REQUIRES_ARM_NEON_DOT;
622 for (size_t k = 1; k < 8; k++) {
623 GemmMicrokernelTester()
624 .mr(4)
625 .nr(8)
626 .kr(4)
627 .sr(1)
628 .m(4)
629 .n(8)
630 .k(k)
631 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
632 }
633 }
634
635 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_lt_8_subtile) {
636 TEST_REQUIRES_ARM_NEON_DOT;
637 for (size_t k = 1; k < 8; k++) {
638 for (uint32_t m = 1; m <= 4; m++) {
639 for (uint32_t n = 1; n <= 8; n++) {
640 GemmMicrokernelTester()
641 .mr(4)
642 .nr(8)
643 .kr(4)
644 .sr(1)
645 .m(m)
646 .n(n)
647 .k(k)
648 .iterations(1)
649 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
650 }
651 }
652 }
653 }
654
655 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_gt_8) {
656 TEST_REQUIRES_ARM_NEON_DOT;
657 for (size_t k = 9; k < 16; k++) {
658 GemmMicrokernelTester()
659 .mr(4)
660 .nr(8)
661 .kr(4)
662 .sr(1)
663 .m(4)
664 .n(8)
665 .k(k)
666 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
667 }
668 }
669
670 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_gt_8_subtile) {
671 TEST_REQUIRES_ARM_NEON_DOT;
672 for (size_t k = 9; k < 16; k++) {
673 for (uint32_t m = 1; m <= 4; m++) {
674 for (uint32_t n = 1; n <= 8; n++) {
675 GemmMicrokernelTester()
676 .mr(4)
677 .nr(8)
678 .kr(4)
679 .sr(1)
680 .m(m)
681 .n(n)
682 .k(k)
683 .iterations(1)
684 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
685 }
686 }
687 }
688 }
689
690 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_div_8) {
691 TEST_REQUIRES_ARM_NEON_DOT;
692 for (size_t k = 16; k <= 80; k += 8) {
693 GemmMicrokernelTester()
694 .mr(4)
695 .nr(8)
696 .kr(4)
697 .sr(1)
698 .m(4)
699 .n(8)
700 .k(k)
701 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
702 }
703 }
704
705 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_div_8_subtile) {
706 TEST_REQUIRES_ARM_NEON_DOT;
707 for (size_t k = 16; k <= 80; k += 8) {
708 for (uint32_t m = 1; m <= 4; m++) {
709 for (uint32_t n = 1; n <= 8; n++) {
710 GemmMicrokernelTester()
711 .mr(4)
712 .nr(8)
713 .kr(4)
714 .sr(1)
715 .m(m)
716 .n(n)
717 .k(k)
718 .iterations(1)
719 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
720 }
721 }
722 }
723 }
724
725 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8) {
726 TEST_REQUIRES_ARM_NEON_DOT;
727 for (uint32_t n = 9; n < 16; n++) {
728 for (size_t k = 1; k <= 40; k += 9) {
729 GemmMicrokernelTester()
730 .mr(4)
731 .nr(8)
732 .kr(4)
733 .sr(1)
734 .m(4)
735 .n(8)
736 .k(k)
737 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
738 }
739 }
740 }
741
742 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_strided_cn) {
743 TEST_REQUIRES_ARM_NEON_DOT;
744 for (uint32_t n = 9; n < 16; n++) {
745 for (size_t k = 1; k <= 40; k += 9) {
746 GemmMicrokernelTester()
747 .mr(4)
748 .nr(8)
749 .kr(4)
750 .sr(1)
751 .m(4)
752 .n(8)
753 .k(k)
754 .cn_stride(11)
755 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
756 }
757 }
758 }
759
760 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_subtile) {
761 TEST_REQUIRES_ARM_NEON_DOT;
762 for (uint32_t n = 9; n < 16; n++) {
763 for (size_t k = 1; k <= 40; k += 9) {
764 for (uint32_t m = 1; m <= 4; m++) {
765 GemmMicrokernelTester()
766 .mr(4)
767 .nr(8)
768 .kr(4)
769 .sr(1)
770 .m(m)
771 .n(n)
772 .k(k)
773 .iterations(1)
774 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
775 }
776 }
777 }
778 }
779
780 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8) {
781 TEST_REQUIRES_ARM_NEON_DOT;
782 for (uint32_t n = 16; n <= 24; n += 8) {
783 for (size_t k = 1; k <= 40; k += 9) {
784 GemmMicrokernelTester()
785 .mr(4)
786 .nr(8)
787 .kr(4)
788 .sr(1)
789 .m(4)
790 .n(8)
791 .k(k)
792 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
793 }
794 }
795 }
796
797 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_strided_cn) {
798 TEST_REQUIRES_ARM_NEON_DOT;
799 for (uint32_t n = 16; n <= 24; n += 8) {
800 for (size_t k = 1; k <= 40; k += 9) {
801 GemmMicrokernelTester()
802 .mr(4)
803 .nr(8)
804 .kr(4)
805 .sr(1)
806 .m(4)
807 .n(n)
808 .k(k)
809 .cn_stride(11)
810 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
811 }
812 }
813 }
814
815 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_subtile) {
816 TEST_REQUIRES_ARM_NEON_DOT;
817 for (uint32_t n = 16; n <= 24; n += 8) {
818 for (size_t k = 1; k <= 40; k += 9) {
819 for (uint32_t m = 1; m <= 4; m++) {
820 GemmMicrokernelTester()
821 .mr(4)
822 .nr(8)
823 .kr(4)
824 .sr(1)
825 .m(m)
826 .n(n)
827 .k(k)
828 .iterations(1)
829 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
830 }
831 }
832 }
833 }
834
835 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, small_kernel) {
836 TEST_REQUIRES_ARM_NEON_DOT;
837 for (size_t k = 1; k <= 40; k += 9) {
838 GemmMicrokernelTester()
839 .mr(4)
840 .nr(8)
841 .kr(4)
842 .sr(1)
843 .m(4)
844 .n(8)
845 .k(k)
846 .ks(3)
847 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
848 }
849 }
850
851 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, small_kernel_subtile) {
852 TEST_REQUIRES_ARM_NEON_DOT;
853 for (size_t k = 1; k <= 40; k += 9) {
854 for (uint32_t m = 1; m <= 4; m++) {
855 for (uint32_t n = 1; n <= 8; n++) {
856 GemmMicrokernelTester()
857 .mr(4)
858 .nr(8)
859 .kr(4)
860 .sr(1)
861 .m(m)
862 .n(n)
863 .k(k)
864 .ks(3)
865 .iterations(1)
866 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
867 }
868 }
869 }
870 }
871
872 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_small_kernel) {
873 TEST_REQUIRES_ARM_NEON_DOT;
874 for (uint32_t n = 9; n < 16; n++) {
875 for (size_t k = 1; k <= 40; k += 9) {
876 GemmMicrokernelTester()
877 .mr(4)
878 .nr(8)
879 .kr(4)
880 .sr(1)
881 .m(4)
882 .n(8)
883 .k(k)
884 .ks(3)
885 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
886 }
887 }
888 }
889
890 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_small_kernel) {
891 TEST_REQUIRES_ARM_NEON_DOT;
892 for (uint32_t n = 16; n <= 24; n += 8) {
893 for (size_t k = 1; k <= 40; k += 9) {
894 GemmMicrokernelTester()
895 .mr(4)
896 .nr(8)
897 .kr(4)
898 .sr(1)
899 .m(4)
900 .n(8)
901 .k(k)
902 .ks(3)
903 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
904 }
905 }
906 }
907
908 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cm_subtile) {
909 TEST_REQUIRES_ARM_NEON_DOT;
910 for (size_t k = 1; k <= 40; k += 9) {
911 for (uint32_t m = 1; m <= 4; m++) {
912 for (uint32_t n = 1; n <= 8; n++) {
913 GemmMicrokernelTester()
914 .mr(4)
915 .nr(8)
916 .kr(4)
917 .sr(1)
918 .m(m)
919 .n(n)
920 .k(k)
921 .cm_stride(11)
922 .iterations(1)
923 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
924 }
925 }
926 }
927 }
928
929 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, a_offset) {
930 TEST_REQUIRES_ARM_NEON_DOT;
931 for (size_t k = 1; k <= 40; k += 9) {
932 GemmMicrokernelTester()
933 .mr(4)
934 .nr(8)
935 .kr(4)
936 .sr(1)
937 .m(4)
938 .n(8)
939 .k(k)
940 .ks(3)
941 .a_offset(163)
942 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
943 }
944 }
945
946 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, zero) {
947 TEST_REQUIRES_ARM_NEON_DOT;
948 for (uint32_t mz = 0; mz < 4; mz++) {
949 for (size_t k = 1; k <= 40; k += 9) {
950 GemmMicrokernelTester()
951 .mr(4)
952 .nr(8)
953 .kr(4)
954 .sr(1)
955 .m(4)
956 .n(8)
957 .k(k)
958 .ks(3)
959 .a_offset(163)
960 .zero_index(mz)
961 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
962 }
963 }
964 }
965
966 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, qmin) {
967 TEST_REQUIRES_ARM_NEON_DOT;
968 GemmMicrokernelTester()
969 .mr(4)
970 .nr(8)
971 .kr(4)
972 .sr(1)
973 .m(4)
974 .n(8)
975 .k(8)
976 .qmin(128)
977 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
978 }
979
980 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, qmax) {
981 TEST_REQUIRES_ARM_NEON_DOT;
982 GemmMicrokernelTester()
983 .mr(4)
984 .nr(8)
985 .kr(4)
986 .sr(1)
987 .m(4)
988 .n(8)
989 .k(8)
990 .qmax(128)
991 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
992 }
993
994 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cm) {
995 TEST_REQUIRES_ARM_NEON_DOT;
996 GemmMicrokernelTester()
997 .mr(4)
998 .nr(8)
999 .kr(4)
1000 .sr(1)
1001 .m(4)
1002 .n(8)
1003 .k(8)
1004 .cm_stride(11)
1005 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1006 }
1007
1008 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_a_zero_point) {
1009 TEST_REQUIRES_ARM_NEON_DOT;
1010 for (size_t k = 1; k <= 40; k += 9) {
1011 GemmMicrokernelTester()
1012 .mr(4)
1013 .nr(8)
1014 .kr(4)
1015 .sr(1)
1016 .m(4)
1017 .n(8)
1018 .k(k)
1019 .a_zero_point(0)
1020 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1021 }
1022 }
1023
1024 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_b_zero_point) {
1025 TEST_REQUIRES_ARM_NEON_DOT;
1026 for (size_t k = 1; k <= 40; k += 9) {
1027 GemmMicrokernelTester()
1028 .mr(4)
1029 .nr(8)
1030 .kr(4)
1031 .sr(1)
1032 .m(4)
1033 .n(8)
1034 .k(k)
1035 .b_zero_point(0)
1036 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1037 }
1038 }
1039
1040 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_zero_point) {
1041 TEST_REQUIRES_ARM_NEON_DOT;
1042 for (size_t k = 1; k <= 40; k += 9) {
1043 GemmMicrokernelTester()
1044 .mr(4)
1045 .nr(8)
1046 .kr(4)
1047 .sr(1)
1048 .m(4)
1049 .n(8)
1050 .k(k)
1051 .a_zero_point(0)
1052 .b_zero_point(0)
1053 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1054 }
1055 }
1056#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
1057
1058
1059#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
1060 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8) {
1061 TEST_REQUIRES_ARM_NEON_DOT;
1062 GemmMicrokernelTester()
1063 .mr(6)
1064 .nr(8)
1065 .kr(4)
1066 .sr(1)
1067 .m(6)
1068 .n(8)
1069 .k(8)
1070 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1071 }
1072
1073 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cn) {
1074 TEST_REQUIRES_ARM_NEON_DOT;
1075 GemmMicrokernelTester()
1076 .mr(6)
1077 .nr(8)
1078 .kr(4)
1079 .sr(1)
1080 .m(6)
1081 .n(8)
1082 .k(8)
1083 .cn_stride(11)
1084 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1085 }
1086
1087 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile) {
1088 TEST_REQUIRES_ARM_NEON_DOT;
1089 for (uint32_t m = 1; m <= 6; m++) {
1090 for (uint32_t n = 1; n <= 8; n++) {
1091 GemmMicrokernelTester()
1092 .mr(6)
1093 .nr(8)
1094 .kr(4)
1095 .sr(1)
1096 .m(m)
1097 .n(n)
1098 .k(8)
1099 .iterations(1)
1100 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1101 }
1102 }
1103 }
1104
1105 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_m) {
1106 TEST_REQUIRES_ARM_NEON_DOT;
1107 for (uint32_t m = 1; m <= 6; m++) {
1108 GemmMicrokernelTester()
1109 .mr(6)
1110 .nr(8)
1111 .kr(4)
1112 .sr(1)
1113 .m(m)
1114 .n(8)
1115 .k(8)
1116 .iterations(1)
1117 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1118 }
1119 }
1120
1121 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_n) {
1122 TEST_REQUIRES_ARM_NEON_DOT;
1123 for (uint32_t n = 1; n <= 8; n++) {
1124 GemmMicrokernelTester()
1125 .mr(6)
1126 .nr(8)
1127 .kr(4)
1128 .sr(1)
1129 .m(6)
1130 .n(n)
1131 .k(8)
1132 .iterations(1)
1133 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1134 }
1135 }
1136
1137 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8) {
1138 TEST_REQUIRES_ARM_NEON_DOT;
1139 for (size_t k = 1; k < 8; k++) {
1140 GemmMicrokernelTester()
1141 .mr(6)
1142 .nr(8)
1143 .kr(4)
1144 .sr(1)
1145 .m(6)
1146 .n(8)
1147 .k(k)
1148 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1149 }
1150 }
1151
1152 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8_subtile) {
1153 TEST_REQUIRES_ARM_NEON_DOT;
1154 for (size_t k = 1; k < 8; k++) {
1155 for (uint32_t m = 1; m <= 6; m++) {
1156 for (uint32_t n = 1; n <= 8; n++) {
1157 GemmMicrokernelTester()
1158 .mr(6)
1159 .nr(8)
1160 .kr(4)
1161 .sr(1)
1162 .m(m)
1163 .n(n)
1164 .k(k)
1165 .iterations(1)
1166 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1167 }
1168 }
1169 }
1170 }
1171
1172 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8) {
1173 TEST_REQUIRES_ARM_NEON_DOT;
1174 for (size_t k = 9; k < 16; k++) {
1175 GemmMicrokernelTester()
1176 .mr(6)
1177 .nr(8)
1178 .kr(4)
1179 .sr(1)
1180 .m(6)
1181 .n(8)
1182 .k(k)
1183 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1184 }
1185 }
1186
1187 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8_subtile) {
1188 TEST_REQUIRES_ARM_NEON_DOT;
1189 for (size_t k = 9; k < 16; k++) {
1190 for (uint32_t m = 1; m <= 6; m++) {
1191 for (uint32_t n = 1; n <= 8; n++) {
1192 GemmMicrokernelTester()
1193 .mr(6)
1194 .nr(8)
1195 .kr(4)
1196 .sr(1)
1197 .m(m)
1198 .n(n)
1199 .k(k)
1200 .iterations(1)
1201 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1202 }
1203 }
1204 }
1205 }
1206
1207 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8) {
1208 TEST_REQUIRES_ARM_NEON_DOT;
1209 for (size_t k = 16; k <= 80; k += 8) {
1210 GemmMicrokernelTester()
1211 .mr(6)
1212 .nr(8)
1213 .kr(4)
1214 .sr(1)
1215 .m(6)
1216 .n(8)
1217 .k(k)
1218 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1219 }
1220 }
1221
1222 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8_subtile) {
1223 TEST_REQUIRES_ARM_NEON_DOT;
1224 for (size_t k = 16; k <= 80; k += 8) {
1225 for (uint32_t m = 1; m <= 6; m++) {
1226 for (uint32_t n = 1; n <= 8; n++) {
1227 GemmMicrokernelTester()
1228 .mr(6)
1229 .nr(8)
1230 .kr(4)
1231 .sr(1)
1232 .m(m)
1233 .n(n)
1234 .k(k)
1235 .iterations(1)
1236 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1237 }
1238 }
1239 }
1240 }
1241
1242 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8) {
1243 TEST_REQUIRES_ARM_NEON_DOT;
1244 for (uint32_t n = 9; n < 16; n++) {
1245 for (size_t k = 1; k <= 40; k += 9) {
1246 GemmMicrokernelTester()
1247 .mr(6)
1248 .nr(8)
1249 .kr(4)
1250 .sr(1)
1251 .m(6)
1252 .n(8)
1253 .k(k)
1254 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1255 }
1256 }
1257 }
1258
1259 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_strided_cn) {
1260 TEST_REQUIRES_ARM_NEON_DOT;
1261 for (uint32_t n = 9; n < 16; n++) {
1262 for (size_t k = 1; k <= 40; k += 9) {
1263 GemmMicrokernelTester()
1264 .mr(6)
1265 .nr(8)
1266 .kr(4)
1267 .sr(1)
1268 .m(6)
1269 .n(8)
1270 .k(k)
1271 .cn_stride(11)
1272 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1273 }
1274 }
1275 }
1276
1277 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_subtile) {
1278 TEST_REQUIRES_ARM_NEON_DOT;
1279 for (uint32_t n = 9; n < 16; n++) {
1280 for (size_t k = 1; k <= 40; k += 9) {
1281 for (uint32_t m = 1; m <= 6; m++) {
1282 GemmMicrokernelTester()
1283 .mr(6)
1284 .nr(8)
1285 .kr(4)
1286 .sr(1)
1287 .m(m)
1288 .n(n)
1289 .k(k)
1290 .iterations(1)
1291 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1292 }
1293 }
1294 }
1295 }
1296
1297 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8) {
1298 TEST_REQUIRES_ARM_NEON_DOT;
1299 for (uint32_t n = 16; n <= 24; n += 8) {
1300 for (size_t k = 1; k <= 40; k += 9) {
1301 GemmMicrokernelTester()
1302 .mr(6)
1303 .nr(8)
1304 .kr(4)
1305 .sr(1)
1306 .m(6)
1307 .n(8)
1308 .k(k)
1309 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1310 }
1311 }
1312 }
1313
1314 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_strided_cn) {
1315 TEST_REQUIRES_ARM_NEON_DOT;
1316 for (uint32_t n = 16; n <= 24; n += 8) {
1317 for (size_t k = 1; k <= 40; k += 9) {
1318 GemmMicrokernelTester()
1319 .mr(6)
1320 .nr(8)
1321 .kr(4)
1322 .sr(1)
1323 .m(6)
1324 .n(n)
1325 .k(k)
1326 .cn_stride(11)
1327 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1328 }
1329 }
1330 }
1331
1332 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_subtile) {
1333 TEST_REQUIRES_ARM_NEON_DOT;
1334 for (uint32_t n = 16; n <= 24; n += 8) {
1335 for (size_t k = 1; k <= 40; k += 9) {
1336 for (uint32_t m = 1; m <= 6; m++) {
1337 GemmMicrokernelTester()
1338 .mr(6)
1339 .nr(8)
1340 .kr(4)
1341 .sr(1)
1342 .m(m)
1343 .n(n)
1344 .k(k)
1345 .iterations(1)
1346 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1347 }
1348 }
1349 }
1350 }
1351
1352 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, small_kernel) {
1353 TEST_REQUIRES_ARM_NEON_DOT;
1354 for (size_t k = 1; k <= 40; k += 9) {
1355 GemmMicrokernelTester()
1356 .mr(6)
1357 .nr(8)
1358 .kr(4)
1359 .sr(1)
1360 .m(6)
1361 .n(8)
1362 .k(k)
1363 .ks(3)
1364 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1365 }
1366 }
1367
1368 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, small_kernel_subtile) {
1369 TEST_REQUIRES_ARM_NEON_DOT;
1370 for (size_t k = 1; k <= 40; k += 9) {
1371 for (uint32_t m = 1; m <= 6; m++) {
1372 for (uint32_t n = 1; n <= 8; n++) {
1373 GemmMicrokernelTester()
1374 .mr(6)
1375 .nr(8)
1376 .kr(4)
1377 .sr(1)
1378 .m(m)
1379 .n(n)
1380 .k(k)
1381 .ks(3)
1382 .iterations(1)
1383 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1384 }
1385 }
1386 }
1387 }
1388
1389 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_small_kernel) {
1390 TEST_REQUIRES_ARM_NEON_DOT;
1391 for (uint32_t n = 9; n < 16; n++) {
1392 for (size_t k = 1; k <= 40; k += 9) {
1393 GemmMicrokernelTester()
1394 .mr(6)
1395 .nr(8)
1396 .kr(4)
1397 .sr(1)
1398 .m(6)
1399 .n(8)
1400 .k(k)
1401 .ks(3)
1402 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1403 }
1404 }
1405 }
1406
1407 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_small_kernel) {
1408 TEST_REQUIRES_ARM_NEON_DOT;
1409 for (uint32_t n = 16; n <= 24; n += 8) {
1410 for (size_t k = 1; k <= 40; k += 9) {
1411 GemmMicrokernelTester()
1412 .mr(6)
1413 .nr(8)
1414 .kr(4)
1415 .sr(1)
1416 .m(6)
1417 .n(8)
1418 .k(k)
1419 .ks(3)
1420 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1421 }
1422 }
1423 }
1424
1425 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm_subtile) {
1426 TEST_REQUIRES_ARM_NEON_DOT;
1427 for (size_t k = 1; k <= 40; k += 9) {
1428 for (uint32_t m = 1; m <= 6; m++) {
1429 for (uint32_t n = 1; n <= 8; n++) {
1430 GemmMicrokernelTester()
1431 .mr(6)
1432 .nr(8)
1433 .kr(4)
1434 .sr(1)
1435 .m(m)
1436 .n(n)
1437 .k(k)
1438 .cm_stride(11)
1439 .iterations(1)
1440 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1441 }
1442 }
1443 }
1444 }
1445
1446 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, a_offset) {
1447 TEST_REQUIRES_ARM_NEON_DOT;
1448 for (size_t k = 1; k <= 40; k += 9) {
1449 GemmMicrokernelTester()
1450 .mr(6)
1451 .nr(8)
1452 .kr(4)
1453 .sr(1)
1454 .m(6)
1455 .n(8)
1456 .k(k)
1457 .ks(3)
1458 .a_offset(251)
1459 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1460 }
1461 }
1462
1463 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, zero) {
1464 TEST_REQUIRES_ARM_NEON_DOT;
1465 for (uint32_t mz = 0; mz < 6; mz++) {
1466 for (size_t k = 1; k <= 40; k += 9) {
1467 GemmMicrokernelTester()
1468 .mr(6)
1469 .nr(8)
1470 .kr(4)
1471 .sr(1)
1472 .m(6)
1473 .n(8)
1474 .k(k)
1475 .ks(3)
1476 .a_offset(251)
1477 .zero_index(mz)
1478 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1479 }
1480 }
1481 }
1482
1483 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmin) {
1484 TEST_REQUIRES_ARM_NEON_DOT;
1485 GemmMicrokernelTester()
1486 .mr(6)
1487 .nr(8)
1488 .kr(4)
1489 .sr(1)
1490 .m(6)
1491 .n(8)
1492 .k(8)
1493 .qmin(128)
1494 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1495 }
1496
1497 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmax) {
1498 TEST_REQUIRES_ARM_NEON_DOT;
1499 GemmMicrokernelTester()
1500 .mr(6)
1501 .nr(8)
1502 .kr(4)
1503 .sr(1)
1504 .m(6)
1505 .n(8)
1506 .k(8)
1507 .qmax(128)
1508 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1509 }
1510
1511 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm) {
1512 TEST_REQUIRES_ARM_NEON_DOT;
1513 GemmMicrokernelTester()
1514 .mr(6)
1515 .nr(8)
1516 .kr(4)
1517 .sr(1)
1518 .m(6)
1519 .n(8)
1520 .k(8)
1521 .cm_stride(11)
1522 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1523 }
1524
1525 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_a_zero_point) {
1526 TEST_REQUIRES_ARM_NEON_DOT;
1527 for (size_t k = 1; k <= 40; k += 9) {
1528 GemmMicrokernelTester()
1529 .mr(6)
1530 .nr(8)
1531 .kr(4)
1532 .sr(1)
1533 .m(6)
1534 .n(8)
1535 .k(k)
1536 .a_zero_point(0)
1537 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1538 }
1539 }
1540
1541 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_b_zero_point) {
1542 TEST_REQUIRES_ARM_NEON_DOT;
1543 for (size_t k = 1; k <= 40; k += 9) {
1544 GemmMicrokernelTester()
1545 .mr(6)
1546 .nr(8)
1547 .kr(4)
1548 .sr(1)
1549 .m(6)
1550 .n(8)
1551 .k(k)
1552 .b_zero_point(0)
1553 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1554 }
1555 }
1556
1557 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_zero_point) {
1558 TEST_REQUIRES_ARM_NEON_DOT;
1559 for (size_t k = 1; k <= 40; k += 9) {
1560 GemmMicrokernelTester()
1561 .mr(6)
1562 .nr(8)
1563 .kr(4)
1564 .sr(1)
1565 .m(6)
1566 .n(8)
1567 .k(k)
1568 .a_zero_point(0)
1569 .b_zero_point(0)
1570 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1571 }
1572 }
1573#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
1574
1575
1576#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
1577 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8) {
1578 TEST_REQUIRES_ARM_NEON_DOT;
1579 GemmMicrokernelTester()
1580 .mr(8)
1581 .nr(8)
1582 .kr(4)
1583 .sr(1)
1584 .m(8)
1585 .n(8)
1586 .k(8)
1587 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1588 }
1589
1590 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cn) {
1591 TEST_REQUIRES_ARM_NEON_DOT;
1592 GemmMicrokernelTester()
1593 .mr(8)
1594 .nr(8)
1595 .kr(4)
1596 .sr(1)
1597 .m(8)
1598 .n(8)
1599 .k(8)
1600 .cn_stride(11)
1601 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1602 }
1603
1604 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile) {
1605 TEST_REQUIRES_ARM_NEON_DOT;
1606 for (uint32_t m = 1; m <= 8; m++) {
1607 for (uint32_t n = 1; n <= 8; n++) {
1608 GemmMicrokernelTester()
1609 .mr(8)
1610 .nr(8)
1611 .kr(4)
1612 .sr(1)
1613 .m(m)
1614 .n(n)
1615 .k(8)
1616 .iterations(1)
1617 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1618 }
1619 }
1620 }
1621
1622 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_m) {
1623 TEST_REQUIRES_ARM_NEON_DOT;
1624 for (uint32_t m = 1; m <= 8; m++) {
1625 GemmMicrokernelTester()
1626 .mr(8)
1627 .nr(8)
1628 .kr(4)
1629 .sr(1)
1630 .m(m)
1631 .n(8)
1632 .k(8)
1633 .iterations(1)
1634 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1635 }
1636 }
1637
1638 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_n) {
1639 TEST_REQUIRES_ARM_NEON_DOT;
1640 for (uint32_t n = 1; n <= 8; n++) {
1641 GemmMicrokernelTester()
1642 .mr(8)
1643 .nr(8)
1644 .kr(4)
1645 .sr(1)
1646 .m(8)
1647 .n(n)
1648 .k(8)
1649 .iterations(1)
1650 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1651 }
1652 }
1653
1654 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8) {
1655 TEST_REQUIRES_ARM_NEON_DOT;
1656 for (size_t k = 1; k < 8; k++) {
1657 GemmMicrokernelTester()
1658 .mr(8)
1659 .nr(8)
1660 .kr(4)
1661 .sr(1)
1662 .m(8)
1663 .n(8)
1664 .k(k)
1665 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1666 }
1667 }
1668
1669 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8_subtile) {
1670 TEST_REQUIRES_ARM_NEON_DOT;
1671 for (size_t k = 1; k < 8; k++) {
1672 for (uint32_t m = 1; m <= 8; m++) {
1673 for (uint32_t n = 1; n <= 8; n++) {
1674 GemmMicrokernelTester()
1675 .mr(8)
1676 .nr(8)
1677 .kr(4)
1678 .sr(1)
1679 .m(m)
1680 .n(n)
1681 .k(k)
1682 .iterations(1)
1683 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1684 }
1685 }
1686 }
1687 }
1688
1689 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8) {
1690 TEST_REQUIRES_ARM_NEON_DOT;
1691 for (size_t k = 9; k < 16; k++) {
1692 GemmMicrokernelTester()
1693 .mr(8)
1694 .nr(8)
1695 .kr(4)
1696 .sr(1)
1697 .m(8)
1698 .n(8)
1699 .k(k)
1700 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1701 }
1702 }
1703
1704 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8_subtile) {
1705 TEST_REQUIRES_ARM_NEON_DOT;
1706 for (size_t k = 9; k < 16; k++) {
1707 for (uint32_t m = 1; m <= 8; m++) {
1708 for (uint32_t n = 1; n <= 8; n++) {
1709 GemmMicrokernelTester()
1710 .mr(8)
1711 .nr(8)
1712 .kr(4)
1713 .sr(1)
1714 .m(m)
1715 .n(n)
1716 .k(k)
1717 .iterations(1)
1718 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1719 }
1720 }
1721 }
1722 }
1723
1724 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8) {
1725 TEST_REQUIRES_ARM_NEON_DOT;
1726 for (size_t k = 16; k <= 80; k += 8) {
1727 GemmMicrokernelTester()
1728 .mr(8)
1729 .nr(8)
1730 .kr(4)
1731 .sr(1)
1732 .m(8)
1733 .n(8)
1734 .k(k)
1735 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1736 }
1737 }
1738
1739 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8_subtile) {
1740 TEST_REQUIRES_ARM_NEON_DOT;
1741 for (size_t k = 16; k <= 80; k += 8) {
1742 for (uint32_t m = 1; m <= 8; m++) {
1743 for (uint32_t n = 1; n <= 8; n++) {
1744 GemmMicrokernelTester()
1745 .mr(8)
1746 .nr(8)
1747 .kr(4)
1748 .sr(1)
1749 .m(m)
1750 .n(n)
1751 .k(k)
1752 .iterations(1)
1753 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1754 }
1755 }
1756 }
1757 }
1758
1759 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8) {
1760 TEST_REQUIRES_ARM_NEON_DOT;
1761 for (uint32_t n = 9; n < 16; n++) {
1762 for (size_t k = 1; k <= 40; k += 9) {
1763 GemmMicrokernelTester()
1764 .mr(8)
1765 .nr(8)
1766 .kr(4)
1767 .sr(1)
1768 .m(8)
1769 .n(8)
1770 .k(k)
1771 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1772 }
1773 }
1774 }
1775
1776 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_strided_cn) {
1777 TEST_REQUIRES_ARM_NEON_DOT;
1778 for (uint32_t n = 9; n < 16; n++) {
1779 for (size_t k = 1; k <= 40; k += 9) {
1780 GemmMicrokernelTester()
1781 .mr(8)
1782 .nr(8)
1783 .kr(4)
1784 .sr(1)
1785 .m(8)
1786 .n(8)
1787 .k(k)
1788 .cn_stride(11)
1789 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1790 }
1791 }
1792 }
1793
1794 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_subtile) {
1795 TEST_REQUIRES_ARM_NEON_DOT;
1796 for (uint32_t n = 9; n < 16; n++) {
1797 for (size_t k = 1; k <= 40; k += 9) {
1798 for (uint32_t m = 1; m <= 8; m++) {
1799 GemmMicrokernelTester()
1800 .mr(8)
1801 .nr(8)
1802 .kr(4)
1803 .sr(1)
1804 .m(m)
1805 .n(n)
1806 .k(k)
1807 .iterations(1)
1808 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1809 }
1810 }
1811 }
1812 }
1813
1814 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8) {
1815 TEST_REQUIRES_ARM_NEON_DOT;
1816 for (uint32_t n = 16; n <= 24; n += 8) {
1817 for (size_t k = 1; k <= 40; k += 9) {
1818 GemmMicrokernelTester()
1819 .mr(8)
1820 .nr(8)
1821 .kr(4)
1822 .sr(1)
1823 .m(8)
1824 .n(8)
1825 .k(k)
1826 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1827 }
1828 }
1829 }
1830
1831 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_strided_cn) {
1832 TEST_REQUIRES_ARM_NEON_DOT;
1833 for (uint32_t n = 16; n <= 24; n += 8) {
1834 for (size_t k = 1; k <= 40; k += 9) {
1835 GemmMicrokernelTester()
1836 .mr(8)
1837 .nr(8)
1838 .kr(4)
1839 .sr(1)
1840 .m(8)
1841 .n(n)
1842 .k(k)
1843 .cn_stride(11)
1844 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1845 }
1846 }
1847 }
1848
1849 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_subtile) {
1850 TEST_REQUIRES_ARM_NEON_DOT;
1851 for (uint32_t n = 16; n <= 24; n += 8) {
1852 for (size_t k = 1; k <= 40; k += 9) {
1853 for (uint32_t m = 1; m <= 8; m++) {
1854 GemmMicrokernelTester()
1855 .mr(8)
1856 .nr(8)
1857 .kr(4)
1858 .sr(1)
1859 .m(m)
1860 .n(n)
1861 .k(k)
1862 .iterations(1)
1863 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1864 }
1865 }
1866 }
1867 }
1868
1869 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, small_kernel) {
1870 TEST_REQUIRES_ARM_NEON_DOT;
1871 for (size_t k = 1; k <= 40; k += 9) {
1872 GemmMicrokernelTester()
1873 .mr(8)
1874 .nr(8)
1875 .kr(4)
1876 .sr(1)
1877 .m(8)
1878 .n(8)
1879 .k(k)
1880 .ks(3)
1881 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1882 }
1883 }
1884
1885 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, small_kernel_subtile) {
1886 TEST_REQUIRES_ARM_NEON_DOT;
1887 for (size_t k = 1; k <= 40; k += 9) {
1888 for (uint32_t m = 1; m <= 8; m++) {
1889 for (uint32_t n = 1; n <= 8; n++) {
1890 GemmMicrokernelTester()
1891 .mr(8)
1892 .nr(8)
1893 .kr(4)
1894 .sr(1)
1895 .m(m)
1896 .n(n)
1897 .k(k)
1898 .ks(3)
1899 .iterations(1)
1900 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1901 }
1902 }
1903 }
1904 }
1905
1906 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_small_kernel) {
1907 TEST_REQUIRES_ARM_NEON_DOT;
1908 for (uint32_t n = 9; n < 16; n++) {
1909 for (size_t k = 1; k <= 40; k += 9) {
1910 GemmMicrokernelTester()
1911 .mr(8)
1912 .nr(8)
1913 .kr(4)
1914 .sr(1)
1915 .m(8)
1916 .n(8)
1917 .k(k)
1918 .ks(3)
1919 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1920 }
1921 }
1922 }
1923
1924 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_small_kernel) {
1925 TEST_REQUIRES_ARM_NEON_DOT;
1926 for (uint32_t n = 16; n <= 24; n += 8) {
1927 for (size_t k = 1; k <= 40; k += 9) {
1928 GemmMicrokernelTester()
1929 .mr(8)
1930 .nr(8)
1931 .kr(4)
1932 .sr(1)
1933 .m(8)
1934 .n(8)
1935 .k(k)
1936 .ks(3)
1937 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1938 }
1939 }
1940 }
1941
1942 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm_subtile) {
1943 TEST_REQUIRES_ARM_NEON_DOT;
1944 for (size_t k = 1; k <= 40; k += 9) {
1945 for (uint32_t m = 1; m <= 8; m++) {
1946 for (uint32_t n = 1; n <= 8; n++) {
1947 GemmMicrokernelTester()
1948 .mr(8)
1949 .nr(8)
1950 .kr(4)
1951 .sr(1)
1952 .m(m)
1953 .n(n)
1954 .k(k)
1955 .cm_stride(11)
1956 .iterations(1)
1957 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1958 }
1959 }
1960 }
1961 }
1962
1963 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, a_offset) {
1964 TEST_REQUIRES_ARM_NEON_DOT;
1965 for (size_t k = 1; k <= 40; k += 9) {
1966 GemmMicrokernelTester()
1967 .mr(8)
1968 .nr(8)
1969 .kr(4)
1970 .sr(1)
1971 .m(8)
1972 .n(8)
1973 .k(k)
1974 .ks(3)
1975 .a_offset(331)
1976 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1977 }
1978 }
1979
1980 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, zero) {
1981 TEST_REQUIRES_ARM_NEON_DOT;
1982 for (uint32_t mz = 0; mz < 8; mz++) {
1983 for (size_t k = 1; k <= 40; k += 9) {
1984 GemmMicrokernelTester()
1985 .mr(8)
1986 .nr(8)
1987 .kr(4)
1988 .sr(1)
1989 .m(8)
1990 .n(8)
1991 .k(k)
1992 .ks(3)
1993 .a_offset(331)
1994 .zero_index(mz)
1995 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
1996 }
1997 }
1998 }
1999
2000 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmin) {
2001 TEST_REQUIRES_ARM_NEON_DOT;
2002 GemmMicrokernelTester()
2003 .mr(8)
2004 .nr(8)
2005 .kr(4)
2006 .sr(1)
2007 .m(8)
2008 .n(8)
2009 .k(8)
2010 .qmin(128)
2011 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2012 }
2013
2014 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmax) {
2015 TEST_REQUIRES_ARM_NEON_DOT;
2016 GemmMicrokernelTester()
2017 .mr(8)
2018 .nr(8)
2019 .kr(4)
2020 .sr(1)
2021 .m(8)
2022 .n(8)
2023 .k(8)
2024 .qmax(128)
2025 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2026 }
2027
2028 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm) {
2029 TEST_REQUIRES_ARM_NEON_DOT;
2030 GemmMicrokernelTester()
2031 .mr(8)
2032 .nr(8)
2033 .kr(4)
2034 .sr(1)
2035 .m(8)
2036 .n(8)
2037 .k(8)
2038 .cm_stride(11)
2039 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2040 }
2041
2042 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_a_zero_point) {
2043 TEST_REQUIRES_ARM_NEON_DOT;
2044 for (size_t k = 1; k <= 40; k += 9) {
2045 GemmMicrokernelTester()
2046 .mr(8)
2047 .nr(8)
2048 .kr(4)
2049 .sr(1)
2050 .m(8)
2051 .n(8)
2052 .k(k)
2053 .a_zero_point(0)
2054 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2055 }
2056 }
2057
2058 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_b_zero_point) {
2059 TEST_REQUIRES_ARM_NEON_DOT;
2060 for (size_t k = 1; k <= 40; k += 9) {
2061 GemmMicrokernelTester()
2062 .mr(8)
2063 .nr(8)
2064 .kr(4)
2065 .sr(1)
2066 .m(8)
2067 .n(8)
2068 .k(k)
2069 .b_zero_point(0)
2070 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2071 }
2072 }
2073
2074 TEST(QU8_IGEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_zero_point) {
2075 TEST_REQUIRES_ARM_NEON_DOT;
2076 for (size_t k = 1; k <= 40; k += 9) {
2077 GemmMicrokernelTester()
2078 .mr(8)
2079 .nr(8)
2080 .kr(4)
2081 .sr(1)
2082 .m(8)
2083 .n(8)
2084 .k(k)
2085 .a_zero_point(0)
2086 .b_zero_point(0)
2087 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2088 }
2089 }
2090#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
2091
2092
2093#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
2094 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8) {
2095 TEST_REQUIRES_ARM_NEON_DOT;
2096 GemmMicrokernelTester()
2097 .mr(1)
2098 .nr(16)
2099 .kr(4)
2100 .sr(1)
2101 .m(1)
2102 .n(16)
2103 .k(8)
2104 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2105 }
2106
2107 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cn) {
2108 TEST_REQUIRES_ARM_NEON_DOT;
2109 GemmMicrokernelTester()
2110 .mr(1)
2111 .nr(16)
2112 .kr(4)
2113 .sr(1)
2114 .m(1)
2115 .n(16)
2116 .k(8)
2117 .cn_stride(19)
2118 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2119 }
2120
2121 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile) {
2122 TEST_REQUIRES_ARM_NEON_DOT;
2123 for (uint32_t m = 1; m <= 1; m++) {
2124 for (uint32_t n = 1; n <= 16; n++) {
2125 GemmMicrokernelTester()
2126 .mr(1)
2127 .nr(16)
2128 .kr(4)
2129 .sr(1)
2130 .m(m)
2131 .n(n)
2132 .k(8)
2133 .iterations(1)
2134 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2135 }
2136 }
2137 }
2138
2139 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile_m) {
2140 TEST_REQUIRES_ARM_NEON_DOT;
2141 for (uint32_t m = 1; m <= 1; m++) {
2142 GemmMicrokernelTester()
2143 .mr(1)
2144 .nr(16)
2145 .kr(4)
2146 .sr(1)
2147 .m(m)
2148 .n(16)
2149 .k(8)
2150 .iterations(1)
2151 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2152 }
2153 }
2154
2155 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile_n) {
2156 TEST_REQUIRES_ARM_NEON_DOT;
2157 for (uint32_t n = 1; n <= 16; n++) {
2158 GemmMicrokernelTester()
2159 .mr(1)
2160 .nr(16)
2161 .kr(4)
2162 .sr(1)
2163 .m(1)
2164 .n(n)
2165 .k(8)
2166 .iterations(1)
2167 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2168 }
2169 }
2170
2171 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_lt_8) {
2172 TEST_REQUIRES_ARM_NEON_DOT;
2173 for (size_t k = 1; k < 8; k++) {
2174 GemmMicrokernelTester()
2175 .mr(1)
2176 .nr(16)
2177 .kr(4)
2178 .sr(1)
2179 .m(1)
2180 .n(16)
2181 .k(k)
2182 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2183 }
2184 }
2185
2186 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_lt_8_subtile) {
2187 TEST_REQUIRES_ARM_NEON_DOT;
2188 for (size_t k = 1; k < 8; k++) {
2189 for (uint32_t m = 1; m <= 1; m++) {
2190 for (uint32_t n = 1; n <= 16; n++) {
2191 GemmMicrokernelTester()
2192 .mr(1)
2193 .nr(16)
2194 .kr(4)
2195 .sr(1)
2196 .m(m)
2197 .n(n)
2198 .k(k)
2199 .iterations(1)
2200 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2201 }
2202 }
2203 }
2204 }
2205
2206 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_gt_8) {
2207 TEST_REQUIRES_ARM_NEON_DOT;
2208 for (size_t k = 9; k < 16; k++) {
2209 GemmMicrokernelTester()
2210 .mr(1)
2211 .nr(16)
2212 .kr(4)
2213 .sr(1)
2214 .m(1)
2215 .n(16)
2216 .k(k)
2217 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2218 }
2219 }
2220
2221 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_gt_8_subtile) {
2222 TEST_REQUIRES_ARM_NEON_DOT;
2223 for (size_t k = 9; k < 16; k++) {
2224 for (uint32_t m = 1; m <= 1; m++) {
2225 for (uint32_t n = 1; n <= 16; n++) {
2226 GemmMicrokernelTester()
2227 .mr(1)
2228 .nr(16)
2229 .kr(4)
2230 .sr(1)
2231 .m(m)
2232 .n(n)
2233 .k(k)
2234 .iterations(1)
2235 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2236 }
2237 }
2238 }
2239 }
2240
2241 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_div_8) {
2242 TEST_REQUIRES_ARM_NEON_DOT;
2243 for (size_t k = 16; k <= 80; k += 8) {
2244 GemmMicrokernelTester()
2245 .mr(1)
2246 .nr(16)
2247 .kr(4)
2248 .sr(1)
2249 .m(1)
2250 .n(16)
2251 .k(k)
2252 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2253 }
2254 }
2255
2256 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_div_8_subtile) {
2257 TEST_REQUIRES_ARM_NEON_DOT;
2258 for (size_t k = 16; k <= 80; k += 8) {
2259 for (uint32_t m = 1; m <= 1; m++) {
2260 for (uint32_t n = 1; n <= 16; n++) {
2261 GemmMicrokernelTester()
2262 .mr(1)
2263 .nr(16)
2264 .kr(4)
2265 .sr(1)
2266 .m(m)
2267 .n(n)
2268 .k(k)
2269 .iterations(1)
2270 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2271 }
2272 }
2273 }
2274 }
2275
2276 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16) {
2277 TEST_REQUIRES_ARM_NEON_DOT;
2278 for (uint32_t n = 17; n < 32; n++) {
2279 for (size_t k = 1; k <= 40; k += 9) {
2280 GemmMicrokernelTester()
2281 .mr(1)
2282 .nr(16)
2283 .kr(4)
2284 .sr(1)
2285 .m(1)
2286 .n(16)
2287 .k(k)
2288 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2289 }
2290 }
2291 }
2292
2293 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_strided_cn) {
2294 TEST_REQUIRES_ARM_NEON_DOT;
2295 for (uint32_t n = 17; n < 32; n++) {
2296 for (size_t k = 1; k <= 40; k += 9) {
2297 GemmMicrokernelTester()
2298 .mr(1)
2299 .nr(16)
2300 .kr(4)
2301 .sr(1)
2302 .m(1)
2303 .n(16)
2304 .k(k)
2305 .cn_stride(19)
2306 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2307 }
2308 }
2309 }
2310
2311 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_subtile) {
2312 TEST_REQUIRES_ARM_NEON_DOT;
2313 for (uint32_t n = 17; n < 32; n++) {
2314 for (size_t k = 1; k <= 40; k += 9) {
2315 for (uint32_t m = 1; m <= 1; m++) {
2316 GemmMicrokernelTester()
2317 .mr(1)
2318 .nr(16)
2319 .kr(4)
2320 .sr(1)
2321 .m(m)
2322 .n(n)
2323 .k(k)
2324 .iterations(1)
2325 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2326 }
2327 }
2328 }
2329 }
2330
2331 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16) {
2332 TEST_REQUIRES_ARM_NEON_DOT;
2333 for (uint32_t n = 32; n <= 48; n += 16) {
2334 for (size_t k = 1; k <= 40; k += 9) {
2335 GemmMicrokernelTester()
2336 .mr(1)
2337 .nr(16)
2338 .kr(4)
2339 .sr(1)
2340 .m(1)
2341 .n(16)
2342 .k(k)
2343 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2344 }
2345 }
2346 }
2347
2348 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_strided_cn) {
2349 TEST_REQUIRES_ARM_NEON_DOT;
2350 for (uint32_t n = 32; n <= 48; n += 16) {
2351 for (size_t k = 1; k <= 40; k += 9) {
2352 GemmMicrokernelTester()
2353 .mr(1)
2354 .nr(16)
2355 .kr(4)
2356 .sr(1)
2357 .m(1)
2358 .n(n)
2359 .k(k)
2360 .cn_stride(19)
2361 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2362 }
2363 }
2364 }
2365
2366 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_subtile) {
2367 TEST_REQUIRES_ARM_NEON_DOT;
2368 for (uint32_t n = 32; n <= 48; n += 16) {
2369 for (size_t k = 1; k <= 40; k += 9) {
2370 for (uint32_t m = 1; m <= 1; m++) {
2371 GemmMicrokernelTester()
2372 .mr(1)
2373 .nr(16)
2374 .kr(4)
2375 .sr(1)
2376 .m(m)
2377 .n(n)
2378 .k(k)
2379 .iterations(1)
2380 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2381 }
2382 }
2383 }
2384 }
2385
2386 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, small_kernel) {
2387 TEST_REQUIRES_ARM_NEON_DOT;
2388 for (size_t k = 1; k <= 40; k += 9) {
2389 GemmMicrokernelTester()
2390 .mr(1)
2391 .nr(16)
2392 .kr(4)
2393 .sr(1)
2394 .m(1)
2395 .n(16)
2396 .k(k)
2397 .ks(3)
2398 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2399 }
2400 }
2401
2402 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, small_kernel_subtile) {
2403 TEST_REQUIRES_ARM_NEON_DOT;
2404 for (size_t k = 1; k <= 40; k += 9) {
2405 for (uint32_t m = 1; m <= 1; m++) {
2406 for (uint32_t n = 1; n <= 16; n++) {
2407 GemmMicrokernelTester()
2408 .mr(1)
2409 .nr(16)
2410 .kr(4)
2411 .sr(1)
2412 .m(m)
2413 .n(n)
2414 .k(k)
2415 .ks(3)
2416 .iterations(1)
2417 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2418 }
2419 }
2420 }
2421 }
2422
2423 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_small_kernel) {
2424 TEST_REQUIRES_ARM_NEON_DOT;
2425 for (uint32_t n = 17; n < 32; n++) {
2426 for (size_t k = 1; k <= 40; k += 9) {
2427 GemmMicrokernelTester()
2428 .mr(1)
2429 .nr(16)
2430 .kr(4)
2431 .sr(1)
2432 .m(1)
2433 .n(16)
2434 .k(k)
2435 .ks(3)
2436 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2437 }
2438 }
2439 }
2440
2441 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_small_kernel) {
2442 TEST_REQUIRES_ARM_NEON_DOT;
2443 for (uint32_t n = 32; n <= 48; n += 16) {
2444 for (size_t k = 1; k <= 40; k += 9) {
2445 GemmMicrokernelTester()
2446 .mr(1)
2447 .nr(16)
2448 .kr(4)
2449 .sr(1)
2450 .m(1)
2451 .n(16)
2452 .k(k)
2453 .ks(3)
2454 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2455 }
2456 }
2457 }
2458
2459 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cm_subtile) {
2460 TEST_REQUIRES_ARM_NEON_DOT;
2461 for (size_t k = 1; k <= 40; k += 9) {
2462 for (uint32_t m = 1; m <= 1; m++) {
2463 for (uint32_t n = 1; n <= 16; n++) {
2464 GemmMicrokernelTester()
2465 .mr(1)
2466 .nr(16)
2467 .kr(4)
2468 .sr(1)
2469 .m(m)
2470 .n(n)
2471 .k(k)
2472 .cm_stride(19)
2473 .iterations(1)
2474 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2475 }
2476 }
2477 }
2478 }
2479
2480 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, a_offset) {
2481 TEST_REQUIRES_ARM_NEON_DOT;
2482 for (size_t k = 1; k <= 40; k += 9) {
2483 GemmMicrokernelTester()
2484 .mr(1)
2485 .nr(16)
2486 .kr(4)
2487 .sr(1)
2488 .m(1)
2489 .n(16)
2490 .k(k)
2491 .ks(3)
2492 .a_offset(43)
2493 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2494 }
2495 }
2496
2497 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, zero) {
2498 TEST_REQUIRES_ARM_NEON_DOT;
2499 for (uint32_t mz = 0; mz < 1; mz++) {
2500 for (size_t k = 1; k <= 40; k += 9) {
2501 GemmMicrokernelTester()
2502 .mr(1)
2503 .nr(16)
2504 .kr(4)
2505 .sr(1)
2506 .m(1)
2507 .n(16)
2508 .k(k)
2509 .ks(3)
2510 .a_offset(43)
2511 .zero_index(mz)
2512 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2513 }
2514 }
2515 }
2516
2517 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, qmin) {
2518 TEST_REQUIRES_ARM_NEON_DOT;
2519 GemmMicrokernelTester()
2520 .mr(1)
2521 .nr(16)
2522 .kr(4)
2523 .sr(1)
2524 .m(1)
2525 .n(16)
2526 .k(8)
2527 .qmin(128)
2528 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2529 }
2530
2531 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, qmax) {
2532 TEST_REQUIRES_ARM_NEON_DOT;
2533 GemmMicrokernelTester()
2534 .mr(1)
2535 .nr(16)
2536 .kr(4)
2537 .sr(1)
2538 .m(1)
2539 .n(16)
2540 .k(8)
2541 .qmax(128)
2542 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2543 }
2544
2545 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cm) {
2546 TEST_REQUIRES_ARM_NEON_DOT;
2547 GemmMicrokernelTester()
2548 .mr(1)
2549 .nr(16)
2550 .kr(4)
2551 .sr(1)
2552 .m(1)
2553 .n(16)
2554 .k(8)
2555 .cm_stride(19)
2556 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2557 }
2558
2559 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_a_zero_point) {
2560 TEST_REQUIRES_ARM_NEON_DOT;
2561 for (size_t k = 1; k <= 40; k += 9) {
2562 GemmMicrokernelTester()
2563 .mr(1)
2564 .nr(16)
2565 .kr(4)
2566 .sr(1)
2567 .m(1)
2568 .n(16)
2569 .k(k)
2570 .a_zero_point(0)
2571 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2572 }
2573 }
2574
2575 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_b_zero_point) {
2576 TEST_REQUIRES_ARM_NEON_DOT;
2577 for (size_t k = 1; k <= 40; k += 9) {
2578 GemmMicrokernelTester()
2579 .mr(1)
2580 .nr(16)
2581 .kr(4)
2582 .sr(1)
2583 .m(1)
2584 .n(16)
2585 .k(k)
2586 .b_zero_point(0)
2587 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2588 }
2589 }
2590
2591 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_zero_point) {
2592 TEST_REQUIRES_ARM_NEON_DOT;
2593 for (size_t k = 1; k <= 40; k += 9) {
2594 GemmMicrokernelTester()
2595 .mr(1)
2596 .nr(16)
2597 .kr(4)
2598 .sr(1)
2599 .m(1)
2600 .n(16)
2601 .k(k)
2602 .a_zero_point(0)
2603 .b_zero_point(0)
2604 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2605 }
2606 }
2607#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
2608
2609
2610#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
2611 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8) {
2612 TEST_REQUIRES_ARM_NEON_DOT;
2613 GemmMicrokernelTester()
2614 .mr(4)
2615 .nr(16)
2616 .kr(4)
2617 .sr(1)
2618 .m(4)
2619 .n(16)
2620 .k(8)
2621 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2622 }
2623
2624 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cn) {
2625 TEST_REQUIRES_ARM_NEON_DOT;
2626 GemmMicrokernelTester()
2627 .mr(4)
2628 .nr(16)
2629 .kr(4)
2630 .sr(1)
2631 .m(4)
2632 .n(16)
2633 .k(8)
2634 .cn_stride(19)
2635 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2636 }
2637
2638 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile) {
2639 TEST_REQUIRES_ARM_NEON_DOT;
2640 for (uint32_t m = 1; m <= 4; m++) {
2641 for (uint32_t n = 1; n <= 16; n++) {
2642 GemmMicrokernelTester()
2643 .mr(4)
2644 .nr(16)
2645 .kr(4)
2646 .sr(1)
2647 .m(m)
2648 .n(n)
2649 .k(8)
2650 .iterations(1)
2651 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2652 }
2653 }
2654 }
2655
2656 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_m) {
2657 TEST_REQUIRES_ARM_NEON_DOT;
2658 for (uint32_t m = 1; m <= 4; m++) {
2659 GemmMicrokernelTester()
2660 .mr(4)
2661 .nr(16)
2662 .kr(4)
2663 .sr(1)
2664 .m(m)
2665 .n(16)
2666 .k(8)
2667 .iterations(1)
2668 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2669 }
2670 }
2671
2672 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_n) {
2673 TEST_REQUIRES_ARM_NEON_DOT;
2674 for (uint32_t n = 1; n <= 16; n++) {
2675 GemmMicrokernelTester()
2676 .mr(4)
2677 .nr(16)
2678 .kr(4)
2679 .sr(1)
2680 .m(4)
2681 .n(n)
2682 .k(8)
2683 .iterations(1)
2684 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2685 }
2686 }
2687
2688 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8) {
2689 TEST_REQUIRES_ARM_NEON_DOT;
2690 for (size_t k = 1; k < 8; k++) {
2691 GemmMicrokernelTester()
2692 .mr(4)
2693 .nr(16)
2694 .kr(4)
2695 .sr(1)
2696 .m(4)
2697 .n(16)
2698 .k(k)
2699 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2700 }
2701 }
2702
2703 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8_subtile) {
2704 TEST_REQUIRES_ARM_NEON_DOT;
2705 for (size_t k = 1; k < 8; k++) {
2706 for (uint32_t m = 1; m <= 4; m++) {
2707 for (uint32_t n = 1; n <= 16; n++) {
2708 GemmMicrokernelTester()
2709 .mr(4)
2710 .nr(16)
2711 .kr(4)
2712 .sr(1)
2713 .m(m)
2714 .n(n)
2715 .k(k)
2716 .iterations(1)
2717 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2718 }
2719 }
2720 }
2721 }
2722
2723 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8) {
2724 TEST_REQUIRES_ARM_NEON_DOT;
2725 for (size_t k = 9; k < 16; k++) {
2726 GemmMicrokernelTester()
2727 .mr(4)
2728 .nr(16)
2729 .kr(4)
2730 .sr(1)
2731 .m(4)
2732 .n(16)
2733 .k(k)
2734 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2735 }
2736 }
2737
2738 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8_subtile) {
2739 TEST_REQUIRES_ARM_NEON_DOT;
2740 for (size_t k = 9; k < 16; k++) {
2741 for (uint32_t m = 1; m <= 4; m++) {
2742 for (uint32_t n = 1; n <= 16; n++) {
2743 GemmMicrokernelTester()
2744 .mr(4)
2745 .nr(16)
2746 .kr(4)
2747 .sr(1)
2748 .m(m)
2749 .n(n)
2750 .k(k)
2751 .iterations(1)
2752 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2753 }
2754 }
2755 }
2756 }
2757
2758 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8) {
2759 TEST_REQUIRES_ARM_NEON_DOT;
2760 for (size_t k = 16; k <= 80; k += 8) {
2761 GemmMicrokernelTester()
2762 .mr(4)
2763 .nr(16)
2764 .kr(4)
2765 .sr(1)
2766 .m(4)
2767 .n(16)
2768 .k(k)
2769 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2770 }
2771 }
2772
2773 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8_subtile) {
2774 TEST_REQUIRES_ARM_NEON_DOT;
2775 for (size_t k = 16; k <= 80; k += 8) {
2776 for (uint32_t m = 1; m <= 4; m++) {
2777 for (uint32_t n = 1; n <= 16; n++) {
2778 GemmMicrokernelTester()
2779 .mr(4)
2780 .nr(16)
2781 .kr(4)
2782 .sr(1)
2783 .m(m)
2784 .n(n)
2785 .k(k)
2786 .iterations(1)
2787 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2788 }
2789 }
2790 }
2791 }
2792
2793 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16) {
2794 TEST_REQUIRES_ARM_NEON_DOT;
2795 for (uint32_t n = 17; n < 32; n++) {
2796 for (size_t k = 1; k <= 40; k += 9) {
2797 GemmMicrokernelTester()
2798 .mr(4)
2799 .nr(16)
2800 .kr(4)
2801 .sr(1)
2802 .m(4)
2803 .n(16)
2804 .k(k)
2805 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2806 }
2807 }
2808 }
2809
2810 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_strided_cn) {
2811 TEST_REQUIRES_ARM_NEON_DOT;
2812 for (uint32_t n = 17; n < 32; n++) {
2813 for (size_t k = 1; k <= 40; k += 9) {
2814 GemmMicrokernelTester()
2815 .mr(4)
2816 .nr(16)
2817 .kr(4)
2818 .sr(1)
2819 .m(4)
2820 .n(16)
2821 .k(k)
2822 .cn_stride(19)
2823 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2824 }
2825 }
2826 }
2827
2828 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_subtile) {
2829 TEST_REQUIRES_ARM_NEON_DOT;
2830 for (uint32_t n = 17; n < 32; n++) {
2831 for (size_t k = 1; k <= 40; k += 9) {
2832 for (uint32_t m = 1; m <= 4; m++) {
2833 GemmMicrokernelTester()
2834 .mr(4)
2835 .nr(16)
2836 .kr(4)
2837 .sr(1)
2838 .m(m)
2839 .n(n)
2840 .k(k)
2841 .iterations(1)
2842 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2843 }
2844 }
2845 }
2846 }
2847
2848 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16) {
2849 TEST_REQUIRES_ARM_NEON_DOT;
2850 for (uint32_t n = 32; n <= 48; n += 16) {
2851 for (size_t k = 1; k <= 40; k += 9) {
2852 GemmMicrokernelTester()
2853 .mr(4)
2854 .nr(16)
2855 .kr(4)
2856 .sr(1)
2857 .m(4)
2858 .n(16)
2859 .k(k)
2860 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2861 }
2862 }
2863 }
2864
2865 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_strided_cn) {
2866 TEST_REQUIRES_ARM_NEON_DOT;
2867 for (uint32_t n = 32; n <= 48; n += 16) {
2868 for (size_t k = 1; k <= 40; k += 9) {
2869 GemmMicrokernelTester()
2870 .mr(4)
2871 .nr(16)
2872 .kr(4)
2873 .sr(1)
2874 .m(4)
2875 .n(n)
2876 .k(k)
2877 .cn_stride(19)
2878 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2879 }
2880 }
2881 }
2882
2883 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_subtile) {
2884 TEST_REQUIRES_ARM_NEON_DOT;
2885 for (uint32_t n = 32; n <= 48; n += 16) {
2886 for (size_t k = 1; k <= 40; k += 9) {
2887 for (uint32_t m = 1; m <= 4; m++) {
2888 GemmMicrokernelTester()
2889 .mr(4)
2890 .nr(16)
2891 .kr(4)
2892 .sr(1)
2893 .m(m)
2894 .n(n)
2895 .k(k)
2896 .iterations(1)
2897 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2898 }
2899 }
2900 }
2901 }
2902
2903 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, small_kernel) {
2904 TEST_REQUIRES_ARM_NEON_DOT;
2905 for (size_t k = 1; k <= 40; k += 9) {
2906 GemmMicrokernelTester()
2907 .mr(4)
2908 .nr(16)
2909 .kr(4)
2910 .sr(1)
2911 .m(4)
2912 .n(16)
2913 .k(k)
2914 .ks(3)
2915 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2916 }
2917 }
2918
2919 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, small_kernel_subtile) {
2920 TEST_REQUIRES_ARM_NEON_DOT;
2921 for (size_t k = 1; k <= 40; k += 9) {
2922 for (uint32_t m = 1; m <= 4; m++) {
2923 for (uint32_t n = 1; n <= 16; n++) {
2924 GemmMicrokernelTester()
2925 .mr(4)
2926 .nr(16)
2927 .kr(4)
2928 .sr(1)
2929 .m(m)
2930 .n(n)
2931 .k(k)
2932 .ks(3)
2933 .iterations(1)
2934 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2935 }
2936 }
2937 }
2938 }
2939
2940 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_small_kernel) {
2941 TEST_REQUIRES_ARM_NEON_DOT;
2942 for (uint32_t n = 17; n < 32; n++) {
2943 for (size_t k = 1; k <= 40; k += 9) {
2944 GemmMicrokernelTester()
2945 .mr(4)
2946 .nr(16)
2947 .kr(4)
2948 .sr(1)
2949 .m(4)
2950 .n(16)
2951 .k(k)
2952 .ks(3)
2953 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2954 }
2955 }
2956 }
2957
2958 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_small_kernel) {
2959 TEST_REQUIRES_ARM_NEON_DOT;
2960 for (uint32_t n = 32; n <= 48; n += 16) {
2961 for (size_t k = 1; k <= 40; k += 9) {
2962 GemmMicrokernelTester()
2963 .mr(4)
2964 .nr(16)
2965 .kr(4)
2966 .sr(1)
2967 .m(4)
2968 .n(16)
2969 .k(k)
2970 .ks(3)
2971 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2972 }
2973 }
2974 }
2975
2976 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm_subtile) {
2977 TEST_REQUIRES_ARM_NEON_DOT;
2978 for (size_t k = 1; k <= 40; k += 9) {
2979 for (uint32_t m = 1; m <= 4; m++) {
2980 for (uint32_t n = 1; n <= 16; n++) {
2981 GemmMicrokernelTester()
2982 .mr(4)
2983 .nr(16)
2984 .kr(4)
2985 .sr(1)
2986 .m(m)
2987 .n(n)
2988 .k(k)
2989 .cm_stride(19)
2990 .iterations(1)
2991 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
2992 }
2993 }
2994 }
2995 }
2996
2997 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, a_offset) {
2998 TEST_REQUIRES_ARM_NEON_DOT;
2999 for (size_t k = 1; k <= 40; k += 9) {
3000 GemmMicrokernelTester()
3001 .mr(4)
3002 .nr(16)
3003 .kr(4)
3004 .sr(1)
3005 .m(4)
3006 .n(16)
3007 .k(k)
3008 .ks(3)
3009 .a_offset(163)
3010 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3011 }
3012 }
3013
3014 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, zero) {
3015 TEST_REQUIRES_ARM_NEON_DOT;
3016 for (uint32_t mz = 0; mz < 4; mz++) {
3017 for (size_t k = 1; k <= 40; k += 9) {
3018 GemmMicrokernelTester()
3019 .mr(4)
3020 .nr(16)
3021 .kr(4)
3022 .sr(1)
3023 .m(4)
3024 .n(16)
3025 .k(k)
3026 .ks(3)
3027 .a_offset(163)
3028 .zero_index(mz)
3029 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3030 }
3031 }
3032 }
3033
3034 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmin) {
3035 TEST_REQUIRES_ARM_NEON_DOT;
3036 GemmMicrokernelTester()
3037 .mr(4)
3038 .nr(16)
3039 .kr(4)
3040 .sr(1)
3041 .m(4)
3042 .n(16)
3043 .k(8)
3044 .qmin(128)
3045 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3046 }
3047
3048 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmax) {
3049 TEST_REQUIRES_ARM_NEON_DOT;
3050 GemmMicrokernelTester()
3051 .mr(4)
3052 .nr(16)
3053 .kr(4)
3054 .sr(1)
3055 .m(4)
3056 .n(16)
3057 .k(8)
3058 .qmax(128)
3059 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3060 }
3061
3062 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm) {
3063 TEST_REQUIRES_ARM_NEON_DOT;
3064 GemmMicrokernelTester()
3065 .mr(4)
3066 .nr(16)
3067 .kr(4)
3068 .sr(1)
3069 .m(4)
3070 .n(16)
3071 .k(8)
3072 .cm_stride(19)
3073 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3074 }
3075
3076 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_a_zero_point) {
3077 TEST_REQUIRES_ARM_NEON_DOT;
3078 for (size_t k = 1; k <= 40; k += 9) {
3079 GemmMicrokernelTester()
3080 .mr(4)
3081 .nr(16)
3082 .kr(4)
3083 .sr(1)
3084 .m(4)
3085 .n(16)
3086 .k(k)
3087 .a_zero_point(0)
3088 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3089 }
3090 }
3091
3092 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_b_zero_point) {
3093 TEST_REQUIRES_ARM_NEON_DOT;
3094 for (size_t k = 1; k <= 40; k += 9) {
3095 GemmMicrokernelTester()
3096 .mr(4)
3097 .nr(16)
3098 .kr(4)
3099 .sr(1)
3100 .m(4)
3101 .n(16)
3102 .k(k)
3103 .b_zero_point(0)
3104 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3105 }
3106 }
3107
3108 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_zero_point) {
3109 TEST_REQUIRES_ARM_NEON_DOT;
3110 for (size_t k = 1; k <= 40; k += 9) {
3111 GemmMicrokernelTester()
3112 .mr(4)
3113 .nr(16)
3114 .kr(4)
3115 .sr(1)
3116 .m(4)
3117 .n(16)
3118 .k(k)
3119 .a_zero_point(0)
3120 .b_zero_point(0)
3121 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3122 }
3123 }
3124#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
3125
3126
3127#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
3128 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8) {
3129 TEST_REQUIRES_ARM_NEON_DOT;
3130 GemmMicrokernelTester()
3131 .mr(6)
3132 .nr(16)
3133 .kr(4)
3134 .sr(1)
3135 .m(6)
3136 .n(16)
3137 .k(8)
3138 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3139 }
3140
3141 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cn) {
3142 TEST_REQUIRES_ARM_NEON_DOT;
3143 GemmMicrokernelTester()
3144 .mr(6)
3145 .nr(16)
3146 .kr(4)
3147 .sr(1)
3148 .m(6)
3149 .n(16)
3150 .k(8)
3151 .cn_stride(19)
3152 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3153 }
3154
3155 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile) {
3156 TEST_REQUIRES_ARM_NEON_DOT;
3157 for (uint32_t m = 1; m <= 6; m++) {
3158 for (uint32_t n = 1; n <= 16; n++) {
3159 GemmMicrokernelTester()
3160 .mr(6)
3161 .nr(16)
3162 .kr(4)
3163 .sr(1)
3164 .m(m)
3165 .n(n)
3166 .k(8)
3167 .iterations(1)
3168 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3169 }
3170 }
3171 }
3172
3173 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_m) {
3174 TEST_REQUIRES_ARM_NEON_DOT;
3175 for (uint32_t m = 1; m <= 6; m++) {
3176 GemmMicrokernelTester()
3177 .mr(6)
3178 .nr(16)
3179 .kr(4)
3180 .sr(1)
3181 .m(m)
3182 .n(16)
3183 .k(8)
3184 .iterations(1)
3185 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3186 }
3187 }
3188
3189 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_n) {
3190 TEST_REQUIRES_ARM_NEON_DOT;
3191 for (uint32_t n = 1; n <= 16; n++) {
3192 GemmMicrokernelTester()
3193 .mr(6)
3194 .nr(16)
3195 .kr(4)
3196 .sr(1)
3197 .m(6)
3198 .n(n)
3199 .k(8)
3200 .iterations(1)
3201 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3202 }
3203 }
3204
3205 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8) {
3206 TEST_REQUIRES_ARM_NEON_DOT;
3207 for (size_t k = 1; k < 8; k++) {
3208 GemmMicrokernelTester()
3209 .mr(6)
3210 .nr(16)
3211 .kr(4)
3212 .sr(1)
3213 .m(6)
3214 .n(16)
3215 .k(k)
3216 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3217 }
3218 }
3219
3220 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8_subtile) {
3221 TEST_REQUIRES_ARM_NEON_DOT;
3222 for (size_t k = 1; k < 8; k++) {
3223 for (uint32_t m = 1; m <= 6; m++) {
3224 for (uint32_t n = 1; n <= 16; n++) {
3225 GemmMicrokernelTester()
3226 .mr(6)
3227 .nr(16)
3228 .kr(4)
3229 .sr(1)
3230 .m(m)
3231 .n(n)
3232 .k(k)
3233 .iterations(1)
3234 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3235 }
3236 }
3237 }
3238 }
3239
3240 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8) {
3241 TEST_REQUIRES_ARM_NEON_DOT;
3242 for (size_t k = 9; k < 16; k++) {
3243 GemmMicrokernelTester()
3244 .mr(6)
3245 .nr(16)
3246 .kr(4)
3247 .sr(1)
3248 .m(6)
3249 .n(16)
3250 .k(k)
3251 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3252 }
3253 }
3254
3255 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8_subtile) {
3256 TEST_REQUIRES_ARM_NEON_DOT;
3257 for (size_t k = 9; k < 16; k++) {
3258 for (uint32_t m = 1; m <= 6; m++) {
3259 for (uint32_t n = 1; n <= 16; n++) {
3260 GemmMicrokernelTester()
3261 .mr(6)
3262 .nr(16)
3263 .kr(4)
3264 .sr(1)
3265 .m(m)
3266 .n(n)
3267 .k(k)
3268 .iterations(1)
3269 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3270 }
3271 }
3272 }
3273 }
3274
3275 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8) {
3276 TEST_REQUIRES_ARM_NEON_DOT;
3277 for (size_t k = 16; k <= 80; k += 8) {
3278 GemmMicrokernelTester()
3279 .mr(6)
3280 .nr(16)
3281 .kr(4)
3282 .sr(1)
3283 .m(6)
3284 .n(16)
3285 .k(k)
3286 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3287 }
3288 }
3289
3290 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8_subtile) {
3291 TEST_REQUIRES_ARM_NEON_DOT;
3292 for (size_t k = 16; k <= 80; k += 8) {
3293 for (uint32_t m = 1; m <= 6; m++) {
3294 for (uint32_t n = 1; n <= 16; n++) {
3295 GemmMicrokernelTester()
3296 .mr(6)
3297 .nr(16)
3298 .kr(4)
3299 .sr(1)
3300 .m(m)
3301 .n(n)
3302 .k(k)
3303 .iterations(1)
3304 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3305 }
3306 }
3307 }
3308 }
3309
3310 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16) {
3311 TEST_REQUIRES_ARM_NEON_DOT;
3312 for (uint32_t n = 17; n < 32; n++) {
3313 for (size_t k = 1; k <= 40; k += 9) {
3314 GemmMicrokernelTester()
3315 .mr(6)
3316 .nr(16)
3317 .kr(4)
3318 .sr(1)
3319 .m(6)
3320 .n(16)
3321 .k(k)
3322 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3323 }
3324 }
3325 }
3326
3327 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_strided_cn) {
3328 TEST_REQUIRES_ARM_NEON_DOT;
3329 for (uint32_t n = 17; n < 32; n++) {
3330 for (size_t k = 1; k <= 40; k += 9) {
3331 GemmMicrokernelTester()
3332 .mr(6)
3333 .nr(16)
3334 .kr(4)
3335 .sr(1)
3336 .m(6)
3337 .n(16)
3338 .k(k)
3339 .cn_stride(19)
3340 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3341 }
3342 }
3343 }
3344
3345 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_subtile) {
3346 TEST_REQUIRES_ARM_NEON_DOT;
3347 for (uint32_t n = 17; n < 32; n++) {
3348 for (size_t k = 1; k <= 40; k += 9) {
3349 for (uint32_t m = 1; m <= 6; m++) {
3350 GemmMicrokernelTester()
3351 .mr(6)
3352 .nr(16)
3353 .kr(4)
3354 .sr(1)
3355 .m(m)
3356 .n(n)
3357 .k(k)
3358 .iterations(1)
3359 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3360 }
3361 }
3362 }
3363 }
3364
3365 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16) {
3366 TEST_REQUIRES_ARM_NEON_DOT;
3367 for (uint32_t n = 32; n <= 48; n += 16) {
3368 for (size_t k = 1; k <= 40; k += 9) {
3369 GemmMicrokernelTester()
3370 .mr(6)
3371 .nr(16)
3372 .kr(4)
3373 .sr(1)
3374 .m(6)
3375 .n(16)
3376 .k(k)
3377 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3378 }
3379 }
3380 }
3381
3382 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_strided_cn) {
3383 TEST_REQUIRES_ARM_NEON_DOT;
3384 for (uint32_t n = 32; n <= 48; n += 16) {
3385 for (size_t k = 1; k <= 40; k += 9) {
3386 GemmMicrokernelTester()
3387 .mr(6)
3388 .nr(16)
3389 .kr(4)
3390 .sr(1)
3391 .m(6)
3392 .n(n)
3393 .k(k)
3394 .cn_stride(19)
3395 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3396 }
3397 }
3398 }
3399
3400 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_subtile) {
3401 TEST_REQUIRES_ARM_NEON_DOT;
3402 for (uint32_t n = 32; n <= 48; n += 16) {
3403 for (size_t k = 1; k <= 40; k += 9) {
3404 for (uint32_t m = 1; m <= 6; m++) {
3405 GemmMicrokernelTester()
3406 .mr(6)
3407 .nr(16)
3408 .kr(4)
3409 .sr(1)
3410 .m(m)
3411 .n(n)
3412 .k(k)
3413 .iterations(1)
3414 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3415 }
3416 }
3417 }
3418 }
3419
3420 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, small_kernel) {
3421 TEST_REQUIRES_ARM_NEON_DOT;
3422 for (size_t k = 1; k <= 40; k += 9) {
3423 GemmMicrokernelTester()
3424 .mr(6)
3425 .nr(16)
3426 .kr(4)
3427 .sr(1)
3428 .m(6)
3429 .n(16)
3430 .k(k)
3431 .ks(3)
3432 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3433 }
3434 }
3435
3436 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, small_kernel_subtile) {
3437 TEST_REQUIRES_ARM_NEON_DOT;
3438 for (size_t k = 1; k <= 40; k += 9) {
3439 for (uint32_t m = 1; m <= 6; m++) {
3440 for (uint32_t n = 1; n <= 16; n++) {
3441 GemmMicrokernelTester()
3442 .mr(6)
3443 .nr(16)
3444 .kr(4)
3445 .sr(1)
3446 .m(m)
3447 .n(n)
3448 .k(k)
3449 .ks(3)
3450 .iterations(1)
3451 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3452 }
3453 }
3454 }
3455 }
3456
3457 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_small_kernel) {
3458 TEST_REQUIRES_ARM_NEON_DOT;
3459 for (uint32_t n = 17; n < 32; n++) {
3460 for (size_t k = 1; k <= 40; k += 9) {
3461 GemmMicrokernelTester()
3462 .mr(6)
3463 .nr(16)
3464 .kr(4)
3465 .sr(1)
3466 .m(6)
3467 .n(16)
3468 .k(k)
3469 .ks(3)
3470 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3471 }
3472 }
3473 }
3474
3475 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_small_kernel) {
3476 TEST_REQUIRES_ARM_NEON_DOT;
3477 for (uint32_t n = 32; n <= 48; n += 16) {
3478 for (size_t k = 1; k <= 40; k += 9) {
3479 GemmMicrokernelTester()
3480 .mr(6)
3481 .nr(16)
3482 .kr(4)
3483 .sr(1)
3484 .m(6)
3485 .n(16)
3486 .k(k)
3487 .ks(3)
3488 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3489 }
3490 }
3491 }
3492
3493 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm_subtile) {
3494 TEST_REQUIRES_ARM_NEON_DOT;
3495 for (size_t k = 1; k <= 40; k += 9) {
3496 for (uint32_t m = 1; m <= 6; m++) {
3497 for (uint32_t n = 1; n <= 16; n++) {
3498 GemmMicrokernelTester()
3499 .mr(6)
3500 .nr(16)
3501 .kr(4)
3502 .sr(1)
3503 .m(m)
3504 .n(n)
3505 .k(k)
3506 .cm_stride(19)
3507 .iterations(1)
3508 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3509 }
3510 }
3511 }
3512 }
3513
3514 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, a_offset) {
3515 TEST_REQUIRES_ARM_NEON_DOT;
3516 for (size_t k = 1; k <= 40; k += 9) {
3517 GemmMicrokernelTester()
3518 .mr(6)
3519 .nr(16)
3520 .kr(4)
3521 .sr(1)
3522 .m(6)
3523 .n(16)
3524 .k(k)
3525 .ks(3)
3526 .a_offset(251)
3527 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3528 }
3529 }
3530
3531 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, zero) {
3532 TEST_REQUIRES_ARM_NEON_DOT;
3533 for (uint32_t mz = 0; mz < 6; mz++) {
3534 for (size_t k = 1; k <= 40; k += 9) {
3535 GemmMicrokernelTester()
3536 .mr(6)
3537 .nr(16)
3538 .kr(4)
3539 .sr(1)
3540 .m(6)
3541 .n(16)
3542 .k(k)
3543 .ks(3)
3544 .a_offset(251)
3545 .zero_index(mz)
3546 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3547 }
3548 }
3549 }
3550
3551 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmin) {
3552 TEST_REQUIRES_ARM_NEON_DOT;
3553 GemmMicrokernelTester()
3554 .mr(6)
3555 .nr(16)
3556 .kr(4)
3557 .sr(1)
3558 .m(6)
3559 .n(16)
3560 .k(8)
3561 .qmin(128)
3562 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3563 }
3564
3565 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmax) {
3566 TEST_REQUIRES_ARM_NEON_DOT;
3567 GemmMicrokernelTester()
3568 .mr(6)
3569 .nr(16)
3570 .kr(4)
3571 .sr(1)
3572 .m(6)
3573 .n(16)
3574 .k(8)
3575 .qmax(128)
3576 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3577 }
3578
3579 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm) {
3580 TEST_REQUIRES_ARM_NEON_DOT;
3581 GemmMicrokernelTester()
3582 .mr(6)
3583 .nr(16)
3584 .kr(4)
3585 .sr(1)
3586 .m(6)
3587 .n(16)
3588 .k(8)
3589 .cm_stride(19)
3590 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3591 }
3592
3593 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_a_zero_point) {
3594 TEST_REQUIRES_ARM_NEON_DOT;
3595 for (size_t k = 1; k <= 40; k += 9) {
3596 GemmMicrokernelTester()
3597 .mr(6)
3598 .nr(16)
3599 .kr(4)
3600 .sr(1)
3601 .m(6)
3602 .n(16)
3603 .k(k)
3604 .a_zero_point(0)
3605 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3606 }
3607 }
3608
3609 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_b_zero_point) {
3610 TEST_REQUIRES_ARM_NEON_DOT;
3611 for (size_t k = 1; k <= 40; k += 9) {
3612 GemmMicrokernelTester()
3613 .mr(6)
3614 .nr(16)
3615 .kr(4)
3616 .sr(1)
3617 .m(6)
3618 .n(16)
3619 .k(k)
3620 .b_zero_point(0)
3621 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3622 }
3623 }
3624
3625 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_zero_point) {
3626 TEST_REQUIRES_ARM_NEON_DOT;
3627 for (size_t k = 1; k <= 40; k += 9) {
3628 GemmMicrokernelTester()
3629 .mr(6)
3630 .nr(16)
3631 .kr(4)
3632 .sr(1)
3633 .m(6)
3634 .n(16)
3635 .k(k)
3636 .a_zero_point(0)
3637 .b_zero_point(0)
3638 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3639 }
3640 }
3641#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
3642
3643
3644#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
3645 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8) {
3646 TEST_REQUIRES_ARM_NEON_DOT;
3647 GemmMicrokernelTester()
3648 .mr(8)
3649 .nr(16)
3650 .kr(4)
3651 .sr(1)
3652 .m(8)
3653 .n(16)
3654 .k(8)
3655 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3656 }
3657
3658 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cn) {
3659 TEST_REQUIRES_ARM_NEON_DOT;
3660 GemmMicrokernelTester()
3661 .mr(8)
3662 .nr(16)
3663 .kr(4)
3664 .sr(1)
3665 .m(8)
3666 .n(16)
3667 .k(8)
3668 .cn_stride(19)
3669 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3670 }
3671
3672 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile) {
3673 TEST_REQUIRES_ARM_NEON_DOT;
3674 for (uint32_t m = 1; m <= 8; m++) {
3675 for (uint32_t n = 1; n <= 16; n++) {
3676 GemmMicrokernelTester()
3677 .mr(8)
3678 .nr(16)
3679 .kr(4)
3680 .sr(1)
3681 .m(m)
3682 .n(n)
3683 .k(8)
3684 .iterations(1)
3685 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3686 }
3687 }
3688 }
3689
3690 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_m) {
3691 TEST_REQUIRES_ARM_NEON_DOT;
3692 for (uint32_t m = 1; m <= 8; m++) {
3693 GemmMicrokernelTester()
3694 .mr(8)
3695 .nr(16)
3696 .kr(4)
3697 .sr(1)
3698 .m(m)
3699 .n(16)
3700 .k(8)
3701 .iterations(1)
3702 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3703 }
3704 }
3705
3706 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_n) {
3707 TEST_REQUIRES_ARM_NEON_DOT;
3708 for (uint32_t n = 1; n <= 16; n++) {
3709 GemmMicrokernelTester()
3710 .mr(8)
3711 .nr(16)
3712 .kr(4)
3713 .sr(1)
3714 .m(8)
3715 .n(n)
3716 .k(8)
3717 .iterations(1)
3718 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3719 }
3720 }
3721
3722 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8) {
3723 TEST_REQUIRES_ARM_NEON_DOT;
3724 for (size_t k = 1; k < 8; k++) {
3725 GemmMicrokernelTester()
3726 .mr(8)
3727 .nr(16)
3728 .kr(4)
3729 .sr(1)
3730 .m(8)
3731 .n(16)
3732 .k(k)
3733 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3734 }
3735 }
3736
3737 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8_subtile) {
3738 TEST_REQUIRES_ARM_NEON_DOT;
3739 for (size_t k = 1; k < 8; k++) {
3740 for (uint32_t m = 1; m <= 8; m++) {
3741 for (uint32_t n = 1; n <= 16; n++) {
3742 GemmMicrokernelTester()
3743 .mr(8)
3744 .nr(16)
3745 .kr(4)
3746 .sr(1)
3747 .m(m)
3748 .n(n)
3749 .k(k)
3750 .iterations(1)
3751 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3752 }
3753 }
3754 }
3755 }
3756
3757 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8) {
3758 TEST_REQUIRES_ARM_NEON_DOT;
3759 for (size_t k = 9; k < 16; k++) {
3760 GemmMicrokernelTester()
3761 .mr(8)
3762 .nr(16)
3763 .kr(4)
3764 .sr(1)
3765 .m(8)
3766 .n(16)
3767 .k(k)
3768 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3769 }
3770 }
3771
3772 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8_subtile) {
3773 TEST_REQUIRES_ARM_NEON_DOT;
3774 for (size_t k = 9; k < 16; k++) {
3775 for (uint32_t m = 1; m <= 8; m++) {
3776 for (uint32_t n = 1; n <= 16; n++) {
3777 GemmMicrokernelTester()
3778 .mr(8)
3779 .nr(16)
3780 .kr(4)
3781 .sr(1)
3782 .m(m)
3783 .n(n)
3784 .k(k)
3785 .iterations(1)
3786 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3787 }
3788 }
3789 }
3790 }
3791
3792 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8) {
3793 TEST_REQUIRES_ARM_NEON_DOT;
3794 for (size_t k = 16; k <= 80; k += 8) {
3795 GemmMicrokernelTester()
3796 .mr(8)
3797 .nr(16)
3798 .kr(4)
3799 .sr(1)
3800 .m(8)
3801 .n(16)
3802 .k(k)
3803 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3804 }
3805 }
3806
3807 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8_subtile) {
3808 TEST_REQUIRES_ARM_NEON_DOT;
3809 for (size_t k = 16; k <= 80; k += 8) {
3810 for (uint32_t m = 1; m <= 8; m++) {
3811 for (uint32_t n = 1; n <= 16; n++) {
3812 GemmMicrokernelTester()
3813 .mr(8)
3814 .nr(16)
3815 .kr(4)
3816 .sr(1)
3817 .m(m)
3818 .n(n)
3819 .k(k)
3820 .iterations(1)
3821 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3822 }
3823 }
3824 }
3825 }
3826
3827 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16) {
3828 TEST_REQUIRES_ARM_NEON_DOT;
3829 for (uint32_t n = 17; n < 32; n++) {
3830 for (size_t k = 1; k <= 40; k += 9) {
3831 GemmMicrokernelTester()
3832 .mr(8)
3833 .nr(16)
3834 .kr(4)
3835 .sr(1)
3836 .m(8)
3837 .n(16)
3838 .k(k)
3839 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3840 }
3841 }
3842 }
3843
3844 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_strided_cn) {
3845 TEST_REQUIRES_ARM_NEON_DOT;
3846 for (uint32_t n = 17; n < 32; n++) {
3847 for (size_t k = 1; k <= 40; k += 9) {
3848 GemmMicrokernelTester()
3849 .mr(8)
3850 .nr(16)
3851 .kr(4)
3852 .sr(1)
3853 .m(8)
3854 .n(16)
3855 .k(k)
3856 .cn_stride(19)
3857 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3858 }
3859 }
3860 }
3861
3862 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_subtile) {
3863 TEST_REQUIRES_ARM_NEON_DOT;
3864 for (uint32_t n = 17; n < 32; n++) {
3865 for (size_t k = 1; k <= 40; k += 9) {
3866 for (uint32_t m = 1; m <= 8; m++) {
3867 GemmMicrokernelTester()
3868 .mr(8)
3869 .nr(16)
3870 .kr(4)
3871 .sr(1)
3872 .m(m)
3873 .n(n)
3874 .k(k)
3875 .iterations(1)
3876 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3877 }
3878 }
3879 }
3880 }
3881
3882 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16) {
3883 TEST_REQUIRES_ARM_NEON_DOT;
3884 for (uint32_t n = 32; n <= 48; n += 16) {
3885 for (size_t k = 1; k <= 40; k += 9) {
3886 GemmMicrokernelTester()
3887 .mr(8)
3888 .nr(16)
3889 .kr(4)
3890 .sr(1)
3891 .m(8)
3892 .n(16)
3893 .k(k)
3894 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3895 }
3896 }
3897 }
3898
3899 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_strided_cn) {
3900 TEST_REQUIRES_ARM_NEON_DOT;
3901 for (uint32_t n = 32; n <= 48; n += 16) {
3902 for (size_t k = 1; k <= 40; k += 9) {
3903 GemmMicrokernelTester()
3904 .mr(8)
3905 .nr(16)
3906 .kr(4)
3907 .sr(1)
3908 .m(8)
3909 .n(n)
3910 .k(k)
3911 .cn_stride(19)
3912 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3913 }
3914 }
3915 }
3916
3917 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_subtile) {
3918 TEST_REQUIRES_ARM_NEON_DOT;
3919 for (uint32_t n = 32; n <= 48; n += 16) {
3920 for (size_t k = 1; k <= 40; k += 9) {
3921 for (uint32_t m = 1; m <= 8; m++) {
3922 GemmMicrokernelTester()
3923 .mr(8)
3924 .nr(16)
3925 .kr(4)
3926 .sr(1)
3927 .m(m)
3928 .n(n)
3929 .k(k)
3930 .iterations(1)
3931 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3932 }
3933 }
3934 }
3935 }
3936
3937 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, small_kernel) {
3938 TEST_REQUIRES_ARM_NEON_DOT;
3939 for (size_t k = 1; k <= 40; k += 9) {
3940 GemmMicrokernelTester()
3941 .mr(8)
3942 .nr(16)
3943 .kr(4)
3944 .sr(1)
3945 .m(8)
3946 .n(16)
3947 .k(k)
3948 .ks(3)
3949 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3950 }
3951 }
3952
3953 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, small_kernel_subtile) {
3954 TEST_REQUIRES_ARM_NEON_DOT;
3955 for (size_t k = 1; k <= 40; k += 9) {
3956 for (uint32_t m = 1; m <= 8; m++) {
3957 for (uint32_t n = 1; n <= 16; n++) {
3958 GemmMicrokernelTester()
3959 .mr(8)
3960 .nr(16)
3961 .kr(4)
3962 .sr(1)
3963 .m(m)
3964 .n(n)
3965 .k(k)
3966 .ks(3)
3967 .iterations(1)
3968 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3969 }
3970 }
3971 }
3972 }
3973
3974 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_small_kernel) {
3975 TEST_REQUIRES_ARM_NEON_DOT;
3976 for (uint32_t n = 17; n < 32; n++) {
3977 for (size_t k = 1; k <= 40; k += 9) {
3978 GemmMicrokernelTester()
3979 .mr(8)
3980 .nr(16)
3981 .kr(4)
3982 .sr(1)
3983 .m(8)
3984 .n(16)
3985 .k(k)
3986 .ks(3)
3987 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
3988 }
3989 }
3990 }
3991
3992 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_small_kernel) {
3993 TEST_REQUIRES_ARM_NEON_DOT;
3994 for (uint32_t n = 32; n <= 48; n += 16) {
3995 for (size_t k = 1; k <= 40; k += 9) {
3996 GemmMicrokernelTester()
3997 .mr(8)
3998 .nr(16)
3999 .kr(4)
4000 .sr(1)
4001 .m(8)
4002 .n(16)
4003 .k(k)
4004 .ks(3)
4005 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4006 }
4007 }
4008 }
4009
4010 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm_subtile) {
4011 TEST_REQUIRES_ARM_NEON_DOT;
4012 for (size_t k = 1; k <= 40; k += 9) {
4013 for (uint32_t m = 1; m <= 8; m++) {
4014 for (uint32_t n = 1; n <= 16; n++) {
4015 GemmMicrokernelTester()
4016 .mr(8)
4017 .nr(16)
4018 .kr(4)
4019 .sr(1)
4020 .m(m)
4021 .n(n)
4022 .k(k)
4023 .cm_stride(19)
4024 .iterations(1)
4025 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4026 }
4027 }
4028 }
4029 }
4030
4031 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, a_offset) {
4032 TEST_REQUIRES_ARM_NEON_DOT;
4033 for (size_t k = 1; k <= 40; k += 9) {
4034 GemmMicrokernelTester()
4035 .mr(8)
4036 .nr(16)
4037 .kr(4)
4038 .sr(1)
4039 .m(8)
4040 .n(16)
4041 .k(k)
4042 .ks(3)
4043 .a_offset(331)
4044 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4045 }
4046 }
4047
4048 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, zero) {
4049 TEST_REQUIRES_ARM_NEON_DOT;
4050 for (uint32_t mz = 0; mz < 8; mz++) {
4051 for (size_t k = 1; k <= 40; k += 9) {
4052 GemmMicrokernelTester()
4053 .mr(8)
4054 .nr(16)
4055 .kr(4)
4056 .sr(1)
4057 .m(8)
4058 .n(16)
4059 .k(k)
4060 .ks(3)
4061 .a_offset(331)
4062 .zero_index(mz)
4063 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4064 }
4065 }
4066 }
4067
4068 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmin) {
4069 TEST_REQUIRES_ARM_NEON_DOT;
4070 GemmMicrokernelTester()
4071 .mr(8)
4072 .nr(16)
4073 .kr(4)
4074 .sr(1)
4075 .m(8)
4076 .n(16)
4077 .k(8)
4078 .qmin(128)
4079 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4080 }
4081
4082 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmax) {
4083 TEST_REQUIRES_ARM_NEON_DOT;
4084 GemmMicrokernelTester()
4085 .mr(8)
4086 .nr(16)
4087 .kr(4)
4088 .sr(1)
4089 .m(8)
4090 .n(16)
4091 .k(8)
4092 .qmax(128)
4093 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4094 }
4095
4096 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm) {
4097 TEST_REQUIRES_ARM_NEON_DOT;
4098 GemmMicrokernelTester()
4099 .mr(8)
4100 .nr(16)
4101 .kr(4)
4102 .sr(1)
4103 .m(8)
4104 .n(16)
4105 .k(8)
4106 .cm_stride(19)
4107 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4108 }
4109
4110 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_a_zero_point) {
4111 TEST_REQUIRES_ARM_NEON_DOT;
4112 for (size_t k = 1; k <= 40; k += 9) {
4113 GemmMicrokernelTester()
4114 .mr(8)
4115 .nr(16)
4116 .kr(4)
4117 .sr(1)
4118 .m(8)
4119 .n(16)
4120 .k(k)
4121 .a_zero_point(0)
4122 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4123 }
4124 }
4125
4126 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_b_zero_point) {
4127 TEST_REQUIRES_ARM_NEON_DOT;
4128 for (size_t k = 1; k <= 40; k += 9) {
4129 GemmMicrokernelTester()
4130 .mr(8)
4131 .nr(16)
4132 .kr(4)
4133 .sr(1)
4134 .m(8)
4135 .n(16)
4136 .k(k)
4137 .b_zero_point(0)
4138 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4139 }
4140 }
4141
4142 TEST(QU8_IGEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_zero_point) {
4143 TEST_REQUIRES_ARM_NEON_DOT;
4144 for (size_t k = 1; k <= 40; k += 9) {
4145 GemmMicrokernelTester()
4146 .mr(8)
4147 .nr(16)
4148 .kr(4)
4149 .sr(1)
4150 .m(8)
4151 .n(16)
4152 .k(k)
4153 .a_zero_point(0)
4154 .b_zero_point(0)
4155 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4156 }
4157 }
4158#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
4159
4160
Frank Barchard59ed1da2021-08-02 11:34:59 -07004161#if XNN_ARCH_ARM64
Frank Barchardfb3a94f2021-08-02 20:37:06 -07004162 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8) {
4163 TEST_REQUIRES_ARM_NEON;
4164 GemmMicrokernelTester()
4165 .mr(4)
4166 .nr(16)
4167 .kr(1)
4168 .sr(1)
4169 .m(4)
4170 .n(16)
4171 .k(8)
4172 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4173 }
4174
4175 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cn) {
4176 TEST_REQUIRES_ARM_NEON;
4177 GemmMicrokernelTester()
4178 .mr(4)
4179 .nr(16)
4180 .kr(1)
4181 .sr(1)
4182 .m(4)
4183 .n(16)
4184 .k(8)
4185 .cn_stride(19)
4186 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4187 }
4188
4189 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile) {
4190 TEST_REQUIRES_ARM_NEON;
4191 for (uint32_t m = 1; m <= 4; m++) {
4192 for (uint32_t n = 1; n <= 16; n++) {
4193 GemmMicrokernelTester()
4194 .mr(4)
4195 .nr(16)
4196 .kr(1)
4197 .sr(1)
4198 .m(m)
4199 .n(n)
4200 .k(8)
4201 .iterations(1)
4202 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4203 }
4204 }
4205 }
4206
4207 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
4208 TEST_REQUIRES_ARM_NEON;
4209 for (uint32_t m = 1; m <= 4; m++) {
4210 GemmMicrokernelTester()
4211 .mr(4)
4212 .nr(16)
4213 .kr(1)
4214 .sr(1)
4215 .m(m)
4216 .n(16)
4217 .k(8)
4218 .iterations(1)
4219 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4220 }
4221 }
4222
4223 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
4224 TEST_REQUIRES_ARM_NEON;
4225 for (uint32_t n = 1; n <= 16; n++) {
4226 GemmMicrokernelTester()
4227 .mr(4)
4228 .nr(16)
4229 .kr(1)
4230 .sr(1)
4231 .m(4)
4232 .n(n)
4233 .k(8)
4234 .iterations(1)
4235 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4236 }
4237 }
4238
4239 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_lt_8) {
4240 TEST_REQUIRES_ARM_NEON;
4241 for (size_t k = 1; k < 8; k++) {
4242 GemmMicrokernelTester()
4243 .mr(4)
4244 .nr(16)
4245 .kr(1)
4246 .sr(1)
4247 .m(4)
4248 .n(16)
4249 .k(k)
4250 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4251 }
4252 }
4253
4254 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_lt_8_subtile) {
4255 TEST_REQUIRES_ARM_NEON;
4256 for (size_t k = 1; k < 8; k++) {
4257 for (uint32_t m = 1; m <= 4; m++) {
4258 for (uint32_t n = 1; n <= 16; n++) {
4259 GemmMicrokernelTester()
4260 .mr(4)
4261 .nr(16)
4262 .kr(1)
4263 .sr(1)
4264 .m(m)
4265 .n(n)
4266 .k(k)
4267 .iterations(1)
4268 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4269 }
4270 }
4271 }
4272 }
4273
4274 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_gt_8) {
4275 TEST_REQUIRES_ARM_NEON;
4276 for (size_t k = 9; k < 16; k++) {
4277 GemmMicrokernelTester()
4278 .mr(4)
4279 .nr(16)
4280 .kr(1)
4281 .sr(1)
4282 .m(4)
4283 .n(16)
4284 .k(k)
4285 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4286 }
4287 }
4288
4289 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_gt_8_subtile) {
4290 TEST_REQUIRES_ARM_NEON;
4291 for (size_t k = 9; k < 16; k++) {
4292 for (uint32_t m = 1; m <= 4; m++) {
4293 for (uint32_t n = 1; n <= 16; n++) {
4294 GemmMicrokernelTester()
4295 .mr(4)
4296 .nr(16)
4297 .kr(1)
4298 .sr(1)
4299 .m(m)
4300 .n(n)
4301 .k(k)
4302 .iterations(1)
4303 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4304 }
4305 }
4306 }
4307 }
4308
4309 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_div_8) {
4310 TEST_REQUIRES_ARM_NEON;
4311 for (size_t k = 16; k <= 80; k += 8) {
4312 GemmMicrokernelTester()
4313 .mr(4)
4314 .nr(16)
4315 .kr(1)
4316 .sr(1)
4317 .m(4)
4318 .n(16)
4319 .k(k)
4320 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4321 }
4322 }
4323
4324 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_div_8_subtile) {
4325 TEST_REQUIRES_ARM_NEON;
4326 for (size_t k = 16; k <= 80; k += 8) {
4327 for (uint32_t m = 1; m <= 4; m++) {
4328 for (uint32_t n = 1; n <= 16; n++) {
4329 GemmMicrokernelTester()
4330 .mr(4)
4331 .nr(16)
4332 .kr(1)
4333 .sr(1)
4334 .m(m)
4335 .n(n)
4336 .k(k)
4337 .iterations(1)
4338 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4339 }
4340 }
4341 }
4342 }
4343
4344 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16) {
4345 TEST_REQUIRES_ARM_NEON;
4346 for (uint32_t n = 17; n < 32; n++) {
4347 for (size_t k = 1; k <= 40; k += 9) {
4348 GemmMicrokernelTester()
4349 .mr(4)
4350 .nr(16)
4351 .kr(1)
4352 .sr(1)
4353 .m(4)
4354 .n(16)
4355 .k(k)
4356 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4357 }
4358 }
4359 }
4360
4361 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_strided_cn) {
4362 TEST_REQUIRES_ARM_NEON;
4363 for (uint32_t n = 17; n < 32; n++) {
4364 for (size_t k = 1; k <= 40; k += 9) {
4365 GemmMicrokernelTester()
4366 .mr(4)
4367 .nr(16)
4368 .kr(1)
4369 .sr(1)
4370 .m(4)
4371 .n(16)
4372 .k(k)
4373 .cn_stride(19)
4374 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4375 }
4376 }
4377 }
4378
4379 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_subtile) {
4380 TEST_REQUIRES_ARM_NEON;
4381 for (uint32_t n = 17; n < 32; n++) {
4382 for (size_t k = 1; k <= 40; k += 9) {
4383 for (uint32_t m = 1; m <= 4; m++) {
4384 GemmMicrokernelTester()
4385 .mr(4)
4386 .nr(16)
4387 .kr(1)
4388 .sr(1)
4389 .m(m)
4390 .n(n)
4391 .k(k)
4392 .iterations(1)
4393 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4394 }
4395 }
4396 }
4397 }
4398
4399 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16) {
4400 TEST_REQUIRES_ARM_NEON;
4401 for (uint32_t n = 32; n <= 48; n += 16) {
4402 for (size_t k = 1; k <= 40; k += 9) {
4403 GemmMicrokernelTester()
4404 .mr(4)
4405 .nr(16)
4406 .kr(1)
4407 .sr(1)
4408 .m(4)
4409 .n(16)
4410 .k(k)
4411 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4412 }
4413 }
4414 }
4415
4416 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_strided_cn) {
4417 TEST_REQUIRES_ARM_NEON;
4418 for (uint32_t n = 32; n <= 48; n += 16) {
4419 for (size_t k = 1; k <= 40; k += 9) {
4420 GemmMicrokernelTester()
4421 .mr(4)
4422 .nr(16)
4423 .kr(1)
4424 .sr(1)
4425 .m(4)
4426 .n(n)
4427 .k(k)
4428 .cn_stride(19)
4429 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4430 }
4431 }
4432 }
4433
4434 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_subtile) {
4435 TEST_REQUIRES_ARM_NEON;
4436 for (uint32_t n = 32; n <= 48; n += 16) {
4437 for (size_t k = 1; k <= 40; k += 9) {
4438 for (uint32_t m = 1; m <= 4; m++) {
4439 GemmMicrokernelTester()
4440 .mr(4)
4441 .nr(16)
4442 .kr(1)
4443 .sr(1)
4444 .m(m)
4445 .n(n)
4446 .k(k)
4447 .iterations(1)
4448 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4449 }
4450 }
4451 }
4452 }
4453
4454 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, small_kernel) {
4455 TEST_REQUIRES_ARM_NEON;
4456 for (size_t k = 1; k <= 40; k += 9) {
4457 GemmMicrokernelTester()
4458 .mr(4)
4459 .nr(16)
4460 .kr(1)
4461 .sr(1)
4462 .m(4)
4463 .n(16)
4464 .k(k)
4465 .ks(3)
4466 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4467 }
4468 }
4469
4470 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, small_kernel_subtile) {
4471 TEST_REQUIRES_ARM_NEON;
4472 for (size_t k = 1; k <= 40; k += 9) {
4473 for (uint32_t m = 1; m <= 4; m++) {
4474 for (uint32_t n = 1; n <= 16; n++) {
4475 GemmMicrokernelTester()
4476 .mr(4)
4477 .nr(16)
4478 .kr(1)
4479 .sr(1)
4480 .m(m)
4481 .n(n)
4482 .k(k)
4483 .ks(3)
4484 .iterations(1)
4485 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4486 }
4487 }
4488 }
4489 }
4490
4491 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_small_kernel) {
4492 TEST_REQUIRES_ARM_NEON;
4493 for (uint32_t n = 17; n < 32; n++) {
4494 for (size_t k = 1; k <= 40; k += 9) {
4495 GemmMicrokernelTester()
4496 .mr(4)
4497 .nr(16)
4498 .kr(1)
4499 .sr(1)
4500 .m(4)
4501 .n(16)
4502 .k(k)
4503 .ks(3)
4504 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4505 }
4506 }
4507 }
4508
4509 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_small_kernel) {
4510 TEST_REQUIRES_ARM_NEON;
4511 for (uint32_t n = 32; n <= 48; n += 16) {
4512 for (size_t k = 1; k <= 40; k += 9) {
4513 GemmMicrokernelTester()
4514 .mr(4)
4515 .nr(16)
4516 .kr(1)
4517 .sr(1)
4518 .m(4)
4519 .n(16)
4520 .k(k)
4521 .ks(3)
4522 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4523 }
4524 }
4525 }
4526
4527 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cm_subtile) {
4528 TEST_REQUIRES_ARM_NEON;
4529 for (size_t k = 1; k <= 40; k += 9) {
4530 for (uint32_t m = 1; m <= 4; m++) {
4531 for (uint32_t n = 1; n <= 16; n++) {
4532 GemmMicrokernelTester()
4533 .mr(4)
4534 .nr(16)
4535 .kr(1)
4536 .sr(1)
4537 .m(m)
4538 .n(n)
4539 .k(k)
4540 .cm_stride(19)
4541 .iterations(1)
4542 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4543 }
4544 }
4545 }
4546 }
4547
4548 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, a_offset) {
4549 TEST_REQUIRES_ARM_NEON;
4550 for (size_t k = 1; k <= 40; k += 9) {
4551 GemmMicrokernelTester()
4552 .mr(4)
4553 .nr(16)
4554 .kr(1)
4555 .sr(1)
4556 .m(4)
4557 .n(16)
4558 .k(k)
4559 .ks(3)
4560 .a_offset(163)
4561 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4562 }
4563 }
4564
4565 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, zero) {
4566 TEST_REQUIRES_ARM_NEON;
4567 for (uint32_t mz = 0; mz < 4; mz++) {
4568 for (size_t k = 1; k <= 40; k += 9) {
4569 GemmMicrokernelTester()
4570 .mr(4)
4571 .nr(16)
4572 .kr(1)
4573 .sr(1)
4574 .m(4)
4575 .n(16)
4576 .k(k)
4577 .ks(3)
4578 .a_offset(163)
4579 .zero_index(mz)
4580 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4581 }
4582 }
4583 }
4584
4585 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, qmin) {
4586 TEST_REQUIRES_ARM_NEON;
4587 GemmMicrokernelTester()
4588 .mr(4)
4589 .nr(16)
4590 .kr(1)
4591 .sr(1)
4592 .m(4)
4593 .n(16)
4594 .k(8)
4595 .qmin(128)
4596 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4597 }
4598
4599 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, qmax) {
4600 TEST_REQUIRES_ARM_NEON;
4601 GemmMicrokernelTester()
4602 .mr(4)
4603 .nr(16)
4604 .kr(1)
4605 .sr(1)
4606 .m(4)
4607 .n(16)
4608 .k(8)
4609 .qmax(128)
4610 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4611 }
4612
4613 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cm) {
4614 TEST_REQUIRES_ARM_NEON;
4615 GemmMicrokernelTester()
4616 .mr(4)
4617 .nr(16)
4618 .kr(1)
4619 .sr(1)
4620 .m(4)
4621 .n(16)
4622 .k(8)
4623 .cm_stride(19)
4624 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4625 }
4626
4627 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_a_zero_point) {
4628 TEST_REQUIRES_ARM_NEON;
4629 for (size_t k = 1; k <= 40; k += 9) {
4630 GemmMicrokernelTester()
4631 .mr(4)
4632 .nr(16)
4633 .kr(1)
4634 .sr(1)
4635 .m(4)
4636 .n(16)
4637 .k(k)
4638 .a_zero_point(0)
4639 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4640 }
4641 }
4642
4643 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_b_zero_point) {
4644 TEST_REQUIRES_ARM_NEON;
4645 for (size_t k = 1; k <= 40; k += 9) {
4646 GemmMicrokernelTester()
4647 .mr(4)
4648 .nr(16)
4649 .kr(1)
4650 .sr(1)
4651 .m(4)
4652 .n(16)
4653 .k(k)
4654 .b_zero_point(0)
4655 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4656 }
4657 }
4658
4659 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_zero_point) {
4660 TEST_REQUIRES_ARM_NEON;
4661 for (size_t k = 1; k <= 40; k += 9) {
4662 GemmMicrokernelTester()
4663 .mr(4)
4664 .nr(16)
4665 .kr(1)
4666 .sr(1)
4667 .m(4)
4668 .n(16)
4669 .k(k)
4670 .a_zero_point(0)
4671 .b_zero_point(0)
4672 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4673 }
4674 }
4675#endif // XNN_ARCH_ARM64
4676
4677
4678#if XNN_ARCH_ARM64
4679 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8) {
4680 TEST_REQUIRES_ARM_NEON;
4681 GemmMicrokernelTester()
4682 .mr(4)
4683 .nr(16)
4684 .kr(1)
4685 .sr(1)
4686 .m(4)
4687 .n(16)
4688 .k(8)
4689 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4690 }
4691
4692 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cn) {
4693 TEST_REQUIRES_ARM_NEON;
4694 GemmMicrokernelTester()
4695 .mr(4)
4696 .nr(16)
4697 .kr(1)
4698 .sr(1)
4699 .m(4)
4700 .n(16)
4701 .k(8)
4702 .cn_stride(19)
4703 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4704 }
4705
4706 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile) {
4707 TEST_REQUIRES_ARM_NEON;
4708 for (uint32_t m = 1; m <= 4; m++) {
4709 for (uint32_t n = 1; n <= 16; n++) {
4710 GemmMicrokernelTester()
4711 .mr(4)
4712 .nr(16)
4713 .kr(1)
4714 .sr(1)
4715 .m(m)
4716 .n(n)
4717 .k(8)
4718 .iterations(1)
4719 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4720 }
4721 }
4722 }
4723
4724 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile_m) {
4725 TEST_REQUIRES_ARM_NEON;
4726 for (uint32_t m = 1; m <= 4; m++) {
4727 GemmMicrokernelTester()
4728 .mr(4)
4729 .nr(16)
4730 .kr(1)
4731 .sr(1)
4732 .m(m)
4733 .n(16)
4734 .k(8)
4735 .iterations(1)
4736 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4737 }
4738 }
4739
4740 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile_n) {
4741 TEST_REQUIRES_ARM_NEON;
4742 for (uint32_t n = 1; n <= 16; n++) {
4743 GemmMicrokernelTester()
4744 .mr(4)
4745 .nr(16)
4746 .kr(1)
4747 .sr(1)
4748 .m(4)
4749 .n(n)
4750 .k(8)
4751 .iterations(1)
4752 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4753 }
4754 }
4755
4756 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_lt_8) {
4757 TEST_REQUIRES_ARM_NEON;
4758 for (size_t k = 1; k < 8; k++) {
4759 GemmMicrokernelTester()
4760 .mr(4)
4761 .nr(16)
4762 .kr(1)
4763 .sr(1)
4764 .m(4)
4765 .n(16)
4766 .k(k)
4767 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4768 }
4769 }
4770
4771 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_lt_8_subtile) {
4772 TEST_REQUIRES_ARM_NEON;
4773 for (size_t k = 1; k < 8; k++) {
4774 for (uint32_t m = 1; m <= 4; m++) {
4775 for (uint32_t n = 1; n <= 16; n++) {
4776 GemmMicrokernelTester()
4777 .mr(4)
4778 .nr(16)
4779 .kr(1)
4780 .sr(1)
4781 .m(m)
4782 .n(n)
4783 .k(k)
4784 .iterations(1)
4785 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4786 }
4787 }
4788 }
4789 }
4790
4791 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_gt_8) {
4792 TEST_REQUIRES_ARM_NEON;
4793 for (size_t k = 9; k < 16; k++) {
4794 GemmMicrokernelTester()
4795 .mr(4)
4796 .nr(16)
4797 .kr(1)
4798 .sr(1)
4799 .m(4)
4800 .n(16)
4801 .k(k)
4802 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4803 }
4804 }
4805
4806 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_gt_8_subtile) {
4807 TEST_REQUIRES_ARM_NEON;
4808 for (size_t k = 9; k < 16; k++) {
4809 for (uint32_t m = 1; m <= 4; m++) {
4810 for (uint32_t n = 1; n <= 16; n++) {
4811 GemmMicrokernelTester()
4812 .mr(4)
4813 .nr(16)
4814 .kr(1)
4815 .sr(1)
4816 .m(m)
4817 .n(n)
4818 .k(k)
4819 .iterations(1)
4820 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4821 }
4822 }
4823 }
4824 }
4825
4826 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_div_8) {
4827 TEST_REQUIRES_ARM_NEON;
4828 for (size_t k = 16; k <= 80; k += 8) {
4829 GemmMicrokernelTester()
4830 .mr(4)
4831 .nr(16)
4832 .kr(1)
4833 .sr(1)
4834 .m(4)
4835 .n(16)
4836 .k(k)
4837 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4838 }
4839 }
4840
4841 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_div_8_subtile) {
4842 TEST_REQUIRES_ARM_NEON;
4843 for (size_t k = 16; k <= 80; k += 8) {
4844 for (uint32_t m = 1; m <= 4; m++) {
4845 for (uint32_t n = 1; n <= 16; n++) {
4846 GemmMicrokernelTester()
4847 .mr(4)
4848 .nr(16)
4849 .kr(1)
4850 .sr(1)
4851 .m(m)
4852 .n(n)
4853 .k(k)
4854 .iterations(1)
4855 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4856 }
4857 }
4858 }
4859 }
4860
4861 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16) {
4862 TEST_REQUIRES_ARM_NEON;
4863 for (uint32_t n = 17; n < 32; n++) {
4864 for (size_t k = 1; k <= 40; k += 9) {
4865 GemmMicrokernelTester()
4866 .mr(4)
4867 .nr(16)
4868 .kr(1)
4869 .sr(1)
4870 .m(4)
4871 .n(16)
4872 .k(k)
4873 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4874 }
4875 }
4876 }
4877
4878 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_strided_cn) {
4879 TEST_REQUIRES_ARM_NEON;
4880 for (uint32_t n = 17; n < 32; n++) {
4881 for (size_t k = 1; k <= 40; k += 9) {
4882 GemmMicrokernelTester()
4883 .mr(4)
4884 .nr(16)
4885 .kr(1)
4886 .sr(1)
4887 .m(4)
4888 .n(16)
4889 .k(k)
4890 .cn_stride(19)
4891 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4892 }
4893 }
4894 }
4895
4896 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_subtile) {
4897 TEST_REQUIRES_ARM_NEON;
4898 for (uint32_t n = 17; n < 32; n++) {
4899 for (size_t k = 1; k <= 40; k += 9) {
4900 for (uint32_t m = 1; m <= 4; m++) {
4901 GemmMicrokernelTester()
4902 .mr(4)
4903 .nr(16)
4904 .kr(1)
4905 .sr(1)
4906 .m(m)
4907 .n(n)
4908 .k(k)
4909 .iterations(1)
4910 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4911 }
4912 }
4913 }
4914 }
4915
4916 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16) {
4917 TEST_REQUIRES_ARM_NEON;
4918 for (uint32_t n = 32; n <= 48; n += 16) {
4919 for (size_t k = 1; k <= 40; k += 9) {
4920 GemmMicrokernelTester()
4921 .mr(4)
4922 .nr(16)
4923 .kr(1)
4924 .sr(1)
4925 .m(4)
4926 .n(16)
4927 .k(k)
4928 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4929 }
4930 }
4931 }
4932
4933 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_strided_cn) {
4934 TEST_REQUIRES_ARM_NEON;
4935 for (uint32_t n = 32; n <= 48; n += 16) {
4936 for (size_t k = 1; k <= 40; k += 9) {
4937 GemmMicrokernelTester()
4938 .mr(4)
4939 .nr(16)
4940 .kr(1)
4941 .sr(1)
4942 .m(4)
4943 .n(n)
4944 .k(k)
4945 .cn_stride(19)
4946 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4947 }
4948 }
4949 }
4950
4951 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_subtile) {
4952 TEST_REQUIRES_ARM_NEON;
4953 for (uint32_t n = 32; n <= 48; n += 16) {
4954 for (size_t k = 1; k <= 40; k += 9) {
4955 for (uint32_t m = 1; m <= 4; m++) {
4956 GemmMicrokernelTester()
4957 .mr(4)
4958 .nr(16)
4959 .kr(1)
4960 .sr(1)
4961 .m(m)
4962 .n(n)
4963 .k(k)
4964 .iterations(1)
4965 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4966 }
4967 }
4968 }
4969 }
4970
4971 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, small_kernel) {
4972 TEST_REQUIRES_ARM_NEON;
4973 for (size_t k = 1; k <= 40; k += 9) {
4974 GemmMicrokernelTester()
4975 .mr(4)
4976 .nr(16)
4977 .kr(1)
4978 .sr(1)
4979 .m(4)
4980 .n(16)
4981 .k(k)
4982 .ks(3)
4983 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
4984 }
4985 }
4986
4987 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, small_kernel_subtile) {
4988 TEST_REQUIRES_ARM_NEON;
4989 for (size_t k = 1; k <= 40; k += 9) {
4990 for (uint32_t m = 1; m <= 4; m++) {
4991 for (uint32_t n = 1; n <= 16; n++) {
4992 GemmMicrokernelTester()
4993 .mr(4)
4994 .nr(16)
4995 .kr(1)
4996 .sr(1)
4997 .m(m)
4998 .n(n)
4999 .k(k)
5000 .ks(3)
5001 .iterations(1)
5002 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5003 }
5004 }
5005 }
5006 }
5007
5008 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_small_kernel) {
5009 TEST_REQUIRES_ARM_NEON;
5010 for (uint32_t n = 17; n < 32; n++) {
5011 for (size_t k = 1; k <= 40; k += 9) {
5012 GemmMicrokernelTester()
5013 .mr(4)
5014 .nr(16)
5015 .kr(1)
5016 .sr(1)
5017 .m(4)
5018 .n(16)
5019 .k(k)
5020 .ks(3)
5021 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5022 }
5023 }
5024 }
5025
5026 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_small_kernel) {
5027 TEST_REQUIRES_ARM_NEON;
5028 for (uint32_t n = 32; n <= 48; n += 16) {
5029 for (size_t k = 1; k <= 40; k += 9) {
5030 GemmMicrokernelTester()
5031 .mr(4)
5032 .nr(16)
5033 .kr(1)
5034 .sr(1)
5035 .m(4)
5036 .n(16)
5037 .k(k)
5038 .ks(3)
5039 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5040 }
5041 }
5042 }
5043
5044 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cm_subtile) {
5045 TEST_REQUIRES_ARM_NEON;
5046 for (size_t k = 1; k <= 40; k += 9) {
5047 for (uint32_t m = 1; m <= 4; m++) {
5048 for (uint32_t n = 1; n <= 16; n++) {
5049 GemmMicrokernelTester()
5050 .mr(4)
5051 .nr(16)
5052 .kr(1)
5053 .sr(1)
5054 .m(m)
5055 .n(n)
5056 .k(k)
5057 .cm_stride(19)
5058 .iterations(1)
5059 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5060 }
5061 }
5062 }
5063 }
5064
5065 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, a_offset) {
5066 TEST_REQUIRES_ARM_NEON;
5067 for (size_t k = 1; k <= 40; k += 9) {
5068 GemmMicrokernelTester()
5069 .mr(4)
5070 .nr(16)
5071 .kr(1)
5072 .sr(1)
5073 .m(4)
5074 .n(16)
5075 .k(k)
5076 .ks(3)
5077 .a_offset(163)
5078 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5079 }
5080 }
5081
5082 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, zero) {
5083 TEST_REQUIRES_ARM_NEON;
5084 for (uint32_t mz = 0; mz < 4; mz++) {
5085 for (size_t k = 1; k <= 40; k += 9) {
5086 GemmMicrokernelTester()
5087 .mr(4)
5088 .nr(16)
5089 .kr(1)
5090 .sr(1)
5091 .m(4)
5092 .n(16)
5093 .k(k)
5094 .ks(3)
5095 .a_offset(163)
5096 .zero_index(mz)
5097 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5098 }
5099 }
5100 }
5101
5102 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, qmin) {
5103 TEST_REQUIRES_ARM_NEON;
5104 GemmMicrokernelTester()
5105 .mr(4)
5106 .nr(16)
5107 .kr(1)
5108 .sr(1)
5109 .m(4)
5110 .n(16)
5111 .k(8)
5112 .qmin(128)
5113 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5114 }
5115
5116 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, qmax) {
5117 TEST_REQUIRES_ARM_NEON;
5118 GemmMicrokernelTester()
5119 .mr(4)
5120 .nr(16)
5121 .kr(1)
5122 .sr(1)
5123 .m(4)
5124 .n(16)
5125 .k(8)
5126 .qmax(128)
5127 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5128 }
5129
5130 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cm) {
5131 TEST_REQUIRES_ARM_NEON;
5132 GemmMicrokernelTester()
5133 .mr(4)
5134 .nr(16)
5135 .kr(1)
5136 .sr(1)
5137 .m(4)
5138 .n(16)
5139 .k(8)
5140 .cm_stride(19)
5141 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5142 }
5143
5144 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_a_zero_point) {
5145 TEST_REQUIRES_ARM_NEON;
5146 for (size_t k = 1; k <= 40; k += 9) {
5147 GemmMicrokernelTester()
5148 .mr(4)
5149 .nr(16)
5150 .kr(1)
5151 .sr(1)
5152 .m(4)
5153 .n(16)
5154 .k(k)
5155 .a_zero_point(0)
5156 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5157 }
5158 }
5159
5160 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_b_zero_point) {
5161 TEST_REQUIRES_ARM_NEON;
5162 for (size_t k = 1; k <= 40; k += 9) {
5163 GemmMicrokernelTester()
5164 .mr(4)
5165 .nr(16)
5166 .kr(1)
5167 .sr(1)
5168 .m(4)
5169 .n(16)
5170 .k(k)
5171 .b_zero_point(0)
5172 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5173 }
5174 }
5175
5176 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_zero_point) {
5177 TEST_REQUIRES_ARM_NEON;
5178 for (size_t k = 1; k <= 40; k += 9) {
5179 GemmMicrokernelTester()
5180 .mr(4)
5181 .nr(16)
5182 .kr(1)
5183 .sr(1)
5184 .m(4)
5185 .n(16)
5186 .k(k)
5187 .a_zero_point(0)
5188 .b_zero_point(0)
5189 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5190 }
5191 }
5192#endif // XNN_ARCH_ARM64
5193
5194
5195#if XNN_ARCH_ARM64
Frank Barchard59ed1da2021-08-02 11:34:59 -07005196 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
5197 TEST_REQUIRES_ARM_NEON;
5198 GemmMicrokernelTester()
5199 .mr(4)
5200 .nr(16)
5201 .kr(1)
5202 .sr(1)
5203 .m(4)
5204 .n(16)
5205 .k(8)
5206 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5207 }
5208
5209 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
5210 TEST_REQUIRES_ARM_NEON;
5211 GemmMicrokernelTester()
5212 .mr(4)
5213 .nr(16)
5214 .kr(1)
5215 .sr(1)
5216 .m(4)
5217 .n(16)
5218 .k(8)
5219 .cn_stride(19)
5220 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5221 }
5222
5223 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
5224 TEST_REQUIRES_ARM_NEON;
5225 for (uint32_t m = 1; m <= 4; m++) {
5226 for (uint32_t n = 1; n <= 16; n++) {
5227 GemmMicrokernelTester()
5228 .mr(4)
5229 .nr(16)
5230 .kr(1)
5231 .sr(1)
5232 .m(m)
5233 .n(n)
5234 .k(8)
5235 .iterations(1)
5236 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5237 }
5238 }
5239 }
5240
5241 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
5242 TEST_REQUIRES_ARM_NEON;
5243 for (uint32_t m = 1; m <= 4; m++) {
5244 GemmMicrokernelTester()
5245 .mr(4)
5246 .nr(16)
5247 .kr(1)
5248 .sr(1)
5249 .m(m)
5250 .n(16)
5251 .k(8)
5252 .iterations(1)
5253 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5254 }
5255 }
5256
5257 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
5258 TEST_REQUIRES_ARM_NEON;
5259 for (uint32_t n = 1; n <= 16; n++) {
5260 GemmMicrokernelTester()
5261 .mr(4)
5262 .nr(16)
5263 .kr(1)
5264 .sr(1)
5265 .m(4)
5266 .n(n)
5267 .k(8)
5268 .iterations(1)
5269 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5270 }
5271 }
5272
5273 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
5274 TEST_REQUIRES_ARM_NEON;
5275 for (size_t k = 1; k < 8; k++) {
5276 GemmMicrokernelTester()
5277 .mr(4)
5278 .nr(16)
5279 .kr(1)
5280 .sr(1)
5281 .m(4)
5282 .n(16)
5283 .k(k)
5284 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5285 }
5286 }
5287
5288 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
5289 TEST_REQUIRES_ARM_NEON;
5290 for (size_t k = 1; k < 8; k++) {
5291 for (uint32_t m = 1; m <= 4; m++) {
5292 for (uint32_t n = 1; n <= 16; n++) {
5293 GemmMicrokernelTester()
5294 .mr(4)
5295 .nr(16)
5296 .kr(1)
5297 .sr(1)
5298 .m(m)
5299 .n(n)
5300 .k(k)
5301 .iterations(1)
5302 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5303 }
5304 }
5305 }
5306 }
5307
5308 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
5309 TEST_REQUIRES_ARM_NEON;
5310 for (size_t k = 9; k < 16; k++) {
5311 GemmMicrokernelTester()
5312 .mr(4)
5313 .nr(16)
5314 .kr(1)
5315 .sr(1)
5316 .m(4)
5317 .n(16)
5318 .k(k)
5319 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5320 }
5321 }
5322
5323 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
5324 TEST_REQUIRES_ARM_NEON;
5325 for (size_t k = 9; k < 16; k++) {
5326 for (uint32_t m = 1; m <= 4; m++) {
5327 for (uint32_t n = 1; n <= 16; n++) {
5328 GemmMicrokernelTester()
5329 .mr(4)
5330 .nr(16)
5331 .kr(1)
5332 .sr(1)
5333 .m(m)
5334 .n(n)
5335 .k(k)
5336 .iterations(1)
5337 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5338 }
5339 }
5340 }
5341 }
5342
5343 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
5344 TEST_REQUIRES_ARM_NEON;
5345 for (size_t k = 16; k <= 80; k += 8) {
5346 GemmMicrokernelTester()
5347 .mr(4)
5348 .nr(16)
5349 .kr(1)
5350 .sr(1)
5351 .m(4)
5352 .n(16)
5353 .k(k)
5354 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5355 }
5356 }
5357
5358 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
5359 TEST_REQUIRES_ARM_NEON;
5360 for (size_t k = 16; k <= 80; k += 8) {
5361 for (uint32_t m = 1; m <= 4; m++) {
5362 for (uint32_t n = 1; n <= 16; n++) {
5363 GemmMicrokernelTester()
5364 .mr(4)
5365 .nr(16)
5366 .kr(1)
5367 .sr(1)
5368 .m(m)
5369 .n(n)
5370 .k(k)
5371 .iterations(1)
5372 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5373 }
5374 }
5375 }
5376 }
5377
5378 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
5379 TEST_REQUIRES_ARM_NEON;
5380 for (uint32_t n = 17; n < 32; n++) {
5381 for (size_t k = 1; k <= 40; k += 9) {
5382 GemmMicrokernelTester()
5383 .mr(4)
5384 .nr(16)
5385 .kr(1)
5386 .sr(1)
5387 .m(4)
5388 .n(16)
5389 .k(k)
5390 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5391 }
5392 }
5393 }
5394
5395 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
5396 TEST_REQUIRES_ARM_NEON;
5397 for (uint32_t n = 17; n < 32; n++) {
5398 for (size_t k = 1; k <= 40; k += 9) {
5399 GemmMicrokernelTester()
5400 .mr(4)
5401 .nr(16)
5402 .kr(1)
5403 .sr(1)
5404 .m(4)
5405 .n(16)
5406 .k(k)
5407 .cn_stride(19)
5408 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5409 }
5410 }
5411 }
5412
5413 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
5414 TEST_REQUIRES_ARM_NEON;
5415 for (uint32_t n = 17; n < 32; n++) {
5416 for (size_t k = 1; k <= 40; k += 9) {
5417 for (uint32_t m = 1; m <= 4; m++) {
5418 GemmMicrokernelTester()
5419 .mr(4)
5420 .nr(16)
5421 .kr(1)
5422 .sr(1)
5423 .m(m)
5424 .n(n)
5425 .k(k)
5426 .iterations(1)
5427 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5428 }
5429 }
5430 }
5431 }
5432
5433 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
5434 TEST_REQUIRES_ARM_NEON;
5435 for (uint32_t n = 32; n <= 48; n += 16) {
5436 for (size_t k = 1; k <= 40; k += 9) {
5437 GemmMicrokernelTester()
5438 .mr(4)
5439 .nr(16)
5440 .kr(1)
5441 .sr(1)
5442 .m(4)
5443 .n(16)
5444 .k(k)
5445 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5446 }
5447 }
5448 }
5449
5450 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
5451 TEST_REQUIRES_ARM_NEON;
5452 for (uint32_t n = 32; n <= 48; n += 16) {
5453 for (size_t k = 1; k <= 40; k += 9) {
5454 GemmMicrokernelTester()
5455 .mr(4)
5456 .nr(16)
5457 .kr(1)
5458 .sr(1)
5459 .m(4)
5460 .n(n)
5461 .k(k)
5462 .cn_stride(19)
5463 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5464 }
5465 }
5466 }
5467
5468 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
5469 TEST_REQUIRES_ARM_NEON;
5470 for (uint32_t n = 32; n <= 48; n += 16) {
5471 for (size_t k = 1; k <= 40; k += 9) {
5472 for (uint32_t m = 1; m <= 4; m++) {
5473 GemmMicrokernelTester()
5474 .mr(4)
5475 .nr(16)
5476 .kr(1)
5477 .sr(1)
5478 .m(m)
5479 .n(n)
5480 .k(k)
5481 .iterations(1)
5482 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5483 }
5484 }
5485 }
5486 }
5487
5488 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
5489 TEST_REQUIRES_ARM_NEON;
5490 for (size_t k = 1; k <= 40; k += 9) {
5491 GemmMicrokernelTester()
5492 .mr(4)
5493 .nr(16)
5494 .kr(1)
5495 .sr(1)
5496 .m(4)
5497 .n(16)
5498 .k(k)
5499 .ks(3)
5500 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5501 }
5502 }
5503
5504 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
5505 TEST_REQUIRES_ARM_NEON;
5506 for (size_t k = 1; k <= 40; k += 9) {
5507 for (uint32_t m = 1; m <= 4; m++) {
5508 for (uint32_t n = 1; n <= 16; n++) {
5509 GemmMicrokernelTester()
5510 .mr(4)
5511 .nr(16)
5512 .kr(1)
5513 .sr(1)
5514 .m(m)
5515 .n(n)
5516 .k(k)
5517 .ks(3)
5518 .iterations(1)
5519 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5520 }
5521 }
5522 }
5523 }
5524
5525 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
5526 TEST_REQUIRES_ARM_NEON;
5527 for (uint32_t n = 17; n < 32; n++) {
5528 for (size_t k = 1; k <= 40; k += 9) {
5529 GemmMicrokernelTester()
5530 .mr(4)
5531 .nr(16)
5532 .kr(1)
5533 .sr(1)
5534 .m(4)
5535 .n(16)
5536 .k(k)
5537 .ks(3)
5538 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5539 }
5540 }
5541 }
5542
5543 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
5544 TEST_REQUIRES_ARM_NEON;
5545 for (uint32_t n = 32; n <= 48; n += 16) {
5546 for (size_t k = 1; k <= 40; k += 9) {
5547 GemmMicrokernelTester()
5548 .mr(4)
5549 .nr(16)
5550 .kr(1)
5551 .sr(1)
5552 .m(4)
5553 .n(16)
5554 .k(k)
5555 .ks(3)
5556 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5557 }
5558 }
5559 }
5560
5561 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
5562 TEST_REQUIRES_ARM_NEON;
5563 for (size_t k = 1; k <= 40; k += 9) {
5564 for (uint32_t m = 1; m <= 4; m++) {
5565 for (uint32_t n = 1; n <= 16; n++) {
5566 GemmMicrokernelTester()
5567 .mr(4)
5568 .nr(16)
5569 .kr(1)
5570 .sr(1)
5571 .m(m)
5572 .n(n)
5573 .k(k)
5574 .cm_stride(19)
5575 .iterations(1)
5576 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5577 }
5578 }
5579 }
5580 }
5581
5582 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
5583 TEST_REQUIRES_ARM_NEON;
5584 for (size_t k = 1; k <= 40; k += 9) {
5585 GemmMicrokernelTester()
5586 .mr(4)
5587 .nr(16)
5588 .kr(1)
5589 .sr(1)
5590 .m(4)
5591 .n(16)
5592 .k(k)
5593 .ks(3)
5594 .a_offset(163)
5595 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5596 }
5597 }
5598
5599 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
5600 TEST_REQUIRES_ARM_NEON;
5601 for (uint32_t mz = 0; mz < 4; mz++) {
5602 for (size_t k = 1; k <= 40; k += 9) {
5603 GemmMicrokernelTester()
5604 .mr(4)
5605 .nr(16)
5606 .kr(1)
5607 .sr(1)
5608 .m(4)
5609 .n(16)
5610 .k(k)
5611 .ks(3)
5612 .a_offset(163)
5613 .zero_index(mz)
5614 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5615 }
5616 }
5617 }
5618
5619 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
5620 TEST_REQUIRES_ARM_NEON;
5621 GemmMicrokernelTester()
5622 .mr(4)
5623 .nr(16)
5624 .kr(1)
5625 .sr(1)
5626 .m(4)
5627 .n(16)
5628 .k(8)
5629 .qmin(128)
5630 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5631 }
5632
5633 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
5634 TEST_REQUIRES_ARM_NEON;
5635 GemmMicrokernelTester()
5636 .mr(4)
5637 .nr(16)
5638 .kr(1)
5639 .sr(1)
5640 .m(4)
5641 .n(16)
5642 .k(8)
5643 .qmax(128)
5644 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5645 }
5646
5647 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
5648 TEST_REQUIRES_ARM_NEON;
5649 GemmMicrokernelTester()
5650 .mr(4)
5651 .nr(16)
5652 .kr(1)
5653 .sr(1)
5654 .m(4)
5655 .n(16)
5656 .k(8)
5657 .cm_stride(19)
5658 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5659 }
5660
5661 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_a_zero_point) {
5662 TEST_REQUIRES_ARM_NEON;
5663 for (size_t k = 1; k <= 40; k += 9) {
5664 GemmMicrokernelTester()
5665 .mr(4)
5666 .nr(16)
5667 .kr(1)
5668 .sr(1)
5669 .m(4)
5670 .n(16)
5671 .k(k)
5672 .a_zero_point(0)
5673 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5674 }
5675 }
5676
5677 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_b_zero_point) {
5678 TEST_REQUIRES_ARM_NEON;
5679 for (size_t k = 1; k <= 40; k += 9) {
5680 GemmMicrokernelTester()
5681 .mr(4)
5682 .nr(16)
5683 .kr(1)
5684 .sr(1)
5685 .m(4)
5686 .n(16)
5687 .k(k)
5688 .b_zero_point(0)
5689 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5690 }
5691 }
5692
5693 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_zero_point) {
5694 TEST_REQUIRES_ARM_NEON;
5695 for (size_t k = 1; k <= 40; k += 9) {
5696 GemmMicrokernelTester()
5697 .mr(4)
5698 .nr(16)
5699 .kr(1)
5700 .sr(1)
5701 .m(4)
5702 .n(16)
5703 .k(k)
5704 .a_zero_point(0)
5705 .b_zero_point(0)
5706 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5707 }
5708 }
5709#endif // XNN_ARCH_ARM64
5710
5711
5712#if XNN_ARCH_ARM64
5713 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
5714 TEST_REQUIRES_ARM_NEON;
5715 GemmMicrokernelTester()
5716 .mr(4)
5717 .nr(16)
5718 .kr(1)
5719 .sr(1)
5720 .m(4)
5721 .n(16)
5722 .k(8)
5723 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5724 }
5725
5726 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
5727 TEST_REQUIRES_ARM_NEON;
5728 GemmMicrokernelTester()
5729 .mr(4)
5730 .nr(16)
5731 .kr(1)
5732 .sr(1)
5733 .m(4)
5734 .n(16)
5735 .k(8)
5736 .cn_stride(19)
5737 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5738 }
5739
5740 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
5741 TEST_REQUIRES_ARM_NEON;
5742 for (uint32_t m = 1; m <= 4; m++) {
5743 for (uint32_t n = 1; n <= 16; n++) {
5744 GemmMicrokernelTester()
5745 .mr(4)
5746 .nr(16)
5747 .kr(1)
5748 .sr(1)
5749 .m(m)
5750 .n(n)
5751 .k(8)
5752 .iterations(1)
5753 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5754 }
5755 }
5756 }
5757
5758 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
5759 TEST_REQUIRES_ARM_NEON;
5760 for (uint32_t m = 1; m <= 4; m++) {
5761 GemmMicrokernelTester()
5762 .mr(4)
5763 .nr(16)
5764 .kr(1)
5765 .sr(1)
5766 .m(m)
5767 .n(16)
5768 .k(8)
5769 .iterations(1)
5770 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5771 }
5772 }
5773
5774 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
5775 TEST_REQUIRES_ARM_NEON;
5776 for (uint32_t n = 1; n <= 16; n++) {
5777 GemmMicrokernelTester()
5778 .mr(4)
5779 .nr(16)
5780 .kr(1)
5781 .sr(1)
5782 .m(4)
5783 .n(n)
5784 .k(8)
5785 .iterations(1)
5786 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5787 }
5788 }
5789
5790 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
5791 TEST_REQUIRES_ARM_NEON;
5792 for (size_t k = 1; k < 8; k++) {
5793 GemmMicrokernelTester()
5794 .mr(4)
5795 .nr(16)
5796 .kr(1)
5797 .sr(1)
5798 .m(4)
5799 .n(16)
5800 .k(k)
5801 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5802 }
5803 }
5804
5805 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
5806 TEST_REQUIRES_ARM_NEON;
5807 for (size_t k = 1; k < 8; k++) {
5808 for (uint32_t m = 1; m <= 4; m++) {
5809 for (uint32_t n = 1; n <= 16; n++) {
5810 GemmMicrokernelTester()
5811 .mr(4)
5812 .nr(16)
5813 .kr(1)
5814 .sr(1)
5815 .m(m)
5816 .n(n)
5817 .k(k)
5818 .iterations(1)
5819 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5820 }
5821 }
5822 }
5823 }
5824
5825 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
5826 TEST_REQUIRES_ARM_NEON;
5827 for (size_t k = 9; k < 16; k++) {
5828 GemmMicrokernelTester()
5829 .mr(4)
5830 .nr(16)
5831 .kr(1)
5832 .sr(1)
5833 .m(4)
5834 .n(16)
5835 .k(k)
5836 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5837 }
5838 }
5839
5840 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
5841 TEST_REQUIRES_ARM_NEON;
5842 for (size_t k = 9; k < 16; k++) {
5843 for (uint32_t m = 1; m <= 4; m++) {
5844 for (uint32_t n = 1; n <= 16; n++) {
5845 GemmMicrokernelTester()
5846 .mr(4)
5847 .nr(16)
5848 .kr(1)
5849 .sr(1)
5850 .m(m)
5851 .n(n)
5852 .k(k)
5853 .iterations(1)
5854 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5855 }
5856 }
5857 }
5858 }
5859
5860 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
5861 TEST_REQUIRES_ARM_NEON;
5862 for (size_t k = 16; k <= 80; k += 8) {
5863 GemmMicrokernelTester()
5864 .mr(4)
5865 .nr(16)
5866 .kr(1)
5867 .sr(1)
5868 .m(4)
5869 .n(16)
5870 .k(k)
5871 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5872 }
5873 }
5874
5875 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
5876 TEST_REQUIRES_ARM_NEON;
5877 for (size_t k = 16; k <= 80; k += 8) {
5878 for (uint32_t m = 1; m <= 4; m++) {
5879 for (uint32_t n = 1; n <= 16; n++) {
5880 GemmMicrokernelTester()
5881 .mr(4)
5882 .nr(16)
5883 .kr(1)
5884 .sr(1)
5885 .m(m)
5886 .n(n)
5887 .k(k)
5888 .iterations(1)
5889 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5890 }
5891 }
5892 }
5893 }
5894
5895 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) {
5896 TEST_REQUIRES_ARM_NEON;
5897 for (uint32_t n = 17; n < 32; n++) {
5898 for (size_t k = 1; k <= 40; k += 9) {
5899 GemmMicrokernelTester()
5900 .mr(4)
5901 .nr(16)
5902 .kr(1)
5903 .sr(1)
5904 .m(4)
5905 .n(16)
5906 .k(k)
5907 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5908 }
5909 }
5910 }
5911
5912 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) {
5913 TEST_REQUIRES_ARM_NEON;
5914 for (uint32_t n = 17; n < 32; n++) {
5915 for (size_t k = 1; k <= 40; k += 9) {
5916 GemmMicrokernelTester()
5917 .mr(4)
5918 .nr(16)
5919 .kr(1)
5920 .sr(1)
5921 .m(4)
5922 .n(16)
5923 .k(k)
5924 .cn_stride(19)
5925 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5926 }
5927 }
5928 }
5929
5930 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) {
5931 TEST_REQUIRES_ARM_NEON;
5932 for (uint32_t n = 17; n < 32; n++) {
5933 for (size_t k = 1; k <= 40; k += 9) {
5934 for (uint32_t m = 1; m <= 4; m++) {
5935 GemmMicrokernelTester()
5936 .mr(4)
5937 .nr(16)
5938 .kr(1)
5939 .sr(1)
5940 .m(m)
5941 .n(n)
5942 .k(k)
5943 .iterations(1)
5944 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5945 }
5946 }
5947 }
5948 }
5949
5950 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) {
5951 TEST_REQUIRES_ARM_NEON;
5952 for (uint32_t n = 32; n <= 48; n += 16) {
5953 for (size_t k = 1; k <= 40; k += 9) {
5954 GemmMicrokernelTester()
5955 .mr(4)
5956 .nr(16)
5957 .kr(1)
5958 .sr(1)
5959 .m(4)
5960 .n(16)
5961 .k(k)
5962 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5963 }
5964 }
5965 }
5966
5967 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) {
5968 TEST_REQUIRES_ARM_NEON;
5969 for (uint32_t n = 32; n <= 48; n += 16) {
5970 for (size_t k = 1; k <= 40; k += 9) {
5971 GemmMicrokernelTester()
5972 .mr(4)
5973 .nr(16)
5974 .kr(1)
5975 .sr(1)
5976 .m(4)
5977 .n(n)
5978 .k(k)
5979 .cn_stride(19)
5980 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
5981 }
5982 }
5983 }
5984
5985 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) {
5986 TEST_REQUIRES_ARM_NEON;
5987 for (uint32_t n = 32; n <= 48; n += 16) {
5988 for (size_t k = 1; k <= 40; k += 9) {
5989 for (uint32_t m = 1; m <= 4; m++) {
5990 GemmMicrokernelTester()
5991 .mr(4)
5992 .nr(16)
5993 .kr(1)
5994 .sr(1)
5995 .m(m)
5996 .n(n)
5997 .k(k)
5998 .iterations(1)
5999 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6000 }
6001 }
6002 }
6003 }
6004
6005 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel) {
6006 TEST_REQUIRES_ARM_NEON;
6007 for (size_t k = 1; k <= 40; k += 9) {
6008 GemmMicrokernelTester()
6009 .mr(4)
6010 .nr(16)
6011 .kr(1)
6012 .sr(1)
6013 .m(4)
6014 .n(16)
6015 .k(k)
6016 .ks(3)
6017 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6018 }
6019 }
6020
6021 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, small_kernel_subtile) {
6022 TEST_REQUIRES_ARM_NEON;
6023 for (size_t k = 1; k <= 40; k += 9) {
6024 for (uint32_t m = 1; m <= 4; m++) {
6025 for (uint32_t n = 1; n <= 16; n++) {
6026 GemmMicrokernelTester()
6027 .mr(4)
6028 .nr(16)
6029 .kr(1)
6030 .sr(1)
6031 .m(m)
6032 .n(n)
6033 .k(k)
6034 .ks(3)
6035 .iterations(1)
6036 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6037 }
6038 }
6039 }
6040 }
6041
6042 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_small_kernel) {
6043 TEST_REQUIRES_ARM_NEON;
6044 for (uint32_t n = 17; n < 32; n++) {
6045 for (size_t k = 1; k <= 40; k += 9) {
6046 GemmMicrokernelTester()
6047 .mr(4)
6048 .nr(16)
6049 .kr(1)
6050 .sr(1)
6051 .m(4)
6052 .n(16)
6053 .k(k)
6054 .ks(3)
6055 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6056 }
6057 }
6058 }
6059
6060 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_small_kernel) {
6061 TEST_REQUIRES_ARM_NEON;
6062 for (uint32_t n = 32; n <= 48; n += 16) {
6063 for (size_t k = 1; k <= 40; k += 9) {
6064 GemmMicrokernelTester()
6065 .mr(4)
6066 .nr(16)
6067 .kr(1)
6068 .sr(1)
6069 .m(4)
6070 .n(16)
6071 .k(k)
6072 .ks(3)
6073 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6074 }
6075 }
6076 }
6077
6078 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
6079 TEST_REQUIRES_ARM_NEON;
6080 for (size_t k = 1; k <= 40; k += 9) {
6081 for (uint32_t m = 1; m <= 4; m++) {
6082 for (uint32_t n = 1; n <= 16; n++) {
6083 GemmMicrokernelTester()
6084 .mr(4)
6085 .nr(16)
6086 .kr(1)
6087 .sr(1)
6088 .m(m)
6089 .n(n)
6090 .k(k)
6091 .cm_stride(19)
6092 .iterations(1)
6093 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6094 }
6095 }
6096 }
6097 }
6098
6099 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, a_offset) {
6100 TEST_REQUIRES_ARM_NEON;
6101 for (size_t k = 1; k <= 40; k += 9) {
6102 GemmMicrokernelTester()
6103 .mr(4)
6104 .nr(16)
6105 .kr(1)
6106 .sr(1)
6107 .m(4)
6108 .n(16)
6109 .k(k)
6110 .ks(3)
6111 .a_offset(163)
6112 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6113 }
6114 }
6115
6116 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, zero) {
6117 TEST_REQUIRES_ARM_NEON;
6118 for (uint32_t mz = 0; mz < 4; mz++) {
6119 for (size_t k = 1; k <= 40; k += 9) {
6120 GemmMicrokernelTester()
6121 .mr(4)
6122 .nr(16)
6123 .kr(1)
6124 .sr(1)
6125 .m(4)
6126 .n(16)
6127 .k(k)
6128 .ks(3)
6129 .a_offset(163)
6130 .zero_index(mz)
6131 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6132 }
6133 }
6134 }
6135
6136 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) {
6137 TEST_REQUIRES_ARM_NEON;
6138 GemmMicrokernelTester()
6139 .mr(4)
6140 .nr(16)
6141 .kr(1)
6142 .sr(1)
6143 .m(4)
6144 .n(16)
6145 .k(8)
6146 .qmin(128)
6147 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6148 }
6149
6150 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
6151 TEST_REQUIRES_ARM_NEON;
6152 GemmMicrokernelTester()
6153 .mr(4)
6154 .nr(16)
6155 .kr(1)
6156 .sr(1)
6157 .m(4)
6158 .n(16)
6159 .k(8)
6160 .qmax(128)
6161 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6162 }
6163
6164 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
6165 TEST_REQUIRES_ARM_NEON;
6166 GemmMicrokernelTester()
6167 .mr(4)
6168 .nr(16)
6169 .kr(1)
6170 .sr(1)
6171 .m(4)
6172 .n(16)
6173 .k(8)
6174 .cm_stride(19)
6175 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6176 }
6177
6178 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_a_zero_point) {
6179 TEST_REQUIRES_ARM_NEON;
6180 for (size_t k = 1; k <= 40; k += 9) {
6181 GemmMicrokernelTester()
6182 .mr(4)
6183 .nr(16)
6184 .kr(1)
6185 .sr(1)
6186 .m(4)
6187 .n(16)
6188 .k(k)
6189 .a_zero_point(0)
6190 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6191 }
6192 }
6193
6194 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_b_zero_point) {
6195 TEST_REQUIRES_ARM_NEON;
6196 for (size_t k = 1; k <= 40; k += 9) {
6197 GemmMicrokernelTester()
6198 .mr(4)
6199 .nr(16)
6200 .kr(1)
6201 .sr(1)
6202 .m(4)
6203 .n(16)
6204 .k(k)
6205 .b_zero_point(0)
6206 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6207 }
6208 }
6209
6210 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_zero_point) {
6211 TEST_REQUIRES_ARM_NEON;
6212 for (size_t k = 1; k <= 40; k += 9) {
6213 GemmMicrokernelTester()
6214 .mr(4)
6215 .nr(16)
6216 .kr(1)
6217 .sr(1)
6218 .m(4)
6219 .n(16)
6220 .k(k)
6221 .a_zero_point(0)
6222 .b_zero_point(0)
6223 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6224 }
6225 }
6226#endif // XNN_ARCH_ARM64
6227
6228
Marat Dukhan173661d2021-07-26 23:47:08 -07006229#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6230 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8) {
6231 TEST_REQUIRES_ARM_NEON;
6232 GemmMicrokernelTester()
6233 .mr(1)
6234 .nr(8)
6235 .kr(1)
6236 .sr(1)
6237 .m(1)
6238 .n(8)
6239 .k(8)
6240 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6241 }
6242
6243 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cn) {
6244 TEST_REQUIRES_ARM_NEON;
6245 GemmMicrokernelTester()
6246 .mr(1)
6247 .nr(8)
6248 .kr(1)
6249 .sr(1)
6250 .m(1)
6251 .n(8)
6252 .k(8)
6253 .cn_stride(11)
6254 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6255 }
6256
6257 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
6258 TEST_REQUIRES_ARM_NEON;
6259 for (uint32_t m = 1; m <= 1; m++) {
6260 for (uint32_t n = 1; n <= 8; n++) {
6261 GemmMicrokernelTester()
6262 .mr(1)
6263 .nr(8)
6264 .kr(1)
6265 .sr(1)
6266 .m(m)
6267 .n(n)
6268 .k(8)
6269 .iterations(1)
6270 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6271 }
6272 }
6273 }
6274
6275 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
6276 TEST_REQUIRES_ARM_NEON;
6277 for (uint32_t m = 1; m <= 1; m++) {
6278 GemmMicrokernelTester()
6279 .mr(1)
6280 .nr(8)
6281 .kr(1)
6282 .sr(1)
6283 .m(m)
6284 .n(8)
6285 .k(8)
6286 .iterations(1)
6287 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6288 }
6289 }
6290
6291 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
6292 TEST_REQUIRES_ARM_NEON;
6293 for (uint32_t n = 1; n <= 8; n++) {
6294 GemmMicrokernelTester()
6295 .mr(1)
6296 .nr(8)
6297 .kr(1)
6298 .sr(1)
6299 .m(1)
6300 .n(n)
6301 .k(8)
6302 .iterations(1)
6303 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6304 }
6305 }
6306
6307 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8) {
6308 TEST_REQUIRES_ARM_NEON;
6309 for (size_t k = 1; k < 8; k++) {
6310 GemmMicrokernelTester()
6311 .mr(1)
6312 .nr(8)
6313 .kr(1)
6314 .sr(1)
6315 .m(1)
6316 .n(8)
6317 .k(k)
6318 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6319 }
6320 }
6321
6322 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
6323 TEST_REQUIRES_ARM_NEON;
6324 for (size_t k = 1; k < 8; k++) {
6325 for (uint32_t m = 1; m <= 1; m++) {
6326 for (uint32_t n = 1; n <= 8; n++) {
6327 GemmMicrokernelTester()
6328 .mr(1)
6329 .nr(8)
6330 .kr(1)
6331 .sr(1)
6332 .m(m)
6333 .n(n)
6334 .k(k)
6335 .iterations(1)
6336 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6337 }
6338 }
6339 }
6340 }
6341
6342 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8) {
6343 TEST_REQUIRES_ARM_NEON;
6344 for (size_t k = 9; k < 16; k++) {
6345 GemmMicrokernelTester()
6346 .mr(1)
6347 .nr(8)
6348 .kr(1)
6349 .sr(1)
6350 .m(1)
6351 .n(8)
6352 .k(k)
6353 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6354 }
6355 }
6356
6357 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
6358 TEST_REQUIRES_ARM_NEON;
6359 for (size_t k = 9; k < 16; k++) {
6360 for (uint32_t m = 1; m <= 1; m++) {
6361 for (uint32_t n = 1; n <= 8; n++) {
6362 GemmMicrokernelTester()
6363 .mr(1)
6364 .nr(8)
6365 .kr(1)
6366 .sr(1)
6367 .m(m)
6368 .n(n)
6369 .k(k)
6370 .iterations(1)
6371 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6372 }
6373 }
6374 }
6375 }
6376
6377 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8) {
6378 TEST_REQUIRES_ARM_NEON;
6379 for (size_t k = 16; k <= 80; k += 8) {
6380 GemmMicrokernelTester()
6381 .mr(1)
6382 .nr(8)
6383 .kr(1)
6384 .sr(1)
6385 .m(1)
6386 .n(8)
6387 .k(k)
6388 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6389 }
6390 }
6391
6392 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
6393 TEST_REQUIRES_ARM_NEON;
6394 for (size_t k = 16; k <= 80; k += 8) {
6395 for (uint32_t m = 1; m <= 1; m++) {
6396 for (uint32_t n = 1; n <= 8; n++) {
6397 GemmMicrokernelTester()
6398 .mr(1)
6399 .nr(8)
6400 .kr(1)
6401 .sr(1)
6402 .m(m)
6403 .n(n)
6404 .k(k)
6405 .iterations(1)
6406 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6407 }
6408 }
6409 }
6410 }
6411
6412 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8) {
6413 TEST_REQUIRES_ARM_NEON;
6414 for (uint32_t n = 9; n < 16; n++) {
6415 for (size_t k = 1; k <= 40; k += 9) {
6416 GemmMicrokernelTester()
6417 .mr(1)
6418 .nr(8)
6419 .kr(1)
6420 .sr(1)
6421 .m(1)
6422 .n(8)
6423 .k(k)
6424 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6425 }
6426 }
6427 }
6428
6429 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
6430 TEST_REQUIRES_ARM_NEON;
6431 for (uint32_t n = 9; n < 16; n++) {
6432 for (size_t k = 1; k <= 40; k += 9) {
6433 GemmMicrokernelTester()
6434 .mr(1)
6435 .nr(8)
6436 .kr(1)
6437 .sr(1)
6438 .m(1)
6439 .n(8)
6440 .k(k)
6441 .cn_stride(11)
6442 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6443 }
6444 }
6445 }
6446
6447 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
6448 TEST_REQUIRES_ARM_NEON;
6449 for (uint32_t n = 9; n < 16; n++) {
6450 for (size_t k = 1; k <= 40; k += 9) {
6451 for (uint32_t m = 1; m <= 1; m++) {
6452 GemmMicrokernelTester()
6453 .mr(1)
6454 .nr(8)
6455 .kr(1)
6456 .sr(1)
6457 .m(m)
6458 .n(n)
6459 .k(k)
6460 .iterations(1)
6461 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6462 }
6463 }
6464 }
6465 }
6466
6467 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8) {
6468 TEST_REQUIRES_ARM_NEON;
6469 for (uint32_t n = 16; n <= 24; n += 8) {
6470 for (size_t k = 1; k <= 40; k += 9) {
6471 GemmMicrokernelTester()
6472 .mr(1)
6473 .nr(8)
6474 .kr(1)
6475 .sr(1)
6476 .m(1)
6477 .n(8)
6478 .k(k)
6479 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6480 }
6481 }
6482 }
6483
6484 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
6485 TEST_REQUIRES_ARM_NEON;
6486 for (uint32_t n = 16; n <= 24; n += 8) {
6487 for (size_t k = 1; k <= 40; k += 9) {
6488 GemmMicrokernelTester()
6489 .mr(1)
6490 .nr(8)
6491 .kr(1)
6492 .sr(1)
6493 .m(1)
6494 .n(n)
6495 .k(k)
6496 .cn_stride(11)
6497 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6498 }
6499 }
6500 }
6501
6502 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
6503 TEST_REQUIRES_ARM_NEON;
6504 for (uint32_t n = 16; n <= 24; n += 8) {
6505 for (size_t k = 1; k <= 40; k += 9) {
6506 for (uint32_t m = 1; m <= 1; m++) {
6507 GemmMicrokernelTester()
6508 .mr(1)
6509 .nr(8)
6510 .kr(1)
6511 .sr(1)
6512 .m(m)
6513 .n(n)
6514 .k(k)
6515 .iterations(1)
6516 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6517 }
6518 }
6519 }
6520 }
6521
6522 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, small_kernel) {
6523 TEST_REQUIRES_ARM_NEON;
6524 for (size_t k = 1; k <= 40; k += 9) {
6525 GemmMicrokernelTester()
6526 .mr(1)
6527 .nr(8)
6528 .kr(1)
6529 .sr(1)
6530 .m(1)
6531 .n(8)
6532 .k(k)
6533 .ks(3)
6534 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6535 }
6536 }
6537
6538 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, small_kernel_subtile) {
6539 TEST_REQUIRES_ARM_NEON;
6540 for (size_t k = 1; k <= 40; k += 9) {
6541 for (uint32_t m = 1; m <= 1; m++) {
6542 for (uint32_t n = 1; n <= 8; n++) {
6543 GemmMicrokernelTester()
6544 .mr(1)
6545 .nr(8)
6546 .kr(1)
6547 .sr(1)
6548 .m(m)
6549 .n(n)
6550 .k(k)
6551 .ks(3)
6552 .iterations(1)
6553 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6554 }
6555 }
6556 }
6557 }
6558
6559 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
6560 TEST_REQUIRES_ARM_NEON;
6561 for (uint32_t n = 9; n < 16; n++) {
6562 for (size_t k = 1; k <= 40; k += 9) {
6563 GemmMicrokernelTester()
6564 .mr(1)
6565 .nr(8)
6566 .kr(1)
6567 .sr(1)
6568 .m(1)
6569 .n(8)
6570 .k(k)
6571 .ks(3)
6572 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6573 }
6574 }
6575 }
6576
6577 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
6578 TEST_REQUIRES_ARM_NEON;
6579 for (uint32_t n = 16; n <= 24; n += 8) {
6580 for (size_t k = 1; k <= 40; k += 9) {
6581 GemmMicrokernelTester()
6582 .mr(1)
6583 .nr(8)
6584 .kr(1)
6585 .sr(1)
6586 .m(1)
6587 .n(8)
6588 .k(k)
6589 .ks(3)
6590 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6591 }
6592 }
6593 }
6594
6595 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
6596 TEST_REQUIRES_ARM_NEON;
6597 for (size_t k = 1; k <= 40; k += 9) {
6598 for (uint32_t m = 1; m <= 1; m++) {
6599 for (uint32_t n = 1; n <= 8; n++) {
6600 GemmMicrokernelTester()
6601 .mr(1)
6602 .nr(8)
6603 .kr(1)
6604 .sr(1)
6605 .m(m)
6606 .n(n)
6607 .k(k)
6608 .cm_stride(11)
6609 .iterations(1)
6610 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6611 }
6612 }
6613 }
6614 }
6615
6616 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, a_offset) {
6617 TEST_REQUIRES_ARM_NEON;
6618 for (size_t k = 1; k <= 40; k += 9) {
6619 GemmMicrokernelTester()
6620 .mr(1)
6621 .nr(8)
6622 .kr(1)
6623 .sr(1)
6624 .m(1)
6625 .n(8)
6626 .k(k)
6627 .ks(3)
6628 .a_offset(43)
6629 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6630 }
6631 }
6632
6633 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, zero) {
6634 TEST_REQUIRES_ARM_NEON;
6635 for (uint32_t mz = 0; mz < 1; mz++) {
6636 for (size_t k = 1; k <= 40; k += 9) {
6637 GemmMicrokernelTester()
6638 .mr(1)
6639 .nr(8)
6640 .kr(1)
6641 .sr(1)
6642 .m(1)
6643 .n(8)
6644 .k(k)
6645 .ks(3)
6646 .a_offset(43)
6647 .zero_index(mz)
6648 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6649 }
6650 }
6651 }
6652
6653 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmin) {
6654 TEST_REQUIRES_ARM_NEON;
6655 GemmMicrokernelTester()
6656 .mr(1)
6657 .nr(8)
6658 .kr(1)
6659 .sr(1)
6660 .m(1)
6661 .n(8)
6662 .k(8)
6663 .qmin(128)
6664 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6665 }
6666
6667 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmax) {
6668 TEST_REQUIRES_ARM_NEON;
6669 GemmMicrokernelTester()
6670 .mr(1)
6671 .nr(8)
6672 .kr(1)
6673 .sr(1)
6674 .m(1)
6675 .n(8)
6676 .k(8)
6677 .qmax(128)
6678 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6679 }
6680
6681 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm) {
6682 TEST_REQUIRES_ARM_NEON;
6683 GemmMicrokernelTester()
6684 .mr(1)
6685 .nr(8)
6686 .kr(1)
6687 .sr(1)
6688 .m(1)
6689 .n(8)
6690 .k(8)
6691 .cm_stride(11)
6692 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6693 }
6694
6695 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_a_zero_point) {
6696 TEST_REQUIRES_ARM_NEON;
6697 for (size_t k = 1; k <= 40; k += 9) {
6698 GemmMicrokernelTester()
6699 .mr(1)
6700 .nr(8)
6701 .kr(1)
6702 .sr(1)
6703 .m(1)
6704 .n(8)
6705 .k(k)
6706 .a_zero_point(0)
6707 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6708 }
6709 }
6710
6711 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_b_zero_point) {
6712 TEST_REQUIRES_ARM_NEON;
6713 for (size_t k = 1; k <= 40; k += 9) {
6714 GemmMicrokernelTester()
6715 .mr(1)
6716 .nr(8)
6717 .kr(1)
6718 .sr(1)
6719 .m(1)
6720 .n(8)
6721 .k(k)
6722 .b_zero_point(0)
6723 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6724 }
6725 }
6726
6727 TEST(QU8_IGEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_zero_point) {
6728 TEST_REQUIRES_ARM_NEON;
6729 for (size_t k = 1; k <= 40; k += 9) {
6730 GemmMicrokernelTester()
6731 .mr(1)
6732 .nr(8)
6733 .kr(1)
6734 .sr(1)
6735 .m(1)
6736 .n(8)
6737 .k(k)
6738 .a_zero_point(0)
6739 .b_zero_point(0)
6740 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6741 }
6742 }
6743#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6744
6745
6746#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6747 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8) {
6748 TEST_REQUIRES_ARM_NEON;
6749 GemmMicrokernelTester()
6750 .mr(4)
6751 .nr(8)
6752 .kr(1)
6753 .sr(1)
6754 .m(4)
6755 .n(8)
6756 .k(8)
6757 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6758 }
6759
6760 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cn) {
6761 TEST_REQUIRES_ARM_NEON;
6762 GemmMicrokernelTester()
6763 .mr(4)
6764 .nr(8)
6765 .kr(1)
6766 .sr(1)
6767 .m(4)
6768 .n(8)
6769 .k(8)
6770 .cn_stride(11)
6771 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6772 }
6773
6774 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
6775 TEST_REQUIRES_ARM_NEON;
6776 for (uint32_t m = 1; m <= 4; m++) {
6777 for (uint32_t n = 1; n <= 8; n++) {
6778 GemmMicrokernelTester()
6779 .mr(4)
6780 .nr(8)
6781 .kr(1)
6782 .sr(1)
6783 .m(m)
6784 .n(n)
6785 .k(8)
6786 .iterations(1)
6787 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6788 }
6789 }
6790 }
6791
6792 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
6793 TEST_REQUIRES_ARM_NEON;
6794 for (uint32_t m = 1; m <= 4; m++) {
6795 GemmMicrokernelTester()
6796 .mr(4)
6797 .nr(8)
6798 .kr(1)
6799 .sr(1)
6800 .m(m)
6801 .n(8)
6802 .k(8)
6803 .iterations(1)
6804 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6805 }
6806 }
6807
6808 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
6809 TEST_REQUIRES_ARM_NEON;
6810 for (uint32_t n = 1; n <= 8; n++) {
6811 GemmMicrokernelTester()
6812 .mr(4)
6813 .nr(8)
6814 .kr(1)
6815 .sr(1)
6816 .m(4)
6817 .n(n)
6818 .k(8)
6819 .iterations(1)
6820 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6821 }
6822 }
6823
6824 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8) {
6825 TEST_REQUIRES_ARM_NEON;
6826 for (size_t k = 1; k < 8; k++) {
6827 GemmMicrokernelTester()
6828 .mr(4)
6829 .nr(8)
6830 .kr(1)
6831 .sr(1)
6832 .m(4)
6833 .n(8)
6834 .k(k)
6835 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6836 }
6837 }
6838
6839 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
6840 TEST_REQUIRES_ARM_NEON;
6841 for (size_t k = 1; k < 8; k++) {
6842 for (uint32_t m = 1; m <= 4; m++) {
6843 for (uint32_t n = 1; n <= 8; n++) {
6844 GemmMicrokernelTester()
6845 .mr(4)
6846 .nr(8)
6847 .kr(1)
6848 .sr(1)
6849 .m(m)
6850 .n(n)
6851 .k(k)
6852 .iterations(1)
6853 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6854 }
6855 }
6856 }
6857 }
6858
6859 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8) {
6860 TEST_REQUIRES_ARM_NEON;
6861 for (size_t k = 9; k < 16; k++) {
6862 GemmMicrokernelTester()
6863 .mr(4)
6864 .nr(8)
6865 .kr(1)
6866 .sr(1)
6867 .m(4)
6868 .n(8)
6869 .k(k)
6870 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6871 }
6872 }
6873
6874 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
6875 TEST_REQUIRES_ARM_NEON;
6876 for (size_t k = 9; k < 16; k++) {
6877 for (uint32_t m = 1; m <= 4; m++) {
6878 for (uint32_t n = 1; n <= 8; n++) {
6879 GemmMicrokernelTester()
6880 .mr(4)
6881 .nr(8)
6882 .kr(1)
6883 .sr(1)
6884 .m(m)
6885 .n(n)
6886 .k(k)
6887 .iterations(1)
6888 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6889 }
6890 }
6891 }
6892 }
6893
6894 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8) {
6895 TEST_REQUIRES_ARM_NEON;
6896 for (size_t k = 16; k <= 80; k += 8) {
6897 GemmMicrokernelTester()
6898 .mr(4)
6899 .nr(8)
6900 .kr(1)
6901 .sr(1)
6902 .m(4)
6903 .n(8)
6904 .k(k)
6905 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6906 }
6907 }
6908
6909 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
6910 TEST_REQUIRES_ARM_NEON;
6911 for (size_t k = 16; k <= 80; k += 8) {
6912 for (uint32_t m = 1; m <= 4; m++) {
6913 for (uint32_t n = 1; n <= 8; n++) {
6914 GemmMicrokernelTester()
6915 .mr(4)
6916 .nr(8)
6917 .kr(1)
6918 .sr(1)
6919 .m(m)
6920 .n(n)
6921 .k(k)
6922 .iterations(1)
6923 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6924 }
6925 }
6926 }
6927 }
6928
6929 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8) {
6930 TEST_REQUIRES_ARM_NEON;
6931 for (uint32_t n = 9; n < 16; n++) {
6932 for (size_t k = 1; k <= 40; k += 9) {
6933 GemmMicrokernelTester()
6934 .mr(4)
6935 .nr(8)
6936 .kr(1)
6937 .sr(1)
6938 .m(4)
6939 .n(8)
6940 .k(k)
6941 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6942 }
6943 }
6944 }
6945
6946 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
6947 TEST_REQUIRES_ARM_NEON;
6948 for (uint32_t n = 9; n < 16; n++) {
6949 for (size_t k = 1; k <= 40; k += 9) {
6950 GemmMicrokernelTester()
6951 .mr(4)
6952 .nr(8)
6953 .kr(1)
6954 .sr(1)
6955 .m(4)
6956 .n(8)
6957 .k(k)
6958 .cn_stride(11)
6959 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6960 }
6961 }
6962 }
6963
6964 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
6965 TEST_REQUIRES_ARM_NEON;
6966 for (uint32_t n = 9; n < 16; n++) {
6967 for (size_t k = 1; k <= 40; k += 9) {
6968 for (uint32_t m = 1; m <= 4; m++) {
6969 GemmMicrokernelTester()
6970 .mr(4)
6971 .nr(8)
6972 .kr(1)
6973 .sr(1)
6974 .m(m)
6975 .n(n)
6976 .k(k)
6977 .iterations(1)
6978 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6979 }
6980 }
6981 }
6982 }
6983
6984 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8) {
6985 TEST_REQUIRES_ARM_NEON;
6986 for (uint32_t n = 16; n <= 24; n += 8) {
6987 for (size_t k = 1; k <= 40; k += 9) {
6988 GemmMicrokernelTester()
6989 .mr(4)
6990 .nr(8)
6991 .kr(1)
6992 .sr(1)
6993 .m(4)
6994 .n(8)
6995 .k(k)
6996 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
6997 }
6998 }
6999 }
7000
7001 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
7002 TEST_REQUIRES_ARM_NEON;
7003 for (uint32_t n = 16; n <= 24; n += 8) {
7004 for (size_t k = 1; k <= 40; k += 9) {
7005 GemmMicrokernelTester()
7006 .mr(4)
7007 .nr(8)
7008 .kr(1)
7009 .sr(1)
7010 .m(4)
7011 .n(n)
7012 .k(k)
7013 .cn_stride(11)
7014 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7015 }
7016 }
7017 }
7018
7019 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
7020 TEST_REQUIRES_ARM_NEON;
7021 for (uint32_t n = 16; n <= 24; n += 8) {
7022 for (size_t k = 1; k <= 40; k += 9) {
7023 for (uint32_t m = 1; m <= 4; m++) {
7024 GemmMicrokernelTester()
7025 .mr(4)
7026 .nr(8)
7027 .kr(1)
7028 .sr(1)
7029 .m(m)
7030 .n(n)
7031 .k(k)
7032 .iterations(1)
7033 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7034 }
7035 }
7036 }
7037 }
7038
7039 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, small_kernel) {
7040 TEST_REQUIRES_ARM_NEON;
7041 for (size_t k = 1; k <= 40; k += 9) {
7042 GemmMicrokernelTester()
7043 .mr(4)
7044 .nr(8)
7045 .kr(1)
7046 .sr(1)
7047 .m(4)
7048 .n(8)
7049 .k(k)
7050 .ks(3)
7051 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7052 }
7053 }
7054
7055 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, small_kernel_subtile) {
7056 TEST_REQUIRES_ARM_NEON;
7057 for (size_t k = 1; k <= 40; k += 9) {
7058 for (uint32_t m = 1; m <= 4; m++) {
7059 for (uint32_t n = 1; n <= 8; n++) {
7060 GemmMicrokernelTester()
7061 .mr(4)
7062 .nr(8)
7063 .kr(1)
7064 .sr(1)
7065 .m(m)
7066 .n(n)
7067 .k(k)
7068 .ks(3)
7069 .iterations(1)
7070 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7071 }
7072 }
7073 }
7074 }
7075
7076 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
7077 TEST_REQUIRES_ARM_NEON;
7078 for (uint32_t n = 9; n < 16; n++) {
7079 for (size_t k = 1; k <= 40; k += 9) {
7080 GemmMicrokernelTester()
7081 .mr(4)
7082 .nr(8)
7083 .kr(1)
7084 .sr(1)
7085 .m(4)
7086 .n(8)
7087 .k(k)
7088 .ks(3)
7089 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7090 }
7091 }
7092 }
7093
7094 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
7095 TEST_REQUIRES_ARM_NEON;
7096 for (uint32_t n = 16; n <= 24; n += 8) {
7097 for (size_t k = 1; k <= 40; k += 9) {
7098 GemmMicrokernelTester()
7099 .mr(4)
7100 .nr(8)
7101 .kr(1)
7102 .sr(1)
7103 .m(4)
7104 .n(8)
7105 .k(k)
7106 .ks(3)
7107 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7108 }
7109 }
7110 }
7111
7112 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
7113 TEST_REQUIRES_ARM_NEON;
7114 for (size_t k = 1; k <= 40; k += 9) {
7115 for (uint32_t m = 1; m <= 4; m++) {
7116 for (uint32_t n = 1; n <= 8; n++) {
7117 GemmMicrokernelTester()
7118 .mr(4)
7119 .nr(8)
7120 .kr(1)
7121 .sr(1)
7122 .m(m)
7123 .n(n)
7124 .k(k)
7125 .cm_stride(11)
7126 .iterations(1)
7127 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7128 }
7129 }
7130 }
7131 }
7132
7133 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, a_offset) {
7134 TEST_REQUIRES_ARM_NEON;
7135 for (size_t k = 1; k <= 40; k += 9) {
7136 GemmMicrokernelTester()
7137 .mr(4)
7138 .nr(8)
7139 .kr(1)
7140 .sr(1)
7141 .m(4)
7142 .n(8)
7143 .k(k)
7144 .ks(3)
7145 .a_offset(163)
7146 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7147 }
7148 }
7149
7150 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, zero) {
7151 TEST_REQUIRES_ARM_NEON;
7152 for (uint32_t mz = 0; mz < 4; mz++) {
7153 for (size_t k = 1; k <= 40; k += 9) {
7154 GemmMicrokernelTester()
7155 .mr(4)
7156 .nr(8)
7157 .kr(1)
7158 .sr(1)
7159 .m(4)
7160 .n(8)
7161 .k(k)
7162 .ks(3)
7163 .a_offset(163)
7164 .zero_index(mz)
7165 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7166 }
7167 }
7168 }
7169
7170 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmin) {
7171 TEST_REQUIRES_ARM_NEON;
7172 GemmMicrokernelTester()
7173 .mr(4)
7174 .nr(8)
7175 .kr(1)
7176 .sr(1)
7177 .m(4)
7178 .n(8)
7179 .k(8)
7180 .qmin(128)
7181 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7182 }
7183
7184 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmax) {
7185 TEST_REQUIRES_ARM_NEON;
7186 GemmMicrokernelTester()
7187 .mr(4)
7188 .nr(8)
7189 .kr(1)
7190 .sr(1)
7191 .m(4)
7192 .n(8)
7193 .k(8)
7194 .qmax(128)
7195 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7196 }
7197
7198 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm) {
7199 TEST_REQUIRES_ARM_NEON;
7200 GemmMicrokernelTester()
7201 .mr(4)
7202 .nr(8)
7203 .kr(1)
7204 .sr(1)
7205 .m(4)
7206 .n(8)
7207 .k(8)
7208 .cm_stride(11)
7209 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7210 }
7211
7212 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_a_zero_point) {
7213 TEST_REQUIRES_ARM_NEON;
7214 for (size_t k = 1; k <= 40; k += 9) {
7215 GemmMicrokernelTester()
7216 .mr(4)
7217 .nr(8)
7218 .kr(1)
7219 .sr(1)
7220 .m(4)
7221 .n(8)
7222 .k(k)
7223 .a_zero_point(0)
7224 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7225 }
7226 }
7227
7228 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_b_zero_point) {
7229 TEST_REQUIRES_ARM_NEON;
7230 for (size_t k = 1; k <= 40; k += 9) {
7231 GemmMicrokernelTester()
7232 .mr(4)
7233 .nr(8)
7234 .kr(1)
7235 .sr(1)
7236 .m(4)
7237 .n(8)
7238 .k(k)
7239 .b_zero_point(0)
7240 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7241 }
7242 }
7243
7244 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_zero_point) {
7245 TEST_REQUIRES_ARM_NEON;
7246 for (size_t k = 1; k <= 40; k += 9) {
7247 GemmMicrokernelTester()
7248 .mr(4)
7249 .nr(8)
7250 .kr(1)
7251 .sr(1)
7252 .m(4)
7253 .n(8)
7254 .k(k)
7255 .a_zero_point(0)
7256 .b_zero_point(0)
7257 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7258 }
7259 }
7260#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7261
7262
7263#if XNN_ARCH_ARM || XNN_ARCH_ARM64
7264 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8) {
7265 TEST_REQUIRES_ARM_NEON;
7266 GemmMicrokernelTester()
7267 .mr(1)
7268 .nr(16)
7269 .kr(1)
7270 .sr(1)
7271 .m(1)
7272 .n(16)
7273 .k(8)
7274 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7275 }
7276
7277 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cn) {
7278 TEST_REQUIRES_ARM_NEON;
7279 GemmMicrokernelTester()
7280 .mr(1)
7281 .nr(16)
7282 .kr(1)
7283 .sr(1)
7284 .m(1)
7285 .n(16)
7286 .k(8)
7287 .cn_stride(19)
7288 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7289 }
7290
7291 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
7292 TEST_REQUIRES_ARM_NEON;
7293 for (uint32_t m = 1; m <= 1; m++) {
7294 for (uint32_t n = 1; n <= 16; n++) {
7295 GemmMicrokernelTester()
7296 .mr(1)
7297 .nr(16)
7298 .kr(1)
7299 .sr(1)
7300 .m(m)
7301 .n(n)
7302 .k(8)
7303 .iterations(1)
7304 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7305 }
7306 }
7307 }
7308
7309 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
7310 TEST_REQUIRES_ARM_NEON;
7311 for (uint32_t m = 1; m <= 1; m++) {
7312 GemmMicrokernelTester()
7313 .mr(1)
7314 .nr(16)
7315 .kr(1)
7316 .sr(1)
7317 .m(m)
7318 .n(16)
7319 .k(8)
7320 .iterations(1)
7321 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7322 }
7323 }
7324
7325 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
7326 TEST_REQUIRES_ARM_NEON;
7327 for (uint32_t n = 1; n <= 16; n++) {
7328 GemmMicrokernelTester()
7329 .mr(1)
7330 .nr(16)
7331 .kr(1)
7332 .sr(1)
7333 .m(1)
7334 .n(n)
7335 .k(8)
7336 .iterations(1)
7337 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7338 }
7339 }
7340
7341 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8) {
7342 TEST_REQUIRES_ARM_NEON;
7343 for (size_t k = 1; k < 8; k++) {
7344 GemmMicrokernelTester()
7345 .mr(1)
7346 .nr(16)
7347 .kr(1)
7348 .sr(1)
7349 .m(1)
7350 .n(16)
7351 .k(k)
7352 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7353 }
7354 }
7355
7356 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
7357 TEST_REQUIRES_ARM_NEON;
7358 for (size_t k = 1; k < 8; k++) {
7359 for (uint32_t m = 1; m <= 1; m++) {
7360 for (uint32_t n = 1; n <= 16; n++) {
7361 GemmMicrokernelTester()
7362 .mr(1)
7363 .nr(16)
7364 .kr(1)
7365 .sr(1)
7366 .m(m)
7367 .n(n)
7368 .k(k)
7369 .iterations(1)
7370 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7371 }
7372 }
7373 }
7374 }
7375
7376 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8) {
7377 TEST_REQUIRES_ARM_NEON;
7378 for (size_t k = 9; k < 16; k++) {
7379 GemmMicrokernelTester()
7380 .mr(1)
7381 .nr(16)
7382 .kr(1)
7383 .sr(1)
7384 .m(1)
7385 .n(16)
7386 .k(k)
7387 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7388 }
7389 }
7390
7391 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
7392 TEST_REQUIRES_ARM_NEON;
7393 for (size_t k = 9; k < 16; k++) {
7394 for (uint32_t m = 1; m <= 1; m++) {
7395 for (uint32_t n = 1; n <= 16; n++) {
7396 GemmMicrokernelTester()
7397 .mr(1)
7398 .nr(16)
7399 .kr(1)
7400 .sr(1)
7401 .m(m)
7402 .n(n)
7403 .k(k)
7404 .iterations(1)
7405 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7406 }
7407 }
7408 }
7409 }
7410
7411 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8) {
7412 TEST_REQUIRES_ARM_NEON;
7413 for (size_t k = 16; k <= 80; k += 8) {
7414 GemmMicrokernelTester()
7415 .mr(1)
7416 .nr(16)
7417 .kr(1)
7418 .sr(1)
7419 .m(1)
7420 .n(16)
7421 .k(k)
7422 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7423 }
7424 }
7425
7426 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
7427 TEST_REQUIRES_ARM_NEON;
7428 for (size_t k = 16; k <= 80; k += 8) {
7429 for (uint32_t m = 1; m <= 1; m++) {
7430 for (uint32_t n = 1; n <= 16; n++) {
7431 GemmMicrokernelTester()
7432 .mr(1)
7433 .nr(16)
7434 .kr(1)
7435 .sr(1)
7436 .m(m)
7437 .n(n)
7438 .k(k)
7439 .iterations(1)
7440 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7441 }
7442 }
7443 }
7444 }
7445
7446 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16) {
7447 TEST_REQUIRES_ARM_NEON;
7448 for (uint32_t n = 17; n < 32; n++) {
7449 for (size_t k = 1; k <= 40; k += 9) {
7450 GemmMicrokernelTester()
7451 .mr(1)
7452 .nr(16)
7453 .kr(1)
7454 .sr(1)
7455 .m(1)
7456 .n(16)
7457 .k(k)
7458 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7459 }
7460 }
7461 }
7462
7463 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
7464 TEST_REQUIRES_ARM_NEON;
7465 for (uint32_t n = 17; n < 32; n++) {
7466 for (size_t k = 1; k <= 40; k += 9) {
7467 GemmMicrokernelTester()
7468 .mr(1)
7469 .nr(16)
7470 .kr(1)
7471 .sr(1)
7472 .m(1)
7473 .n(16)
7474 .k(k)
7475 .cn_stride(19)
7476 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7477 }
7478 }
7479 }
7480
7481 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
7482 TEST_REQUIRES_ARM_NEON;
7483 for (uint32_t n = 17; n < 32; n++) {
7484 for (size_t k = 1; k <= 40; k += 9) {
7485 for (uint32_t m = 1; m <= 1; m++) {
7486 GemmMicrokernelTester()
7487 .mr(1)
7488 .nr(16)
7489 .kr(1)
7490 .sr(1)
7491 .m(m)
7492 .n(n)
7493 .k(k)
7494 .iterations(1)
7495 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7496 }
7497 }
7498 }
7499 }
7500
7501 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16) {
7502 TEST_REQUIRES_ARM_NEON;
7503 for (uint32_t n = 32; n <= 48; n += 16) {
7504 for (size_t k = 1; k <= 40; k += 9) {
7505 GemmMicrokernelTester()
7506 .mr(1)
7507 .nr(16)
7508 .kr(1)
7509 .sr(1)
7510 .m(1)
7511 .n(16)
7512 .k(k)
7513 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7514 }
7515 }
7516 }
7517
7518 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
7519 TEST_REQUIRES_ARM_NEON;
7520 for (uint32_t n = 32; n <= 48; n += 16) {
7521 for (size_t k = 1; k <= 40; k += 9) {
7522 GemmMicrokernelTester()
7523 .mr(1)
7524 .nr(16)
7525 .kr(1)
7526 .sr(1)
7527 .m(1)
7528 .n(n)
7529 .k(k)
7530 .cn_stride(19)
7531 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7532 }
7533 }
7534 }
7535
7536 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
7537 TEST_REQUIRES_ARM_NEON;
7538 for (uint32_t n = 32; n <= 48; n += 16) {
7539 for (size_t k = 1; k <= 40; k += 9) {
7540 for (uint32_t m = 1; m <= 1; m++) {
7541 GemmMicrokernelTester()
7542 .mr(1)
7543 .nr(16)
7544 .kr(1)
7545 .sr(1)
7546 .m(m)
7547 .n(n)
7548 .k(k)
7549 .iterations(1)
7550 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7551 }
7552 }
7553 }
7554 }
7555
7556 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, small_kernel) {
7557 TEST_REQUIRES_ARM_NEON;
7558 for (size_t k = 1; k <= 40; k += 9) {
7559 GemmMicrokernelTester()
7560 .mr(1)
7561 .nr(16)
7562 .kr(1)
7563 .sr(1)
7564 .m(1)
7565 .n(16)
7566 .k(k)
7567 .ks(3)
7568 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7569 }
7570 }
7571
7572 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, small_kernel_subtile) {
7573 TEST_REQUIRES_ARM_NEON;
7574 for (size_t k = 1; k <= 40; k += 9) {
7575 for (uint32_t m = 1; m <= 1; m++) {
7576 for (uint32_t n = 1; n <= 16; n++) {
7577 GemmMicrokernelTester()
7578 .mr(1)
7579 .nr(16)
7580 .kr(1)
7581 .sr(1)
7582 .m(m)
7583 .n(n)
7584 .k(k)
7585 .ks(3)
7586 .iterations(1)
7587 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7588 }
7589 }
7590 }
7591 }
7592
7593 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
7594 TEST_REQUIRES_ARM_NEON;
7595 for (uint32_t n = 17; n < 32; n++) {
7596 for (size_t k = 1; k <= 40; k += 9) {
7597 GemmMicrokernelTester()
7598 .mr(1)
7599 .nr(16)
7600 .kr(1)
7601 .sr(1)
7602 .m(1)
7603 .n(16)
7604 .k(k)
7605 .ks(3)
7606 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7607 }
7608 }
7609 }
7610
7611 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
7612 TEST_REQUIRES_ARM_NEON;
7613 for (uint32_t n = 32; n <= 48; n += 16) {
7614 for (size_t k = 1; k <= 40; k += 9) {
7615 GemmMicrokernelTester()
7616 .mr(1)
7617 .nr(16)
7618 .kr(1)
7619 .sr(1)
7620 .m(1)
7621 .n(16)
7622 .k(k)
7623 .ks(3)
7624 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7625 }
7626 }
7627 }
7628
7629 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
7630 TEST_REQUIRES_ARM_NEON;
7631 for (size_t k = 1; k <= 40; k += 9) {
7632 for (uint32_t m = 1; m <= 1; m++) {
7633 for (uint32_t n = 1; n <= 16; n++) {
7634 GemmMicrokernelTester()
7635 .mr(1)
7636 .nr(16)
7637 .kr(1)
7638 .sr(1)
7639 .m(m)
7640 .n(n)
7641 .k(k)
7642 .cm_stride(19)
7643 .iterations(1)
7644 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7645 }
7646 }
7647 }
7648 }
7649
7650 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, a_offset) {
7651 TEST_REQUIRES_ARM_NEON;
7652 for (size_t k = 1; k <= 40; k += 9) {
7653 GemmMicrokernelTester()
7654 .mr(1)
7655 .nr(16)
7656 .kr(1)
7657 .sr(1)
7658 .m(1)
7659 .n(16)
7660 .k(k)
7661 .ks(3)
7662 .a_offset(43)
7663 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7664 }
7665 }
7666
7667 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, zero) {
7668 TEST_REQUIRES_ARM_NEON;
7669 for (uint32_t mz = 0; mz < 1; mz++) {
7670 for (size_t k = 1; k <= 40; k += 9) {
7671 GemmMicrokernelTester()
7672 .mr(1)
7673 .nr(16)
7674 .kr(1)
7675 .sr(1)
7676 .m(1)
7677 .n(16)
7678 .k(k)
7679 .ks(3)
7680 .a_offset(43)
7681 .zero_index(mz)
7682 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7683 }
7684 }
7685 }
7686
7687 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmin) {
7688 TEST_REQUIRES_ARM_NEON;
7689 GemmMicrokernelTester()
7690 .mr(1)
7691 .nr(16)
7692 .kr(1)
7693 .sr(1)
7694 .m(1)
7695 .n(16)
7696 .k(8)
7697 .qmin(128)
7698 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7699 }
7700
7701 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmax) {
7702 TEST_REQUIRES_ARM_NEON;
7703 GemmMicrokernelTester()
7704 .mr(1)
7705 .nr(16)
7706 .kr(1)
7707 .sr(1)
7708 .m(1)
7709 .n(16)
7710 .k(8)
7711 .qmax(128)
7712 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7713 }
7714
7715 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm) {
7716 TEST_REQUIRES_ARM_NEON;
7717 GemmMicrokernelTester()
7718 .mr(1)
7719 .nr(16)
7720 .kr(1)
7721 .sr(1)
7722 .m(1)
7723 .n(16)
7724 .k(8)
7725 .cm_stride(19)
7726 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7727 }
7728
7729 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, no_a_zero_point) {
7730 TEST_REQUIRES_ARM_NEON;
7731 for (size_t k = 1; k <= 40; k += 9) {
7732 GemmMicrokernelTester()
7733 .mr(1)
7734 .nr(16)
7735 .kr(1)
7736 .sr(1)
7737 .m(1)
7738 .n(16)
7739 .k(k)
7740 .a_zero_point(0)
7741 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7742 }
7743 }
7744
7745 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, no_b_zero_point) {
7746 TEST_REQUIRES_ARM_NEON;
7747 for (size_t k = 1; k <= 40; k += 9) {
7748 GemmMicrokernelTester()
7749 .mr(1)
7750 .nr(16)
7751 .kr(1)
7752 .sr(1)
7753 .m(1)
7754 .n(16)
7755 .k(k)
7756 .b_zero_point(0)
7757 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7758 }
7759 }
7760
7761 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, no_zero_point) {
7762 TEST_REQUIRES_ARM_NEON;
7763 for (size_t k = 1; k <= 40; k += 9) {
7764 GemmMicrokernelTester()
7765 .mr(1)
7766 .nr(16)
7767 .kr(1)
7768 .sr(1)
7769 .m(1)
7770 .n(16)
7771 .k(k)
7772 .a_zero_point(0)
7773 .b_zero_point(0)
7774 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7775 }
7776 }
7777#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7778
7779
7780#if XNN_ARCH_ARM || XNN_ARCH_ARM64
7781 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8) {
7782 TEST_REQUIRES_ARM_NEON;
7783 GemmMicrokernelTester()
7784 .mr(4)
7785 .nr(16)
7786 .kr(1)
7787 .sr(1)
7788 .m(4)
7789 .n(16)
7790 .k(8)
7791 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7792 }
7793
7794 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cn) {
7795 TEST_REQUIRES_ARM_NEON;
7796 GemmMicrokernelTester()
7797 .mr(4)
7798 .nr(16)
7799 .kr(1)
7800 .sr(1)
7801 .m(4)
7802 .n(16)
7803 .k(8)
7804 .cn_stride(19)
7805 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7806 }
7807
7808 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
7809 TEST_REQUIRES_ARM_NEON;
7810 for (uint32_t m = 1; m <= 4; m++) {
7811 for (uint32_t n = 1; n <= 16; n++) {
7812 GemmMicrokernelTester()
7813 .mr(4)
7814 .nr(16)
7815 .kr(1)
7816 .sr(1)
7817 .m(m)
7818 .n(n)
7819 .k(8)
7820 .iterations(1)
7821 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7822 }
7823 }
7824 }
7825
7826 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
7827 TEST_REQUIRES_ARM_NEON;
7828 for (uint32_t m = 1; m <= 4; m++) {
7829 GemmMicrokernelTester()
7830 .mr(4)
7831 .nr(16)
7832 .kr(1)
7833 .sr(1)
7834 .m(m)
7835 .n(16)
7836 .k(8)
7837 .iterations(1)
7838 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7839 }
7840 }
7841
7842 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
7843 TEST_REQUIRES_ARM_NEON;
7844 for (uint32_t n = 1; n <= 16; n++) {
7845 GemmMicrokernelTester()
7846 .mr(4)
7847 .nr(16)
7848 .kr(1)
7849 .sr(1)
7850 .m(4)
7851 .n(n)
7852 .k(8)
7853 .iterations(1)
7854 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7855 }
7856 }
7857
7858 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8) {
7859 TEST_REQUIRES_ARM_NEON;
7860 for (size_t k = 1; k < 8; k++) {
7861 GemmMicrokernelTester()
7862 .mr(4)
7863 .nr(16)
7864 .kr(1)
7865 .sr(1)
7866 .m(4)
7867 .n(16)
7868 .k(k)
7869 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7870 }
7871 }
7872
7873 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
7874 TEST_REQUIRES_ARM_NEON;
7875 for (size_t k = 1; k < 8; k++) {
7876 for (uint32_t m = 1; m <= 4; m++) {
7877 for (uint32_t n = 1; n <= 16; n++) {
7878 GemmMicrokernelTester()
7879 .mr(4)
7880 .nr(16)
7881 .kr(1)
7882 .sr(1)
7883 .m(m)
7884 .n(n)
7885 .k(k)
7886 .iterations(1)
7887 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7888 }
7889 }
7890 }
7891 }
7892
7893 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8) {
7894 TEST_REQUIRES_ARM_NEON;
7895 for (size_t k = 9; k < 16; k++) {
7896 GemmMicrokernelTester()
7897 .mr(4)
7898 .nr(16)
7899 .kr(1)
7900 .sr(1)
7901 .m(4)
7902 .n(16)
7903 .k(k)
7904 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7905 }
7906 }
7907
7908 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
7909 TEST_REQUIRES_ARM_NEON;
7910 for (size_t k = 9; k < 16; k++) {
7911 for (uint32_t m = 1; m <= 4; m++) {
7912 for (uint32_t n = 1; n <= 16; n++) {
7913 GemmMicrokernelTester()
7914 .mr(4)
7915 .nr(16)
7916 .kr(1)
7917 .sr(1)
7918 .m(m)
7919 .n(n)
7920 .k(k)
7921 .iterations(1)
7922 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7923 }
7924 }
7925 }
7926 }
7927
7928 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8) {
7929 TEST_REQUIRES_ARM_NEON;
7930 for (size_t k = 16; k <= 80; k += 8) {
7931 GemmMicrokernelTester()
7932 .mr(4)
7933 .nr(16)
7934 .kr(1)
7935 .sr(1)
7936 .m(4)
7937 .n(16)
7938 .k(k)
7939 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7940 }
7941 }
7942
7943 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
7944 TEST_REQUIRES_ARM_NEON;
7945 for (size_t k = 16; k <= 80; k += 8) {
7946 for (uint32_t m = 1; m <= 4; m++) {
7947 for (uint32_t n = 1; n <= 16; n++) {
7948 GemmMicrokernelTester()
7949 .mr(4)
7950 .nr(16)
7951 .kr(1)
7952 .sr(1)
7953 .m(m)
7954 .n(n)
7955 .k(k)
7956 .iterations(1)
7957 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7958 }
7959 }
7960 }
7961 }
7962
7963 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16) {
7964 TEST_REQUIRES_ARM_NEON;
7965 for (uint32_t n = 17; n < 32; n++) {
7966 for (size_t k = 1; k <= 40; k += 9) {
7967 GemmMicrokernelTester()
7968 .mr(4)
7969 .nr(16)
7970 .kr(1)
7971 .sr(1)
7972 .m(4)
7973 .n(16)
7974 .k(k)
7975 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7976 }
7977 }
7978 }
7979
7980 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
7981 TEST_REQUIRES_ARM_NEON;
7982 for (uint32_t n = 17; n < 32; n++) {
7983 for (size_t k = 1; k <= 40; k += 9) {
7984 GemmMicrokernelTester()
7985 .mr(4)
7986 .nr(16)
7987 .kr(1)
7988 .sr(1)
7989 .m(4)
7990 .n(16)
7991 .k(k)
7992 .cn_stride(19)
7993 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
7994 }
7995 }
7996 }
7997
7998 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
7999 TEST_REQUIRES_ARM_NEON;
8000 for (uint32_t n = 17; n < 32; n++) {
8001 for (size_t k = 1; k <= 40; k += 9) {
8002 for (uint32_t m = 1; m <= 4; m++) {
8003 GemmMicrokernelTester()
8004 .mr(4)
8005 .nr(16)
8006 .kr(1)
8007 .sr(1)
8008 .m(m)
8009 .n(n)
8010 .k(k)
8011 .iterations(1)
8012 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8013 }
8014 }
8015 }
8016 }
8017
8018 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16) {
8019 TEST_REQUIRES_ARM_NEON;
8020 for (uint32_t n = 32; n <= 48; n += 16) {
8021 for (size_t k = 1; k <= 40; k += 9) {
8022 GemmMicrokernelTester()
8023 .mr(4)
8024 .nr(16)
8025 .kr(1)
8026 .sr(1)
8027 .m(4)
8028 .n(16)
8029 .k(k)
8030 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8031 }
8032 }
8033 }
8034
8035 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
8036 TEST_REQUIRES_ARM_NEON;
8037 for (uint32_t n = 32; n <= 48; n += 16) {
8038 for (size_t k = 1; k <= 40; k += 9) {
8039 GemmMicrokernelTester()
8040 .mr(4)
8041 .nr(16)
8042 .kr(1)
8043 .sr(1)
8044 .m(4)
8045 .n(n)
8046 .k(k)
8047 .cn_stride(19)
8048 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8049 }
8050 }
8051 }
8052
8053 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
8054 TEST_REQUIRES_ARM_NEON;
8055 for (uint32_t n = 32; n <= 48; n += 16) {
8056 for (size_t k = 1; k <= 40; k += 9) {
8057 for (uint32_t m = 1; m <= 4; m++) {
8058 GemmMicrokernelTester()
8059 .mr(4)
8060 .nr(16)
8061 .kr(1)
8062 .sr(1)
8063 .m(m)
8064 .n(n)
8065 .k(k)
8066 .iterations(1)
8067 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8068 }
8069 }
8070 }
8071 }
8072
8073 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, small_kernel) {
8074 TEST_REQUIRES_ARM_NEON;
8075 for (size_t k = 1; k <= 40; k += 9) {
8076 GemmMicrokernelTester()
8077 .mr(4)
8078 .nr(16)
8079 .kr(1)
8080 .sr(1)
8081 .m(4)
8082 .n(16)
8083 .k(k)
8084 .ks(3)
8085 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8086 }
8087 }
8088
8089 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
8090 TEST_REQUIRES_ARM_NEON;
8091 for (size_t k = 1; k <= 40; k += 9) {
8092 for (uint32_t m = 1; m <= 4; m++) {
8093 for (uint32_t n = 1; n <= 16; n++) {
8094 GemmMicrokernelTester()
8095 .mr(4)
8096 .nr(16)
8097 .kr(1)
8098 .sr(1)
8099 .m(m)
8100 .n(n)
8101 .k(k)
8102 .ks(3)
8103 .iterations(1)
8104 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8105 }
8106 }
8107 }
8108 }
8109
8110 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
8111 TEST_REQUIRES_ARM_NEON;
8112 for (uint32_t n = 17; n < 32; n++) {
8113 for (size_t k = 1; k <= 40; k += 9) {
8114 GemmMicrokernelTester()
8115 .mr(4)
8116 .nr(16)
8117 .kr(1)
8118 .sr(1)
8119 .m(4)
8120 .n(16)
8121 .k(k)
8122 .ks(3)
8123 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8124 }
8125 }
8126 }
8127
8128 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
8129 TEST_REQUIRES_ARM_NEON;
8130 for (uint32_t n = 32; n <= 48; n += 16) {
8131 for (size_t k = 1; k <= 40; k += 9) {
8132 GemmMicrokernelTester()
8133 .mr(4)
8134 .nr(16)
8135 .kr(1)
8136 .sr(1)
8137 .m(4)
8138 .n(16)
8139 .k(k)
8140 .ks(3)
8141 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8142 }
8143 }
8144 }
8145
8146 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
8147 TEST_REQUIRES_ARM_NEON;
8148 for (size_t k = 1; k <= 40; k += 9) {
8149 for (uint32_t m = 1; m <= 4; m++) {
8150 for (uint32_t n = 1; n <= 16; n++) {
8151 GemmMicrokernelTester()
8152 .mr(4)
8153 .nr(16)
8154 .kr(1)
8155 .sr(1)
8156 .m(m)
8157 .n(n)
8158 .k(k)
8159 .cm_stride(19)
8160 .iterations(1)
8161 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8162 }
8163 }
8164 }
8165 }
8166
8167 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, a_offset) {
8168 TEST_REQUIRES_ARM_NEON;
8169 for (size_t k = 1; k <= 40; k += 9) {
8170 GemmMicrokernelTester()
8171 .mr(4)
8172 .nr(16)
8173 .kr(1)
8174 .sr(1)
8175 .m(4)
8176 .n(16)
8177 .k(k)
8178 .ks(3)
8179 .a_offset(163)
8180 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8181 }
8182 }
8183
8184 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, zero) {
8185 TEST_REQUIRES_ARM_NEON;
8186 for (uint32_t mz = 0; mz < 4; mz++) {
8187 for (size_t k = 1; k <= 40; k += 9) {
8188 GemmMicrokernelTester()
8189 .mr(4)
8190 .nr(16)
8191 .kr(1)
8192 .sr(1)
8193 .m(4)
8194 .n(16)
8195 .k(k)
8196 .ks(3)
8197 .a_offset(163)
8198 .zero_index(mz)
8199 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8200 }
8201 }
8202 }
8203
8204 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmin) {
8205 TEST_REQUIRES_ARM_NEON;
8206 GemmMicrokernelTester()
8207 .mr(4)
8208 .nr(16)
8209 .kr(1)
8210 .sr(1)
8211 .m(4)
8212 .n(16)
8213 .k(8)
8214 .qmin(128)
8215 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8216 }
8217
8218 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmax) {
8219 TEST_REQUIRES_ARM_NEON;
8220 GemmMicrokernelTester()
8221 .mr(4)
8222 .nr(16)
8223 .kr(1)
8224 .sr(1)
8225 .m(4)
8226 .n(16)
8227 .k(8)
8228 .qmax(128)
8229 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8230 }
8231
8232 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm) {
8233 TEST_REQUIRES_ARM_NEON;
8234 GemmMicrokernelTester()
8235 .mr(4)
8236 .nr(16)
8237 .kr(1)
8238 .sr(1)
8239 .m(4)
8240 .n(16)
8241 .k(8)
8242 .cm_stride(19)
8243 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8244 }
8245
8246 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_a_zero_point) {
8247 TEST_REQUIRES_ARM_NEON;
8248 for (size_t k = 1; k <= 40; k += 9) {
8249 GemmMicrokernelTester()
8250 .mr(4)
8251 .nr(16)
8252 .kr(1)
8253 .sr(1)
8254 .m(4)
8255 .n(16)
8256 .k(k)
8257 .a_zero_point(0)
8258 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8259 }
8260 }
8261
8262 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_b_zero_point) {
8263 TEST_REQUIRES_ARM_NEON;
8264 for (size_t k = 1; k <= 40; k += 9) {
8265 GemmMicrokernelTester()
8266 .mr(4)
8267 .nr(16)
8268 .kr(1)
8269 .sr(1)
8270 .m(4)
8271 .n(16)
8272 .k(k)
8273 .b_zero_point(0)
8274 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8275 }
8276 }
8277
8278 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_zero_point) {
8279 TEST_REQUIRES_ARM_NEON;
8280 for (size_t k = 1; k <= 40; k += 9) {
8281 GemmMicrokernelTester()
8282 .mr(4)
8283 .nr(16)
8284 .kr(1)
8285 .sr(1)
8286 .m(4)
8287 .n(16)
8288 .k(k)
8289 .a_zero_point(0)
8290 .b_zero_point(0)
8291 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_init_qu8_requantization_rndnu_params, xnn_qu8_requantize_rndnu);
8292 }
8293 }
8294#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64