blob: b8ce63472bed4c34a42d3e4a689e12481c60e978 [file] [log] [blame]
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qu8-igemm-minmax-rndnu.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/allocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
Frank Barchard901845c2022-01-19 01:45:22 -080026#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
27 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
28 TEST_REQUIRES_ARM_NEON;
29 GemmMicrokernelTester()
30 .mr(4)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(4)
35 .n(8)
36 .k(8)
37 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
38 }
39
40 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
41 TEST_REQUIRES_ARM_NEON;
42 GemmMicrokernelTester()
43 .mr(4)
44 .nr(8)
45 .kr(1)
46 .sr(1)
47 .m(4)
48 .n(8)
49 .k(8)
50 .cn_stride(11)
51 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
52 }
53
54 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
55 TEST_REQUIRES_ARM_NEON;
56 for (uint32_t n = 1; n <= 8; n++) {
57 for (uint32_t m = 1; m <= 4; m++) {
58 GemmMicrokernelTester()
59 .mr(4)
60 .nr(8)
61 .kr(1)
62 .sr(1)
63 .m(m)
64 .n(n)
65 .k(8)
66 .iterations(1)
67 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
68 }
69 }
70 }
71
72 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
73 TEST_REQUIRES_ARM_NEON;
74 for (uint32_t m = 1; m <= 4; m++) {
75 GemmMicrokernelTester()
76 .mr(4)
77 .nr(8)
78 .kr(1)
79 .sr(1)
80 .m(m)
81 .n(8)
82 .k(8)
83 .iterations(1)
84 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
85 }
86 }
87
88 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
89 TEST_REQUIRES_ARM_NEON;
90 for (uint32_t n = 1; n <= 8; n++) {
91 GemmMicrokernelTester()
92 .mr(4)
93 .nr(8)
94 .kr(1)
95 .sr(1)
96 .m(4)
97 .n(n)
98 .k(8)
99 .iterations(1)
100 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
101 }
102 }
103
104 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
105 TEST_REQUIRES_ARM_NEON;
106 for (size_t k = 1; k < 8; k++) {
107 GemmMicrokernelTester()
108 .mr(4)
109 .nr(8)
110 .kr(1)
111 .sr(1)
112 .m(4)
113 .n(8)
114 .k(k)
115 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
116 }
117 }
118
119 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
120 TEST_REQUIRES_ARM_NEON;
121 for (size_t k = 1; k < 8; k++) {
122 for (uint32_t n = 1; n <= 8; n++) {
123 for (uint32_t m = 1; m <= 4; m++) {
124 GemmMicrokernelTester()
125 .mr(4)
126 .nr(8)
127 .kr(1)
128 .sr(1)
129 .m(m)
130 .n(n)
131 .k(k)
132 .iterations(1)
133 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
134 }
135 }
136 }
137 }
138
139 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
140 TEST_REQUIRES_ARM_NEON;
141 for (size_t k = 9; k < 16; k++) {
142 GemmMicrokernelTester()
143 .mr(4)
144 .nr(8)
145 .kr(1)
146 .sr(1)
147 .m(4)
148 .n(8)
149 .k(k)
150 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
151 }
152 }
153
154 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
155 TEST_REQUIRES_ARM_NEON;
156 for (size_t k = 9; k < 16; k++) {
157 for (uint32_t n = 1; n <= 8; n++) {
158 for (uint32_t m = 1; m <= 4; m++) {
159 GemmMicrokernelTester()
160 .mr(4)
161 .nr(8)
162 .kr(1)
163 .sr(1)
164 .m(m)
165 .n(n)
166 .k(k)
167 .iterations(1)
168 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
169 }
170 }
171 }
172 }
173
174 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
175 TEST_REQUIRES_ARM_NEON;
176 for (size_t k = 16; k <= 80; k += 8) {
177 GemmMicrokernelTester()
178 .mr(4)
179 .nr(8)
180 .kr(1)
181 .sr(1)
182 .m(4)
183 .n(8)
184 .k(k)
185 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
186 }
187 }
188
189 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
190 TEST_REQUIRES_ARM_NEON;
191 for (size_t k = 16; k <= 80; k += 8) {
192 for (uint32_t n = 1; n <= 8; n++) {
193 for (uint32_t m = 1; m <= 4; m++) {
194 GemmMicrokernelTester()
195 .mr(4)
196 .nr(8)
197 .kr(1)
198 .sr(1)
199 .m(m)
200 .n(n)
201 .k(k)
202 .iterations(1)
203 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
204 }
205 }
206 }
207 }
208
209 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
210 TEST_REQUIRES_ARM_NEON;
211 for (uint32_t n = 9; n < 16; n++) {
212 for (size_t k = 1; k <= 40; k += 9) {
213 GemmMicrokernelTester()
214 .mr(4)
215 .nr(8)
216 .kr(1)
217 .sr(1)
218 .m(4)
219 .n(n)
220 .k(k)
221 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
222 }
223 }
224 }
225
226 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
227 TEST_REQUIRES_ARM_NEON;
228 for (uint32_t n = 9; n < 16; n++) {
229 for (size_t k = 1; k <= 40; k += 9) {
230 GemmMicrokernelTester()
231 .mr(4)
232 .nr(8)
233 .kr(1)
234 .sr(1)
235 .m(4)
236 .n(n)
237 .k(k)
238 .cn_stride(11)
239 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
240 }
241 }
242 }
243
244 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
245 TEST_REQUIRES_ARM_NEON;
246 for (uint32_t n = 9; n < 16; n++) {
247 for (size_t k = 1; k <= 40; k += 9) {
248 for (uint32_t m = 1; m <= 4; m++) {
249 GemmMicrokernelTester()
250 .mr(4)
251 .nr(8)
252 .kr(1)
253 .sr(1)
254 .m(m)
255 .n(n)
256 .k(k)
257 .iterations(1)
258 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
259 }
260 }
261 }
262 }
263
264 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
265 TEST_REQUIRES_ARM_NEON;
266 for (uint32_t n = 16; n <= 24; n += 8) {
267 for (size_t k = 1; k <= 40; k += 9) {
268 GemmMicrokernelTester()
269 .mr(4)
270 .nr(8)
271 .kr(1)
272 .sr(1)
273 .m(4)
274 .n(n)
275 .k(k)
276 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
277 }
278 }
279 }
280
281 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
282 TEST_REQUIRES_ARM_NEON;
283 for (uint32_t n = 16; n <= 24; n += 8) {
284 for (size_t k = 1; k <= 40; k += 9) {
285 GemmMicrokernelTester()
286 .mr(4)
287 .nr(8)
288 .kr(1)
289 .sr(1)
290 .m(4)
291 .n(n)
292 .k(k)
293 .cn_stride(11)
294 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
295 }
296 }
297 }
298
299 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
300 TEST_REQUIRES_ARM_NEON;
301 for (uint32_t n = 16; n <= 24; n += 8) {
302 for (size_t k = 1; k <= 40; k += 9) {
303 for (uint32_t m = 1; m <= 4; m++) {
304 GemmMicrokernelTester()
305 .mr(4)
306 .nr(8)
307 .kr(1)
308 .sr(1)
309 .m(m)
310 .n(n)
311 .k(k)
312 .iterations(1)
313 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
314 }
315 }
316 }
317 }
318
319 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
320 TEST_REQUIRES_ARM_NEON;
321 for (size_t k = 1; k <= 40; k += 9) {
322 GemmMicrokernelTester()
323 .mr(4)
324 .nr(8)
325 .kr(1)
326 .sr(1)
327 .m(4)
328 .n(8)
329 .k(k)
330 .ks(3)
331 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
332 }
333 }
334
335 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
336 TEST_REQUIRES_ARM_NEON;
337 for (size_t k = 1; k <= 40; k += 9) {
338 for (uint32_t n = 1; n <= 8; n++) {
339 for (uint32_t m = 1; m <= 4; m++) {
340 GemmMicrokernelTester()
341 .mr(4)
342 .nr(8)
343 .kr(1)
344 .sr(1)
345 .m(m)
346 .n(n)
347 .k(k)
348 .ks(3)
349 .iterations(1)
350 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
351 }
352 }
353 }
354 }
355
356 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_small_kernel) {
357 TEST_REQUIRES_ARM_NEON;
358 for (uint32_t n = 9; n < 16; n++) {
359 for (size_t k = 1; k <= 40; k += 9) {
360 GemmMicrokernelTester()
361 .mr(4)
362 .nr(8)
363 .kr(1)
364 .sr(1)
365 .m(4)
366 .n(n)
367 .k(k)
368 .ks(3)
369 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
370 }
371 }
372 }
373
374 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_small_kernel) {
375 TEST_REQUIRES_ARM_NEON;
376 for (uint32_t n = 16; n <= 24; n += 8) {
377 for (size_t k = 1; k <= 40; k += 9) {
378 GemmMicrokernelTester()
379 .mr(4)
380 .nr(8)
381 .kr(1)
382 .sr(1)
383 .m(4)
384 .n(n)
385 .k(k)
386 .ks(3)
387 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
388 }
389 }
390 }
391
392 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
393 TEST_REQUIRES_ARM_NEON;
394 for (size_t k = 1; k <= 40; k += 9) {
395 for (uint32_t n = 1; n <= 8; n++) {
396 for (uint32_t m = 1; m <= 4; m++) {
397 GemmMicrokernelTester()
398 .mr(4)
399 .nr(8)
400 .kr(1)
401 .sr(1)
402 .m(m)
403 .n(n)
404 .k(k)
405 .cm_stride(11)
406 .iterations(1)
407 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
408 }
409 }
410 }
411 }
412
413 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
414 TEST_REQUIRES_ARM_NEON;
415 for (size_t k = 1; k <= 40; k += 9) {
416 GemmMicrokernelTester()
417 .mr(4)
418 .nr(8)
419 .kr(1)
420 .sr(1)
421 .m(4)
422 .n(8)
423 .k(k)
424 .ks(3)
425 .a_offset(163)
426 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
427 }
428 }
429
430 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, zero) {
431 TEST_REQUIRES_ARM_NEON;
432 for (size_t k = 1; k <= 40; k += 9) {
433 for (uint32_t mz = 0; mz < 4; mz++) {
434 GemmMicrokernelTester()
435 .mr(4)
436 .nr(8)
437 .kr(1)
438 .sr(1)
439 .m(4)
440 .n(8)
441 .k(k)
442 .ks(3)
443 .a_offset(163)
444 .zero_index(mz)
445 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
446 }
447 }
448 }
449
450 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) {
451 TEST_REQUIRES_ARM_NEON;
452 GemmMicrokernelTester()
453 .mr(4)
454 .nr(8)
455 .kr(1)
456 .sr(1)
457 .m(4)
458 .n(8)
459 .k(8)
460 .qmin(128)
461 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
462 }
463
464 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) {
465 TEST_REQUIRES_ARM_NEON;
466 GemmMicrokernelTester()
467 .mr(4)
468 .nr(8)
469 .kr(1)
470 .sr(1)
471 .m(4)
472 .n(8)
473 .k(8)
474 .qmax(128)
475 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
476 }
477
478 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
479 TEST_REQUIRES_ARM_NEON;
480 GemmMicrokernelTester()
481 .mr(4)
482 .nr(8)
483 .kr(1)
484 .sr(1)
485 .m(4)
486 .n(8)
487 .k(8)
488 .cm_stride(11)
489 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
490 }
491
492 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_a_zero_point) {
493 TEST_REQUIRES_ARM_NEON;
494 for (size_t k = 1; k <= 40; k += 9) {
495 GemmMicrokernelTester()
496 .mr(4)
497 .nr(8)
498 .kr(1)
499 .sr(1)
500 .m(4)
501 .n(8)
502 .k(k)
503 .a_zero_point(0)
504 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
505 }
506 }
507
508 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_b_zero_point) {
509 TEST_REQUIRES_ARM_NEON;
510 for (size_t k = 1; k <= 40; k += 9) {
511 GemmMicrokernelTester()
512 .mr(4)
513 .nr(8)
514 .kr(1)
515 .sr(1)
516 .m(4)
517 .n(8)
518 .k(k)
519 .b_zero_point(0)
520 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
521 }
522 }
523
524 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_zero_point) {
525 TEST_REQUIRES_ARM_NEON;
526 for (size_t k = 1; k <= 40; k += 9) {
527 GemmMicrokernelTester()
528 .mr(4)
529 .nr(8)
530 .kr(1)
531 .sr(1)
532 .m(4)
533 .n(8)
534 .k(k)
535 .a_zero_point(0)
536 .b_zero_point(0)
537 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
538 }
539 }
540#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
541
542
543#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
544 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) {
545 TEST_REQUIRES_ARM_NEON;
546 GemmMicrokernelTester()
547 .mr(4)
548 .nr(8)
549 .kr(1)
550 .sr(1)
551 .m(4)
552 .n(8)
553 .k(8)
554 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
555 }
556
557 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) {
558 TEST_REQUIRES_ARM_NEON;
559 GemmMicrokernelTester()
560 .mr(4)
561 .nr(8)
562 .kr(1)
563 .sr(1)
564 .m(4)
565 .n(8)
566 .k(8)
567 .cn_stride(11)
568 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
569 }
570
571 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
572 TEST_REQUIRES_ARM_NEON;
573 for (uint32_t n = 1; n <= 8; n++) {
574 for (uint32_t m = 1; m <= 4; m++) {
575 GemmMicrokernelTester()
576 .mr(4)
577 .nr(8)
578 .kr(1)
579 .sr(1)
580 .m(m)
581 .n(n)
582 .k(8)
583 .iterations(1)
584 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
585 }
586 }
587 }
588
589 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
590 TEST_REQUIRES_ARM_NEON;
591 for (uint32_t m = 1; m <= 4; m++) {
592 GemmMicrokernelTester()
593 .mr(4)
594 .nr(8)
595 .kr(1)
596 .sr(1)
597 .m(m)
598 .n(8)
599 .k(8)
600 .iterations(1)
601 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
602 }
603 }
604
605 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
606 TEST_REQUIRES_ARM_NEON;
607 for (uint32_t n = 1; n <= 8; n++) {
608 GemmMicrokernelTester()
609 .mr(4)
610 .nr(8)
611 .kr(1)
612 .sr(1)
613 .m(4)
614 .n(n)
615 .k(8)
616 .iterations(1)
617 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
618 }
619 }
620
621 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) {
622 TEST_REQUIRES_ARM_NEON;
623 for (size_t k = 1; k < 8; k++) {
624 GemmMicrokernelTester()
625 .mr(4)
626 .nr(8)
627 .kr(1)
628 .sr(1)
629 .m(4)
630 .n(8)
631 .k(k)
632 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
633 }
634 }
635
636 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
637 TEST_REQUIRES_ARM_NEON;
638 for (size_t k = 1; k < 8; k++) {
639 for (uint32_t n = 1; n <= 8; n++) {
640 for (uint32_t m = 1; m <= 4; m++) {
641 GemmMicrokernelTester()
642 .mr(4)
643 .nr(8)
644 .kr(1)
645 .sr(1)
646 .m(m)
647 .n(n)
648 .k(k)
649 .iterations(1)
650 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
651 }
652 }
653 }
654 }
655
656 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) {
657 TEST_REQUIRES_ARM_NEON;
658 for (size_t k = 9; k < 16; k++) {
659 GemmMicrokernelTester()
660 .mr(4)
661 .nr(8)
662 .kr(1)
663 .sr(1)
664 .m(4)
665 .n(8)
666 .k(k)
667 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
668 }
669 }
670
671 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
672 TEST_REQUIRES_ARM_NEON;
673 for (size_t k = 9; k < 16; k++) {
674 for (uint32_t n = 1; n <= 8; n++) {
675 for (uint32_t m = 1; m <= 4; m++) {
676 GemmMicrokernelTester()
677 .mr(4)
678 .nr(8)
679 .kr(1)
680 .sr(1)
681 .m(m)
682 .n(n)
683 .k(k)
684 .iterations(1)
685 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
686 }
687 }
688 }
689 }
690
691 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) {
692 TEST_REQUIRES_ARM_NEON;
693 for (size_t k = 16; k <= 80; k += 8) {
694 GemmMicrokernelTester()
695 .mr(4)
696 .nr(8)
697 .kr(1)
698 .sr(1)
699 .m(4)
700 .n(8)
701 .k(k)
702 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
703 }
704 }
705
706 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
707 TEST_REQUIRES_ARM_NEON;
708 for (size_t k = 16; k <= 80; k += 8) {
709 for (uint32_t n = 1; n <= 8; n++) {
710 for (uint32_t m = 1; m <= 4; m++) {
711 GemmMicrokernelTester()
712 .mr(4)
713 .nr(8)
714 .kr(1)
715 .sr(1)
716 .m(m)
717 .n(n)
718 .k(k)
719 .iterations(1)
720 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
721 }
722 }
723 }
724 }
725
726 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) {
727 TEST_REQUIRES_ARM_NEON;
728 for (uint32_t n = 9; n < 16; n++) {
729 for (size_t k = 1; k <= 40; k += 9) {
730 GemmMicrokernelTester()
731 .mr(4)
732 .nr(8)
733 .kr(1)
734 .sr(1)
735 .m(4)
736 .n(n)
737 .k(k)
738 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
739 }
740 }
741 }
742
743 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) {
744 TEST_REQUIRES_ARM_NEON;
745 for (uint32_t n = 9; n < 16; n++) {
746 for (size_t k = 1; k <= 40; k += 9) {
747 GemmMicrokernelTester()
748 .mr(4)
749 .nr(8)
750 .kr(1)
751 .sr(1)
752 .m(4)
753 .n(n)
754 .k(k)
755 .cn_stride(11)
756 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
757 }
758 }
759 }
760
761 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) {
762 TEST_REQUIRES_ARM_NEON;
763 for (uint32_t n = 9; n < 16; n++) {
764 for (size_t k = 1; k <= 40; k += 9) {
765 for (uint32_t m = 1; m <= 4; m++) {
766 GemmMicrokernelTester()
767 .mr(4)
768 .nr(8)
769 .kr(1)
770 .sr(1)
771 .m(m)
772 .n(n)
773 .k(k)
774 .iterations(1)
775 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
776 }
777 }
778 }
779 }
780
781 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) {
782 TEST_REQUIRES_ARM_NEON;
783 for (uint32_t n = 16; n <= 24; n += 8) {
784 for (size_t k = 1; k <= 40; k += 9) {
785 GemmMicrokernelTester()
786 .mr(4)
787 .nr(8)
788 .kr(1)
789 .sr(1)
790 .m(4)
791 .n(n)
792 .k(k)
793 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
794 }
795 }
796 }
797
798 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) {
799 TEST_REQUIRES_ARM_NEON;
800 for (uint32_t n = 16; n <= 24; n += 8) {
801 for (size_t k = 1; k <= 40; k += 9) {
802 GemmMicrokernelTester()
803 .mr(4)
804 .nr(8)
805 .kr(1)
806 .sr(1)
807 .m(4)
808 .n(n)
809 .k(k)
810 .cn_stride(11)
811 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
812 }
813 }
814 }
815
816 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) {
817 TEST_REQUIRES_ARM_NEON;
818 for (uint32_t n = 16; n <= 24; n += 8) {
819 for (size_t k = 1; k <= 40; k += 9) {
820 for (uint32_t m = 1; m <= 4; m++) {
821 GemmMicrokernelTester()
822 .mr(4)
823 .nr(8)
824 .kr(1)
825 .sr(1)
826 .m(m)
827 .n(n)
828 .k(k)
829 .iterations(1)
830 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
831 }
832 }
833 }
834 }
835
836 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel) {
837 TEST_REQUIRES_ARM_NEON;
838 for (size_t k = 1; k <= 40; k += 9) {
839 GemmMicrokernelTester()
840 .mr(4)
841 .nr(8)
842 .kr(1)
843 .sr(1)
844 .m(4)
845 .n(8)
846 .k(k)
847 .ks(3)
848 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
849 }
850 }
851
852 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
853 TEST_REQUIRES_ARM_NEON;
854 for (size_t k = 1; k <= 40; k += 9) {
855 for (uint32_t n = 1; n <= 8; n++) {
856 for (uint32_t m = 1; m <= 4; m++) {
857 GemmMicrokernelTester()
858 .mr(4)
859 .nr(8)
860 .kr(1)
861 .sr(1)
862 .m(m)
863 .n(n)
864 .k(k)
865 .ks(3)
866 .iterations(1)
867 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
868 }
869 }
870 }
871 }
872
873 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_small_kernel) {
874 TEST_REQUIRES_ARM_NEON;
875 for (uint32_t n = 9; n < 16; n++) {
876 for (size_t k = 1; k <= 40; k += 9) {
877 GemmMicrokernelTester()
878 .mr(4)
879 .nr(8)
880 .kr(1)
881 .sr(1)
882 .m(4)
883 .n(n)
884 .k(k)
885 .ks(3)
886 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
887 }
888 }
889 }
890
891 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_small_kernel) {
892 TEST_REQUIRES_ARM_NEON;
893 for (uint32_t n = 16; n <= 24; n += 8) {
894 for (size_t k = 1; k <= 40; k += 9) {
895 GemmMicrokernelTester()
896 .mr(4)
897 .nr(8)
898 .kr(1)
899 .sr(1)
900 .m(4)
901 .n(n)
902 .k(k)
903 .ks(3)
904 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
905 }
906 }
907 }
908
909 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
910 TEST_REQUIRES_ARM_NEON;
911 for (size_t k = 1; k <= 40; k += 9) {
912 for (uint32_t n = 1; n <= 8; n++) {
913 for (uint32_t m = 1; m <= 4; m++) {
914 GemmMicrokernelTester()
915 .mr(4)
916 .nr(8)
917 .kr(1)
918 .sr(1)
919 .m(m)
920 .n(n)
921 .k(k)
922 .cm_stride(11)
923 .iterations(1)
924 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
925 }
926 }
927 }
928 }
929
930 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, a_offset) {
931 TEST_REQUIRES_ARM_NEON;
932 for (size_t k = 1; k <= 40; k += 9) {
933 GemmMicrokernelTester()
934 .mr(4)
935 .nr(8)
936 .kr(1)
937 .sr(1)
938 .m(4)
939 .n(8)
940 .k(k)
941 .ks(3)
942 .a_offset(163)
943 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
944 }
945 }
946
947 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, zero) {
948 TEST_REQUIRES_ARM_NEON;
949 for (size_t k = 1; k <= 40; k += 9) {
950 for (uint32_t mz = 0; mz < 4; mz++) {
951 GemmMicrokernelTester()
952 .mr(4)
953 .nr(8)
954 .kr(1)
955 .sr(1)
956 .m(4)
957 .n(8)
958 .k(k)
959 .ks(3)
960 .a_offset(163)
961 .zero_index(mz)
962 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
963 }
964 }
965 }
966
967 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) {
968 TEST_REQUIRES_ARM_NEON;
969 GemmMicrokernelTester()
970 .mr(4)
971 .nr(8)
972 .kr(1)
973 .sr(1)
974 .m(4)
975 .n(8)
976 .k(8)
977 .qmin(128)
978 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
979 }
980
981 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) {
982 TEST_REQUIRES_ARM_NEON;
983 GemmMicrokernelTester()
984 .mr(4)
985 .nr(8)
986 .kr(1)
987 .sr(1)
988 .m(4)
989 .n(8)
990 .k(8)
991 .qmax(128)
992 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
993 }
994
995 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) {
996 TEST_REQUIRES_ARM_NEON;
997 GemmMicrokernelTester()
998 .mr(4)
999 .nr(8)
1000 .kr(1)
1001 .sr(1)
1002 .m(4)
1003 .n(8)
1004 .k(8)
1005 .cm_stride(11)
1006 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1007 }
1008
1009 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_a_zero_point) {
1010 TEST_REQUIRES_ARM_NEON;
1011 for (size_t k = 1; k <= 40; k += 9) {
1012 GemmMicrokernelTester()
1013 .mr(4)
1014 .nr(8)
1015 .kr(1)
1016 .sr(1)
1017 .m(4)
1018 .n(8)
1019 .k(k)
1020 .a_zero_point(0)
1021 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1022 }
1023 }
1024
1025 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_b_zero_point) {
1026 TEST_REQUIRES_ARM_NEON;
1027 for (size_t k = 1; k <= 40; k += 9) {
1028 GemmMicrokernelTester()
1029 .mr(4)
1030 .nr(8)
1031 .kr(1)
1032 .sr(1)
1033 .m(4)
1034 .n(8)
1035 .k(k)
1036 .b_zero_point(0)
1037 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1038 }
1039 }
1040
1041 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_zero_point) {
1042 TEST_REQUIRES_ARM_NEON;
1043 for (size_t k = 1; k <= 40; k += 9) {
1044 GemmMicrokernelTester()
1045 .mr(4)
1046 .nr(8)
1047 .kr(1)
1048 .sr(1)
1049 .m(4)
1050 .n(8)
1051 .k(k)
1052 .a_zero_point(0)
1053 .b_zero_point(0)
1054 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1055 }
1056 }
1057#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1058
1059
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001060#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1061 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8) {
1062 TEST_REQUIRES_ARM_NEON;
1063 GemmMicrokernelTester()
1064 .mr(2)
1065 .nr(8)
1066 .kr(1)
1067 .sr(1)
1068 .m(2)
1069 .n(8)
1070 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001071 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001072 }
1073
1074 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, strided_cn) {
1075 TEST_REQUIRES_ARM_NEON;
1076 GemmMicrokernelTester()
1077 .mr(2)
1078 .nr(8)
1079 .kr(1)
1080 .sr(1)
1081 .m(2)
1082 .n(8)
1083 .k(8)
1084 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001085 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001086 }
1087
1088 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8_subtile) {
1089 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001090 for (uint32_t n = 1; n <= 8; n++) {
1091 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001092 GemmMicrokernelTester()
1093 .mr(2)
1094 .nr(8)
1095 .kr(1)
1096 .sr(1)
1097 .m(m)
1098 .n(n)
1099 .k(8)
1100 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001101 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001102 }
1103 }
1104 }
1105
1106 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
1107 TEST_REQUIRES_ARM_NEON;
1108 for (uint32_t m = 1; m <= 2; m++) {
1109 GemmMicrokernelTester()
1110 .mr(2)
1111 .nr(8)
1112 .kr(1)
1113 .sr(1)
1114 .m(m)
1115 .n(8)
1116 .k(8)
1117 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001118 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001119 }
1120 }
1121
1122 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
1123 TEST_REQUIRES_ARM_NEON;
1124 for (uint32_t n = 1; n <= 8; n++) {
1125 GemmMicrokernelTester()
1126 .mr(2)
1127 .nr(8)
1128 .kr(1)
1129 .sr(1)
1130 .m(2)
1131 .n(n)
1132 .k(8)
1133 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001134 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001135 }
1136 }
1137
1138 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_lt_8) {
1139 TEST_REQUIRES_ARM_NEON;
1140 for (size_t k = 1; k < 8; k++) {
1141 GemmMicrokernelTester()
1142 .mr(2)
1143 .nr(8)
1144 .kr(1)
1145 .sr(1)
1146 .m(2)
1147 .n(8)
1148 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001149 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001150 }
1151 }
1152
1153 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_lt_8_subtile) {
1154 TEST_REQUIRES_ARM_NEON;
1155 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001156 for (uint32_t n = 1; n <= 8; n++) {
1157 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001158 GemmMicrokernelTester()
1159 .mr(2)
1160 .nr(8)
1161 .kr(1)
1162 .sr(1)
1163 .m(m)
1164 .n(n)
1165 .k(k)
1166 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001167 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001168 }
1169 }
1170 }
1171 }
1172
1173 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_gt_8) {
1174 TEST_REQUIRES_ARM_NEON;
1175 for (size_t k = 9; k < 16; k++) {
1176 GemmMicrokernelTester()
1177 .mr(2)
1178 .nr(8)
1179 .kr(1)
1180 .sr(1)
1181 .m(2)
1182 .n(8)
1183 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001184 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001185 }
1186 }
1187
1188 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_gt_8_subtile) {
1189 TEST_REQUIRES_ARM_NEON;
1190 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001191 for (uint32_t n = 1; n <= 8; n++) {
1192 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001193 GemmMicrokernelTester()
1194 .mr(2)
1195 .nr(8)
1196 .kr(1)
1197 .sr(1)
1198 .m(m)
1199 .n(n)
1200 .k(k)
1201 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001202 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001203 }
1204 }
1205 }
1206 }
1207
1208 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_div_8) {
1209 TEST_REQUIRES_ARM_NEON;
1210 for (size_t k = 16; k <= 80; k += 8) {
1211 GemmMicrokernelTester()
1212 .mr(2)
1213 .nr(8)
1214 .kr(1)
1215 .sr(1)
1216 .m(2)
1217 .n(8)
1218 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001219 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001220 }
1221 }
1222
1223 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, k_div_8_subtile) {
1224 TEST_REQUIRES_ARM_NEON;
1225 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001226 for (uint32_t n = 1; n <= 8; n++) {
1227 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001228 GemmMicrokernelTester()
1229 .mr(2)
1230 .nr(8)
1231 .kr(1)
1232 .sr(1)
1233 .m(m)
1234 .n(n)
1235 .k(k)
1236 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001237 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001238 }
1239 }
1240 }
1241 }
1242
1243 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8) {
1244 TEST_REQUIRES_ARM_NEON;
1245 for (uint32_t n = 9; n < 16; n++) {
1246 for (size_t k = 1; k <= 40; k += 9) {
1247 GemmMicrokernelTester()
1248 .mr(2)
1249 .nr(8)
1250 .kr(1)
1251 .sr(1)
1252 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001253 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001254 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001255 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001256 }
1257 }
1258 }
1259
1260 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
1261 TEST_REQUIRES_ARM_NEON;
1262 for (uint32_t n = 9; n < 16; n++) {
1263 for (size_t k = 1; k <= 40; k += 9) {
1264 GemmMicrokernelTester()
1265 .mr(2)
1266 .nr(8)
1267 .kr(1)
1268 .sr(1)
1269 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001270 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001271 .k(k)
1272 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001273 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001274 }
1275 }
1276 }
1277
1278 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8_subtile) {
1279 TEST_REQUIRES_ARM_NEON;
1280 for (uint32_t n = 9; n < 16; n++) {
1281 for (size_t k = 1; k <= 40; k += 9) {
1282 for (uint32_t m = 1; m <= 2; m++) {
1283 GemmMicrokernelTester()
1284 .mr(2)
1285 .nr(8)
1286 .kr(1)
1287 .sr(1)
1288 .m(m)
1289 .n(n)
1290 .k(k)
1291 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001292 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001293 }
1294 }
1295 }
1296 }
1297
1298 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8) {
1299 TEST_REQUIRES_ARM_NEON;
1300 for (uint32_t n = 16; n <= 24; n += 8) {
1301 for (size_t k = 1; k <= 40; k += 9) {
1302 GemmMicrokernelTester()
1303 .mr(2)
1304 .nr(8)
1305 .kr(1)
1306 .sr(1)
1307 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001308 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001309 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001310 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001311 }
1312 }
1313 }
1314
1315 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
1316 TEST_REQUIRES_ARM_NEON;
1317 for (uint32_t n = 16; n <= 24; n += 8) {
1318 for (size_t k = 1; k <= 40; k += 9) {
1319 GemmMicrokernelTester()
1320 .mr(2)
1321 .nr(8)
1322 .kr(1)
1323 .sr(1)
1324 .m(2)
1325 .n(n)
1326 .k(k)
1327 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001328 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001329 }
1330 }
1331 }
1332
1333 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8_subtile) {
1334 TEST_REQUIRES_ARM_NEON;
1335 for (uint32_t n = 16; n <= 24; n += 8) {
1336 for (size_t k = 1; k <= 40; k += 9) {
1337 for (uint32_t m = 1; m <= 2; m++) {
1338 GemmMicrokernelTester()
1339 .mr(2)
1340 .nr(8)
1341 .kr(1)
1342 .sr(1)
1343 .m(m)
1344 .n(n)
1345 .k(k)
1346 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001347 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001348 }
1349 }
1350 }
1351 }
1352
1353 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, small_kernel) {
1354 TEST_REQUIRES_ARM_NEON;
1355 for (size_t k = 1; k <= 40; k += 9) {
1356 GemmMicrokernelTester()
1357 .mr(2)
1358 .nr(8)
1359 .kr(1)
1360 .sr(1)
1361 .m(2)
1362 .n(8)
1363 .k(k)
1364 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001365 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001366 }
1367 }
1368
1369 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, small_kernel_subtile) {
1370 TEST_REQUIRES_ARM_NEON;
1371 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001372 for (uint32_t n = 1; n <= 8; n++) {
1373 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001374 GemmMicrokernelTester()
1375 .mr(2)
1376 .nr(8)
1377 .kr(1)
1378 .sr(1)
1379 .m(m)
1380 .n(n)
1381 .k(k)
1382 .ks(3)
1383 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001384 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001385 }
1386 }
1387 }
1388 }
1389
1390 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
1391 TEST_REQUIRES_ARM_NEON;
1392 for (uint32_t n = 9; n < 16; n++) {
1393 for (size_t k = 1; k <= 40; k += 9) {
1394 GemmMicrokernelTester()
1395 .mr(2)
1396 .nr(8)
1397 .kr(1)
1398 .sr(1)
1399 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001400 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001401 .k(k)
1402 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001403 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001404 }
1405 }
1406 }
1407
1408 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
1409 TEST_REQUIRES_ARM_NEON;
1410 for (uint32_t n = 16; n <= 24; n += 8) {
1411 for (size_t k = 1; k <= 40; k += 9) {
1412 GemmMicrokernelTester()
1413 .mr(2)
1414 .nr(8)
1415 .kr(1)
1416 .sr(1)
1417 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001418 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001419 .k(k)
1420 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001421 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001422 }
1423 }
1424 }
1425
1426 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, strided_cm_subtile) {
1427 TEST_REQUIRES_ARM_NEON;
1428 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001429 for (uint32_t n = 1; n <= 8; n++) {
1430 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001431 GemmMicrokernelTester()
1432 .mr(2)
1433 .nr(8)
1434 .kr(1)
1435 .sr(1)
1436 .m(m)
1437 .n(n)
1438 .k(k)
1439 .cm_stride(11)
1440 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001441 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001442 }
1443 }
1444 }
1445 }
1446
1447 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, a_offset) {
1448 TEST_REQUIRES_ARM_NEON;
1449 for (size_t k = 1; k <= 40; k += 9) {
1450 GemmMicrokernelTester()
1451 .mr(2)
1452 .nr(8)
1453 .kr(1)
1454 .sr(1)
1455 .m(2)
1456 .n(8)
1457 .k(k)
1458 .ks(3)
1459 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001460 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001461 }
1462 }
1463
1464 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, zero) {
1465 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001466 for (size_t k = 1; k <= 40; k += 9) {
1467 for (uint32_t mz = 0; mz < 2; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001468 GemmMicrokernelTester()
1469 .mr(2)
1470 .nr(8)
1471 .kr(1)
1472 .sr(1)
1473 .m(2)
1474 .n(8)
1475 .k(k)
1476 .ks(3)
1477 .a_offset(83)
1478 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001479 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001480 }
1481 }
1482 }
1483
1484 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, qmin) {
1485 TEST_REQUIRES_ARM_NEON;
1486 GemmMicrokernelTester()
1487 .mr(2)
1488 .nr(8)
1489 .kr(1)
1490 .sr(1)
1491 .m(2)
1492 .n(8)
1493 .k(8)
1494 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001495 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001496 }
1497
1498 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, qmax) {
1499 TEST_REQUIRES_ARM_NEON;
1500 GemmMicrokernelTester()
1501 .mr(2)
1502 .nr(8)
1503 .kr(1)
1504 .sr(1)
1505 .m(2)
1506 .n(8)
1507 .k(8)
1508 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001509 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001510 }
1511
1512 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, strided_cm) {
1513 TEST_REQUIRES_ARM_NEON;
1514 GemmMicrokernelTester()
1515 .mr(2)
1516 .nr(8)
1517 .kr(1)
1518 .sr(1)
1519 .m(2)
1520 .n(8)
1521 .k(8)
1522 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001523 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001524 }
1525
1526 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, no_a_zero_point) {
1527 TEST_REQUIRES_ARM_NEON;
1528 for (size_t k = 1; k <= 40; k += 9) {
1529 GemmMicrokernelTester()
1530 .mr(2)
1531 .nr(8)
1532 .kr(1)
1533 .sr(1)
1534 .m(2)
1535 .n(8)
1536 .k(k)
1537 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001538 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001539 }
1540 }
1541
1542 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, no_b_zero_point) {
1543 TEST_REQUIRES_ARM_NEON;
1544 for (size_t k = 1; k <= 40; k += 9) {
1545 GemmMicrokernelTester()
1546 .mr(2)
1547 .nr(8)
1548 .kr(1)
1549 .sr(1)
1550 .m(2)
1551 .n(8)
1552 .k(k)
1553 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001554 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001555 }
1556 }
1557
1558 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8__NEON_MLAL_LANE, no_zero_point) {
1559 TEST_REQUIRES_ARM_NEON;
1560 for (size_t k = 1; k <= 40; k += 9) {
1561 GemmMicrokernelTester()
1562 .mr(2)
1563 .nr(8)
1564 .kr(1)
1565 .sr(1)
1566 .m(2)
1567 .n(8)
1568 .k(k)
1569 .a_zero_point(0)
1570 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001571 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001572 }
1573 }
1574#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1575
1576
1577#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1578 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8) {
1579 TEST_REQUIRES_ARM_NEON;
1580 GemmMicrokernelTester()
1581 .mr(3)
1582 .nr(8)
1583 .kr(1)
1584 .sr(1)
1585 .m(3)
1586 .n(8)
1587 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001588 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001589 }
1590
1591 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, strided_cn) {
1592 TEST_REQUIRES_ARM_NEON;
1593 GemmMicrokernelTester()
1594 .mr(3)
1595 .nr(8)
1596 .kr(1)
1597 .sr(1)
1598 .m(3)
1599 .n(8)
1600 .k(8)
1601 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001602 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001603 }
1604
1605 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8_subtile) {
1606 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001607 for (uint32_t n = 1; n <= 8; n++) {
1608 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001609 GemmMicrokernelTester()
1610 .mr(3)
1611 .nr(8)
1612 .kr(1)
1613 .sr(1)
1614 .m(m)
1615 .n(n)
1616 .k(8)
1617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001618 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001619 }
1620 }
1621 }
1622
1623 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
1624 TEST_REQUIRES_ARM_NEON;
1625 for (uint32_t m = 1; m <= 3; m++) {
1626 GemmMicrokernelTester()
1627 .mr(3)
1628 .nr(8)
1629 .kr(1)
1630 .sr(1)
1631 .m(m)
1632 .n(8)
1633 .k(8)
1634 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001635 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001636 }
1637 }
1638
1639 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
1640 TEST_REQUIRES_ARM_NEON;
1641 for (uint32_t n = 1; n <= 8; n++) {
1642 GemmMicrokernelTester()
1643 .mr(3)
1644 .nr(8)
1645 .kr(1)
1646 .sr(1)
1647 .m(3)
1648 .n(n)
1649 .k(8)
1650 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001651 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001652 }
1653 }
1654
1655 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_lt_8) {
1656 TEST_REQUIRES_ARM_NEON;
1657 for (size_t k = 1; k < 8; k++) {
1658 GemmMicrokernelTester()
1659 .mr(3)
1660 .nr(8)
1661 .kr(1)
1662 .sr(1)
1663 .m(3)
1664 .n(8)
1665 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001666 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001667 }
1668 }
1669
1670 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_lt_8_subtile) {
1671 TEST_REQUIRES_ARM_NEON;
1672 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001673 for (uint32_t n = 1; n <= 8; n++) {
1674 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001675 GemmMicrokernelTester()
1676 .mr(3)
1677 .nr(8)
1678 .kr(1)
1679 .sr(1)
1680 .m(m)
1681 .n(n)
1682 .k(k)
1683 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001684 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001685 }
1686 }
1687 }
1688 }
1689
1690 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_gt_8) {
1691 TEST_REQUIRES_ARM_NEON;
1692 for (size_t k = 9; k < 16; k++) {
1693 GemmMicrokernelTester()
1694 .mr(3)
1695 .nr(8)
1696 .kr(1)
1697 .sr(1)
1698 .m(3)
1699 .n(8)
1700 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001701 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001702 }
1703 }
1704
1705 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_gt_8_subtile) {
1706 TEST_REQUIRES_ARM_NEON;
1707 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001708 for (uint32_t n = 1; n <= 8; n++) {
1709 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001710 GemmMicrokernelTester()
1711 .mr(3)
1712 .nr(8)
1713 .kr(1)
1714 .sr(1)
1715 .m(m)
1716 .n(n)
1717 .k(k)
1718 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001719 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001720 }
1721 }
1722 }
1723 }
1724
1725 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_div_8) {
1726 TEST_REQUIRES_ARM_NEON;
1727 for (size_t k = 16; k <= 80; k += 8) {
1728 GemmMicrokernelTester()
1729 .mr(3)
1730 .nr(8)
1731 .kr(1)
1732 .sr(1)
1733 .m(3)
1734 .n(8)
1735 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001736 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001737 }
1738 }
1739
1740 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, k_div_8_subtile) {
1741 TEST_REQUIRES_ARM_NEON;
1742 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001743 for (uint32_t n = 1; n <= 8; n++) {
1744 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001745 GemmMicrokernelTester()
1746 .mr(3)
1747 .nr(8)
1748 .kr(1)
1749 .sr(1)
1750 .m(m)
1751 .n(n)
1752 .k(k)
1753 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001754 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001755 }
1756 }
1757 }
1758 }
1759
1760 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8) {
1761 TEST_REQUIRES_ARM_NEON;
1762 for (uint32_t n = 9; n < 16; n++) {
1763 for (size_t k = 1; k <= 40; k += 9) {
1764 GemmMicrokernelTester()
1765 .mr(3)
1766 .nr(8)
1767 .kr(1)
1768 .sr(1)
1769 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001770 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001771 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001772 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001773 }
1774 }
1775 }
1776
1777 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
1778 TEST_REQUIRES_ARM_NEON;
1779 for (uint32_t n = 9; n < 16; n++) {
1780 for (size_t k = 1; k <= 40; k += 9) {
1781 GemmMicrokernelTester()
1782 .mr(3)
1783 .nr(8)
1784 .kr(1)
1785 .sr(1)
1786 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001787 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001788 .k(k)
1789 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001790 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001791 }
1792 }
1793 }
1794
1795 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8_subtile) {
1796 TEST_REQUIRES_ARM_NEON;
1797 for (uint32_t n = 9; n < 16; n++) {
1798 for (size_t k = 1; k <= 40; k += 9) {
1799 for (uint32_t m = 1; m <= 3; m++) {
1800 GemmMicrokernelTester()
1801 .mr(3)
1802 .nr(8)
1803 .kr(1)
1804 .sr(1)
1805 .m(m)
1806 .n(n)
1807 .k(k)
1808 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001809 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001810 }
1811 }
1812 }
1813 }
1814
1815 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8) {
1816 TEST_REQUIRES_ARM_NEON;
1817 for (uint32_t n = 16; n <= 24; n += 8) {
1818 for (size_t k = 1; k <= 40; k += 9) {
1819 GemmMicrokernelTester()
1820 .mr(3)
1821 .nr(8)
1822 .kr(1)
1823 .sr(1)
1824 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001825 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001826 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001827 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001828 }
1829 }
1830 }
1831
1832 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
1833 TEST_REQUIRES_ARM_NEON;
1834 for (uint32_t n = 16; n <= 24; n += 8) {
1835 for (size_t k = 1; k <= 40; k += 9) {
1836 GemmMicrokernelTester()
1837 .mr(3)
1838 .nr(8)
1839 .kr(1)
1840 .sr(1)
1841 .m(3)
1842 .n(n)
1843 .k(k)
1844 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001845 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001846 }
1847 }
1848 }
1849
1850 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8_subtile) {
1851 TEST_REQUIRES_ARM_NEON;
1852 for (uint32_t n = 16; n <= 24; n += 8) {
1853 for (size_t k = 1; k <= 40; k += 9) {
1854 for (uint32_t m = 1; m <= 3; m++) {
1855 GemmMicrokernelTester()
1856 .mr(3)
1857 .nr(8)
1858 .kr(1)
1859 .sr(1)
1860 .m(m)
1861 .n(n)
1862 .k(k)
1863 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001864 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001865 }
1866 }
1867 }
1868 }
1869
1870 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, small_kernel) {
1871 TEST_REQUIRES_ARM_NEON;
1872 for (size_t k = 1; k <= 40; k += 9) {
1873 GemmMicrokernelTester()
1874 .mr(3)
1875 .nr(8)
1876 .kr(1)
1877 .sr(1)
1878 .m(3)
1879 .n(8)
1880 .k(k)
1881 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001882 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001883 }
1884 }
1885
1886 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, small_kernel_subtile) {
1887 TEST_REQUIRES_ARM_NEON;
1888 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001889 for (uint32_t n = 1; n <= 8; n++) {
1890 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001891 GemmMicrokernelTester()
1892 .mr(3)
1893 .nr(8)
1894 .kr(1)
1895 .sr(1)
1896 .m(m)
1897 .n(n)
1898 .k(k)
1899 .ks(3)
1900 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001901 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001902 }
1903 }
1904 }
1905 }
1906
1907 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
1908 TEST_REQUIRES_ARM_NEON;
1909 for (uint32_t n = 9; n < 16; n++) {
1910 for (size_t k = 1; k <= 40; k += 9) {
1911 GemmMicrokernelTester()
1912 .mr(3)
1913 .nr(8)
1914 .kr(1)
1915 .sr(1)
1916 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001917 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001918 .k(k)
1919 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001920 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001921 }
1922 }
1923 }
1924
1925 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
1926 TEST_REQUIRES_ARM_NEON;
1927 for (uint32_t n = 16; n <= 24; n += 8) {
1928 for (size_t k = 1; k <= 40; k += 9) {
1929 GemmMicrokernelTester()
1930 .mr(3)
1931 .nr(8)
1932 .kr(1)
1933 .sr(1)
1934 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001935 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001936 .k(k)
1937 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08001938 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001939 }
1940 }
1941 }
1942
1943 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, strided_cm_subtile) {
1944 TEST_REQUIRES_ARM_NEON;
1945 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001946 for (uint32_t n = 1; n <= 8; n++) {
1947 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001948 GemmMicrokernelTester()
1949 .mr(3)
1950 .nr(8)
1951 .kr(1)
1952 .sr(1)
1953 .m(m)
1954 .n(n)
1955 .k(k)
1956 .cm_stride(11)
1957 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001958 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001959 }
1960 }
1961 }
1962 }
1963
1964 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, a_offset) {
1965 TEST_REQUIRES_ARM_NEON;
1966 for (size_t k = 1; k <= 40; k += 9) {
1967 GemmMicrokernelTester()
1968 .mr(3)
1969 .nr(8)
1970 .kr(1)
1971 .sr(1)
1972 .m(3)
1973 .n(8)
1974 .k(k)
1975 .ks(3)
1976 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08001977 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001978 }
1979 }
1980
1981 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, zero) {
1982 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001983 for (size_t k = 1; k <= 40; k += 9) {
1984 for (uint32_t mz = 0; mz < 3; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001985 GemmMicrokernelTester()
1986 .mr(3)
1987 .nr(8)
1988 .kr(1)
1989 .sr(1)
1990 .m(3)
1991 .n(8)
1992 .k(k)
1993 .ks(3)
1994 .a_offset(127)
1995 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08001996 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001997 }
1998 }
1999 }
2000
2001 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, qmin) {
2002 TEST_REQUIRES_ARM_NEON;
2003 GemmMicrokernelTester()
2004 .mr(3)
2005 .nr(8)
2006 .kr(1)
2007 .sr(1)
2008 .m(3)
2009 .n(8)
2010 .k(8)
2011 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002012 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002013 }
2014
2015 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, qmax) {
2016 TEST_REQUIRES_ARM_NEON;
2017 GemmMicrokernelTester()
2018 .mr(3)
2019 .nr(8)
2020 .kr(1)
2021 .sr(1)
2022 .m(3)
2023 .n(8)
2024 .k(8)
2025 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002026 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002027 }
2028
2029 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, strided_cm) {
2030 TEST_REQUIRES_ARM_NEON;
2031 GemmMicrokernelTester()
2032 .mr(3)
2033 .nr(8)
2034 .kr(1)
2035 .sr(1)
2036 .m(3)
2037 .n(8)
2038 .k(8)
2039 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002040 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002041 }
2042
2043 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, no_a_zero_point) {
2044 TEST_REQUIRES_ARM_NEON;
2045 for (size_t k = 1; k <= 40; k += 9) {
2046 GemmMicrokernelTester()
2047 .mr(3)
2048 .nr(8)
2049 .kr(1)
2050 .sr(1)
2051 .m(3)
2052 .n(8)
2053 .k(k)
2054 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002055 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002056 }
2057 }
2058
2059 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, no_b_zero_point) {
2060 TEST_REQUIRES_ARM_NEON;
2061 for (size_t k = 1; k <= 40; k += 9) {
2062 GemmMicrokernelTester()
2063 .mr(3)
2064 .nr(8)
2065 .kr(1)
2066 .sr(1)
2067 .m(3)
2068 .n(8)
2069 .k(k)
2070 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002071 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002072 }
2073 }
2074
2075 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8__NEON_MLAL_LANE, no_zero_point) {
2076 TEST_REQUIRES_ARM_NEON;
2077 for (size_t k = 1; k <= 40; k += 9) {
2078 GemmMicrokernelTester()
2079 .mr(3)
2080 .nr(8)
2081 .kr(1)
2082 .sr(1)
2083 .m(3)
2084 .n(8)
2085 .k(k)
2086 .a_zero_point(0)
2087 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002088 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002089 }
2090 }
2091#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2092
2093
2094#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2095 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8) {
2096 TEST_REQUIRES_ARM_NEON;
2097 GemmMicrokernelTester()
2098 .mr(6)
2099 .nr(8)
2100 .kr(1)
2101 .sr(1)
2102 .m(6)
2103 .n(8)
2104 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002105 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002106 }
2107
2108 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cn) {
2109 TEST_REQUIRES_ARM_NEON;
2110 GemmMicrokernelTester()
2111 .mr(6)
2112 .nr(8)
2113 .kr(1)
2114 .sr(1)
2115 .m(6)
2116 .n(8)
2117 .k(8)
2118 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002119 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002120 }
2121
2122 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile) {
2123 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002124 for (uint32_t n = 1; n <= 8; n++) {
2125 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002126 GemmMicrokernelTester()
2127 .mr(6)
2128 .nr(8)
2129 .kr(1)
2130 .sr(1)
2131 .m(m)
2132 .n(n)
2133 .k(8)
2134 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002135 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002136 }
2137 }
2138 }
2139
2140 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
2141 TEST_REQUIRES_ARM_NEON;
2142 for (uint32_t m = 1; m <= 6; m++) {
2143 GemmMicrokernelTester()
2144 .mr(6)
2145 .nr(8)
2146 .kr(1)
2147 .sr(1)
2148 .m(m)
2149 .n(8)
2150 .k(8)
2151 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002152 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002153 }
2154 }
2155
2156 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
2157 TEST_REQUIRES_ARM_NEON;
2158 for (uint32_t n = 1; n <= 8; n++) {
2159 GemmMicrokernelTester()
2160 .mr(6)
2161 .nr(8)
2162 .kr(1)
2163 .sr(1)
2164 .m(6)
2165 .n(n)
2166 .k(8)
2167 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002168 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002169 }
2170 }
2171
2172 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_lt_8) {
2173 TEST_REQUIRES_ARM_NEON;
2174 for (size_t k = 1; k < 8; k++) {
2175 GemmMicrokernelTester()
2176 .mr(6)
2177 .nr(8)
2178 .kr(1)
2179 .sr(1)
2180 .m(6)
2181 .n(8)
2182 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002183 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002184 }
2185 }
2186
2187 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_lt_8_subtile) {
2188 TEST_REQUIRES_ARM_NEON;
2189 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002190 for (uint32_t n = 1; n <= 8; n++) {
2191 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002192 GemmMicrokernelTester()
2193 .mr(6)
2194 .nr(8)
2195 .kr(1)
2196 .sr(1)
2197 .m(m)
2198 .n(n)
2199 .k(k)
2200 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002201 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002202 }
2203 }
2204 }
2205 }
2206
2207 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_gt_8) {
2208 TEST_REQUIRES_ARM_NEON;
2209 for (size_t k = 9; k < 16; k++) {
2210 GemmMicrokernelTester()
2211 .mr(6)
2212 .nr(8)
2213 .kr(1)
2214 .sr(1)
2215 .m(6)
2216 .n(8)
2217 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002218 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002219 }
2220 }
2221
2222 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_gt_8_subtile) {
2223 TEST_REQUIRES_ARM_NEON;
2224 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002225 for (uint32_t n = 1; n <= 8; n++) {
2226 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002227 GemmMicrokernelTester()
2228 .mr(6)
2229 .nr(8)
2230 .kr(1)
2231 .sr(1)
2232 .m(m)
2233 .n(n)
2234 .k(k)
2235 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002236 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002237 }
2238 }
2239 }
2240 }
2241
2242 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_div_8) {
2243 TEST_REQUIRES_ARM_NEON;
2244 for (size_t k = 16; k <= 80; k += 8) {
2245 GemmMicrokernelTester()
2246 .mr(6)
2247 .nr(8)
2248 .kr(1)
2249 .sr(1)
2250 .m(6)
2251 .n(8)
2252 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002253 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002254 }
2255 }
2256
2257 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, k_div_8_subtile) {
2258 TEST_REQUIRES_ARM_NEON;
2259 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002260 for (uint32_t n = 1; n <= 8; n++) {
2261 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002262 GemmMicrokernelTester()
2263 .mr(6)
2264 .nr(8)
2265 .kr(1)
2266 .sr(1)
2267 .m(m)
2268 .n(n)
2269 .k(k)
2270 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002271 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002272 }
2273 }
2274 }
2275 }
2276
2277 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8) {
2278 TEST_REQUIRES_ARM_NEON;
2279 for (uint32_t n = 9; n < 16; n++) {
2280 for (size_t k = 1; k <= 40; k += 9) {
2281 GemmMicrokernelTester()
2282 .mr(6)
2283 .nr(8)
2284 .kr(1)
2285 .sr(1)
2286 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002287 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002288 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002289 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002290 }
2291 }
2292 }
2293
2294 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
2295 TEST_REQUIRES_ARM_NEON;
2296 for (uint32_t n = 9; n < 16; n++) {
2297 for (size_t k = 1; k <= 40; k += 9) {
2298 GemmMicrokernelTester()
2299 .mr(6)
2300 .nr(8)
2301 .kr(1)
2302 .sr(1)
2303 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002304 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002305 .k(k)
2306 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002307 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002308 }
2309 }
2310 }
2311
2312 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_subtile) {
2313 TEST_REQUIRES_ARM_NEON;
2314 for (uint32_t n = 9; n < 16; n++) {
2315 for (size_t k = 1; k <= 40; k += 9) {
2316 for (uint32_t m = 1; m <= 6; m++) {
2317 GemmMicrokernelTester()
2318 .mr(6)
2319 .nr(8)
2320 .kr(1)
2321 .sr(1)
2322 .m(m)
2323 .n(n)
2324 .k(k)
2325 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002326 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002327 }
2328 }
2329 }
2330 }
2331
2332 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8) {
2333 TEST_REQUIRES_ARM_NEON;
2334 for (uint32_t n = 16; n <= 24; n += 8) {
2335 for (size_t k = 1; k <= 40; k += 9) {
2336 GemmMicrokernelTester()
2337 .mr(6)
2338 .nr(8)
2339 .kr(1)
2340 .sr(1)
2341 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002342 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002343 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002344 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002345 }
2346 }
2347 }
2348
2349 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
2350 TEST_REQUIRES_ARM_NEON;
2351 for (uint32_t n = 16; n <= 24; n += 8) {
2352 for (size_t k = 1; k <= 40; k += 9) {
2353 GemmMicrokernelTester()
2354 .mr(6)
2355 .nr(8)
2356 .kr(1)
2357 .sr(1)
2358 .m(6)
2359 .n(n)
2360 .k(k)
2361 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002362 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002363 }
2364 }
2365 }
2366
2367 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_subtile) {
2368 TEST_REQUIRES_ARM_NEON;
2369 for (uint32_t n = 16; n <= 24; n += 8) {
2370 for (size_t k = 1; k <= 40; k += 9) {
2371 for (uint32_t m = 1; m <= 6; m++) {
2372 GemmMicrokernelTester()
2373 .mr(6)
2374 .nr(8)
2375 .kr(1)
2376 .sr(1)
2377 .m(m)
2378 .n(n)
2379 .k(k)
2380 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002381 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002382 }
2383 }
2384 }
2385 }
2386
2387 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, small_kernel) {
2388 TEST_REQUIRES_ARM_NEON;
2389 for (size_t k = 1; k <= 40; k += 9) {
2390 GemmMicrokernelTester()
2391 .mr(6)
2392 .nr(8)
2393 .kr(1)
2394 .sr(1)
2395 .m(6)
2396 .n(8)
2397 .k(k)
2398 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002399 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002400 }
2401 }
2402
2403 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, small_kernel_subtile) {
2404 TEST_REQUIRES_ARM_NEON;
2405 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002406 for (uint32_t n = 1; n <= 8; n++) {
2407 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002408 GemmMicrokernelTester()
2409 .mr(6)
2410 .nr(8)
2411 .kr(1)
2412 .sr(1)
2413 .m(m)
2414 .n(n)
2415 .k(k)
2416 .ks(3)
2417 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002418 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002419 }
2420 }
2421 }
2422 }
2423
2424 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_gt_8_small_kernel) {
2425 TEST_REQUIRES_ARM_NEON;
2426 for (uint32_t n = 9; n < 16; n++) {
2427 for (size_t k = 1; k <= 40; k += 9) {
2428 GemmMicrokernelTester()
2429 .mr(6)
2430 .nr(8)
2431 .kr(1)
2432 .sr(1)
2433 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002434 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002435 .k(k)
2436 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002437 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002438 }
2439 }
2440 }
2441
2442 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, n_div_8_small_kernel) {
2443 TEST_REQUIRES_ARM_NEON;
2444 for (uint32_t n = 16; n <= 24; n += 8) {
2445 for (size_t k = 1; k <= 40; k += 9) {
2446 GemmMicrokernelTester()
2447 .mr(6)
2448 .nr(8)
2449 .kr(1)
2450 .sr(1)
2451 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002452 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002453 .k(k)
2454 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002455 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002456 }
2457 }
2458 }
2459
2460 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cm_subtile) {
2461 TEST_REQUIRES_ARM_NEON;
2462 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002463 for (uint32_t n = 1; n <= 8; n++) {
2464 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002465 GemmMicrokernelTester()
2466 .mr(6)
2467 .nr(8)
2468 .kr(1)
2469 .sr(1)
2470 .m(m)
2471 .n(n)
2472 .k(k)
2473 .cm_stride(11)
2474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002475 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002476 }
2477 }
2478 }
2479 }
2480
2481 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, a_offset) {
2482 TEST_REQUIRES_ARM_NEON;
2483 for (size_t k = 1; k <= 40; k += 9) {
2484 GemmMicrokernelTester()
2485 .mr(6)
2486 .nr(8)
2487 .kr(1)
2488 .sr(1)
2489 .m(6)
2490 .n(8)
2491 .k(k)
2492 .ks(3)
2493 .a_offset(251)
Marat Dukhan50323b82022-01-11 00:12:01 -08002494 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002495 }
2496 }
2497
2498 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, zero) {
2499 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002500 for (size_t k = 1; k <= 40; k += 9) {
2501 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002502 GemmMicrokernelTester()
2503 .mr(6)
2504 .nr(8)
2505 .kr(1)
2506 .sr(1)
2507 .m(6)
2508 .n(8)
2509 .k(k)
2510 .ks(3)
2511 .a_offset(251)
2512 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08002513 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002514 }
2515 }
2516 }
2517
2518 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, qmin) {
2519 TEST_REQUIRES_ARM_NEON;
2520 GemmMicrokernelTester()
2521 .mr(6)
2522 .nr(8)
2523 .kr(1)
2524 .sr(1)
2525 .m(6)
2526 .n(8)
2527 .k(8)
2528 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002529 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002530 }
2531
2532 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, qmax) {
2533 TEST_REQUIRES_ARM_NEON;
2534 GemmMicrokernelTester()
2535 .mr(6)
2536 .nr(8)
2537 .kr(1)
2538 .sr(1)
2539 .m(6)
2540 .n(8)
2541 .k(8)
2542 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002543 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002544 }
2545
2546 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, strided_cm) {
2547 TEST_REQUIRES_ARM_NEON;
2548 GemmMicrokernelTester()
2549 .mr(6)
2550 .nr(8)
2551 .kr(1)
2552 .sr(1)
2553 .m(6)
2554 .n(8)
2555 .k(8)
2556 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002557 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002558 }
2559
2560 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, no_a_zero_point) {
2561 TEST_REQUIRES_ARM_NEON;
2562 for (size_t k = 1; k <= 40; k += 9) {
2563 GemmMicrokernelTester()
2564 .mr(6)
2565 .nr(8)
2566 .kr(1)
2567 .sr(1)
2568 .m(6)
2569 .n(8)
2570 .k(k)
2571 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002572 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002573 }
2574 }
2575
2576 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, no_b_zero_point) {
2577 TEST_REQUIRES_ARM_NEON;
2578 for (size_t k = 1; k <= 40; k += 9) {
2579 GemmMicrokernelTester()
2580 .mr(6)
2581 .nr(8)
2582 .kr(1)
2583 .sr(1)
2584 .m(6)
2585 .n(8)
2586 .k(k)
2587 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002588 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002589 }
2590 }
2591
2592 TEST(QU8_IGEMM_MINMAX_RNDNU_6X8__NEON_MLAL_LANE, no_zero_point) {
2593 TEST_REQUIRES_ARM_NEON;
2594 for (size_t k = 1; k <= 40; k += 9) {
2595 GemmMicrokernelTester()
2596 .mr(6)
2597 .nr(8)
2598 .kr(1)
2599 .sr(1)
2600 .m(6)
2601 .n(8)
2602 .k(k)
2603 .a_zero_point(0)
2604 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002605 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002606 }
2607 }
2608#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2609
2610
2611#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2612 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8) {
2613 TEST_REQUIRES_ARM_NEON;
2614 GemmMicrokernelTester()
2615 .mr(4)
2616 .nr(16)
2617 .kr(1)
2618 .sr(1)
2619 .m(4)
2620 .n(16)
2621 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002622 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002623 }
2624
2625 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cn) {
2626 TEST_REQUIRES_ARM_NEON;
2627 GemmMicrokernelTester()
2628 .mr(4)
2629 .nr(16)
2630 .kr(1)
2631 .sr(1)
2632 .m(4)
2633 .n(16)
2634 .k(8)
2635 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002636 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002637 }
2638
2639 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
2640 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002641 for (uint32_t n = 1; n <= 16; n++) {
2642 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002643 GemmMicrokernelTester()
2644 .mr(4)
2645 .nr(16)
2646 .kr(1)
2647 .sr(1)
2648 .m(m)
2649 .n(n)
2650 .k(8)
2651 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002652 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002653 }
2654 }
2655 }
2656
2657 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
2658 TEST_REQUIRES_ARM_NEON;
2659 for (uint32_t m = 1; m <= 4; m++) {
2660 GemmMicrokernelTester()
2661 .mr(4)
2662 .nr(16)
2663 .kr(1)
2664 .sr(1)
2665 .m(m)
2666 .n(16)
2667 .k(8)
2668 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002669 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002670 }
2671 }
2672
2673 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
2674 TEST_REQUIRES_ARM_NEON;
2675 for (uint32_t n = 1; n <= 16; n++) {
2676 GemmMicrokernelTester()
2677 .mr(4)
2678 .nr(16)
2679 .kr(1)
2680 .sr(1)
2681 .m(4)
2682 .n(n)
2683 .k(8)
2684 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002685 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002686 }
2687 }
2688
2689 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8) {
2690 TEST_REQUIRES_ARM_NEON;
2691 for (size_t k = 1; k < 8; k++) {
2692 GemmMicrokernelTester()
2693 .mr(4)
2694 .nr(16)
2695 .kr(1)
2696 .sr(1)
2697 .m(4)
2698 .n(16)
2699 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002700 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002701 }
2702 }
2703
2704 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
2705 TEST_REQUIRES_ARM_NEON;
2706 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002707 for (uint32_t n = 1; n <= 16; n++) {
2708 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002709 GemmMicrokernelTester()
2710 .mr(4)
2711 .nr(16)
2712 .kr(1)
2713 .sr(1)
2714 .m(m)
2715 .n(n)
2716 .k(k)
2717 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002718 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002719 }
2720 }
2721 }
2722 }
2723
2724 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8) {
2725 TEST_REQUIRES_ARM_NEON;
2726 for (size_t k = 9; k < 16; k++) {
2727 GemmMicrokernelTester()
2728 .mr(4)
2729 .nr(16)
2730 .kr(1)
2731 .sr(1)
2732 .m(4)
2733 .n(16)
2734 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002735 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002736 }
2737 }
2738
2739 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
2740 TEST_REQUIRES_ARM_NEON;
2741 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002742 for (uint32_t n = 1; n <= 16; n++) {
2743 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002744 GemmMicrokernelTester()
2745 .mr(4)
2746 .nr(16)
2747 .kr(1)
2748 .sr(1)
2749 .m(m)
2750 .n(n)
2751 .k(k)
2752 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002753 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002754 }
2755 }
2756 }
2757 }
2758
2759 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8) {
2760 TEST_REQUIRES_ARM_NEON;
2761 for (size_t k = 16; k <= 80; k += 8) {
2762 GemmMicrokernelTester()
2763 .mr(4)
2764 .nr(16)
2765 .kr(1)
2766 .sr(1)
2767 .m(4)
2768 .n(16)
2769 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002770 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002771 }
2772 }
2773
2774 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
2775 TEST_REQUIRES_ARM_NEON;
2776 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002777 for (uint32_t n = 1; n <= 16; n++) {
2778 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002779 GemmMicrokernelTester()
2780 .mr(4)
2781 .nr(16)
2782 .kr(1)
2783 .sr(1)
2784 .m(m)
2785 .n(n)
2786 .k(k)
2787 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002788 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002789 }
2790 }
2791 }
2792 }
2793
2794 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16) {
2795 TEST_REQUIRES_ARM_NEON;
2796 for (uint32_t n = 17; n < 32; n++) {
2797 for (size_t k = 1; k <= 40; k += 9) {
2798 GemmMicrokernelTester()
2799 .mr(4)
2800 .nr(16)
2801 .kr(1)
2802 .sr(1)
2803 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002804 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002805 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002806 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002807 }
2808 }
2809 }
2810
2811 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
2812 TEST_REQUIRES_ARM_NEON;
2813 for (uint32_t n = 17; n < 32; n++) {
2814 for (size_t k = 1; k <= 40; k += 9) {
2815 GemmMicrokernelTester()
2816 .mr(4)
2817 .nr(16)
2818 .kr(1)
2819 .sr(1)
2820 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002821 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002822 .k(k)
2823 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002824 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002825 }
2826 }
2827 }
2828
2829 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
2830 TEST_REQUIRES_ARM_NEON;
2831 for (uint32_t n = 17; n < 32; n++) {
2832 for (size_t k = 1; k <= 40; k += 9) {
2833 for (uint32_t m = 1; m <= 4; m++) {
2834 GemmMicrokernelTester()
2835 .mr(4)
2836 .nr(16)
2837 .kr(1)
2838 .sr(1)
2839 .m(m)
2840 .n(n)
2841 .k(k)
2842 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002843 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002844 }
2845 }
2846 }
2847 }
2848
2849 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16) {
2850 TEST_REQUIRES_ARM_NEON;
2851 for (uint32_t n = 32; n <= 48; n += 16) {
2852 for (size_t k = 1; k <= 40; k += 9) {
2853 GemmMicrokernelTester()
2854 .mr(4)
2855 .nr(16)
2856 .kr(1)
2857 .sr(1)
2858 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002859 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002860 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002861 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002862 }
2863 }
2864 }
2865
2866 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
2867 TEST_REQUIRES_ARM_NEON;
2868 for (uint32_t n = 32; n <= 48; n += 16) {
2869 for (size_t k = 1; k <= 40; k += 9) {
2870 GemmMicrokernelTester()
2871 .mr(4)
2872 .nr(16)
2873 .kr(1)
2874 .sr(1)
2875 .m(4)
2876 .n(n)
2877 .k(k)
2878 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002879 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002880 }
2881 }
2882 }
2883
2884 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
2885 TEST_REQUIRES_ARM_NEON;
2886 for (uint32_t n = 32; n <= 48; n += 16) {
2887 for (size_t k = 1; k <= 40; k += 9) {
2888 for (uint32_t m = 1; m <= 4; m++) {
2889 GemmMicrokernelTester()
2890 .mr(4)
2891 .nr(16)
2892 .kr(1)
2893 .sr(1)
2894 .m(m)
2895 .n(n)
2896 .k(k)
2897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002898 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002899 }
2900 }
2901 }
2902 }
2903
2904 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, small_kernel) {
2905 TEST_REQUIRES_ARM_NEON;
2906 for (size_t k = 1; k <= 40; k += 9) {
2907 GemmMicrokernelTester()
2908 .mr(4)
2909 .nr(16)
2910 .kr(1)
2911 .sr(1)
2912 .m(4)
2913 .n(16)
2914 .k(k)
2915 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002916 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002917 }
2918 }
2919
2920 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, small_kernel_subtile) {
2921 TEST_REQUIRES_ARM_NEON;
2922 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002923 for (uint32_t n = 1; n <= 16; n++) {
2924 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002925 GemmMicrokernelTester()
2926 .mr(4)
2927 .nr(16)
2928 .kr(1)
2929 .sr(1)
2930 .m(m)
2931 .n(n)
2932 .k(k)
2933 .ks(3)
2934 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002935 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002936 }
2937 }
2938 }
2939 }
2940
2941 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
2942 TEST_REQUIRES_ARM_NEON;
2943 for (uint32_t n = 17; n < 32; n++) {
2944 for (size_t k = 1; k <= 40; k += 9) {
2945 GemmMicrokernelTester()
2946 .mr(4)
2947 .nr(16)
2948 .kr(1)
2949 .sr(1)
2950 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002951 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002952 .k(k)
2953 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002954 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002955 }
2956 }
2957 }
2958
2959 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
2960 TEST_REQUIRES_ARM_NEON;
2961 for (uint32_t n = 32; n <= 48; n += 16) {
2962 for (size_t k = 1; k <= 40; k += 9) {
2963 GemmMicrokernelTester()
2964 .mr(4)
2965 .nr(16)
2966 .kr(1)
2967 .sr(1)
2968 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002969 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002970 .k(k)
2971 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08002972 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002973 }
2974 }
2975 }
2976
2977 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
2978 TEST_REQUIRES_ARM_NEON;
2979 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002980 for (uint32_t n = 1; n <= 16; n++) {
2981 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002982 GemmMicrokernelTester()
2983 .mr(4)
2984 .nr(16)
2985 .kr(1)
2986 .sr(1)
2987 .m(m)
2988 .n(n)
2989 .k(k)
2990 .cm_stride(19)
2991 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002992 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002993 }
2994 }
2995 }
2996 }
2997
2998 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, a_offset) {
2999 TEST_REQUIRES_ARM_NEON;
3000 for (size_t k = 1; k <= 40; k += 9) {
3001 GemmMicrokernelTester()
3002 .mr(4)
3003 .nr(16)
3004 .kr(1)
3005 .sr(1)
3006 .m(4)
3007 .n(16)
3008 .k(k)
3009 .ks(3)
3010 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08003011 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003012 }
3013 }
3014
3015 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, zero) {
3016 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003017 for (size_t k = 1; k <= 40; k += 9) {
3018 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003019 GemmMicrokernelTester()
3020 .mr(4)
3021 .nr(16)
3022 .kr(1)
3023 .sr(1)
3024 .m(4)
3025 .n(16)
3026 .k(k)
3027 .ks(3)
3028 .a_offset(163)
3029 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003030 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003031 }
3032 }
3033 }
3034
3035 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmin) {
3036 TEST_REQUIRES_ARM_NEON;
3037 GemmMicrokernelTester()
3038 .mr(4)
3039 .nr(16)
3040 .kr(1)
3041 .sr(1)
3042 .m(4)
3043 .n(16)
3044 .k(8)
3045 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003046 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003047 }
3048
3049 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmax) {
3050 TEST_REQUIRES_ARM_NEON;
3051 GemmMicrokernelTester()
3052 .mr(4)
3053 .nr(16)
3054 .kr(1)
3055 .sr(1)
3056 .m(4)
3057 .n(16)
3058 .k(8)
3059 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003060 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003061 }
3062
3063 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm) {
3064 TEST_REQUIRES_ARM_NEON;
3065 GemmMicrokernelTester()
3066 .mr(4)
3067 .nr(16)
3068 .kr(1)
3069 .sr(1)
3070 .m(4)
3071 .n(16)
3072 .k(8)
3073 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003074 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003075 }
3076
3077 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_a_zero_point) {
3078 TEST_REQUIRES_ARM_NEON;
3079 for (size_t k = 1; k <= 40; k += 9) {
3080 GemmMicrokernelTester()
3081 .mr(4)
3082 .nr(16)
3083 .kr(1)
3084 .sr(1)
3085 .m(4)
3086 .n(16)
3087 .k(k)
3088 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003089 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003090 }
3091 }
3092
3093 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_b_zero_point) {
3094 TEST_REQUIRES_ARM_NEON;
3095 for (size_t k = 1; k <= 40; k += 9) {
3096 GemmMicrokernelTester()
3097 .mr(4)
3098 .nr(16)
3099 .kr(1)
3100 .sr(1)
3101 .m(4)
3102 .n(16)
3103 .k(k)
3104 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003105 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003106 }
3107 }
3108
3109 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_zero_point) {
3110 TEST_REQUIRES_ARM_NEON;
3111 for (size_t k = 1; k <= 40; k += 9) {
3112 GemmMicrokernelTester()
3113 .mr(4)
3114 .nr(16)
3115 .kr(1)
3116 .sr(1)
3117 .m(4)
3118 .n(16)
3119 .k(k)
3120 .a_zero_point(0)
3121 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003122 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003123 }
3124 }
3125#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3126
3127
3128#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3129 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8) {
3130 TEST_REQUIRES_ARM_NEON;
3131 GemmMicrokernelTester()
3132 .mr(6)
3133 .nr(16)
3134 .kr(1)
3135 .sr(1)
3136 .m(6)
3137 .n(16)
3138 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003139 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003140 }
3141
3142 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cn) {
3143 TEST_REQUIRES_ARM_NEON;
3144 GemmMicrokernelTester()
3145 .mr(6)
3146 .nr(16)
3147 .kr(1)
3148 .sr(1)
3149 .m(6)
3150 .n(16)
3151 .k(8)
3152 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003153 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003154 }
3155
3156 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
3157 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003158 for (uint32_t n = 1; n <= 16; n++) {
3159 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003160 GemmMicrokernelTester()
3161 .mr(6)
3162 .nr(16)
3163 .kr(1)
3164 .sr(1)
3165 .m(m)
3166 .n(n)
3167 .k(8)
3168 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003169 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003170 }
3171 }
3172 }
3173
3174 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
3175 TEST_REQUIRES_ARM_NEON;
3176 for (uint32_t m = 1; m <= 6; m++) {
3177 GemmMicrokernelTester()
3178 .mr(6)
3179 .nr(16)
3180 .kr(1)
3181 .sr(1)
3182 .m(m)
3183 .n(16)
3184 .k(8)
3185 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003186 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003187 }
3188 }
3189
3190 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
3191 TEST_REQUIRES_ARM_NEON;
3192 for (uint32_t n = 1; n <= 16; n++) {
3193 GemmMicrokernelTester()
3194 .mr(6)
3195 .nr(16)
3196 .kr(1)
3197 .sr(1)
3198 .m(6)
3199 .n(n)
3200 .k(8)
3201 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003202 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003203 }
3204 }
3205
3206 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8) {
3207 TEST_REQUIRES_ARM_NEON;
3208 for (size_t k = 1; k < 8; k++) {
3209 GemmMicrokernelTester()
3210 .mr(6)
3211 .nr(16)
3212 .kr(1)
3213 .sr(1)
3214 .m(6)
3215 .n(16)
3216 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003217 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003218 }
3219 }
3220
3221 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
3222 TEST_REQUIRES_ARM_NEON;
3223 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003224 for (uint32_t n = 1; n <= 16; n++) {
3225 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003226 GemmMicrokernelTester()
3227 .mr(6)
3228 .nr(16)
3229 .kr(1)
3230 .sr(1)
3231 .m(m)
3232 .n(n)
3233 .k(k)
3234 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003235 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003236 }
3237 }
3238 }
3239 }
3240
3241 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8) {
3242 TEST_REQUIRES_ARM_NEON;
3243 for (size_t k = 9; k < 16; k++) {
3244 GemmMicrokernelTester()
3245 .mr(6)
3246 .nr(16)
3247 .kr(1)
3248 .sr(1)
3249 .m(6)
3250 .n(16)
3251 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003252 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003253 }
3254 }
3255
3256 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
3257 TEST_REQUIRES_ARM_NEON;
3258 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003259 for (uint32_t n = 1; n <= 16; n++) {
3260 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003261 GemmMicrokernelTester()
3262 .mr(6)
3263 .nr(16)
3264 .kr(1)
3265 .sr(1)
3266 .m(m)
3267 .n(n)
3268 .k(k)
3269 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003270 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003271 }
3272 }
3273 }
3274 }
3275
3276 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8) {
3277 TEST_REQUIRES_ARM_NEON;
3278 for (size_t k = 16; k <= 80; k += 8) {
3279 GemmMicrokernelTester()
3280 .mr(6)
3281 .nr(16)
3282 .kr(1)
3283 .sr(1)
3284 .m(6)
3285 .n(16)
3286 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003287 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003288 }
3289 }
3290
3291 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
3292 TEST_REQUIRES_ARM_NEON;
3293 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003294 for (uint32_t n = 1; n <= 16; n++) {
3295 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003296 GemmMicrokernelTester()
3297 .mr(6)
3298 .nr(16)
3299 .kr(1)
3300 .sr(1)
3301 .m(m)
3302 .n(n)
3303 .k(k)
3304 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003305 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003306 }
3307 }
3308 }
3309 }
3310
3311 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16) {
3312 TEST_REQUIRES_ARM_NEON;
3313 for (uint32_t n = 17; n < 32; n++) {
3314 for (size_t k = 1; k <= 40; k += 9) {
3315 GemmMicrokernelTester()
3316 .mr(6)
3317 .nr(16)
3318 .kr(1)
3319 .sr(1)
3320 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003321 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003322 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003323 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003324 }
3325 }
3326 }
3327
3328 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
3329 TEST_REQUIRES_ARM_NEON;
3330 for (uint32_t n = 17; n < 32; n++) {
3331 for (size_t k = 1; k <= 40; k += 9) {
3332 GemmMicrokernelTester()
3333 .mr(6)
3334 .nr(16)
3335 .kr(1)
3336 .sr(1)
3337 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003338 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003339 .k(k)
3340 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003341 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003342 }
3343 }
3344 }
3345
3346 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
3347 TEST_REQUIRES_ARM_NEON;
3348 for (uint32_t n = 17; n < 32; n++) {
3349 for (size_t k = 1; k <= 40; k += 9) {
3350 for (uint32_t m = 1; m <= 6; m++) {
3351 GemmMicrokernelTester()
3352 .mr(6)
3353 .nr(16)
3354 .kr(1)
3355 .sr(1)
3356 .m(m)
3357 .n(n)
3358 .k(k)
3359 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003360 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003361 }
3362 }
3363 }
3364 }
3365
3366 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16) {
3367 TEST_REQUIRES_ARM_NEON;
3368 for (uint32_t n = 32; n <= 48; n += 16) {
3369 for (size_t k = 1; k <= 40; k += 9) {
3370 GemmMicrokernelTester()
3371 .mr(6)
3372 .nr(16)
3373 .kr(1)
3374 .sr(1)
3375 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003376 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003377 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003378 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003379 }
3380 }
3381 }
3382
3383 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
3384 TEST_REQUIRES_ARM_NEON;
3385 for (uint32_t n = 32; n <= 48; n += 16) {
3386 for (size_t k = 1; k <= 40; k += 9) {
3387 GemmMicrokernelTester()
3388 .mr(6)
3389 .nr(16)
3390 .kr(1)
3391 .sr(1)
3392 .m(6)
3393 .n(n)
3394 .k(k)
3395 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003396 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003397 }
3398 }
3399 }
3400
3401 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
3402 TEST_REQUIRES_ARM_NEON;
3403 for (uint32_t n = 32; n <= 48; n += 16) {
3404 for (size_t k = 1; k <= 40; k += 9) {
3405 for (uint32_t m = 1; m <= 6; m++) {
3406 GemmMicrokernelTester()
3407 .mr(6)
3408 .nr(16)
3409 .kr(1)
3410 .sr(1)
3411 .m(m)
3412 .n(n)
3413 .k(k)
3414 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003415 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003416 }
3417 }
3418 }
3419 }
3420
3421 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel) {
3422 TEST_REQUIRES_ARM_NEON;
3423 for (size_t k = 1; k <= 40; k += 9) {
3424 GemmMicrokernelTester()
3425 .mr(6)
3426 .nr(16)
3427 .kr(1)
3428 .sr(1)
3429 .m(6)
3430 .n(16)
3431 .k(k)
3432 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003433 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003434 }
3435 }
3436
3437 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, small_kernel_subtile) {
3438 TEST_REQUIRES_ARM_NEON;
3439 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003440 for (uint32_t n = 1; n <= 16; n++) {
3441 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003442 GemmMicrokernelTester()
3443 .mr(6)
3444 .nr(16)
3445 .kr(1)
3446 .sr(1)
3447 .m(m)
3448 .n(n)
3449 .k(k)
3450 .ks(3)
3451 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003452 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003453 }
3454 }
3455 }
3456 }
3457
3458 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_small_kernel) {
3459 TEST_REQUIRES_ARM_NEON;
3460 for (uint32_t n = 17; n < 32; n++) {
3461 for (size_t k = 1; k <= 40; k += 9) {
3462 GemmMicrokernelTester()
3463 .mr(6)
3464 .nr(16)
3465 .kr(1)
3466 .sr(1)
3467 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003468 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003469 .k(k)
3470 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003471 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003472 }
3473 }
3474 }
3475
3476 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_small_kernel) {
3477 TEST_REQUIRES_ARM_NEON;
3478 for (uint32_t n = 32; n <= 48; n += 16) {
3479 for (size_t k = 1; k <= 40; k += 9) {
3480 GemmMicrokernelTester()
3481 .mr(6)
3482 .nr(16)
3483 .kr(1)
3484 .sr(1)
3485 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003486 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003487 .k(k)
3488 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003489 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003490 }
3491 }
3492 }
3493
3494 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
3495 TEST_REQUIRES_ARM_NEON;
3496 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003497 for (uint32_t n = 1; n <= 16; n++) {
3498 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003499 GemmMicrokernelTester()
3500 .mr(6)
3501 .nr(16)
3502 .kr(1)
3503 .sr(1)
3504 .m(m)
3505 .n(n)
3506 .k(k)
3507 .cm_stride(19)
3508 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003509 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003510 }
3511 }
3512 }
3513 }
3514
3515 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, a_offset) {
3516 TEST_REQUIRES_ARM_NEON;
3517 for (size_t k = 1; k <= 40; k += 9) {
3518 GemmMicrokernelTester()
3519 .mr(6)
3520 .nr(16)
3521 .kr(1)
3522 .sr(1)
3523 .m(6)
3524 .n(16)
3525 .k(k)
3526 .ks(3)
3527 .a_offset(251)
Marat Dukhan50323b82022-01-11 00:12:01 -08003528 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003529 }
3530 }
3531
3532 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, zero) {
3533 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003534 for (size_t k = 1; k <= 40; k += 9) {
3535 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003536 GemmMicrokernelTester()
3537 .mr(6)
3538 .nr(16)
3539 .kr(1)
3540 .sr(1)
3541 .m(6)
3542 .n(16)
3543 .k(k)
3544 .ks(3)
3545 .a_offset(251)
3546 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08003547 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003548 }
3549 }
3550 }
3551
3552 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmin) {
3553 TEST_REQUIRES_ARM_NEON;
3554 GemmMicrokernelTester()
3555 .mr(6)
3556 .nr(16)
3557 .kr(1)
3558 .sr(1)
3559 .m(6)
3560 .n(16)
3561 .k(8)
3562 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003563 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003564 }
3565
3566 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmax) {
3567 TEST_REQUIRES_ARM_NEON;
3568 GemmMicrokernelTester()
3569 .mr(6)
3570 .nr(16)
3571 .kr(1)
3572 .sr(1)
3573 .m(6)
3574 .n(16)
3575 .k(8)
3576 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003577 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003578 }
3579
3580 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm) {
3581 TEST_REQUIRES_ARM_NEON;
3582 GemmMicrokernelTester()
3583 .mr(6)
3584 .nr(16)
3585 .kr(1)
3586 .sr(1)
3587 .m(6)
3588 .n(16)
3589 .k(8)
3590 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003591 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003592 }
3593
3594 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_a_zero_point) {
3595 TEST_REQUIRES_ARM_NEON;
3596 for (size_t k = 1; k <= 40; k += 9) {
3597 GemmMicrokernelTester()
3598 .mr(6)
3599 .nr(16)
3600 .kr(1)
3601 .sr(1)
3602 .m(6)
3603 .n(16)
3604 .k(k)
3605 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003606 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003607 }
3608 }
3609
3610 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_b_zero_point) {
3611 TEST_REQUIRES_ARM_NEON;
3612 for (size_t k = 1; k <= 40; k += 9) {
3613 GemmMicrokernelTester()
3614 .mr(6)
3615 .nr(16)
3616 .kr(1)
3617 .sr(1)
3618 .m(6)
3619 .n(16)
3620 .k(k)
3621 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003622 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003623 }
3624 }
3625
3626 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_zero_point) {
3627 TEST_REQUIRES_ARM_NEON;
3628 for (size_t k = 1; k <= 40; k += 9) {
3629 GemmMicrokernelTester()
3630 .mr(6)
3631 .nr(16)
3632 .kr(1)
3633 .sr(1)
3634 .m(6)
3635 .n(16)
3636 .k(k)
3637 .a_zero_point(0)
3638 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003639 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003640 }
3641 }
3642#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3643
3644
3645#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3646 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8) {
3647 TEST_REQUIRES_ARM_NEON_DOT;
3648 GemmMicrokernelTester()
3649 .mr(4)
3650 .nr(8)
3651 .kr(4)
3652 .sr(1)
3653 .m(4)
3654 .n(8)
3655 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003656 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003657 }
3658
3659 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
3660 TEST_REQUIRES_ARM_NEON_DOT;
3661 GemmMicrokernelTester()
3662 .mr(4)
3663 .nr(8)
3664 .kr(4)
3665 .sr(1)
3666 .m(4)
3667 .n(8)
3668 .k(8)
3669 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003670 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003671 }
3672
3673 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile) {
3674 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003675 for (uint32_t n = 1; n <= 8; n++) {
3676 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003677 GemmMicrokernelTester()
3678 .mr(4)
3679 .nr(8)
3680 .kr(4)
3681 .sr(1)
3682 .m(m)
3683 .n(n)
3684 .k(8)
3685 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003686 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003687 }
3688 }
3689 }
3690
3691 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile_m) {
3692 TEST_REQUIRES_ARM_NEON_DOT;
3693 for (uint32_t m = 1; m <= 4; m++) {
3694 GemmMicrokernelTester()
3695 .mr(4)
3696 .nr(8)
3697 .kr(4)
3698 .sr(1)
3699 .m(m)
3700 .n(8)
3701 .k(8)
3702 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003703 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003704 }
3705 }
3706
3707 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile_n) {
3708 TEST_REQUIRES_ARM_NEON_DOT;
3709 for (uint32_t n = 1; n <= 8; n++) {
3710 GemmMicrokernelTester()
3711 .mr(4)
3712 .nr(8)
3713 .kr(4)
3714 .sr(1)
3715 .m(4)
3716 .n(n)
3717 .k(8)
3718 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003719 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003720 }
3721 }
3722
3723 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_8) {
3724 TEST_REQUIRES_ARM_NEON_DOT;
3725 for (size_t k = 1; k < 8; k++) {
3726 GemmMicrokernelTester()
3727 .mr(4)
3728 .nr(8)
3729 .kr(4)
3730 .sr(1)
3731 .m(4)
3732 .n(8)
3733 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003734 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003735 }
3736 }
3737
3738 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_8_subtile) {
3739 TEST_REQUIRES_ARM_NEON_DOT;
3740 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003741 for (uint32_t n = 1; n <= 8; n++) {
3742 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003743 GemmMicrokernelTester()
3744 .mr(4)
3745 .nr(8)
3746 .kr(4)
3747 .sr(1)
3748 .m(m)
3749 .n(n)
3750 .k(k)
3751 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003752 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003753 }
3754 }
3755 }
3756 }
3757
3758 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_8) {
3759 TEST_REQUIRES_ARM_NEON_DOT;
3760 for (size_t k = 9; k < 16; k++) {
3761 GemmMicrokernelTester()
3762 .mr(4)
3763 .nr(8)
3764 .kr(4)
3765 .sr(1)
3766 .m(4)
3767 .n(8)
3768 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003769 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003770 }
3771 }
3772
3773 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_8_subtile) {
3774 TEST_REQUIRES_ARM_NEON_DOT;
3775 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003776 for (uint32_t n = 1; n <= 8; n++) {
3777 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003778 GemmMicrokernelTester()
3779 .mr(4)
3780 .nr(8)
3781 .kr(4)
3782 .sr(1)
3783 .m(m)
3784 .n(n)
3785 .k(k)
3786 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003787 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003788 }
3789 }
3790 }
3791 }
3792
3793 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_div_8) {
3794 TEST_REQUIRES_ARM_NEON_DOT;
3795 for (size_t k = 16; k <= 80; k += 8) {
3796 GemmMicrokernelTester()
3797 .mr(4)
3798 .nr(8)
3799 .kr(4)
3800 .sr(1)
3801 .m(4)
3802 .n(8)
3803 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003804 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003805 }
3806 }
3807
3808 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_div_8_subtile) {
3809 TEST_REQUIRES_ARM_NEON_DOT;
3810 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003811 for (uint32_t n = 1; n <= 8; n++) {
3812 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003813 GemmMicrokernelTester()
3814 .mr(4)
3815 .nr(8)
3816 .kr(4)
3817 .sr(1)
3818 .m(m)
3819 .n(n)
3820 .k(k)
3821 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003822 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003823 }
3824 }
3825 }
3826 }
3827
3828 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8) {
3829 TEST_REQUIRES_ARM_NEON_DOT;
3830 for (uint32_t n = 9; n < 16; n++) {
3831 for (size_t k = 1; k <= 40; k += 9) {
3832 GemmMicrokernelTester()
3833 .mr(4)
3834 .nr(8)
3835 .kr(4)
3836 .sr(1)
3837 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003838 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003839 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003840 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003841 }
3842 }
3843 }
3844
3845 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_strided_cn) {
3846 TEST_REQUIRES_ARM_NEON_DOT;
3847 for (uint32_t n = 9; n < 16; n++) {
3848 for (size_t k = 1; k <= 40; k += 9) {
3849 GemmMicrokernelTester()
3850 .mr(4)
3851 .nr(8)
3852 .kr(4)
3853 .sr(1)
3854 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003855 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003856 .k(k)
3857 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003858 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003859 }
3860 }
3861 }
3862
3863 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_subtile) {
3864 TEST_REQUIRES_ARM_NEON_DOT;
3865 for (uint32_t n = 9; n < 16; n++) {
3866 for (size_t k = 1; k <= 40; k += 9) {
3867 for (uint32_t m = 1; m <= 4; m++) {
3868 GemmMicrokernelTester()
3869 .mr(4)
3870 .nr(8)
3871 .kr(4)
3872 .sr(1)
3873 .m(m)
3874 .n(n)
3875 .k(k)
3876 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003877 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003878 }
3879 }
3880 }
3881 }
3882
3883 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8) {
3884 TEST_REQUIRES_ARM_NEON_DOT;
3885 for (uint32_t n = 16; n <= 24; n += 8) {
3886 for (size_t k = 1; k <= 40; k += 9) {
3887 GemmMicrokernelTester()
3888 .mr(4)
3889 .nr(8)
3890 .kr(4)
3891 .sr(1)
3892 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003893 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003894 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003895 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003896 }
3897 }
3898 }
3899
3900 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_strided_cn) {
3901 TEST_REQUIRES_ARM_NEON_DOT;
3902 for (uint32_t n = 16; n <= 24; n += 8) {
3903 for (size_t k = 1; k <= 40; k += 9) {
3904 GemmMicrokernelTester()
3905 .mr(4)
3906 .nr(8)
3907 .kr(4)
3908 .sr(1)
3909 .m(4)
3910 .n(n)
3911 .k(k)
3912 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003913 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003914 }
3915 }
3916 }
3917
3918 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_subtile) {
3919 TEST_REQUIRES_ARM_NEON_DOT;
3920 for (uint32_t n = 16; n <= 24; n += 8) {
3921 for (size_t k = 1; k <= 40; k += 9) {
3922 for (uint32_t m = 1; m <= 4; m++) {
3923 GemmMicrokernelTester()
3924 .mr(4)
3925 .nr(8)
3926 .kr(4)
3927 .sr(1)
3928 .m(m)
3929 .n(n)
3930 .k(k)
3931 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003932 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003933 }
3934 }
3935 }
3936 }
3937
3938 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel) {
3939 TEST_REQUIRES_ARM_NEON_DOT;
3940 for (size_t k = 1; k <= 40; k += 9) {
3941 GemmMicrokernelTester()
3942 .mr(4)
3943 .nr(8)
3944 .kr(4)
3945 .sr(1)
3946 .m(4)
3947 .n(8)
3948 .k(k)
3949 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003950 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003951 }
3952 }
3953
3954 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, small_kernel_subtile) {
3955 TEST_REQUIRES_ARM_NEON_DOT;
3956 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003957 for (uint32_t n = 1; n <= 8; n++) {
3958 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003959 GemmMicrokernelTester()
3960 .mr(4)
3961 .nr(8)
3962 .kr(4)
3963 .sr(1)
3964 .m(m)
3965 .n(n)
3966 .k(k)
3967 .ks(3)
3968 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003969 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003970 }
3971 }
3972 }
3973 }
3974
3975 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_small_kernel) {
3976 TEST_REQUIRES_ARM_NEON_DOT;
3977 for (uint32_t n = 9; n < 16; n++) {
3978 for (size_t k = 1; k <= 40; k += 9) {
3979 GemmMicrokernelTester()
3980 .mr(4)
3981 .nr(8)
3982 .kr(4)
3983 .sr(1)
3984 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003985 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003986 .k(k)
3987 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08003988 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003989 }
3990 }
3991 }
3992
3993 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_small_kernel) {
3994 TEST_REQUIRES_ARM_NEON_DOT;
3995 for (uint32_t n = 16; n <= 24; n += 8) {
3996 for (size_t k = 1; k <= 40; k += 9) {
3997 GemmMicrokernelTester()
3998 .mr(4)
3999 .nr(8)
4000 .kr(4)
4001 .sr(1)
4002 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004003 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004004 .k(k)
4005 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004006 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004007 }
4008 }
4009 }
4010
4011 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
4012 TEST_REQUIRES_ARM_NEON_DOT;
4013 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004014 for (uint32_t n = 1; n <= 8; n++) {
4015 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004016 GemmMicrokernelTester()
4017 .mr(4)
4018 .nr(8)
4019 .kr(4)
4020 .sr(1)
4021 .m(m)
4022 .n(n)
4023 .k(k)
4024 .cm_stride(11)
4025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004026 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004027 }
4028 }
4029 }
4030 }
4031
4032 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, a_offset) {
4033 TEST_REQUIRES_ARM_NEON_DOT;
4034 for (size_t k = 1; k <= 40; k += 9) {
4035 GemmMicrokernelTester()
4036 .mr(4)
4037 .nr(8)
4038 .kr(4)
4039 .sr(1)
4040 .m(4)
4041 .n(8)
4042 .k(k)
4043 .ks(3)
4044 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08004045 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004046 }
4047 }
4048
4049 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, zero) {
4050 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004051 for (size_t k = 1; k <= 40; k += 9) {
4052 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004053 GemmMicrokernelTester()
4054 .mr(4)
4055 .nr(8)
4056 .kr(4)
4057 .sr(1)
4058 .m(4)
4059 .n(8)
4060 .k(k)
4061 .ks(3)
4062 .a_offset(163)
4063 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004064 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004065 }
4066 }
4067 }
4068
4069 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
4070 TEST_REQUIRES_ARM_NEON_DOT;
4071 GemmMicrokernelTester()
4072 .mr(4)
4073 .nr(8)
4074 .kr(4)
4075 .sr(1)
4076 .m(4)
4077 .n(8)
4078 .k(8)
4079 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004080 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004081 }
4082
4083 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
4084 TEST_REQUIRES_ARM_NEON_DOT;
4085 GemmMicrokernelTester()
4086 .mr(4)
4087 .nr(8)
4088 .kr(4)
4089 .sr(1)
4090 .m(4)
4091 .n(8)
4092 .k(8)
4093 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004094 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004095 }
4096
4097 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
4098 TEST_REQUIRES_ARM_NEON_DOT;
4099 GemmMicrokernelTester()
4100 .mr(4)
4101 .nr(8)
4102 .kr(4)
4103 .sr(1)
4104 .m(4)
4105 .n(8)
4106 .k(8)
4107 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004108 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004109 }
4110
4111 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_a_zero_point) {
4112 TEST_REQUIRES_ARM_NEON_DOT;
4113 for (size_t k = 1; k <= 40; k += 9) {
4114 GemmMicrokernelTester()
4115 .mr(4)
4116 .nr(8)
4117 .kr(4)
4118 .sr(1)
4119 .m(4)
4120 .n(8)
4121 .k(k)
4122 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004123 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004124 }
4125 }
4126
4127 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_b_zero_point) {
4128 TEST_REQUIRES_ARM_NEON_DOT;
4129 for (size_t k = 1; k <= 40; k += 9) {
4130 GemmMicrokernelTester()
4131 .mr(4)
4132 .nr(8)
4133 .kr(4)
4134 .sr(1)
4135 .m(4)
4136 .n(8)
4137 .k(k)
4138 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004139 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004140 }
4141 }
4142
4143 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_zero_point) {
4144 TEST_REQUIRES_ARM_NEON_DOT;
4145 for (size_t k = 1; k <= 40; k += 9) {
4146 GemmMicrokernelTester()
4147 .mr(4)
4148 .nr(8)
4149 .kr(4)
4150 .sr(1)
4151 .m(4)
4152 .n(8)
4153 .k(k)
4154 .a_zero_point(0)
4155 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004156 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004157 }
4158 }
4159#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4160
4161
4162#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4163 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16) {
4164 TEST_REQUIRES_ARM_NEON_DOT;
4165 GemmMicrokernelTester()
4166 .mr(4)
4167 .nr(8)
4168 .kr(4)
4169 .sr(1)
4170 .m(4)
4171 .n(8)
4172 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004173 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004174 }
4175
4176 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, strided_cn) {
4177 TEST_REQUIRES_ARM_NEON_DOT;
4178 GemmMicrokernelTester()
4179 .mr(4)
4180 .nr(8)
4181 .kr(4)
4182 .sr(1)
4183 .m(4)
4184 .n(8)
4185 .k(16)
4186 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004187 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004188 }
4189
4190 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile) {
4191 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004192 for (uint32_t n = 1; n <= 8; n++) {
4193 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004194 GemmMicrokernelTester()
4195 .mr(4)
4196 .nr(8)
4197 .kr(4)
4198 .sr(1)
4199 .m(m)
4200 .n(n)
4201 .k(16)
4202 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004203 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004204 }
4205 }
4206 }
4207
4208 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_m) {
4209 TEST_REQUIRES_ARM_NEON_DOT;
4210 for (uint32_t m = 1; m <= 4; m++) {
4211 GemmMicrokernelTester()
4212 .mr(4)
4213 .nr(8)
4214 .kr(4)
4215 .sr(1)
4216 .m(m)
4217 .n(8)
4218 .k(16)
4219 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004220 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004221 }
4222 }
4223
4224 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_eq_16_subtile_n) {
4225 TEST_REQUIRES_ARM_NEON_DOT;
4226 for (uint32_t n = 1; n <= 8; n++) {
4227 GemmMicrokernelTester()
4228 .mr(4)
4229 .nr(8)
4230 .kr(4)
4231 .sr(1)
4232 .m(4)
4233 .n(n)
4234 .k(16)
4235 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004236 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004237 }
4238 }
4239
4240 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_lt_16) {
4241 TEST_REQUIRES_ARM_NEON_DOT;
4242 for (size_t k = 1; k < 16; k++) {
4243 GemmMicrokernelTester()
4244 .mr(4)
4245 .nr(8)
4246 .kr(4)
4247 .sr(1)
4248 .m(4)
4249 .n(8)
4250 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004251 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004252 }
4253 }
4254
4255 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_lt_16_subtile) {
4256 TEST_REQUIRES_ARM_NEON_DOT;
4257 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004258 for (uint32_t n = 1; n <= 8; n++) {
4259 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004260 GemmMicrokernelTester()
4261 .mr(4)
4262 .nr(8)
4263 .kr(4)
4264 .sr(1)
4265 .m(m)
4266 .n(n)
4267 .k(k)
4268 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004269 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004270 }
4271 }
4272 }
4273 }
4274
4275 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_gt_16) {
4276 TEST_REQUIRES_ARM_NEON_DOT;
4277 for (size_t k = 17; k < 32; k++) {
4278 GemmMicrokernelTester()
4279 .mr(4)
4280 .nr(8)
4281 .kr(4)
4282 .sr(1)
4283 .m(4)
4284 .n(8)
4285 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004286 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004287 }
4288 }
4289
4290 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_gt_16_subtile) {
4291 TEST_REQUIRES_ARM_NEON_DOT;
4292 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004293 for (uint32_t n = 1; n <= 8; n++) {
4294 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004295 GemmMicrokernelTester()
4296 .mr(4)
4297 .nr(8)
4298 .kr(4)
4299 .sr(1)
4300 .m(m)
4301 .n(n)
4302 .k(k)
4303 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004304 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004305 }
4306 }
4307 }
4308 }
4309
4310 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_div_16) {
4311 TEST_REQUIRES_ARM_NEON_DOT;
4312 for (size_t k = 32; k <= 160; k += 16) {
4313 GemmMicrokernelTester()
4314 .mr(4)
4315 .nr(8)
4316 .kr(4)
4317 .sr(1)
4318 .m(4)
4319 .n(8)
4320 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004321 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004322 }
4323 }
4324
4325 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, k_div_16_subtile) {
4326 TEST_REQUIRES_ARM_NEON_DOT;
4327 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004328 for (uint32_t n = 1; n <= 8; n++) {
4329 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004330 GemmMicrokernelTester()
4331 .mr(4)
4332 .nr(8)
4333 .kr(4)
4334 .sr(1)
4335 .m(m)
4336 .n(n)
4337 .k(k)
4338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004339 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004340 }
4341 }
4342 }
4343 }
4344
4345 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8) {
4346 TEST_REQUIRES_ARM_NEON_DOT;
4347 for (uint32_t n = 9; n < 16; n++) {
4348 for (size_t k = 1; k <= 80; k += 17) {
4349 GemmMicrokernelTester()
4350 .mr(4)
4351 .nr(8)
4352 .kr(4)
4353 .sr(1)
4354 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004355 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004356 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004357 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004358 }
4359 }
4360 }
4361
4362 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8_strided_cn) {
4363 TEST_REQUIRES_ARM_NEON_DOT;
4364 for (uint32_t n = 9; n < 16; n++) {
4365 for (size_t k = 1; k <= 80; k += 17) {
4366 GemmMicrokernelTester()
4367 .mr(4)
4368 .nr(8)
4369 .kr(4)
4370 .sr(1)
4371 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004372 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004373 .k(k)
4374 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004375 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004376 }
4377 }
4378 }
4379
4380 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8_subtile) {
4381 TEST_REQUIRES_ARM_NEON_DOT;
4382 for (uint32_t n = 9; n < 16; n++) {
4383 for (size_t k = 1; k <= 80; k += 17) {
4384 for (uint32_t m = 1; m <= 4; m++) {
4385 GemmMicrokernelTester()
4386 .mr(4)
4387 .nr(8)
4388 .kr(4)
4389 .sr(1)
4390 .m(m)
4391 .n(n)
4392 .k(k)
4393 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004394 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004395 }
4396 }
4397 }
4398 }
4399
4400 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8) {
4401 TEST_REQUIRES_ARM_NEON_DOT;
4402 for (uint32_t n = 16; n <= 24; n += 8) {
4403 for (size_t k = 1; k <= 80; k += 17) {
4404 GemmMicrokernelTester()
4405 .mr(4)
4406 .nr(8)
4407 .kr(4)
4408 .sr(1)
4409 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004410 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004411 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004412 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004413 }
4414 }
4415 }
4416
4417 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8_strided_cn) {
4418 TEST_REQUIRES_ARM_NEON_DOT;
4419 for (uint32_t n = 16; n <= 24; n += 8) {
4420 for (size_t k = 1; k <= 80; k += 17) {
4421 GemmMicrokernelTester()
4422 .mr(4)
4423 .nr(8)
4424 .kr(4)
4425 .sr(1)
4426 .m(4)
4427 .n(n)
4428 .k(k)
4429 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004430 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004431 }
4432 }
4433 }
4434
4435 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8_subtile) {
4436 TEST_REQUIRES_ARM_NEON_DOT;
4437 for (uint32_t n = 16; n <= 24; n += 8) {
4438 for (size_t k = 1; k <= 80; k += 17) {
4439 for (uint32_t m = 1; m <= 4; m++) {
4440 GemmMicrokernelTester()
4441 .mr(4)
4442 .nr(8)
4443 .kr(4)
4444 .sr(1)
4445 .m(m)
4446 .n(n)
4447 .k(k)
4448 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004449 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004450 }
4451 }
4452 }
4453 }
4454
4455 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, small_kernel) {
4456 TEST_REQUIRES_ARM_NEON_DOT;
4457 for (size_t k = 1; k <= 80; k += 17) {
4458 GemmMicrokernelTester()
4459 .mr(4)
4460 .nr(8)
4461 .kr(4)
4462 .sr(1)
4463 .m(4)
4464 .n(8)
4465 .k(k)
4466 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004467 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004468 }
4469 }
4470
4471 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, small_kernel_subtile) {
4472 TEST_REQUIRES_ARM_NEON_DOT;
4473 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004474 for (uint32_t n = 1; n <= 8; n++) {
4475 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004476 GemmMicrokernelTester()
4477 .mr(4)
4478 .nr(8)
4479 .kr(4)
4480 .sr(1)
4481 .m(m)
4482 .n(n)
4483 .k(k)
4484 .ks(3)
4485 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004486 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004487 }
4488 }
4489 }
4490 }
4491
4492 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_gt_8_small_kernel) {
4493 TEST_REQUIRES_ARM_NEON_DOT;
4494 for (uint32_t n = 9; n < 16; n++) {
4495 for (size_t k = 1; k <= 80; k += 17) {
4496 GemmMicrokernelTester()
4497 .mr(4)
4498 .nr(8)
4499 .kr(4)
4500 .sr(1)
4501 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004502 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004503 .k(k)
4504 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004505 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004506 }
4507 }
4508 }
4509
4510 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, n_div_8_small_kernel) {
4511 TEST_REQUIRES_ARM_NEON_DOT;
4512 for (uint32_t n = 16; n <= 24; n += 8) {
4513 for (size_t k = 1; k <= 80; k += 17) {
4514 GemmMicrokernelTester()
4515 .mr(4)
4516 .nr(8)
4517 .kr(4)
4518 .sr(1)
4519 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004520 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004521 .k(k)
4522 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004523 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004524 }
4525 }
4526 }
4527
4528 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, strided_cm_subtile) {
4529 TEST_REQUIRES_ARM_NEON_DOT;
4530 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004531 for (uint32_t n = 1; n <= 8; n++) {
4532 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004533 GemmMicrokernelTester()
4534 .mr(4)
4535 .nr(8)
4536 .kr(4)
4537 .sr(1)
4538 .m(m)
4539 .n(n)
4540 .k(k)
4541 .cm_stride(11)
4542 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004543 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004544 }
4545 }
4546 }
4547 }
4548
4549 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, a_offset) {
4550 TEST_REQUIRES_ARM_NEON_DOT;
4551 for (size_t k = 1; k <= 80; k += 17) {
4552 GemmMicrokernelTester()
4553 .mr(4)
4554 .nr(8)
4555 .kr(4)
4556 .sr(1)
4557 .m(4)
4558 .n(8)
4559 .k(k)
4560 .ks(3)
4561 .a_offset(331)
Marat Dukhan50323b82022-01-11 00:12:01 -08004562 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004563 }
4564 }
4565
4566 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, zero) {
4567 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004568 for (size_t k = 1; k <= 80; k += 17) {
4569 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004570 GemmMicrokernelTester()
4571 .mr(4)
4572 .nr(8)
4573 .kr(4)
4574 .sr(1)
4575 .m(4)
4576 .n(8)
4577 .k(k)
4578 .ks(3)
4579 .a_offset(331)
4580 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08004581 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004582 }
4583 }
4584 }
4585
4586 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, qmin) {
4587 TEST_REQUIRES_ARM_NEON_DOT;
4588 GemmMicrokernelTester()
4589 .mr(4)
4590 .nr(8)
4591 .kr(4)
4592 .sr(1)
4593 .m(4)
4594 .n(8)
4595 .k(16)
4596 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004597 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004598 }
4599
4600 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, qmax) {
4601 TEST_REQUIRES_ARM_NEON_DOT;
4602 GemmMicrokernelTester()
4603 .mr(4)
4604 .nr(8)
4605 .kr(4)
4606 .sr(1)
4607 .m(4)
4608 .n(8)
4609 .k(16)
4610 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004611 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004612 }
4613
4614 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, strided_cm) {
4615 TEST_REQUIRES_ARM_NEON_DOT;
4616 GemmMicrokernelTester()
4617 .mr(4)
4618 .nr(8)
4619 .kr(4)
4620 .sr(1)
4621 .m(4)
4622 .n(8)
4623 .k(16)
4624 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004625 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004626 }
4627
4628 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, no_a_zero_point) {
4629 TEST_REQUIRES_ARM_NEON_DOT;
4630 for (size_t k = 1; k <= 80; k += 17) {
4631 GemmMicrokernelTester()
4632 .mr(4)
4633 .nr(8)
4634 .kr(4)
4635 .sr(1)
4636 .m(4)
4637 .n(8)
4638 .k(k)
4639 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004640 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004641 }
4642 }
4643
4644 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, no_b_zero_point) {
4645 TEST_REQUIRES_ARM_NEON_DOT;
4646 for (size_t k = 1; k <= 80; k += 17) {
4647 GemmMicrokernelTester()
4648 .mr(4)
4649 .nr(8)
4650 .kr(4)
4651 .sr(1)
4652 .m(4)
4653 .n(8)
4654 .k(k)
4655 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004656 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004657 }
4658 }
4659
4660 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_LD128, no_zero_point) {
4661 TEST_REQUIRES_ARM_NEON_DOT;
4662 for (size_t k = 1; k <= 80; k += 17) {
4663 GemmMicrokernelTester()
4664 .mr(4)
4665 .nr(8)
4666 .kr(4)
4667 .sr(1)
4668 .m(4)
4669 .n(8)
4670 .k(k)
4671 .a_zero_point(0)
4672 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004673 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_ld128, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004674 }
4675 }
4676#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
4677
4678
4679#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
4680 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8) {
4681 TEST_REQUIRES_ARM_NEON_DOT;
4682 GemmMicrokernelTester()
4683 .mr(2)
4684 .nr(8)
4685 .kr(4)
4686 .sr(1)
4687 .m(2)
4688 .n(8)
4689 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004690 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004691 }
4692
4693 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, strided_cn) {
4694 TEST_REQUIRES_ARM_NEON_DOT;
4695 GemmMicrokernelTester()
4696 .mr(2)
4697 .nr(8)
4698 .kr(4)
4699 .sr(1)
4700 .m(2)
4701 .n(8)
4702 .k(8)
4703 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004704 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004705 }
4706
4707 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8_subtile) {
4708 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004709 for (uint32_t n = 1; n <= 8; n++) {
4710 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004711 GemmMicrokernelTester()
4712 .mr(2)
4713 .nr(8)
4714 .kr(4)
4715 .sr(1)
4716 .m(m)
4717 .n(n)
4718 .k(8)
4719 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004720 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004721 }
4722 }
4723 }
4724
4725 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8_subtile_m) {
4726 TEST_REQUIRES_ARM_NEON_DOT;
4727 for (uint32_t m = 1; m <= 2; m++) {
4728 GemmMicrokernelTester()
4729 .mr(2)
4730 .nr(8)
4731 .kr(4)
4732 .sr(1)
4733 .m(m)
4734 .n(8)
4735 .k(8)
4736 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004737 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004738 }
4739 }
4740
4741 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_eq_8_subtile_n) {
4742 TEST_REQUIRES_ARM_NEON_DOT;
4743 for (uint32_t n = 1; n <= 8; n++) {
4744 GemmMicrokernelTester()
4745 .mr(2)
4746 .nr(8)
4747 .kr(4)
4748 .sr(1)
4749 .m(2)
4750 .n(n)
4751 .k(8)
4752 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004753 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004754 }
4755 }
4756
4757 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_lt_8) {
4758 TEST_REQUIRES_ARM_NEON_DOT;
4759 for (size_t k = 1; k < 8; k++) {
4760 GemmMicrokernelTester()
4761 .mr(2)
4762 .nr(8)
4763 .kr(4)
4764 .sr(1)
4765 .m(2)
4766 .n(8)
4767 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004768 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004769 }
4770 }
4771
4772 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_lt_8_subtile) {
4773 TEST_REQUIRES_ARM_NEON_DOT;
4774 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004775 for (uint32_t n = 1; n <= 8; n++) {
4776 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004777 GemmMicrokernelTester()
4778 .mr(2)
4779 .nr(8)
4780 .kr(4)
4781 .sr(1)
4782 .m(m)
4783 .n(n)
4784 .k(k)
4785 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004786 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004787 }
4788 }
4789 }
4790 }
4791
4792 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_gt_8) {
4793 TEST_REQUIRES_ARM_NEON_DOT;
4794 for (size_t k = 9; k < 16; k++) {
4795 GemmMicrokernelTester()
4796 .mr(2)
4797 .nr(8)
4798 .kr(4)
4799 .sr(1)
4800 .m(2)
4801 .n(8)
4802 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004803 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004804 }
4805 }
4806
4807 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_gt_8_subtile) {
4808 TEST_REQUIRES_ARM_NEON_DOT;
4809 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004810 for (uint32_t n = 1; n <= 8; n++) {
4811 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004812 GemmMicrokernelTester()
4813 .mr(2)
4814 .nr(8)
4815 .kr(4)
4816 .sr(1)
4817 .m(m)
4818 .n(n)
4819 .k(k)
4820 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004821 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004822 }
4823 }
4824 }
4825 }
4826
4827 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_div_8) {
4828 TEST_REQUIRES_ARM_NEON_DOT;
4829 for (size_t k = 16; k <= 80; k += 8) {
4830 GemmMicrokernelTester()
4831 .mr(2)
4832 .nr(8)
4833 .kr(4)
4834 .sr(1)
4835 .m(2)
4836 .n(8)
4837 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004838 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004839 }
4840 }
4841
4842 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, k_div_8_subtile) {
4843 TEST_REQUIRES_ARM_NEON_DOT;
4844 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004845 for (uint32_t n = 1; n <= 8; n++) {
4846 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004847 GemmMicrokernelTester()
4848 .mr(2)
4849 .nr(8)
4850 .kr(4)
4851 .sr(1)
4852 .m(m)
4853 .n(n)
4854 .k(k)
4855 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004856 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004857 }
4858 }
4859 }
4860 }
4861
4862 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8) {
4863 TEST_REQUIRES_ARM_NEON_DOT;
4864 for (uint32_t n = 9; n < 16; n++) {
4865 for (size_t k = 1; k <= 40; k += 9) {
4866 GemmMicrokernelTester()
4867 .mr(2)
4868 .nr(8)
4869 .kr(4)
4870 .sr(1)
4871 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004872 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004873 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004874 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004875 }
4876 }
4877 }
4878
4879 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8_strided_cn) {
4880 TEST_REQUIRES_ARM_NEON_DOT;
4881 for (uint32_t n = 9; n < 16; n++) {
4882 for (size_t k = 1; k <= 40; k += 9) {
4883 GemmMicrokernelTester()
4884 .mr(2)
4885 .nr(8)
4886 .kr(4)
4887 .sr(1)
4888 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004889 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004890 .k(k)
4891 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004892 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004893 }
4894 }
4895 }
4896
4897 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8_subtile) {
4898 TEST_REQUIRES_ARM_NEON_DOT;
4899 for (uint32_t n = 9; n < 16; n++) {
4900 for (size_t k = 1; k <= 40; k += 9) {
4901 for (uint32_t m = 1; m <= 2; m++) {
4902 GemmMicrokernelTester()
4903 .mr(2)
4904 .nr(8)
4905 .kr(4)
4906 .sr(1)
4907 .m(m)
4908 .n(n)
4909 .k(k)
4910 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004911 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004912 }
4913 }
4914 }
4915 }
4916
4917 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8) {
4918 TEST_REQUIRES_ARM_NEON_DOT;
4919 for (uint32_t n = 16; n <= 24; n += 8) {
4920 for (size_t k = 1; k <= 40; k += 9) {
4921 GemmMicrokernelTester()
4922 .mr(2)
4923 .nr(8)
4924 .kr(4)
4925 .sr(1)
4926 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004927 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004928 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004929 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004930 }
4931 }
4932 }
4933
4934 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8_strided_cn) {
4935 TEST_REQUIRES_ARM_NEON_DOT;
4936 for (uint32_t n = 16; n <= 24; n += 8) {
4937 for (size_t k = 1; k <= 40; k += 9) {
4938 GemmMicrokernelTester()
4939 .mr(2)
4940 .nr(8)
4941 .kr(4)
4942 .sr(1)
4943 .m(2)
4944 .n(n)
4945 .k(k)
4946 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004947 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004948 }
4949 }
4950 }
4951
4952 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8_subtile) {
4953 TEST_REQUIRES_ARM_NEON_DOT;
4954 for (uint32_t n = 16; n <= 24; n += 8) {
4955 for (size_t k = 1; k <= 40; k += 9) {
4956 for (uint32_t m = 1; m <= 2; m++) {
4957 GemmMicrokernelTester()
4958 .mr(2)
4959 .nr(8)
4960 .kr(4)
4961 .sr(1)
4962 .m(m)
4963 .n(n)
4964 .k(k)
4965 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004966 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004967 }
4968 }
4969 }
4970 }
4971
4972 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, small_kernel) {
4973 TEST_REQUIRES_ARM_NEON_DOT;
4974 for (size_t k = 1; k <= 40; k += 9) {
4975 GemmMicrokernelTester()
4976 .mr(2)
4977 .nr(8)
4978 .kr(4)
4979 .sr(1)
4980 .m(2)
4981 .n(8)
4982 .k(k)
4983 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08004984 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004985 }
4986 }
4987
4988 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, small_kernel_subtile) {
4989 TEST_REQUIRES_ARM_NEON_DOT;
4990 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004991 for (uint32_t n = 1; n <= 8; n++) {
4992 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004993 GemmMicrokernelTester()
4994 .mr(2)
4995 .nr(8)
4996 .kr(4)
4997 .sr(1)
4998 .m(m)
4999 .n(n)
5000 .k(k)
5001 .ks(3)
5002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005003 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005004 }
5005 }
5006 }
5007 }
5008
5009 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_gt_8_small_kernel) {
5010 TEST_REQUIRES_ARM_NEON_DOT;
5011 for (uint32_t n = 9; n < 16; n++) {
5012 for (size_t k = 1; k <= 40; k += 9) {
5013 GemmMicrokernelTester()
5014 .mr(2)
5015 .nr(8)
5016 .kr(4)
5017 .sr(1)
5018 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005019 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005020 .k(k)
5021 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005022 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005023 }
5024 }
5025 }
5026
5027 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, n_div_8_small_kernel) {
5028 TEST_REQUIRES_ARM_NEON_DOT;
5029 for (uint32_t n = 16; n <= 24; n += 8) {
5030 for (size_t k = 1; k <= 40; k += 9) {
5031 GemmMicrokernelTester()
5032 .mr(2)
5033 .nr(8)
5034 .kr(4)
5035 .sr(1)
5036 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005037 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005038 .k(k)
5039 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005040 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005041 }
5042 }
5043 }
5044
5045 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, strided_cm_subtile) {
5046 TEST_REQUIRES_ARM_NEON_DOT;
5047 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005048 for (uint32_t n = 1; n <= 8; n++) {
5049 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005050 GemmMicrokernelTester()
5051 .mr(2)
5052 .nr(8)
5053 .kr(4)
5054 .sr(1)
5055 .m(m)
5056 .n(n)
5057 .k(k)
5058 .cm_stride(11)
5059 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005060 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005061 }
5062 }
5063 }
5064 }
5065
5066 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, a_offset) {
5067 TEST_REQUIRES_ARM_NEON_DOT;
5068 for (size_t k = 1; k <= 40; k += 9) {
5069 GemmMicrokernelTester()
5070 .mr(2)
5071 .nr(8)
5072 .kr(4)
5073 .sr(1)
5074 .m(2)
5075 .n(8)
5076 .k(k)
5077 .ks(3)
5078 .a_offset(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005079 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005080 }
5081 }
5082
5083 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, zero) {
5084 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005085 for (size_t k = 1; k <= 40; k += 9) {
5086 for (uint32_t mz = 0; mz < 2; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005087 GemmMicrokernelTester()
5088 .mr(2)
5089 .nr(8)
5090 .kr(4)
5091 .sr(1)
5092 .m(2)
5093 .n(8)
5094 .k(k)
5095 .ks(3)
5096 .a_offset(83)
5097 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005098 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005099 }
5100 }
5101 }
5102
5103 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, qmin) {
5104 TEST_REQUIRES_ARM_NEON_DOT;
5105 GemmMicrokernelTester()
5106 .mr(2)
5107 .nr(8)
5108 .kr(4)
5109 .sr(1)
5110 .m(2)
5111 .n(8)
5112 .k(8)
5113 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005114 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005115 }
5116
5117 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, qmax) {
5118 TEST_REQUIRES_ARM_NEON_DOT;
5119 GemmMicrokernelTester()
5120 .mr(2)
5121 .nr(8)
5122 .kr(4)
5123 .sr(1)
5124 .m(2)
5125 .n(8)
5126 .k(8)
5127 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005128 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005129 }
5130
5131 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, strided_cm) {
5132 TEST_REQUIRES_ARM_NEON_DOT;
5133 GemmMicrokernelTester()
5134 .mr(2)
5135 .nr(8)
5136 .kr(4)
5137 .sr(1)
5138 .m(2)
5139 .n(8)
5140 .k(8)
5141 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005142 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005143 }
5144
5145 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, no_a_zero_point) {
5146 TEST_REQUIRES_ARM_NEON_DOT;
5147 for (size_t k = 1; k <= 40; k += 9) {
5148 GemmMicrokernelTester()
5149 .mr(2)
5150 .nr(8)
5151 .kr(4)
5152 .sr(1)
5153 .m(2)
5154 .n(8)
5155 .k(k)
5156 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005157 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005158 }
5159 }
5160
5161 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, no_b_zero_point) {
5162 TEST_REQUIRES_ARM_NEON_DOT;
5163 for (size_t k = 1; k <= 40; k += 9) {
5164 GemmMicrokernelTester()
5165 .mr(2)
5166 .nr(8)
5167 .kr(4)
5168 .sr(1)
5169 .m(2)
5170 .n(8)
5171 .k(k)
5172 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005173 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005174 }
5175 }
5176
5177 TEST(QU8_IGEMM_MINMAX_RNDNU_2X8C4__NEONDOT, no_zero_point) {
5178 TEST_REQUIRES_ARM_NEON_DOT;
5179 for (size_t k = 1; k <= 40; k += 9) {
5180 GemmMicrokernelTester()
5181 .mr(2)
5182 .nr(8)
5183 .kr(4)
5184 .sr(1)
5185 .m(2)
5186 .n(8)
5187 .k(k)
5188 .a_zero_point(0)
5189 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005190 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_2x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005191 }
5192 }
5193#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
5194
5195
5196#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
5197 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8) {
5198 TEST_REQUIRES_ARM_NEON_DOT;
5199 GemmMicrokernelTester()
5200 .mr(3)
5201 .nr(8)
5202 .kr(4)
5203 .sr(1)
5204 .m(3)
5205 .n(8)
5206 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005207 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005208 }
5209
5210 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, strided_cn) {
5211 TEST_REQUIRES_ARM_NEON_DOT;
5212 GemmMicrokernelTester()
5213 .mr(3)
5214 .nr(8)
5215 .kr(4)
5216 .sr(1)
5217 .m(3)
5218 .n(8)
5219 .k(8)
5220 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005221 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005222 }
5223
5224 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8_subtile) {
5225 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005226 for (uint32_t n = 1; n <= 8; n++) {
5227 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005228 GemmMicrokernelTester()
5229 .mr(3)
5230 .nr(8)
5231 .kr(4)
5232 .sr(1)
5233 .m(m)
5234 .n(n)
5235 .k(8)
5236 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005237 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005238 }
5239 }
5240 }
5241
5242 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8_subtile_m) {
5243 TEST_REQUIRES_ARM_NEON_DOT;
5244 for (uint32_t m = 1; m <= 3; m++) {
5245 GemmMicrokernelTester()
5246 .mr(3)
5247 .nr(8)
5248 .kr(4)
5249 .sr(1)
5250 .m(m)
5251 .n(8)
5252 .k(8)
5253 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005254 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005255 }
5256 }
5257
5258 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_eq_8_subtile_n) {
5259 TEST_REQUIRES_ARM_NEON_DOT;
5260 for (uint32_t n = 1; n <= 8; n++) {
5261 GemmMicrokernelTester()
5262 .mr(3)
5263 .nr(8)
5264 .kr(4)
5265 .sr(1)
5266 .m(3)
5267 .n(n)
5268 .k(8)
5269 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005270 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005271 }
5272 }
5273
5274 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_lt_8) {
5275 TEST_REQUIRES_ARM_NEON_DOT;
5276 for (size_t k = 1; k < 8; k++) {
5277 GemmMicrokernelTester()
5278 .mr(3)
5279 .nr(8)
5280 .kr(4)
5281 .sr(1)
5282 .m(3)
5283 .n(8)
5284 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005285 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005286 }
5287 }
5288
5289 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_lt_8_subtile) {
5290 TEST_REQUIRES_ARM_NEON_DOT;
5291 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005292 for (uint32_t n = 1; n <= 8; n++) {
5293 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005294 GemmMicrokernelTester()
5295 .mr(3)
5296 .nr(8)
5297 .kr(4)
5298 .sr(1)
5299 .m(m)
5300 .n(n)
5301 .k(k)
5302 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005303 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005304 }
5305 }
5306 }
5307 }
5308
5309 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_gt_8) {
5310 TEST_REQUIRES_ARM_NEON_DOT;
5311 for (size_t k = 9; k < 16; k++) {
5312 GemmMicrokernelTester()
5313 .mr(3)
5314 .nr(8)
5315 .kr(4)
5316 .sr(1)
5317 .m(3)
5318 .n(8)
5319 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005320 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005321 }
5322 }
5323
5324 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_gt_8_subtile) {
5325 TEST_REQUIRES_ARM_NEON_DOT;
5326 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005327 for (uint32_t n = 1; n <= 8; n++) {
5328 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005329 GemmMicrokernelTester()
5330 .mr(3)
5331 .nr(8)
5332 .kr(4)
5333 .sr(1)
5334 .m(m)
5335 .n(n)
5336 .k(k)
5337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005338 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005339 }
5340 }
5341 }
5342 }
5343
5344 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_div_8) {
5345 TEST_REQUIRES_ARM_NEON_DOT;
5346 for (size_t k = 16; k <= 80; k += 8) {
5347 GemmMicrokernelTester()
5348 .mr(3)
5349 .nr(8)
5350 .kr(4)
5351 .sr(1)
5352 .m(3)
5353 .n(8)
5354 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005355 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005356 }
5357 }
5358
5359 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, k_div_8_subtile) {
5360 TEST_REQUIRES_ARM_NEON_DOT;
5361 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005362 for (uint32_t n = 1; n <= 8; n++) {
5363 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005364 GemmMicrokernelTester()
5365 .mr(3)
5366 .nr(8)
5367 .kr(4)
5368 .sr(1)
5369 .m(m)
5370 .n(n)
5371 .k(k)
5372 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005373 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005374 }
5375 }
5376 }
5377 }
5378
5379 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8) {
5380 TEST_REQUIRES_ARM_NEON_DOT;
5381 for (uint32_t n = 9; n < 16; n++) {
5382 for (size_t k = 1; k <= 40; k += 9) {
5383 GemmMicrokernelTester()
5384 .mr(3)
5385 .nr(8)
5386 .kr(4)
5387 .sr(1)
5388 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005389 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005390 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005391 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005392 }
5393 }
5394 }
5395
5396 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8_strided_cn) {
5397 TEST_REQUIRES_ARM_NEON_DOT;
5398 for (uint32_t n = 9; n < 16; n++) {
5399 for (size_t k = 1; k <= 40; k += 9) {
5400 GemmMicrokernelTester()
5401 .mr(3)
5402 .nr(8)
5403 .kr(4)
5404 .sr(1)
5405 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005406 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005407 .k(k)
5408 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005409 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005410 }
5411 }
5412 }
5413
5414 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8_subtile) {
5415 TEST_REQUIRES_ARM_NEON_DOT;
5416 for (uint32_t n = 9; n < 16; n++) {
5417 for (size_t k = 1; k <= 40; k += 9) {
5418 for (uint32_t m = 1; m <= 3; m++) {
5419 GemmMicrokernelTester()
5420 .mr(3)
5421 .nr(8)
5422 .kr(4)
5423 .sr(1)
5424 .m(m)
5425 .n(n)
5426 .k(k)
5427 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005428 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005429 }
5430 }
5431 }
5432 }
5433
5434 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8) {
5435 TEST_REQUIRES_ARM_NEON_DOT;
5436 for (uint32_t n = 16; n <= 24; n += 8) {
5437 for (size_t k = 1; k <= 40; k += 9) {
5438 GemmMicrokernelTester()
5439 .mr(3)
5440 .nr(8)
5441 .kr(4)
5442 .sr(1)
5443 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005444 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005445 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005446 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005447 }
5448 }
5449 }
5450
5451 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8_strided_cn) {
5452 TEST_REQUIRES_ARM_NEON_DOT;
5453 for (uint32_t n = 16; n <= 24; n += 8) {
5454 for (size_t k = 1; k <= 40; k += 9) {
5455 GemmMicrokernelTester()
5456 .mr(3)
5457 .nr(8)
5458 .kr(4)
5459 .sr(1)
5460 .m(3)
5461 .n(n)
5462 .k(k)
5463 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005464 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005465 }
5466 }
5467 }
5468
5469 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8_subtile) {
5470 TEST_REQUIRES_ARM_NEON_DOT;
5471 for (uint32_t n = 16; n <= 24; n += 8) {
5472 for (size_t k = 1; k <= 40; k += 9) {
5473 for (uint32_t m = 1; m <= 3; m++) {
5474 GemmMicrokernelTester()
5475 .mr(3)
5476 .nr(8)
5477 .kr(4)
5478 .sr(1)
5479 .m(m)
5480 .n(n)
5481 .k(k)
5482 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005483 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005484 }
5485 }
5486 }
5487 }
5488
5489 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, small_kernel) {
5490 TEST_REQUIRES_ARM_NEON_DOT;
5491 for (size_t k = 1; k <= 40; k += 9) {
5492 GemmMicrokernelTester()
5493 .mr(3)
5494 .nr(8)
5495 .kr(4)
5496 .sr(1)
5497 .m(3)
5498 .n(8)
5499 .k(k)
5500 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005501 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005502 }
5503 }
5504
5505 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, small_kernel_subtile) {
5506 TEST_REQUIRES_ARM_NEON_DOT;
5507 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005508 for (uint32_t n = 1; n <= 8; n++) {
5509 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005510 GemmMicrokernelTester()
5511 .mr(3)
5512 .nr(8)
5513 .kr(4)
5514 .sr(1)
5515 .m(m)
5516 .n(n)
5517 .k(k)
5518 .ks(3)
5519 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005520 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005521 }
5522 }
5523 }
5524 }
5525
5526 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_gt_8_small_kernel) {
5527 TEST_REQUIRES_ARM_NEON_DOT;
5528 for (uint32_t n = 9; n < 16; n++) {
5529 for (size_t k = 1; k <= 40; k += 9) {
5530 GemmMicrokernelTester()
5531 .mr(3)
5532 .nr(8)
5533 .kr(4)
5534 .sr(1)
5535 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005536 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005537 .k(k)
5538 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005539 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005540 }
5541 }
5542 }
5543
5544 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, n_div_8_small_kernel) {
5545 TEST_REQUIRES_ARM_NEON_DOT;
5546 for (uint32_t n = 16; n <= 24; n += 8) {
5547 for (size_t k = 1; k <= 40; k += 9) {
5548 GemmMicrokernelTester()
5549 .mr(3)
5550 .nr(8)
5551 .kr(4)
5552 .sr(1)
5553 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005554 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005555 .k(k)
5556 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08005557 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005558 }
5559 }
5560 }
5561
5562 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, strided_cm_subtile) {
5563 TEST_REQUIRES_ARM_NEON_DOT;
5564 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005565 for (uint32_t n = 1; n <= 8; n++) {
5566 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005567 GemmMicrokernelTester()
5568 .mr(3)
5569 .nr(8)
5570 .kr(4)
5571 .sr(1)
5572 .m(m)
5573 .n(n)
5574 .k(k)
5575 .cm_stride(11)
5576 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005577 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005578 }
5579 }
5580 }
5581 }
5582
5583 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, a_offset) {
5584 TEST_REQUIRES_ARM_NEON_DOT;
5585 for (size_t k = 1; k <= 40; k += 9) {
5586 GemmMicrokernelTester()
5587 .mr(3)
5588 .nr(8)
5589 .kr(4)
5590 .sr(1)
5591 .m(3)
5592 .n(8)
5593 .k(k)
5594 .ks(3)
5595 .a_offset(127)
Marat Dukhan50323b82022-01-11 00:12:01 -08005596 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005597 }
5598 }
5599
5600 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, zero) {
5601 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005602 for (size_t k = 1; k <= 40; k += 9) {
5603 for (uint32_t mz = 0; mz < 3; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005604 GemmMicrokernelTester()
5605 .mr(3)
5606 .nr(8)
5607 .kr(4)
5608 .sr(1)
5609 .m(3)
5610 .n(8)
5611 .k(k)
5612 .ks(3)
5613 .a_offset(127)
5614 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08005615 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005616 }
5617 }
5618 }
5619
5620 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, qmin) {
5621 TEST_REQUIRES_ARM_NEON_DOT;
5622 GemmMicrokernelTester()
5623 .mr(3)
5624 .nr(8)
5625 .kr(4)
5626 .sr(1)
5627 .m(3)
5628 .n(8)
5629 .k(8)
5630 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005631 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005632 }
5633
5634 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, qmax) {
5635 TEST_REQUIRES_ARM_NEON_DOT;
5636 GemmMicrokernelTester()
5637 .mr(3)
5638 .nr(8)
5639 .kr(4)
5640 .sr(1)
5641 .m(3)
5642 .n(8)
5643 .k(8)
5644 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005645 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005646 }
5647
5648 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, strided_cm) {
5649 TEST_REQUIRES_ARM_NEON_DOT;
5650 GemmMicrokernelTester()
5651 .mr(3)
5652 .nr(8)
5653 .kr(4)
5654 .sr(1)
5655 .m(3)
5656 .n(8)
5657 .k(8)
5658 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005659 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005660 }
5661
5662 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, no_a_zero_point) {
5663 TEST_REQUIRES_ARM_NEON_DOT;
5664 for (size_t k = 1; k <= 40; k += 9) {
5665 GemmMicrokernelTester()
5666 .mr(3)
5667 .nr(8)
5668 .kr(4)
5669 .sr(1)
5670 .m(3)
5671 .n(8)
5672 .k(k)
5673 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005674 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005675 }
5676 }
5677
5678 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, no_b_zero_point) {
5679 TEST_REQUIRES_ARM_NEON_DOT;
5680 for (size_t k = 1; k <= 40; k += 9) {
5681 GemmMicrokernelTester()
5682 .mr(3)
5683 .nr(8)
5684 .kr(4)
5685 .sr(1)
5686 .m(3)
5687 .n(8)
5688 .k(k)
5689 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005690 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005691 }
5692 }
5693
5694 TEST(QU8_IGEMM_MINMAX_RNDNU_3X8C4__NEONDOT, no_zero_point) {
5695 TEST_REQUIRES_ARM_NEON_DOT;
5696 for (size_t k = 1; k <= 40; k += 9) {
5697 GemmMicrokernelTester()
5698 .mr(3)
5699 .nr(8)
5700 .kr(4)
5701 .sr(1)
5702 .m(3)
5703 .n(8)
5704 .k(k)
5705 .a_zero_point(0)
5706 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005707 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_3x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005708 }
5709 }
5710#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
5711
5712
5713#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
5714 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8) {
5715 TEST_REQUIRES_ARM_NEON_DOT;
5716 GemmMicrokernelTester()
5717 .mr(4)
5718 .nr(8)
5719 .kr(4)
5720 .sr(1)
5721 .m(4)
5722 .n(8)
5723 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005724 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005725 }
5726
5727 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cn) {
5728 TEST_REQUIRES_ARM_NEON_DOT;
5729 GemmMicrokernelTester()
5730 .mr(4)
5731 .nr(8)
5732 .kr(4)
5733 .sr(1)
5734 .m(4)
5735 .n(8)
5736 .k(8)
5737 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005738 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005739 }
5740
5741 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile) {
5742 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005743 for (uint32_t n = 1; n <= 8; n++) {
5744 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005745 GemmMicrokernelTester()
5746 .mr(4)
5747 .nr(8)
5748 .kr(4)
5749 .sr(1)
5750 .m(m)
5751 .n(n)
5752 .k(8)
5753 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005754 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005755 }
5756 }
5757 }
5758
5759 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile_m) {
5760 TEST_REQUIRES_ARM_NEON_DOT;
5761 for (uint32_t m = 1; m <= 4; m++) {
5762 GemmMicrokernelTester()
5763 .mr(4)
5764 .nr(8)
5765 .kr(4)
5766 .sr(1)
5767 .m(m)
5768 .n(8)
5769 .k(8)
5770 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005771 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005772 }
5773 }
5774
5775 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_eq_8_subtile_n) {
5776 TEST_REQUIRES_ARM_NEON_DOT;
5777 for (uint32_t n = 1; n <= 8; n++) {
5778 GemmMicrokernelTester()
5779 .mr(4)
5780 .nr(8)
5781 .kr(4)
5782 .sr(1)
5783 .m(4)
5784 .n(n)
5785 .k(8)
5786 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005787 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005788 }
5789 }
5790
5791 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_lt_8) {
5792 TEST_REQUIRES_ARM_NEON_DOT;
5793 for (size_t k = 1; k < 8; k++) {
5794 GemmMicrokernelTester()
5795 .mr(4)
5796 .nr(8)
5797 .kr(4)
5798 .sr(1)
5799 .m(4)
5800 .n(8)
5801 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005802 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005803 }
5804 }
5805
5806 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_lt_8_subtile) {
5807 TEST_REQUIRES_ARM_NEON_DOT;
5808 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005809 for (uint32_t n = 1; n <= 8; n++) {
5810 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005811 GemmMicrokernelTester()
5812 .mr(4)
5813 .nr(8)
5814 .kr(4)
5815 .sr(1)
5816 .m(m)
5817 .n(n)
5818 .k(k)
5819 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005820 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005821 }
5822 }
5823 }
5824 }
5825
5826 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_gt_8) {
5827 TEST_REQUIRES_ARM_NEON_DOT;
5828 for (size_t k = 9; k < 16; k++) {
5829 GemmMicrokernelTester()
5830 .mr(4)
5831 .nr(8)
5832 .kr(4)
5833 .sr(1)
5834 .m(4)
5835 .n(8)
5836 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005837 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005838 }
5839 }
5840
5841 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_gt_8_subtile) {
5842 TEST_REQUIRES_ARM_NEON_DOT;
5843 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005844 for (uint32_t n = 1; n <= 8; n++) {
5845 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005846 GemmMicrokernelTester()
5847 .mr(4)
5848 .nr(8)
5849 .kr(4)
5850 .sr(1)
5851 .m(m)
5852 .n(n)
5853 .k(k)
5854 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005855 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005856 }
5857 }
5858 }
5859 }
5860
5861 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_div_8) {
5862 TEST_REQUIRES_ARM_NEON_DOT;
5863 for (size_t k = 16; k <= 80; k += 8) {
5864 GemmMicrokernelTester()
5865 .mr(4)
5866 .nr(8)
5867 .kr(4)
5868 .sr(1)
5869 .m(4)
5870 .n(8)
5871 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005872 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005873 }
5874 }
5875
5876 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, k_div_8_subtile) {
5877 TEST_REQUIRES_ARM_NEON_DOT;
5878 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005879 for (uint32_t n = 1; n <= 8; n++) {
5880 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005881 GemmMicrokernelTester()
5882 .mr(4)
5883 .nr(8)
5884 .kr(4)
5885 .sr(1)
5886 .m(m)
5887 .n(n)
5888 .k(k)
5889 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005890 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005891 }
5892 }
5893 }
5894 }
5895
5896 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8) {
5897 TEST_REQUIRES_ARM_NEON_DOT;
5898 for (uint32_t n = 9; n < 16; n++) {
5899 for (size_t k = 1; k <= 40; k += 9) {
5900 GemmMicrokernelTester()
5901 .mr(4)
5902 .nr(8)
5903 .kr(4)
5904 .sr(1)
5905 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005906 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005907 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005908 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005909 }
5910 }
5911 }
5912
5913 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_strided_cn) {
5914 TEST_REQUIRES_ARM_NEON_DOT;
5915 for (uint32_t n = 9; n < 16; n++) {
5916 for (size_t k = 1; k <= 40; k += 9) {
5917 GemmMicrokernelTester()
5918 .mr(4)
5919 .nr(8)
5920 .kr(4)
5921 .sr(1)
5922 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005923 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005924 .k(k)
5925 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005926 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005927 }
5928 }
5929 }
5930
5931 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_subtile) {
5932 TEST_REQUIRES_ARM_NEON_DOT;
5933 for (uint32_t n = 9; n < 16; n++) {
5934 for (size_t k = 1; k <= 40; k += 9) {
5935 for (uint32_t m = 1; m <= 4; m++) {
5936 GemmMicrokernelTester()
5937 .mr(4)
5938 .nr(8)
5939 .kr(4)
5940 .sr(1)
5941 .m(m)
5942 .n(n)
5943 .k(k)
5944 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005945 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005946 }
5947 }
5948 }
5949 }
5950
5951 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8) {
5952 TEST_REQUIRES_ARM_NEON_DOT;
5953 for (uint32_t n = 16; n <= 24; n += 8) {
5954 for (size_t k = 1; k <= 40; k += 9) {
5955 GemmMicrokernelTester()
5956 .mr(4)
5957 .nr(8)
5958 .kr(4)
5959 .sr(1)
5960 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005961 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005962 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005963 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005964 }
5965 }
5966 }
5967
5968 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_strided_cn) {
5969 TEST_REQUIRES_ARM_NEON_DOT;
5970 for (uint32_t n = 16; n <= 24; n += 8) {
5971 for (size_t k = 1; k <= 40; k += 9) {
5972 GemmMicrokernelTester()
5973 .mr(4)
5974 .nr(8)
5975 .kr(4)
5976 .sr(1)
5977 .m(4)
5978 .n(n)
5979 .k(k)
5980 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005981 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005982 }
5983 }
5984 }
5985
5986 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_subtile) {
5987 TEST_REQUIRES_ARM_NEON_DOT;
5988 for (uint32_t n = 16; n <= 24; n += 8) {
5989 for (size_t k = 1; k <= 40; k += 9) {
5990 for (uint32_t m = 1; m <= 4; m++) {
5991 GemmMicrokernelTester()
5992 .mr(4)
5993 .nr(8)
5994 .kr(4)
5995 .sr(1)
5996 .m(m)
5997 .n(n)
5998 .k(k)
5999 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006000 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006001 }
6002 }
6003 }
6004 }
6005
6006 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, small_kernel) {
6007 TEST_REQUIRES_ARM_NEON_DOT;
6008 for (size_t k = 1; k <= 40; k += 9) {
6009 GemmMicrokernelTester()
6010 .mr(4)
6011 .nr(8)
6012 .kr(4)
6013 .sr(1)
6014 .m(4)
6015 .n(8)
6016 .k(k)
6017 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006018 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006019 }
6020 }
6021
6022 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, small_kernel_subtile) {
6023 TEST_REQUIRES_ARM_NEON_DOT;
6024 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006025 for (uint32_t n = 1; n <= 8; n++) {
6026 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006027 GemmMicrokernelTester()
6028 .mr(4)
6029 .nr(8)
6030 .kr(4)
6031 .sr(1)
6032 .m(m)
6033 .n(n)
6034 .k(k)
6035 .ks(3)
6036 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006037 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006038 }
6039 }
6040 }
6041 }
6042
6043 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_gt_8_small_kernel) {
6044 TEST_REQUIRES_ARM_NEON_DOT;
6045 for (uint32_t n = 9; n < 16; n++) {
6046 for (size_t k = 1; k <= 40; k += 9) {
6047 GemmMicrokernelTester()
6048 .mr(4)
6049 .nr(8)
6050 .kr(4)
6051 .sr(1)
6052 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006053 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006054 .k(k)
6055 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006056 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006057 }
6058 }
6059 }
6060
6061 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, n_div_8_small_kernel) {
6062 TEST_REQUIRES_ARM_NEON_DOT;
6063 for (uint32_t n = 16; n <= 24; n += 8) {
6064 for (size_t k = 1; k <= 40; k += 9) {
6065 GemmMicrokernelTester()
6066 .mr(4)
6067 .nr(8)
6068 .kr(4)
6069 .sr(1)
6070 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006071 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006072 .k(k)
6073 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006074 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006075 }
6076 }
6077 }
6078
6079 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cm_subtile) {
6080 TEST_REQUIRES_ARM_NEON_DOT;
6081 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006082 for (uint32_t n = 1; n <= 8; n++) {
6083 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006084 GemmMicrokernelTester()
6085 .mr(4)
6086 .nr(8)
6087 .kr(4)
6088 .sr(1)
6089 .m(m)
6090 .n(n)
6091 .k(k)
6092 .cm_stride(11)
6093 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006094 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006095 }
6096 }
6097 }
6098 }
6099
6100 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, a_offset) {
6101 TEST_REQUIRES_ARM_NEON_DOT;
6102 for (size_t k = 1; k <= 40; k += 9) {
6103 GemmMicrokernelTester()
6104 .mr(4)
6105 .nr(8)
6106 .kr(4)
6107 .sr(1)
6108 .m(4)
6109 .n(8)
6110 .k(k)
6111 .ks(3)
6112 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08006113 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006114 }
6115 }
6116
6117 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, zero) {
6118 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006119 for (size_t k = 1; k <= 40; k += 9) {
6120 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006121 GemmMicrokernelTester()
6122 .mr(4)
6123 .nr(8)
6124 .kr(4)
6125 .sr(1)
6126 .m(4)
6127 .n(8)
6128 .k(k)
6129 .ks(3)
6130 .a_offset(163)
6131 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006132 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006133 }
6134 }
6135 }
6136
6137 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, qmin) {
6138 TEST_REQUIRES_ARM_NEON_DOT;
6139 GemmMicrokernelTester()
6140 .mr(4)
6141 .nr(8)
6142 .kr(4)
6143 .sr(1)
6144 .m(4)
6145 .n(8)
6146 .k(8)
6147 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006148 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006149 }
6150
6151 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, qmax) {
6152 TEST_REQUIRES_ARM_NEON_DOT;
6153 GemmMicrokernelTester()
6154 .mr(4)
6155 .nr(8)
6156 .kr(4)
6157 .sr(1)
6158 .m(4)
6159 .n(8)
6160 .k(8)
6161 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006162 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006163 }
6164
6165 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, strided_cm) {
6166 TEST_REQUIRES_ARM_NEON_DOT;
6167 GemmMicrokernelTester()
6168 .mr(4)
6169 .nr(8)
6170 .kr(4)
6171 .sr(1)
6172 .m(4)
6173 .n(8)
6174 .k(8)
6175 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006176 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006177 }
6178
6179 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_a_zero_point) {
6180 TEST_REQUIRES_ARM_NEON_DOT;
6181 for (size_t k = 1; k <= 40; k += 9) {
6182 GemmMicrokernelTester()
6183 .mr(4)
6184 .nr(8)
6185 .kr(4)
6186 .sr(1)
6187 .m(4)
6188 .n(8)
6189 .k(k)
6190 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006191 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006192 }
6193 }
6194
6195 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_b_zero_point) {
6196 TEST_REQUIRES_ARM_NEON_DOT;
6197 for (size_t k = 1; k <= 40; k += 9) {
6198 GemmMicrokernelTester()
6199 .mr(4)
6200 .nr(8)
6201 .kr(4)
6202 .sr(1)
6203 .m(4)
6204 .n(8)
6205 .k(k)
6206 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006207 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006208 }
6209 }
6210
6211 TEST(QU8_IGEMM_MINMAX_RNDNU_4X8C4__NEONDOT, no_zero_point) {
6212 TEST_REQUIRES_ARM_NEON_DOT;
6213 for (size_t k = 1; k <= 40; k += 9) {
6214 GemmMicrokernelTester()
6215 .mr(4)
6216 .nr(8)
6217 .kr(4)
6218 .sr(1)
6219 .m(4)
6220 .n(8)
6221 .k(k)
6222 .a_zero_point(0)
6223 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006224 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006225 }
6226 }
6227#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
6228
6229
6230#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
6231 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8) {
6232 TEST_REQUIRES_ARM_NEON_DOT;
6233 GemmMicrokernelTester()
6234 .mr(5)
6235 .nr(8)
6236 .kr(4)
6237 .sr(1)
6238 .m(5)
6239 .n(8)
6240 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006241 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006242 }
6243
6244 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, strided_cn) {
6245 TEST_REQUIRES_ARM_NEON_DOT;
6246 GemmMicrokernelTester()
6247 .mr(5)
6248 .nr(8)
6249 .kr(4)
6250 .sr(1)
6251 .m(5)
6252 .n(8)
6253 .k(8)
6254 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006255 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006256 }
6257
6258 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8_subtile) {
6259 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006260 for (uint32_t n = 1; n <= 8; n++) {
6261 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006262 GemmMicrokernelTester()
6263 .mr(5)
6264 .nr(8)
6265 .kr(4)
6266 .sr(1)
6267 .m(m)
6268 .n(n)
6269 .k(8)
6270 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006271 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006272 }
6273 }
6274 }
6275
6276 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8_subtile_m) {
6277 TEST_REQUIRES_ARM_NEON_DOT;
6278 for (uint32_t m = 1; m <= 5; m++) {
6279 GemmMicrokernelTester()
6280 .mr(5)
6281 .nr(8)
6282 .kr(4)
6283 .sr(1)
6284 .m(m)
6285 .n(8)
6286 .k(8)
6287 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006288 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006289 }
6290 }
6291
6292 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_eq_8_subtile_n) {
6293 TEST_REQUIRES_ARM_NEON_DOT;
6294 for (uint32_t n = 1; n <= 8; n++) {
6295 GemmMicrokernelTester()
6296 .mr(5)
6297 .nr(8)
6298 .kr(4)
6299 .sr(1)
6300 .m(5)
6301 .n(n)
6302 .k(8)
6303 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006304 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006305 }
6306 }
6307
6308 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_lt_8) {
6309 TEST_REQUIRES_ARM_NEON_DOT;
6310 for (size_t k = 1; k < 8; k++) {
6311 GemmMicrokernelTester()
6312 .mr(5)
6313 .nr(8)
6314 .kr(4)
6315 .sr(1)
6316 .m(5)
6317 .n(8)
6318 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006319 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006320 }
6321 }
6322
6323 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_lt_8_subtile) {
6324 TEST_REQUIRES_ARM_NEON_DOT;
6325 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006326 for (uint32_t n = 1; n <= 8; n++) {
6327 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006328 GemmMicrokernelTester()
6329 .mr(5)
6330 .nr(8)
6331 .kr(4)
6332 .sr(1)
6333 .m(m)
6334 .n(n)
6335 .k(k)
6336 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006337 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006338 }
6339 }
6340 }
6341 }
6342
6343 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_gt_8) {
6344 TEST_REQUIRES_ARM_NEON_DOT;
6345 for (size_t k = 9; k < 16; k++) {
6346 GemmMicrokernelTester()
6347 .mr(5)
6348 .nr(8)
6349 .kr(4)
6350 .sr(1)
6351 .m(5)
6352 .n(8)
6353 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006354 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006355 }
6356 }
6357
6358 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_gt_8_subtile) {
6359 TEST_REQUIRES_ARM_NEON_DOT;
6360 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006361 for (uint32_t n = 1; n <= 8; n++) {
6362 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006363 GemmMicrokernelTester()
6364 .mr(5)
6365 .nr(8)
6366 .kr(4)
6367 .sr(1)
6368 .m(m)
6369 .n(n)
6370 .k(k)
6371 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006372 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006373 }
6374 }
6375 }
6376 }
6377
6378 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_div_8) {
6379 TEST_REQUIRES_ARM_NEON_DOT;
6380 for (size_t k = 16; k <= 80; k += 8) {
6381 GemmMicrokernelTester()
6382 .mr(5)
6383 .nr(8)
6384 .kr(4)
6385 .sr(1)
6386 .m(5)
6387 .n(8)
6388 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006389 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006390 }
6391 }
6392
6393 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, k_div_8_subtile) {
6394 TEST_REQUIRES_ARM_NEON_DOT;
6395 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006396 for (uint32_t n = 1; n <= 8; n++) {
6397 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006398 GemmMicrokernelTester()
6399 .mr(5)
6400 .nr(8)
6401 .kr(4)
6402 .sr(1)
6403 .m(m)
6404 .n(n)
6405 .k(k)
6406 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006407 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006408 }
6409 }
6410 }
6411 }
6412
6413 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8) {
6414 TEST_REQUIRES_ARM_NEON_DOT;
6415 for (uint32_t n = 9; n < 16; n++) {
6416 for (size_t k = 1; k <= 40; k += 9) {
6417 GemmMicrokernelTester()
6418 .mr(5)
6419 .nr(8)
6420 .kr(4)
6421 .sr(1)
6422 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006423 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006424 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006425 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006426 }
6427 }
6428 }
6429
6430 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8_strided_cn) {
6431 TEST_REQUIRES_ARM_NEON_DOT;
6432 for (uint32_t n = 9; n < 16; n++) {
6433 for (size_t k = 1; k <= 40; k += 9) {
6434 GemmMicrokernelTester()
6435 .mr(5)
6436 .nr(8)
6437 .kr(4)
6438 .sr(1)
6439 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006440 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006441 .k(k)
6442 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006443 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006444 }
6445 }
6446 }
6447
6448 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8_subtile) {
6449 TEST_REQUIRES_ARM_NEON_DOT;
6450 for (uint32_t n = 9; n < 16; n++) {
6451 for (size_t k = 1; k <= 40; k += 9) {
6452 for (uint32_t m = 1; m <= 5; m++) {
6453 GemmMicrokernelTester()
6454 .mr(5)
6455 .nr(8)
6456 .kr(4)
6457 .sr(1)
6458 .m(m)
6459 .n(n)
6460 .k(k)
6461 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006462 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006463 }
6464 }
6465 }
6466 }
6467
6468 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8) {
6469 TEST_REQUIRES_ARM_NEON_DOT;
6470 for (uint32_t n = 16; n <= 24; n += 8) {
6471 for (size_t k = 1; k <= 40; k += 9) {
6472 GemmMicrokernelTester()
6473 .mr(5)
6474 .nr(8)
6475 .kr(4)
6476 .sr(1)
6477 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006478 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006479 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006480 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006481 }
6482 }
6483 }
6484
6485 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8_strided_cn) {
6486 TEST_REQUIRES_ARM_NEON_DOT;
6487 for (uint32_t n = 16; n <= 24; n += 8) {
6488 for (size_t k = 1; k <= 40; k += 9) {
6489 GemmMicrokernelTester()
6490 .mr(5)
6491 .nr(8)
6492 .kr(4)
6493 .sr(1)
6494 .m(5)
6495 .n(n)
6496 .k(k)
6497 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006498 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006499 }
6500 }
6501 }
6502
6503 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8_subtile) {
6504 TEST_REQUIRES_ARM_NEON_DOT;
6505 for (uint32_t n = 16; n <= 24; n += 8) {
6506 for (size_t k = 1; k <= 40; k += 9) {
6507 for (uint32_t m = 1; m <= 5; m++) {
6508 GemmMicrokernelTester()
6509 .mr(5)
6510 .nr(8)
6511 .kr(4)
6512 .sr(1)
6513 .m(m)
6514 .n(n)
6515 .k(k)
6516 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006517 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006518 }
6519 }
6520 }
6521 }
6522
6523 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, small_kernel) {
6524 TEST_REQUIRES_ARM_NEON_DOT;
6525 for (size_t k = 1; k <= 40; k += 9) {
6526 GemmMicrokernelTester()
6527 .mr(5)
6528 .nr(8)
6529 .kr(4)
6530 .sr(1)
6531 .m(5)
6532 .n(8)
6533 .k(k)
6534 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006535 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006536 }
6537 }
6538
6539 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, small_kernel_subtile) {
6540 TEST_REQUIRES_ARM_NEON_DOT;
6541 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006542 for (uint32_t n = 1; n <= 8; n++) {
6543 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006544 GemmMicrokernelTester()
6545 .mr(5)
6546 .nr(8)
6547 .kr(4)
6548 .sr(1)
6549 .m(m)
6550 .n(n)
6551 .k(k)
6552 .ks(3)
6553 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006554 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006555 }
6556 }
6557 }
6558 }
6559
6560 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_gt_8_small_kernel) {
6561 TEST_REQUIRES_ARM_NEON_DOT;
6562 for (uint32_t n = 9; n < 16; n++) {
6563 for (size_t k = 1; k <= 40; k += 9) {
6564 GemmMicrokernelTester()
6565 .mr(5)
6566 .nr(8)
6567 .kr(4)
6568 .sr(1)
6569 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006570 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006571 .k(k)
6572 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006573 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006574 }
6575 }
6576 }
6577
6578 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, n_div_8_small_kernel) {
6579 TEST_REQUIRES_ARM_NEON_DOT;
6580 for (uint32_t n = 16; n <= 24; n += 8) {
6581 for (size_t k = 1; k <= 40; k += 9) {
6582 GemmMicrokernelTester()
6583 .mr(5)
6584 .nr(8)
6585 .kr(4)
6586 .sr(1)
6587 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006588 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006589 .k(k)
6590 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08006591 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006592 }
6593 }
6594 }
6595
6596 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, strided_cm_subtile) {
6597 TEST_REQUIRES_ARM_NEON_DOT;
6598 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006599 for (uint32_t n = 1; n <= 8; n++) {
6600 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006601 GemmMicrokernelTester()
6602 .mr(5)
6603 .nr(8)
6604 .kr(4)
6605 .sr(1)
6606 .m(m)
6607 .n(n)
6608 .k(k)
6609 .cm_stride(11)
6610 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006611 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006612 }
6613 }
6614 }
6615 }
6616
6617 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, a_offset) {
6618 TEST_REQUIRES_ARM_NEON_DOT;
6619 for (size_t k = 1; k <= 40; k += 9) {
6620 GemmMicrokernelTester()
6621 .mr(5)
6622 .nr(8)
6623 .kr(4)
6624 .sr(1)
6625 .m(5)
6626 .n(8)
6627 .k(k)
6628 .ks(3)
6629 .a_offset(211)
Marat Dukhan50323b82022-01-11 00:12:01 -08006630 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006631 }
6632 }
6633
6634 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, zero) {
6635 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006636 for (size_t k = 1; k <= 40; k += 9) {
6637 for (uint32_t mz = 0; mz < 5; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006638 GemmMicrokernelTester()
6639 .mr(5)
6640 .nr(8)
6641 .kr(4)
6642 .sr(1)
6643 .m(5)
6644 .n(8)
6645 .k(k)
6646 .ks(3)
6647 .a_offset(211)
6648 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08006649 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006650 }
6651 }
6652 }
6653
6654 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, qmin) {
6655 TEST_REQUIRES_ARM_NEON_DOT;
6656 GemmMicrokernelTester()
6657 .mr(5)
6658 .nr(8)
6659 .kr(4)
6660 .sr(1)
6661 .m(5)
6662 .n(8)
6663 .k(8)
6664 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006665 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006666 }
6667
6668 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, qmax) {
6669 TEST_REQUIRES_ARM_NEON_DOT;
6670 GemmMicrokernelTester()
6671 .mr(5)
6672 .nr(8)
6673 .kr(4)
6674 .sr(1)
6675 .m(5)
6676 .n(8)
6677 .k(8)
6678 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006679 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006680 }
6681
6682 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, strided_cm) {
6683 TEST_REQUIRES_ARM_NEON_DOT;
6684 GemmMicrokernelTester()
6685 .mr(5)
6686 .nr(8)
6687 .kr(4)
6688 .sr(1)
6689 .m(5)
6690 .n(8)
6691 .k(8)
6692 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006693 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006694 }
6695
6696 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, no_a_zero_point) {
6697 TEST_REQUIRES_ARM_NEON_DOT;
6698 for (size_t k = 1; k <= 40; k += 9) {
6699 GemmMicrokernelTester()
6700 .mr(5)
6701 .nr(8)
6702 .kr(4)
6703 .sr(1)
6704 .m(5)
6705 .n(8)
6706 .k(k)
6707 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006708 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006709 }
6710 }
6711
6712 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, no_b_zero_point) {
6713 TEST_REQUIRES_ARM_NEON_DOT;
6714 for (size_t k = 1; k <= 40; k += 9) {
6715 GemmMicrokernelTester()
6716 .mr(5)
6717 .nr(8)
6718 .kr(4)
6719 .sr(1)
6720 .m(5)
6721 .n(8)
6722 .k(k)
6723 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006724 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006725 }
6726 }
6727
6728 TEST(QU8_IGEMM_MINMAX_RNDNU_5X8C4__NEONDOT, no_zero_point) {
6729 TEST_REQUIRES_ARM_NEON_DOT;
6730 for (size_t k = 1; k <= 40; k += 9) {
6731 GemmMicrokernelTester()
6732 .mr(5)
6733 .nr(8)
6734 .kr(4)
6735 .sr(1)
6736 .m(5)
6737 .n(8)
6738 .k(k)
6739 .a_zero_point(0)
6740 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006741 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_5x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006742 }
6743 }
6744#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
6745
6746
6747#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
6748 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8) {
6749 TEST_REQUIRES_ARM_NEON_DOT;
6750 GemmMicrokernelTester()
6751 .mr(1)
6752 .nr(16)
6753 .kr(4)
6754 .sr(1)
6755 .m(1)
6756 .n(16)
6757 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006758 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006759 }
6760
6761 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cn) {
6762 TEST_REQUIRES_ARM_NEON_DOT;
6763 GemmMicrokernelTester()
6764 .mr(1)
6765 .nr(16)
6766 .kr(4)
6767 .sr(1)
6768 .m(1)
6769 .n(16)
6770 .k(8)
6771 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006772 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006773 }
6774
6775 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile) {
6776 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006777 for (uint32_t n = 1; n <= 16; n++) {
6778 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006779 GemmMicrokernelTester()
6780 .mr(1)
6781 .nr(16)
6782 .kr(4)
6783 .sr(1)
6784 .m(m)
6785 .n(n)
6786 .k(8)
6787 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006788 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006789 }
6790 }
6791 }
6792
6793 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile_m) {
6794 TEST_REQUIRES_ARM_NEON_DOT;
6795 for (uint32_t m = 1; m <= 1; m++) {
6796 GemmMicrokernelTester()
6797 .mr(1)
6798 .nr(16)
6799 .kr(4)
6800 .sr(1)
6801 .m(m)
6802 .n(16)
6803 .k(8)
6804 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006805 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006806 }
6807 }
6808
6809 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_eq_8_subtile_n) {
6810 TEST_REQUIRES_ARM_NEON_DOT;
6811 for (uint32_t n = 1; n <= 16; n++) {
6812 GemmMicrokernelTester()
6813 .mr(1)
6814 .nr(16)
6815 .kr(4)
6816 .sr(1)
6817 .m(1)
6818 .n(n)
6819 .k(8)
6820 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006821 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006822 }
6823 }
6824
6825 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_lt_8) {
6826 TEST_REQUIRES_ARM_NEON_DOT;
6827 for (size_t k = 1; k < 8; k++) {
6828 GemmMicrokernelTester()
6829 .mr(1)
6830 .nr(16)
6831 .kr(4)
6832 .sr(1)
6833 .m(1)
6834 .n(16)
6835 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006836 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006837 }
6838 }
6839
6840 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_lt_8_subtile) {
6841 TEST_REQUIRES_ARM_NEON_DOT;
6842 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006843 for (uint32_t n = 1; n <= 16; n++) {
6844 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006845 GemmMicrokernelTester()
6846 .mr(1)
6847 .nr(16)
6848 .kr(4)
6849 .sr(1)
6850 .m(m)
6851 .n(n)
6852 .k(k)
6853 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006854 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006855 }
6856 }
6857 }
6858 }
6859
6860 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_gt_8) {
6861 TEST_REQUIRES_ARM_NEON_DOT;
6862 for (size_t k = 9; k < 16; k++) {
6863 GemmMicrokernelTester()
6864 .mr(1)
6865 .nr(16)
6866 .kr(4)
6867 .sr(1)
6868 .m(1)
6869 .n(16)
6870 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006871 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006872 }
6873 }
6874
6875 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_gt_8_subtile) {
6876 TEST_REQUIRES_ARM_NEON_DOT;
6877 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006878 for (uint32_t n = 1; n <= 16; n++) {
6879 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006880 GemmMicrokernelTester()
6881 .mr(1)
6882 .nr(16)
6883 .kr(4)
6884 .sr(1)
6885 .m(m)
6886 .n(n)
6887 .k(k)
6888 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006889 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006890 }
6891 }
6892 }
6893 }
6894
6895 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_div_8) {
6896 TEST_REQUIRES_ARM_NEON_DOT;
6897 for (size_t k = 16; k <= 80; k += 8) {
6898 GemmMicrokernelTester()
6899 .mr(1)
6900 .nr(16)
6901 .kr(4)
6902 .sr(1)
6903 .m(1)
6904 .n(16)
6905 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006906 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006907 }
6908 }
6909
6910 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, k_div_8_subtile) {
6911 TEST_REQUIRES_ARM_NEON_DOT;
6912 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006913 for (uint32_t n = 1; n <= 16; n++) {
6914 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006915 GemmMicrokernelTester()
6916 .mr(1)
6917 .nr(16)
6918 .kr(4)
6919 .sr(1)
6920 .m(m)
6921 .n(n)
6922 .k(k)
6923 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006924 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006925 }
6926 }
6927 }
6928 }
6929
6930 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16) {
6931 TEST_REQUIRES_ARM_NEON_DOT;
6932 for (uint32_t n = 17; n < 32; n++) {
6933 for (size_t k = 1; k <= 40; k += 9) {
6934 GemmMicrokernelTester()
6935 .mr(1)
6936 .nr(16)
6937 .kr(4)
6938 .sr(1)
6939 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006940 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006941 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006942 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006943 }
6944 }
6945 }
6946
6947 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_strided_cn) {
6948 TEST_REQUIRES_ARM_NEON_DOT;
6949 for (uint32_t n = 17; n < 32; n++) {
6950 for (size_t k = 1; k <= 40; k += 9) {
6951 GemmMicrokernelTester()
6952 .mr(1)
6953 .nr(16)
6954 .kr(4)
6955 .sr(1)
6956 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006957 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006958 .k(k)
6959 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006960 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006961 }
6962 }
6963 }
6964
6965 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_subtile) {
6966 TEST_REQUIRES_ARM_NEON_DOT;
6967 for (uint32_t n = 17; n < 32; n++) {
6968 for (size_t k = 1; k <= 40; k += 9) {
6969 for (uint32_t m = 1; m <= 1; m++) {
6970 GemmMicrokernelTester()
6971 .mr(1)
6972 .nr(16)
6973 .kr(4)
6974 .sr(1)
6975 .m(m)
6976 .n(n)
6977 .k(k)
6978 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006979 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006980 }
6981 }
6982 }
6983 }
6984
6985 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16) {
6986 TEST_REQUIRES_ARM_NEON_DOT;
6987 for (uint32_t n = 32; n <= 48; n += 16) {
6988 for (size_t k = 1; k <= 40; k += 9) {
6989 GemmMicrokernelTester()
6990 .mr(1)
6991 .nr(16)
6992 .kr(4)
6993 .sr(1)
6994 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006995 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006996 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006997 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006998 }
6999 }
7000 }
7001
7002 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_strided_cn) {
7003 TEST_REQUIRES_ARM_NEON_DOT;
7004 for (uint32_t n = 32; n <= 48; n += 16) {
7005 for (size_t k = 1; k <= 40; k += 9) {
7006 GemmMicrokernelTester()
7007 .mr(1)
7008 .nr(16)
7009 .kr(4)
7010 .sr(1)
7011 .m(1)
7012 .n(n)
7013 .k(k)
7014 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007015 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007016 }
7017 }
7018 }
7019
7020 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_subtile) {
7021 TEST_REQUIRES_ARM_NEON_DOT;
7022 for (uint32_t n = 32; n <= 48; n += 16) {
7023 for (size_t k = 1; k <= 40; k += 9) {
7024 for (uint32_t m = 1; m <= 1; m++) {
7025 GemmMicrokernelTester()
7026 .mr(1)
7027 .nr(16)
7028 .kr(4)
7029 .sr(1)
7030 .m(m)
7031 .n(n)
7032 .k(k)
7033 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007034 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007035 }
7036 }
7037 }
7038 }
7039
7040 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, small_kernel) {
7041 TEST_REQUIRES_ARM_NEON_DOT;
7042 for (size_t k = 1; k <= 40; k += 9) {
7043 GemmMicrokernelTester()
7044 .mr(1)
7045 .nr(16)
7046 .kr(4)
7047 .sr(1)
7048 .m(1)
7049 .n(16)
7050 .k(k)
7051 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007052 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007053 }
7054 }
7055
7056 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, small_kernel_subtile) {
7057 TEST_REQUIRES_ARM_NEON_DOT;
7058 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007059 for (uint32_t n = 1; n <= 16; n++) {
7060 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007061 GemmMicrokernelTester()
7062 .mr(1)
7063 .nr(16)
7064 .kr(4)
7065 .sr(1)
7066 .m(m)
7067 .n(n)
7068 .k(k)
7069 .ks(3)
7070 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007071 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007072 }
7073 }
7074 }
7075 }
7076
7077 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_gt_16_small_kernel) {
7078 TEST_REQUIRES_ARM_NEON_DOT;
7079 for (uint32_t n = 17; n < 32; n++) {
7080 for (size_t k = 1; k <= 40; k += 9) {
7081 GemmMicrokernelTester()
7082 .mr(1)
7083 .nr(16)
7084 .kr(4)
7085 .sr(1)
7086 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007087 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007088 .k(k)
7089 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007090 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007091 }
7092 }
7093 }
7094
7095 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, n_div_16_small_kernel) {
7096 TEST_REQUIRES_ARM_NEON_DOT;
7097 for (uint32_t n = 32; n <= 48; n += 16) {
7098 for (size_t k = 1; k <= 40; k += 9) {
7099 GemmMicrokernelTester()
7100 .mr(1)
7101 .nr(16)
7102 .kr(4)
7103 .sr(1)
7104 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007105 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007106 .k(k)
7107 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007108 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007109 }
7110 }
7111 }
7112
7113 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cm_subtile) {
7114 TEST_REQUIRES_ARM_NEON_DOT;
7115 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007116 for (uint32_t n = 1; n <= 16; n++) {
7117 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007118 GemmMicrokernelTester()
7119 .mr(1)
7120 .nr(16)
7121 .kr(4)
7122 .sr(1)
7123 .m(m)
7124 .n(n)
7125 .k(k)
7126 .cm_stride(19)
7127 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007128 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007129 }
7130 }
7131 }
7132 }
7133
7134 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, a_offset) {
7135 TEST_REQUIRES_ARM_NEON_DOT;
7136 for (size_t k = 1; k <= 40; k += 9) {
7137 GemmMicrokernelTester()
7138 .mr(1)
7139 .nr(16)
7140 .kr(4)
7141 .sr(1)
7142 .m(1)
7143 .n(16)
7144 .k(k)
7145 .ks(3)
7146 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007147 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007148 }
7149 }
7150
7151 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, zero) {
7152 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007153 for (size_t k = 1; k <= 40; k += 9) {
7154 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007155 GemmMicrokernelTester()
7156 .mr(1)
7157 .nr(16)
7158 .kr(4)
7159 .sr(1)
7160 .m(1)
7161 .n(16)
7162 .k(k)
7163 .ks(3)
7164 .a_offset(43)
7165 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007166 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007167 }
7168 }
7169 }
7170
7171 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, qmin) {
7172 TEST_REQUIRES_ARM_NEON_DOT;
7173 GemmMicrokernelTester()
7174 .mr(1)
7175 .nr(16)
7176 .kr(4)
7177 .sr(1)
7178 .m(1)
7179 .n(16)
7180 .k(8)
7181 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007182 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007183 }
7184
7185 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, qmax) {
7186 TEST_REQUIRES_ARM_NEON_DOT;
7187 GemmMicrokernelTester()
7188 .mr(1)
7189 .nr(16)
7190 .kr(4)
7191 .sr(1)
7192 .m(1)
7193 .n(16)
7194 .k(8)
7195 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007196 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007197 }
7198
7199 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, strided_cm) {
7200 TEST_REQUIRES_ARM_NEON_DOT;
7201 GemmMicrokernelTester()
7202 .mr(1)
7203 .nr(16)
7204 .kr(4)
7205 .sr(1)
7206 .m(1)
7207 .n(16)
7208 .k(8)
7209 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007210 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007211 }
7212
7213 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_a_zero_point) {
7214 TEST_REQUIRES_ARM_NEON_DOT;
7215 for (size_t k = 1; k <= 40; k += 9) {
7216 GemmMicrokernelTester()
7217 .mr(1)
7218 .nr(16)
7219 .kr(4)
7220 .sr(1)
7221 .m(1)
7222 .n(16)
7223 .k(k)
7224 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007225 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007226 }
7227 }
7228
7229 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_b_zero_point) {
7230 TEST_REQUIRES_ARM_NEON_DOT;
7231 for (size_t k = 1; k <= 40; k += 9) {
7232 GemmMicrokernelTester()
7233 .mr(1)
7234 .nr(16)
7235 .kr(4)
7236 .sr(1)
7237 .m(1)
7238 .n(16)
7239 .k(k)
7240 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007241 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007242 }
7243 }
7244
7245 TEST(QU8_IGEMM_MINMAX_RNDNU_1X16C4__NEONDOT, no_zero_point) {
7246 TEST_REQUIRES_ARM_NEON_DOT;
7247 for (size_t k = 1; k <= 40; k += 9) {
7248 GemmMicrokernelTester()
7249 .mr(1)
7250 .nr(16)
7251 .kr(4)
7252 .sr(1)
7253 .m(1)
7254 .n(16)
7255 .k(k)
7256 .a_zero_point(0)
7257 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007258 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007259 }
7260 }
7261#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
7262
7263
7264#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
7265 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8) {
7266 TEST_REQUIRES_ARM_NEON_DOT;
7267 GemmMicrokernelTester()
7268 .mr(6)
7269 .nr(16)
7270 .kr(4)
7271 .sr(1)
7272 .m(6)
7273 .n(16)
7274 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007275 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007276 }
7277
7278 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cn) {
7279 TEST_REQUIRES_ARM_NEON_DOT;
7280 GemmMicrokernelTester()
7281 .mr(6)
7282 .nr(16)
7283 .kr(4)
7284 .sr(1)
7285 .m(6)
7286 .n(16)
7287 .k(8)
7288 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007289 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007290 }
7291
7292 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile) {
7293 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007294 for (uint32_t n = 1; n <= 16; n++) {
7295 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007296 GemmMicrokernelTester()
7297 .mr(6)
7298 .nr(16)
7299 .kr(4)
7300 .sr(1)
7301 .m(m)
7302 .n(n)
7303 .k(8)
7304 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007305 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007306 }
7307 }
7308 }
7309
7310 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_m) {
7311 TEST_REQUIRES_ARM_NEON_DOT;
7312 for (uint32_t m = 1; m <= 6; m++) {
7313 GemmMicrokernelTester()
7314 .mr(6)
7315 .nr(16)
7316 .kr(4)
7317 .sr(1)
7318 .m(m)
7319 .n(16)
7320 .k(8)
7321 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007322 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007323 }
7324 }
7325
7326 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_n) {
7327 TEST_REQUIRES_ARM_NEON_DOT;
7328 for (uint32_t n = 1; n <= 16; n++) {
7329 GemmMicrokernelTester()
7330 .mr(6)
7331 .nr(16)
7332 .kr(4)
7333 .sr(1)
7334 .m(6)
7335 .n(n)
7336 .k(8)
7337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007338 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007339 }
7340 }
7341
7342 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8) {
7343 TEST_REQUIRES_ARM_NEON_DOT;
7344 for (size_t k = 1; k < 8; k++) {
7345 GemmMicrokernelTester()
7346 .mr(6)
7347 .nr(16)
7348 .kr(4)
7349 .sr(1)
7350 .m(6)
7351 .n(16)
7352 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007353 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007354 }
7355 }
7356
7357 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8_subtile) {
7358 TEST_REQUIRES_ARM_NEON_DOT;
7359 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007360 for (uint32_t n = 1; n <= 16; n++) {
7361 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007362 GemmMicrokernelTester()
7363 .mr(6)
7364 .nr(16)
7365 .kr(4)
7366 .sr(1)
7367 .m(m)
7368 .n(n)
7369 .k(k)
7370 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007371 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007372 }
7373 }
7374 }
7375 }
7376
7377 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8) {
7378 TEST_REQUIRES_ARM_NEON_DOT;
7379 for (size_t k = 9; k < 16; k++) {
7380 GemmMicrokernelTester()
7381 .mr(6)
7382 .nr(16)
7383 .kr(4)
7384 .sr(1)
7385 .m(6)
7386 .n(16)
7387 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007388 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007389 }
7390 }
7391
7392 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8_subtile) {
7393 TEST_REQUIRES_ARM_NEON_DOT;
7394 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007395 for (uint32_t n = 1; n <= 16; n++) {
7396 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007397 GemmMicrokernelTester()
7398 .mr(6)
7399 .nr(16)
7400 .kr(4)
7401 .sr(1)
7402 .m(m)
7403 .n(n)
7404 .k(k)
7405 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007406 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007407 }
7408 }
7409 }
7410 }
7411
7412 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8) {
7413 TEST_REQUIRES_ARM_NEON_DOT;
7414 for (size_t k = 16; k <= 80; k += 8) {
7415 GemmMicrokernelTester()
7416 .mr(6)
7417 .nr(16)
7418 .kr(4)
7419 .sr(1)
7420 .m(6)
7421 .n(16)
7422 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007423 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007424 }
7425 }
7426
7427 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8_subtile) {
7428 TEST_REQUIRES_ARM_NEON_DOT;
7429 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007430 for (uint32_t n = 1; n <= 16; n++) {
7431 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007432 GemmMicrokernelTester()
7433 .mr(6)
7434 .nr(16)
7435 .kr(4)
7436 .sr(1)
7437 .m(m)
7438 .n(n)
7439 .k(k)
7440 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007441 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007442 }
7443 }
7444 }
7445 }
7446
7447 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16) {
7448 TEST_REQUIRES_ARM_NEON_DOT;
7449 for (uint32_t n = 17; n < 32; n++) {
7450 for (size_t k = 1; k <= 40; k += 9) {
7451 GemmMicrokernelTester()
7452 .mr(6)
7453 .nr(16)
7454 .kr(4)
7455 .sr(1)
7456 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007457 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007458 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007459 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007460 }
7461 }
7462 }
7463
7464 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_strided_cn) {
7465 TEST_REQUIRES_ARM_NEON_DOT;
7466 for (uint32_t n = 17; n < 32; n++) {
7467 for (size_t k = 1; k <= 40; k += 9) {
7468 GemmMicrokernelTester()
7469 .mr(6)
7470 .nr(16)
7471 .kr(4)
7472 .sr(1)
7473 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007474 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007475 .k(k)
7476 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007477 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007478 }
7479 }
7480 }
7481
7482 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_subtile) {
7483 TEST_REQUIRES_ARM_NEON_DOT;
7484 for (uint32_t n = 17; n < 32; n++) {
7485 for (size_t k = 1; k <= 40; k += 9) {
7486 for (uint32_t m = 1; m <= 6; m++) {
7487 GemmMicrokernelTester()
7488 .mr(6)
7489 .nr(16)
7490 .kr(4)
7491 .sr(1)
7492 .m(m)
7493 .n(n)
7494 .k(k)
7495 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007496 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007497 }
7498 }
7499 }
7500 }
7501
7502 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16) {
7503 TEST_REQUIRES_ARM_NEON_DOT;
7504 for (uint32_t n = 32; n <= 48; n += 16) {
7505 for (size_t k = 1; k <= 40; k += 9) {
7506 GemmMicrokernelTester()
7507 .mr(6)
7508 .nr(16)
7509 .kr(4)
7510 .sr(1)
7511 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007512 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007513 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007514 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007515 }
7516 }
7517 }
7518
7519 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_strided_cn) {
7520 TEST_REQUIRES_ARM_NEON_DOT;
7521 for (uint32_t n = 32; n <= 48; n += 16) {
7522 for (size_t k = 1; k <= 40; k += 9) {
7523 GemmMicrokernelTester()
7524 .mr(6)
7525 .nr(16)
7526 .kr(4)
7527 .sr(1)
7528 .m(6)
7529 .n(n)
7530 .k(k)
7531 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007532 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007533 }
7534 }
7535 }
7536
7537 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_subtile) {
7538 TEST_REQUIRES_ARM_NEON_DOT;
7539 for (uint32_t n = 32; n <= 48; n += 16) {
7540 for (size_t k = 1; k <= 40; k += 9) {
7541 for (uint32_t m = 1; m <= 6; m++) {
7542 GemmMicrokernelTester()
7543 .mr(6)
7544 .nr(16)
7545 .kr(4)
7546 .sr(1)
7547 .m(m)
7548 .n(n)
7549 .k(k)
7550 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007551 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007552 }
7553 }
7554 }
7555 }
7556
7557 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, small_kernel) {
7558 TEST_REQUIRES_ARM_NEON_DOT;
7559 for (size_t k = 1; k <= 40; k += 9) {
7560 GemmMicrokernelTester()
7561 .mr(6)
7562 .nr(16)
7563 .kr(4)
7564 .sr(1)
7565 .m(6)
7566 .n(16)
7567 .k(k)
7568 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007569 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007570 }
7571 }
7572
7573 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, small_kernel_subtile) {
7574 TEST_REQUIRES_ARM_NEON_DOT;
7575 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007576 for (uint32_t n = 1; n <= 16; n++) {
7577 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007578 GemmMicrokernelTester()
7579 .mr(6)
7580 .nr(16)
7581 .kr(4)
7582 .sr(1)
7583 .m(m)
7584 .n(n)
7585 .k(k)
7586 .ks(3)
7587 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007588 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007589 }
7590 }
7591 }
7592 }
7593
7594 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_small_kernel) {
7595 TEST_REQUIRES_ARM_NEON_DOT;
7596 for (uint32_t n = 17; n < 32; n++) {
7597 for (size_t k = 1; k <= 40; k += 9) {
7598 GemmMicrokernelTester()
7599 .mr(6)
7600 .nr(16)
7601 .kr(4)
7602 .sr(1)
7603 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007604 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007605 .k(k)
7606 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007607 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007608 }
7609 }
7610 }
7611
7612 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_small_kernel) {
7613 TEST_REQUIRES_ARM_NEON_DOT;
7614 for (uint32_t n = 32; n <= 48; n += 16) {
7615 for (size_t k = 1; k <= 40; k += 9) {
7616 GemmMicrokernelTester()
7617 .mr(6)
7618 .nr(16)
7619 .kr(4)
7620 .sr(1)
7621 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007622 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007623 .k(k)
7624 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08007625 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007626 }
7627 }
7628 }
7629
7630 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm_subtile) {
7631 TEST_REQUIRES_ARM_NEON_DOT;
7632 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007633 for (uint32_t n = 1; n <= 16; n++) {
7634 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007635 GemmMicrokernelTester()
7636 .mr(6)
7637 .nr(16)
7638 .kr(4)
7639 .sr(1)
7640 .m(m)
7641 .n(n)
7642 .k(k)
7643 .cm_stride(19)
7644 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007645 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007646 }
7647 }
7648 }
7649 }
7650
7651 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, a_offset) {
7652 TEST_REQUIRES_ARM_NEON_DOT;
7653 for (size_t k = 1; k <= 40; k += 9) {
7654 GemmMicrokernelTester()
7655 .mr(6)
7656 .nr(16)
7657 .kr(4)
7658 .sr(1)
7659 .m(6)
7660 .n(16)
7661 .k(k)
7662 .ks(3)
7663 .a_offset(251)
Marat Dukhan50323b82022-01-11 00:12:01 -08007664 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007665 }
7666 }
7667
7668 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, zero) {
7669 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007670 for (size_t k = 1; k <= 40; k += 9) {
7671 for (uint32_t mz = 0; mz < 6; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007672 GemmMicrokernelTester()
7673 .mr(6)
7674 .nr(16)
7675 .kr(4)
7676 .sr(1)
7677 .m(6)
7678 .n(16)
7679 .k(k)
7680 .ks(3)
7681 .a_offset(251)
7682 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08007683 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007684 }
7685 }
7686 }
7687
7688 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmin) {
7689 TEST_REQUIRES_ARM_NEON_DOT;
7690 GemmMicrokernelTester()
7691 .mr(6)
7692 .nr(16)
7693 .kr(4)
7694 .sr(1)
7695 .m(6)
7696 .n(16)
7697 .k(8)
7698 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007699 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007700 }
7701
7702 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmax) {
7703 TEST_REQUIRES_ARM_NEON_DOT;
7704 GemmMicrokernelTester()
7705 .mr(6)
7706 .nr(16)
7707 .kr(4)
7708 .sr(1)
7709 .m(6)
7710 .n(16)
7711 .k(8)
7712 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007713 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007714 }
7715
7716 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm) {
7717 TEST_REQUIRES_ARM_NEON_DOT;
7718 GemmMicrokernelTester()
7719 .mr(6)
7720 .nr(16)
7721 .kr(4)
7722 .sr(1)
7723 .m(6)
7724 .n(16)
7725 .k(8)
7726 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007727 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007728 }
7729
7730 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_a_zero_point) {
7731 TEST_REQUIRES_ARM_NEON_DOT;
7732 for (size_t k = 1; k <= 40; k += 9) {
7733 GemmMicrokernelTester()
7734 .mr(6)
7735 .nr(16)
7736 .kr(4)
7737 .sr(1)
7738 .m(6)
7739 .n(16)
7740 .k(k)
7741 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007742 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007743 }
7744 }
7745
7746 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_b_zero_point) {
7747 TEST_REQUIRES_ARM_NEON_DOT;
7748 for (size_t k = 1; k <= 40; k += 9) {
7749 GemmMicrokernelTester()
7750 .mr(6)
7751 .nr(16)
7752 .kr(4)
7753 .sr(1)
7754 .m(6)
7755 .n(16)
7756 .k(k)
7757 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007758 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007759 }
7760 }
7761
7762 TEST(QU8_IGEMM_MINMAX_RNDNU_6X16C4__NEONDOT, no_zero_point) {
7763 TEST_REQUIRES_ARM_NEON_DOT;
7764 for (size_t k = 1; k <= 40; k += 9) {
7765 GemmMicrokernelTester()
7766 .mr(6)
7767 .nr(16)
7768 .kr(4)
7769 .sr(1)
7770 .m(6)
7771 .n(16)
7772 .k(k)
7773 .a_zero_point(0)
7774 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007775 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007776 }
7777 }
7778#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
7779
7780
7781#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
7782 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8) {
7783 TEST_REQUIRES_ARM_NEON_DOT;
7784 GemmMicrokernelTester()
7785 .mr(1)
7786 .nr(32)
7787 .kr(4)
7788 .sr(1)
7789 .m(1)
7790 .n(32)
7791 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007792 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007793 }
7794
7795 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, strided_cn) {
7796 TEST_REQUIRES_ARM_NEON_DOT;
7797 GemmMicrokernelTester()
7798 .mr(1)
7799 .nr(32)
7800 .kr(4)
7801 .sr(1)
7802 .m(1)
7803 .n(32)
7804 .k(8)
7805 .cn_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08007806 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007807 }
7808
7809 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8_subtile) {
7810 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007811 for (uint32_t n = 1; n <= 32; n++) {
7812 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007813 GemmMicrokernelTester()
7814 .mr(1)
7815 .nr(32)
7816 .kr(4)
7817 .sr(1)
7818 .m(m)
7819 .n(n)
7820 .k(8)
7821 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007822 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007823 }
7824 }
7825 }
7826
7827 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8_subtile_m) {
7828 TEST_REQUIRES_ARM_NEON_DOT;
7829 for (uint32_t m = 1; m <= 1; m++) {
7830 GemmMicrokernelTester()
7831 .mr(1)
7832 .nr(32)
7833 .kr(4)
7834 .sr(1)
7835 .m(m)
7836 .n(32)
7837 .k(8)
7838 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007839 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007840 }
7841 }
7842
7843 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_eq_8_subtile_n) {
7844 TEST_REQUIRES_ARM_NEON_DOT;
7845 for (uint32_t n = 1; n <= 32; n++) {
7846 GemmMicrokernelTester()
7847 .mr(1)
7848 .nr(32)
7849 .kr(4)
7850 .sr(1)
7851 .m(1)
7852 .n(n)
7853 .k(8)
7854 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007855 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007856 }
7857 }
7858
7859 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_lt_8) {
7860 TEST_REQUIRES_ARM_NEON_DOT;
7861 for (size_t k = 1; k < 8; k++) {
7862 GemmMicrokernelTester()
7863 .mr(1)
7864 .nr(32)
7865 .kr(4)
7866 .sr(1)
7867 .m(1)
7868 .n(32)
7869 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007870 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007871 }
7872 }
7873
7874 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_lt_8_subtile) {
7875 TEST_REQUIRES_ARM_NEON_DOT;
7876 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007877 for (uint32_t n = 1; n <= 32; n++) {
7878 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007879 GemmMicrokernelTester()
7880 .mr(1)
7881 .nr(32)
7882 .kr(4)
7883 .sr(1)
7884 .m(m)
7885 .n(n)
7886 .k(k)
7887 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007888 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007889 }
7890 }
7891 }
7892 }
7893
7894 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_gt_8) {
7895 TEST_REQUIRES_ARM_NEON_DOT;
7896 for (size_t k = 9; k < 16; k++) {
7897 GemmMicrokernelTester()
7898 .mr(1)
7899 .nr(32)
7900 .kr(4)
7901 .sr(1)
7902 .m(1)
7903 .n(32)
7904 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007905 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007906 }
7907 }
7908
7909 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_gt_8_subtile) {
7910 TEST_REQUIRES_ARM_NEON_DOT;
7911 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007912 for (uint32_t n = 1; n <= 32; n++) {
7913 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007914 GemmMicrokernelTester()
7915 .mr(1)
7916 .nr(32)
7917 .kr(4)
7918 .sr(1)
7919 .m(m)
7920 .n(n)
7921 .k(k)
7922 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007923 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007924 }
7925 }
7926 }
7927 }
7928
7929 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_div_8) {
7930 TEST_REQUIRES_ARM_NEON_DOT;
7931 for (size_t k = 16; k <= 80; k += 8) {
7932 GemmMicrokernelTester()
7933 .mr(1)
7934 .nr(32)
7935 .kr(4)
7936 .sr(1)
7937 .m(1)
7938 .n(32)
7939 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007940 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007941 }
7942 }
7943
7944 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, k_div_8_subtile) {
7945 TEST_REQUIRES_ARM_NEON_DOT;
7946 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007947 for (uint32_t n = 1; n <= 32; n++) {
7948 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007949 GemmMicrokernelTester()
7950 .mr(1)
7951 .nr(32)
7952 .kr(4)
7953 .sr(1)
7954 .m(m)
7955 .n(n)
7956 .k(k)
7957 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007958 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007959 }
7960 }
7961 }
7962 }
7963
7964 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32) {
7965 TEST_REQUIRES_ARM_NEON_DOT;
7966 for (uint32_t n = 33; n < 64; n++) {
7967 for (size_t k = 1; k <= 40; k += 9) {
7968 GemmMicrokernelTester()
7969 .mr(1)
7970 .nr(32)
7971 .kr(4)
7972 .sr(1)
7973 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007974 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007975 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007976 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007977 }
7978 }
7979 }
7980
7981 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32_strided_cn) {
7982 TEST_REQUIRES_ARM_NEON_DOT;
7983 for (uint32_t n = 33; n < 64; n++) {
7984 for (size_t k = 1; k <= 40; k += 9) {
7985 GemmMicrokernelTester()
7986 .mr(1)
7987 .nr(32)
7988 .kr(4)
7989 .sr(1)
7990 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007991 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007992 .k(k)
7993 .cn_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08007994 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007995 }
7996 }
7997 }
7998
7999 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32_subtile) {
8000 TEST_REQUIRES_ARM_NEON_DOT;
8001 for (uint32_t n = 33; n < 64; n++) {
8002 for (size_t k = 1; k <= 40; k += 9) {
8003 for (uint32_t m = 1; m <= 1; m++) {
8004 GemmMicrokernelTester()
8005 .mr(1)
8006 .nr(32)
8007 .kr(4)
8008 .sr(1)
8009 .m(m)
8010 .n(n)
8011 .k(k)
8012 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008013 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008014 }
8015 }
8016 }
8017 }
8018
8019 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32) {
8020 TEST_REQUIRES_ARM_NEON_DOT;
8021 for (uint32_t n = 64; n <= 96; n += 32) {
8022 for (size_t k = 1; k <= 40; k += 9) {
8023 GemmMicrokernelTester()
8024 .mr(1)
8025 .nr(32)
8026 .kr(4)
8027 .sr(1)
8028 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008029 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008030 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008031 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008032 }
8033 }
8034 }
8035
8036 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32_strided_cn) {
8037 TEST_REQUIRES_ARM_NEON_DOT;
8038 for (uint32_t n = 64; n <= 96; n += 32) {
8039 for (size_t k = 1; k <= 40; k += 9) {
8040 GemmMicrokernelTester()
8041 .mr(1)
8042 .nr(32)
8043 .kr(4)
8044 .sr(1)
8045 .m(1)
8046 .n(n)
8047 .k(k)
8048 .cn_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08008049 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008050 }
8051 }
8052 }
8053
8054 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32_subtile) {
8055 TEST_REQUIRES_ARM_NEON_DOT;
8056 for (uint32_t n = 64; n <= 96; n += 32) {
8057 for (size_t k = 1; k <= 40; k += 9) {
8058 for (uint32_t m = 1; m <= 1; m++) {
8059 GemmMicrokernelTester()
8060 .mr(1)
8061 .nr(32)
8062 .kr(4)
8063 .sr(1)
8064 .m(m)
8065 .n(n)
8066 .k(k)
8067 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008068 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008069 }
8070 }
8071 }
8072 }
8073
8074 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, small_kernel) {
8075 TEST_REQUIRES_ARM_NEON_DOT;
8076 for (size_t k = 1; k <= 40; k += 9) {
8077 GemmMicrokernelTester()
8078 .mr(1)
8079 .nr(32)
8080 .kr(4)
8081 .sr(1)
8082 .m(1)
8083 .n(32)
8084 .k(k)
8085 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008086 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008087 }
8088 }
8089
8090 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, small_kernel_subtile) {
8091 TEST_REQUIRES_ARM_NEON_DOT;
8092 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008093 for (uint32_t n = 1; n <= 32; n++) {
8094 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008095 GemmMicrokernelTester()
8096 .mr(1)
8097 .nr(32)
8098 .kr(4)
8099 .sr(1)
8100 .m(m)
8101 .n(n)
8102 .k(k)
8103 .ks(3)
8104 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008105 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008106 }
8107 }
8108 }
8109 }
8110
8111 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_gt_32_small_kernel) {
8112 TEST_REQUIRES_ARM_NEON_DOT;
8113 for (uint32_t n = 33; n < 64; n++) {
8114 for (size_t k = 1; k <= 40; k += 9) {
8115 GemmMicrokernelTester()
8116 .mr(1)
8117 .nr(32)
8118 .kr(4)
8119 .sr(1)
8120 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008121 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008122 .k(k)
8123 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008124 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008125 }
8126 }
8127 }
8128
8129 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, n_div_32_small_kernel) {
8130 TEST_REQUIRES_ARM_NEON_DOT;
8131 for (uint32_t n = 64; n <= 96; n += 32) {
8132 for (size_t k = 1; k <= 40; k += 9) {
8133 GemmMicrokernelTester()
8134 .mr(1)
8135 .nr(32)
8136 .kr(4)
8137 .sr(1)
8138 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008139 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008140 .k(k)
8141 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008142 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008143 }
8144 }
8145 }
8146
8147 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, strided_cm_subtile) {
8148 TEST_REQUIRES_ARM_NEON_DOT;
8149 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008150 for (uint32_t n = 1; n <= 32; n++) {
8151 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008152 GemmMicrokernelTester()
8153 .mr(1)
8154 .nr(32)
8155 .kr(4)
8156 .sr(1)
8157 .m(m)
8158 .n(n)
8159 .k(k)
8160 .cm_stride(37)
8161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008162 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008163 }
8164 }
8165 }
8166 }
8167
8168 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, a_offset) {
8169 TEST_REQUIRES_ARM_NEON_DOT;
8170 for (size_t k = 1; k <= 40; k += 9) {
8171 GemmMicrokernelTester()
8172 .mr(1)
8173 .nr(32)
8174 .kr(4)
8175 .sr(1)
8176 .m(1)
8177 .n(32)
8178 .k(k)
8179 .ks(3)
8180 .a_offset(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008181 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008182 }
8183 }
8184
8185 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, zero) {
8186 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008187 for (size_t k = 1; k <= 40; k += 9) {
8188 for (uint32_t mz = 0; mz < 1; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008189 GemmMicrokernelTester()
8190 .mr(1)
8191 .nr(32)
8192 .kr(4)
8193 .sr(1)
8194 .m(1)
8195 .n(32)
8196 .k(k)
8197 .ks(3)
8198 .a_offset(43)
8199 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008200 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008201 }
8202 }
8203 }
8204
8205 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, qmin) {
8206 TEST_REQUIRES_ARM_NEON_DOT;
8207 GemmMicrokernelTester()
8208 .mr(1)
8209 .nr(32)
8210 .kr(4)
8211 .sr(1)
8212 .m(1)
8213 .n(32)
8214 .k(8)
8215 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008216 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008217 }
8218
8219 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, qmax) {
8220 TEST_REQUIRES_ARM_NEON_DOT;
8221 GemmMicrokernelTester()
8222 .mr(1)
8223 .nr(32)
8224 .kr(4)
8225 .sr(1)
8226 .m(1)
8227 .n(32)
8228 .k(8)
8229 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008230 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008231 }
8232
8233 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, strided_cm) {
8234 TEST_REQUIRES_ARM_NEON_DOT;
8235 GemmMicrokernelTester()
8236 .mr(1)
8237 .nr(32)
8238 .kr(4)
8239 .sr(1)
8240 .m(1)
8241 .n(32)
8242 .k(8)
8243 .cm_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08008244 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008245 }
8246
8247 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, no_a_zero_point) {
8248 TEST_REQUIRES_ARM_NEON_DOT;
8249 for (size_t k = 1; k <= 40; k += 9) {
8250 GemmMicrokernelTester()
8251 .mr(1)
8252 .nr(32)
8253 .kr(4)
8254 .sr(1)
8255 .m(1)
8256 .n(32)
8257 .k(k)
8258 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008259 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008260 }
8261 }
8262
8263 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, no_b_zero_point) {
8264 TEST_REQUIRES_ARM_NEON_DOT;
8265 for (size_t k = 1; k <= 40; k += 9) {
8266 GemmMicrokernelTester()
8267 .mr(1)
8268 .nr(32)
8269 .kr(4)
8270 .sr(1)
8271 .m(1)
8272 .n(32)
8273 .k(k)
8274 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008275 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008276 }
8277 }
8278
8279 TEST(QU8_IGEMM_MINMAX_RNDNU_1X32C4__NEONDOT, no_zero_point) {
8280 TEST_REQUIRES_ARM_NEON_DOT;
8281 for (size_t k = 1; k <= 40; k += 9) {
8282 GemmMicrokernelTester()
8283 .mr(1)
8284 .nr(32)
8285 .kr(4)
8286 .sr(1)
8287 .m(1)
8288 .n(32)
8289 .k(k)
8290 .a_zero_point(0)
8291 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008292 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_1x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008293 }
8294 }
8295#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
8296
8297
8298#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
8299 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8) {
8300 TEST_REQUIRES_ARM_NEON;
8301 GemmMicrokernelTester()
8302 .mr(4)
8303 .nr(16)
8304 .kr(1)
8305 .sr(1)
8306 .m(4)
8307 .n(16)
8308 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008309 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008310 }
8311
8312 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cn) {
8313 TEST_REQUIRES_ARM_NEON;
8314 GemmMicrokernelTester()
8315 .mr(4)
8316 .nr(16)
8317 .kr(1)
8318 .sr(1)
8319 .m(4)
8320 .n(16)
8321 .k(8)
8322 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008323 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008324 }
8325
8326 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile) {
8327 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008328 for (uint32_t n = 1; n <= 16; n++) {
8329 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008330 GemmMicrokernelTester()
8331 .mr(4)
8332 .nr(16)
8333 .kr(1)
8334 .sr(1)
8335 .m(m)
8336 .n(n)
8337 .k(8)
8338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008339 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008340 }
8341 }
8342 }
8343
8344 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile_m) {
8345 TEST_REQUIRES_ARM_NEON;
8346 for (uint32_t m = 1; m <= 4; m++) {
8347 GemmMicrokernelTester()
8348 .mr(4)
8349 .nr(16)
8350 .kr(1)
8351 .sr(1)
8352 .m(m)
8353 .n(16)
8354 .k(8)
8355 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008356 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008357 }
8358 }
8359
8360 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_eq_8_subtile_n) {
8361 TEST_REQUIRES_ARM_NEON;
8362 for (uint32_t n = 1; n <= 16; n++) {
8363 GemmMicrokernelTester()
8364 .mr(4)
8365 .nr(16)
8366 .kr(1)
8367 .sr(1)
8368 .m(4)
8369 .n(n)
8370 .k(8)
8371 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008372 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008373 }
8374 }
8375
8376 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_lt_8) {
8377 TEST_REQUIRES_ARM_NEON;
8378 for (size_t k = 1; k < 8; k++) {
8379 GemmMicrokernelTester()
8380 .mr(4)
8381 .nr(16)
8382 .kr(1)
8383 .sr(1)
8384 .m(4)
8385 .n(16)
8386 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008387 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008388 }
8389 }
8390
8391 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_lt_8_subtile) {
8392 TEST_REQUIRES_ARM_NEON;
8393 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008394 for (uint32_t n = 1; n <= 16; n++) {
8395 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008396 GemmMicrokernelTester()
8397 .mr(4)
8398 .nr(16)
8399 .kr(1)
8400 .sr(1)
8401 .m(m)
8402 .n(n)
8403 .k(k)
8404 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008405 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008406 }
8407 }
8408 }
8409 }
8410
8411 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_gt_8) {
8412 TEST_REQUIRES_ARM_NEON;
8413 for (size_t k = 9; k < 16; k++) {
8414 GemmMicrokernelTester()
8415 .mr(4)
8416 .nr(16)
8417 .kr(1)
8418 .sr(1)
8419 .m(4)
8420 .n(16)
8421 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008422 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008423 }
8424 }
8425
8426 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_gt_8_subtile) {
8427 TEST_REQUIRES_ARM_NEON;
8428 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008429 for (uint32_t n = 1; n <= 16; n++) {
8430 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008431 GemmMicrokernelTester()
8432 .mr(4)
8433 .nr(16)
8434 .kr(1)
8435 .sr(1)
8436 .m(m)
8437 .n(n)
8438 .k(k)
8439 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008440 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008441 }
8442 }
8443 }
8444 }
8445
8446 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_div_8) {
8447 TEST_REQUIRES_ARM_NEON;
8448 for (size_t k = 16; k <= 80; k += 8) {
8449 GemmMicrokernelTester()
8450 .mr(4)
8451 .nr(16)
8452 .kr(1)
8453 .sr(1)
8454 .m(4)
8455 .n(16)
8456 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008457 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008458 }
8459 }
8460
8461 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, k_div_8_subtile) {
8462 TEST_REQUIRES_ARM_NEON;
8463 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008464 for (uint32_t n = 1; n <= 16; n++) {
8465 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008466 GemmMicrokernelTester()
8467 .mr(4)
8468 .nr(16)
8469 .kr(1)
8470 .sr(1)
8471 .m(m)
8472 .n(n)
8473 .k(k)
8474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008475 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008476 }
8477 }
8478 }
8479 }
8480
8481 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16) {
8482 TEST_REQUIRES_ARM_NEON;
8483 for (uint32_t n = 17; n < 32; n++) {
8484 for (size_t k = 1; k <= 40; k += 9) {
8485 GemmMicrokernelTester()
8486 .mr(4)
8487 .nr(16)
8488 .kr(1)
8489 .sr(1)
8490 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008491 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008492 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008493 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008494 }
8495 }
8496 }
8497
8498 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_strided_cn) {
8499 TEST_REQUIRES_ARM_NEON;
8500 for (uint32_t n = 17; n < 32; n++) {
8501 for (size_t k = 1; k <= 40; k += 9) {
8502 GemmMicrokernelTester()
8503 .mr(4)
8504 .nr(16)
8505 .kr(1)
8506 .sr(1)
8507 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008508 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008509 .k(k)
8510 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008511 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008512 }
8513 }
8514 }
8515
8516 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_subtile) {
8517 TEST_REQUIRES_ARM_NEON;
8518 for (uint32_t n = 17; n < 32; n++) {
8519 for (size_t k = 1; k <= 40; k += 9) {
8520 for (uint32_t m = 1; m <= 4; m++) {
8521 GemmMicrokernelTester()
8522 .mr(4)
8523 .nr(16)
8524 .kr(1)
8525 .sr(1)
8526 .m(m)
8527 .n(n)
8528 .k(k)
8529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008530 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008531 }
8532 }
8533 }
8534 }
8535
8536 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16) {
8537 TEST_REQUIRES_ARM_NEON;
8538 for (uint32_t n = 32; n <= 48; n += 16) {
8539 for (size_t k = 1; k <= 40; k += 9) {
8540 GemmMicrokernelTester()
8541 .mr(4)
8542 .nr(16)
8543 .kr(1)
8544 .sr(1)
8545 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008546 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008547 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008548 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008549 }
8550 }
8551 }
8552
8553 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_strided_cn) {
8554 TEST_REQUIRES_ARM_NEON;
8555 for (uint32_t n = 32; n <= 48; n += 16) {
8556 for (size_t k = 1; k <= 40; k += 9) {
8557 GemmMicrokernelTester()
8558 .mr(4)
8559 .nr(16)
8560 .kr(1)
8561 .sr(1)
8562 .m(4)
8563 .n(n)
8564 .k(k)
8565 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008566 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008567 }
8568 }
8569 }
8570
8571 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_subtile) {
8572 TEST_REQUIRES_ARM_NEON;
8573 for (uint32_t n = 32; n <= 48; n += 16) {
8574 for (size_t k = 1; k <= 40; k += 9) {
8575 for (uint32_t m = 1; m <= 4; m++) {
8576 GemmMicrokernelTester()
8577 .mr(4)
8578 .nr(16)
8579 .kr(1)
8580 .sr(1)
8581 .m(m)
8582 .n(n)
8583 .k(k)
8584 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008585 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008586 }
8587 }
8588 }
8589 }
8590
8591 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, small_kernel) {
8592 TEST_REQUIRES_ARM_NEON;
8593 for (size_t k = 1; k <= 40; k += 9) {
8594 GemmMicrokernelTester()
8595 .mr(4)
8596 .nr(16)
8597 .kr(1)
8598 .sr(1)
8599 .m(4)
8600 .n(16)
8601 .k(k)
8602 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008603 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008604 }
8605 }
8606
8607 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, small_kernel_subtile) {
8608 TEST_REQUIRES_ARM_NEON;
8609 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008610 for (uint32_t n = 1; n <= 16; n++) {
8611 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008612 GemmMicrokernelTester()
8613 .mr(4)
8614 .nr(16)
8615 .kr(1)
8616 .sr(1)
8617 .m(m)
8618 .n(n)
8619 .k(k)
8620 .ks(3)
8621 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008622 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008623 }
8624 }
8625 }
8626 }
8627
8628 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_gt_16_small_kernel) {
8629 TEST_REQUIRES_ARM_NEON;
8630 for (uint32_t n = 17; n < 32; n++) {
8631 for (size_t k = 1; k <= 40; k += 9) {
8632 GemmMicrokernelTester()
8633 .mr(4)
8634 .nr(16)
8635 .kr(1)
8636 .sr(1)
8637 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008638 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008639 .k(k)
8640 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008641 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008642 }
8643 }
8644 }
8645
8646 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, n_div_16_small_kernel) {
8647 TEST_REQUIRES_ARM_NEON;
8648 for (uint32_t n = 32; n <= 48; n += 16) {
8649 for (size_t k = 1; k <= 40; k += 9) {
8650 GemmMicrokernelTester()
8651 .mr(4)
8652 .nr(16)
8653 .kr(1)
8654 .sr(1)
8655 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008656 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008657 .k(k)
8658 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08008659 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008660 }
8661 }
8662 }
8663
8664 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cm_subtile) {
8665 TEST_REQUIRES_ARM_NEON;
8666 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008667 for (uint32_t n = 1; n <= 16; n++) {
8668 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008669 GemmMicrokernelTester()
8670 .mr(4)
8671 .nr(16)
8672 .kr(1)
8673 .sr(1)
8674 .m(m)
8675 .n(n)
8676 .k(k)
8677 .cm_stride(19)
8678 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008679 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008680 }
8681 }
8682 }
8683 }
8684
8685 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, a_offset) {
8686 TEST_REQUIRES_ARM_NEON;
8687 for (size_t k = 1; k <= 40; k += 9) {
8688 GemmMicrokernelTester()
8689 .mr(4)
8690 .nr(16)
8691 .kr(1)
8692 .sr(1)
8693 .m(4)
8694 .n(16)
8695 .k(k)
8696 .ks(3)
8697 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08008698 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008699 }
8700 }
8701
8702 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, zero) {
8703 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008704 for (size_t k = 1; k <= 40; k += 9) {
8705 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008706 GemmMicrokernelTester()
8707 .mr(4)
8708 .nr(16)
8709 .kr(1)
8710 .sr(1)
8711 .m(4)
8712 .n(16)
8713 .k(k)
8714 .ks(3)
8715 .a_offset(163)
8716 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08008717 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008718 }
8719 }
8720 }
8721
8722 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, qmin) {
8723 TEST_REQUIRES_ARM_NEON;
8724 GemmMicrokernelTester()
8725 .mr(4)
8726 .nr(16)
8727 .kr(1)
8728 .sr(1)
8729 .m(4)
8730 .n(16)
8731 .k(8)
8732 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008733 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008734 }
8735
8736 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, qmax) {
8737 TEST_REQUIRES_ARM_NEON;
8738 GemmMicrokernelTester()
8739 .mr(4)
8740 .nr(16)
8741 .kr(1)
8742 .sr(1)
8743 .m(4)
8744 .n(16)
8745 .k(8)
8746 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008747 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008748 }
8749
8750 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, strided_cm) {
8751 TEST_REQUIRES_ARM_NEON;
8752 GemmMicrokernelTester()
8753 .mr(4)
8754 .nr(16)
8755 .kr(1)
8756 .sr(1)
8757 .m(4)
8758 .n(16)
8759 .k(8)
8760 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008761 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008762 }
8763
8764 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_a_zero_point) {
8765 TEST_REQUIRES_ARM_NEON;
8766 for (size_t k = 1; k <= 40; k += 9) {
8767 GemmMicrokernelTester()
8768 .mr(4)
8769 .nr(16)
8770 .kr(1)
8771 .sr(1)
8772 .m(4)
8773 .n(16)
8774 .k(k)
8775 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008776 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008777 }
8778 }
8779
8780 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_b_zero_point) {
8781 TEST_REQUIRES_ARM_NEON;
8782 for (size_t k = 1; k <= 40; k += 9) {
8783 GemmMicrokernelTester()
8784 .mr(4)
8785 .nr(16)
8786 .kr(1)
8787 .sr(1)
8788 .m(4)
8789 .n(16)
8790 .k(k)
8791 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008792 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008793 }
8794 }
8795
8796 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A75, no_zero_point) {
8797 TEST_REQUIRES_ARM_NEON;
8798 for (size_t k = 1; k <= 40; k += 9) {
8799 GemmMicrokernelTester()
8800 .mr(4)
8801 .nr(16)
8802 .kr(1)
8803 .sr(1)
8804 .m(4)
8805 .n(16)
8806 .k(k)
8807 .a_zero_point(0)
8808 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008809 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008810 }
8811 }
8812#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
8813
8814
8815#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
8816 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
8817 TEST_REQUIRES_ARM_NEON;
8818 GemmMicrokernelTester()
8819 .mr(4)
8820 .nr(16)
8821 .kr(1)
8822 .sr(1)
8823 .m(4)
8824 .n(16)
8825 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008826 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008827 }
8828
8829 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
8830 TEST_REQUIRES_ARM_NEON;
8831 GemmMicrokernelTester()
8832 .mr(4)
8833 .nr(16)
8834 .kr(1)
8835 .sr(1)
8836 .m(4)
8837 .n(16)
8838 .k(8)
8839 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008840 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008841 }
8842
8843 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
8844 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008845 for (uint32_t n = 1; n <= 16; n++) {
8846 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008847 GemmMicrokernelTester()
8848 .mr(4)
8849 .nr(16)
8850 .kr(1)
8851 .sr(1)
8852 .m(m)
8853 .n(n)
8854 .k(8)
8855 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008856 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008857 }
8858 }
8859 }
8860
8861 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
8862 TEST_REQUIRES_ARM_NEON;
8863 for (uint32_t m = 1; m <= 4; m++) {
8864 GemmMicrokernelTester()
8865 .mr(4)
8866 .nr(16)
8867 .kr(1)
8868 .sr(1)
8869 .m(m)
8870 .n(16)
8871 .k(8)
8872 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008873 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008874 }
8875 }
8876
8877 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
8878 TEST_REQUIRES_ARM_NEON;
8879 for (uint32_t n = 1; n <= 16; n++) {
8880 GemmMicrokernelTester()
8881 .mr(4)
8882 .nr(16)
8883 .kr(1)
8884 .sr(1)
8885 .m(4)
8886 .n(n)
8887 .k(8)
8888 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008889 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008890 }
8891 }
8892
8893 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
8894 TEST_REQUIRES_ARM_NEON;
8895 for (size_t k = 1; k < 8; k++) {
8896 GemmMicrokernelTester()
8897 .mr(4)
8898 .nr(16)
8899 .kr(1)
8900 .sr(1)
8901 .m(4)
8902 .n(16)
8903 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008904 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008905 }
8906 }
8907
8908 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
8909 TEST_REQUIRES_ARM_NEON;
8910 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008911 for (uint32_t n = 1; n <= 16; n++) {
8912 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008913 GemmMicrokernelTester()
8914 .mr(4)
8915 .nr(16)
8916 .kr(1)
8917 .sr(1)
8918 .m(m)
8919 .n(n)
8920 .k(k)
8921 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008922 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008923 }
8924 }
8925 }
8926 }
8927
8928 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
8929 TEST_REQUIRES_ARM_NEON;
8930 for (size_t k = 9; k < 16; k++) {
8931 GemmMicrokernelTester()
8932 .mr(4)
8933 .nr(16)
8934 .kr(1)
8935 .sr(1)
8936 .m(4)
8937 .n(16)
8938 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008939 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008940 }
8941 }
8942
8943 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
8944 TEST_REQUIRES_ARM_NEON;
8945 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008946 for (uint32_t n = 1; n <= 16; n++) {
8947 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008948 GemmMicrokernelTester()
8949 .mr(4)
8950 .nr(16)
8951 .kr(1)
8952 .sr(1)
8953 .m(m)
8954 .n(n)
8955 .k(k)
8956 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008957 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008958 }
8959 }
8960 }
8961 }
8962
8963 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
8964 TEST_REQUIRES_ARM_NEON;
8965 for (size_t k = 16; k <= 80; k += 8) {
8966 GemmMicrokernelTester()
8967 .mr(4)
8968 .nr(16)
8969 .kr(1)
8970 .sr(1)
8971 .m(4)
8972 .n(16)
8973 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008974 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008975 }
8976 }
8977
8978 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
8979 TEST_REQUIRES_ARM_NEON;
8980 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008981 for (uint32_t n = 1; n <= 16; n++) {
8982 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008983 GemmMicrokernelTester()
8984 .mr(4)
8985 .nr(16)
8986 .kr(1)
8987 .sr(1)
8988 .m(m)
8989 .n(n)
8990 .k(k)
8991 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008992 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008993 }
8994 }
8995 }
8996 }
8997
8998 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16) {
8999 TEST_REQUIRES_ARM_NEON;
9000 for (uint32_t n = 17; n < 32; n++) {
9001 for (size_t k = 1; k <= 40; k += 9) {
9002 GemmMicrokernelTester()
9003 .mr(4)
9004 .nr(16)
9005 .kr(1)
9006 .sr(1)
9007 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009008 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009009 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009010 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009011 }
9012 }
9013 }
9014
9015 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_strided_cn) {
9016 TEST_REQUIRES_ARM_NEON;
9017 for (uint32_t n = 17; n < 32; n++) {
9018 for (size_t k = 1; k <= 40; k += 9) {
9019 GemmMicrokernelTester()
9020 .mr(4)
9021 .nr(16)
9022 .kr(1)
9023 .sr(1)
9024 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009025 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009026 .k(k)
9027 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009028 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009029 }
9030 }
9031 }
9032
9033 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_subtile) {
9034 TEST_REQUIRES_ARM_NEON;
9035 for (uint32_t n = 17; n < 32; n++) {
9036 for (size_t k = 1; k <= 40; k += 9) {
9037 for (uint32_t m = 1; m <= 4; m++) {
9038 GemmMicrokernelTester()
9039 .mr(4)
9040 .nr(16)
9041 .kr(1)
9042 .sr(1)
9043 .m(m)
9044 .n(n)
9045 .k(k)
9046 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009047 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009048 }
9049 }
9050 }
9051 }
9052
9053 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16) {
9054 TEST_REQUIRES_ARM_NEON;
9055 for (uint32_t n = 32; n <= 48; n += 16) {
9056 for (size_t k = 1; k <= 40; k += 9) {
9057 GemmMicrokernelTester()
9058 .mr(4)
9059 .nr(16)
9060 .kr(1)
9061 .sr(1)
9062 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009063 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009064 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009065 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009066 }
9067 }
9068 }
9069
9070 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_strided_cn) {
9071 TEST_REQUIRES_ARM_NEON;
9072 for (uint32_t n = 32; n <= 48; n += 16) {
9073 for (size_t k = 1; k <= 40; k += 9) {
9074 GemmMicrokernelTester()
9075 .mr(4)
9076 .nr(16)
9077 .kr(1)
9078 .sr(1)
9079 .m(4)
9080 .n(n)
9081 .k(k)
9082 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009083 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009084 }
9085 }
9086 }
9087
9088 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_subtile) {
9089 TEST_REQUIRES_ARM_NEON;
9090 for (uint32_t n = 32; n <= 48; n += 16) {
9091 for (size_t k = 1; k <= 40; k += 9) {
9092 for (uint32_t m = 1; m <= 4; m++) {
9093 GemmMicrokernelTester()
9094 .mr(4)
9095 .nr(16)
9096 .kr(1)
9097 .sr(1)
9098 .m(m)
9099 .n(n)
9100 .k(k)
9101 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009102 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009103 }
9104 }
9105 }
9106 }
9107
9108 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel) {
9109 TEST_REQUIRES_ARM_NEON;
9110 for (size_t k = 1; k <= 40; k += 9) {
9111 GemmMicrokernelTester()
9112 .mr(4)
9113 .nr(16)
9114 .kr(1)
9115 .sr(1)
9116 .m(4)
9117 .n(16)
9118 .k(k)
9119 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009120 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009121 }
9122 }
9123
9124 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, small_kernel_subtile) {
9125 TEST_REQUIRES_ARM_NEON;
9126 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009127 for (uint32_t n = 1; n <= 16; n++) {
9128 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009129 GemmMicrokernelTester()
9130 .mr(4)
9131 .nr(16)
9132 .kr(1)
9133 .sr(1)
9134 .m(m)
9135 .n(n)
9136 .k(k)
9137 .ks(3)
9138 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009139 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009140 }
9141 }
9142 }
9143 }
9144
9145 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_16_small_kernel) {
9146 TEST_REQUIRES_ARM_NEON;
9147 for (uint32_t n = 17; n < 32; n++) {
9148 for (size_t k = 1; k <= 40; k += 9) {
9149 GemmMicrokernelTester()
9150 .mr(4)
9151 .nr(16)
9152 .kr(1)
9153 .sr(1)
9154 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009155 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009156 .k(k)
9157 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009158 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009159 }
9160 }
9161 }
9162
9163 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_16_small_kernel) {
9164 TEST_REQUIRES_ARM_NEON;
9165 for (uint32_t n = 32; n <= 48; n += 16) {
9166 for (size_t k = 1; k <= 40; k += 9) {
9167 GemmMicrokernelTester()
9168 .mr(4)
9169 .nr(16)
9170 .kr(1)
9171 .sr(1)
9172 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009173 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009174 .k(k)
9175 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009176 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009177 }
9178 }
9179 }
9180
9181 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
9182 TEST_REQUIRES_ARM_NEON;
9183 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009184 for (uint32_t n = 1; n <= 16; n++) {
9185 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009186 GemmMicrokernelTester()
9187 .mr(4)
9188 .nr(16)
9189 .kr(1)
9190 .sr(1)
9191 .m(m)
9192 .n(n)
9193 .k(k)
9194 .cm_stride(19)
9195 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009196 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009197 }
9198 }
9199 }
9200 }
9201
9202 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, a_offset) {
9203 TEST_REQUIRES_ARM_NEON;
9204 for (size_t k = 1; k <= 40; k += 9) {
9205 GemmMicrokernelTester()
9206 .mr(4)
9207 .nr(16)
9208 .kr(1)
9209 .sr(1)
9210 .m(4)
9211 .n(16)
9212 .k(k)
9213 .ks(3)
9214 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08009215 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009216 }
9217 }
9218
9219 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, zero) {
9220 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009221 for (size_t k = 1; k <= 40; k += 9) {
9222 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009223 GemmMicrokernelTester()
9224 .mr(4)
9225 .nr(16)
9226 .kr(1)
9227 .sr(1)
9228 .m(4)
9229 .n(16)
9230 .k(k)
9231 .ks(3)
9232 .a_offset(163)
9233 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009234 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009235 }
9236 }
9237 }
9238
9239 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
9240 TEST_REQUIRES_ARM_NEON;
9241 GemmMicrokernelTester()
9242 .mr(4)
9243 .nr(16)
9244 .kr(1)
9245 .sr(1)
9246 .m(4)
9247 .n(16)
9248 .k(8)
9249 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009250 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009251 }
9252
9253 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
9254 TEST_REQUIRES_ARM_NEON;
9255 GemmMicrokernelTester()
9256 .mr(4)
9257 .nr(16)
9258 .kr(1)
9259 .sr(1)
9260 .m(4)
9261 .n(16)
9262 .k(8)
9263 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009264 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009265 }
9266
9267 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
9268 TEST_REQUIRES_ARM_NEON;
9269 GemmMicrokernelTester()
9270 .mr(4)
9271 .nr(16)
9272 .kr(1)
9273 .sr(1)
9274 .m(4)
9275 .n(16)
9276 .k(8)
9277 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009278 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009279 }
9280
9281 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_a_zero_point) {
9282 TEST_REQUIRES_ARM_NEON;
9283 for (size_t k = 1; k <= 40; k += 9) {
9284 GemmMicrokernelTester()
9285 .mr(4)
9286 .nr(16)
9287 .kr(1)
9288 .sr(1)
9289 .m(4)
9290 .n(16)
9291 .k(k)
9292 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009293 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009294 }
9295 }
9296
9297 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_b_zero_point) {
9298 TEST_REQUIRES_ARM_NEON;
9299 for (size_t k = 1; k <= 40; k += 9) {
9300 GemmMicrokernelTester()
9301 .mr(4)
9302 .nr(16)
9303 .kr(1)
9304 .sr(1)
9305 .m(4)
9306 .n(16)
9307 .k(k)
9308 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009309 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009310 }
9311 }
9312
9313 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A53, no_zero_point) {
9314 TEST_REQUIRES_ARM_NEON;
9315 for (size_t k = 1; k <= 40; k += 9) {
9316 GemmMicrokernelTester()
9317 .mr(4)
9318 .nr(16)
9319 .kr(1)
9320 .sr(1)
9321 .m(4)
9322 .n(16)
9323 .k(k)
9324 .a_zero_point(0)
9325 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009326 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009327 }
9328 }
9329#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9330
9331
9332#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9333 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
9334 TEST_REQUIRES_ARM_NEON;
9335 GemmMicrokernelTester()
9336 .mr(4)
9337 .nr(16)
9338 .kr(1)
9339 .sr(1)
9340 .m(4)
9341 .n(16)
9342 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009343 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009344 }
9345
9346 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
9347 TEST_REQUIRES_ARM_NEON;
9348 GemmMicrokernelTester()
9349 .mr(4)
9350 .nr(16)
9351 .kr(1)
9352 .sr(1)
9353 .m(4)
9354 .n(16)
9355 .k(8)
9356 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009357 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009358 }
9359
9360 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
9361 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009362 for (uint32_t n = 1; n <= 16; n++) {
9363 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009364 GemmMicrokernelTester()
9365 .mr(4)
9366 .nr(16)
9367 .kr(1)
9368 .sr(1)
9369 .m(m)
9370 .n(n)
9371 .k(8)
9372 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009373 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009374 }
9375 }
9376 }
9377
9378 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
9379 TEST_REQUIRES_ARM_NEON;
9380 for (uint32_t m = 1; m <= 4; m++) {
9381 GemmMicrokernelTester()
9382 .mr(4)
9383 .nr(16)
9384 .kr(1)
9385 .sr(1)
9386 .m(m)
9387 .n(16)
9388 .k(8)
9389 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009390 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009391 }
9392 }
9393
9394 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
9395 TEST_REQUIRES_ARM_NEON;
9396 for (uint32_t n = 1; n <= 16; n++) {
9397 GemmMicrokernelTester()
9398 .mr(4)
9399 .nr(16)
9400 .kr(1)
9401 .sr(1)
9402 .m(4)
9403 .n(n)
9404 .k(8)
9405 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009406 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009407 }
9408 }
9409
9410 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
9411 TEST_REQUIRES_ARM_NEON;
9412 for (size_t k = 1; k < 8; k++) {
9413 GemmMicrokernelTester()
9414 .mr(4)
9415 .nr(16)
9416 .kr(1)
9417 .sr(1)
9418 .m(4)
9419 .n(16)
9420 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009421 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009422 }
9423 }
9424
9425 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
9426 TEST_REQUIRES_ARM_NEON;
9427 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009428 for (uint32_t n = 1; n <= 16; n++) {
9429 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009430 GemmMicrokernelTester()
9431 .mr(4)
9432 .nr(16)
9433 .kr(1)
9434 .sr(1)
9435 .m(m)
9436 .n(n)
9437 .k(k)
9438 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009439 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009440 }
9441 }
9442 }
9443 }
9444
9445 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
9446 TEST_REQUIRES_ARM_NEON;
9447 for (size_t k = 9; k < 16; k++) {
9448 GemmMicrokernelTester()
9449 .mr(4)
9450 .nr(16)
9451 .kr(1)
9452 .sr(1)
9453 .m(4)
9454 .n(16)
9455 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009456 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009457 }
9458 }
9459
9460 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
9461 TEST_REQUIRES_ARM_NEON;
9462 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009463 for (uint32_t n = 1; n <= 16; n++) {
9464 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009465 GemmMicrokernelTester()
9466 .mr(4)
9467 .nr(16)
9468 .kr(1)
9469 .sr(1)
9470 .m(m)
9471 .n(n)
9472 .k(k)
9473 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009474 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009475 }
9476 }
9477 }
9478 }
9479
9480 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
9481 TEST_REQUIRES_ARM_NEON;
9482 for (size_t k = 16; k <= 80; k += 8) {
9483 GemmMicrokernelTester()
9484 .mr(4)
9485 .nr(16)
9486 .kr(1)
9487 .sr(1)
9488 .m(4)
9489 .n(16)
9490 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009491 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009492 }
9493 }
9494
9495 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
9496 TEST_REQUIRES_ARM_NEON;
9497 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009498 for (uint32_t n = 1; n <= 16; n++) {
9499 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009500 GemmMicrokernelTester()
9501 .mr(4)
9502 .nr(16)
9503 .kr(1)
9504 .sr(1)
9505 .m(m)
9506 .n(n)
9507 .k(k)
9508 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009509 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009510 }
9511 }
9512 }
9513 }
9514
9515 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16) {
9516 TEST_REQUIRES_ARM_NEON;
9517 for (uint32_t n = 17; n < 32; n++) {
9518 for (size_t k = 1; k <= 40; k += 9) {
9519 GemmMicrokernelTester()
9520 .mr(4)
9521 .nr(16)
9522 .kr(1)
9523 .sr(1)
9524 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009525 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009526 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009527 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009528 }
9529 }
9530 }
9531
9532 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_cn) {
9533 TEST_REQUIRES_ARM_NEON;
9534 for (uint32_t n = 17; n < 32; n++) {
9535 for (size_t k = 1; k <= 40; k += 9) {
9536 GemmMicrokernelTester()
9537 .mr(4)
9538 .nr(16)
9539 .kr(1)
9540 .sr(1)
9541 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009542 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009543 .k(k)
9544 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009545 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009546 }
9547 }
9548 }
9549
9550 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_subtile) {
9551 TEST_REQUIRES_ARM_NEON;
9552 for (uint32_t n = 17; n < 32; n++) {
9553 for (size_t k = 1; k <= 40; k += 9) {
9554 for (uint32_t m = 1; m <= 4; m++) {
9555 GemmMicrokernelTester()
9556 .mr(4)
9557 .nr(16)
9558 .kr(1)
9559 .sr(1)
9560 .m(m)
9561 .n(n)
9562 .k(k)
9563 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009564 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009565 }
9566 }
9567 }
9568 }
9569
9570 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16) {
9571 TEST_REQUIRES_ARM_NEON;
9572 for (uint32_t n = 32; n <= 48; n += 16) {
9573 for (size_t k = 1; k <= 40; k += 9) {
9574 GemmMicrokernelTester()
9575 .mr(4)
9576 .nr(16)
9577 .kr(1)
9578 .sr(1)
9579 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009580 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009581 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009582 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009583 }
9584 }
9585 }
9586
9587 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_cn) {
9588 TEST_REQUIRES_ARM_NEON;
9589 for (uint32_t n = 32; n <= 48; n += 16) {
9590 for (size_t k = 1; k <= 40; k += 9) {
9591 GemmMicrokernelTester()
9592 .mr(4)
9593 .nr(16)
9594 .kr(1)
9595 .sr(1)
9596 .m(4)
9597 .n(n)
9598 .k(k)
9599 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009600 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009601 }
9602 }
9603 }
9604
9605 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_subtile) {
9606 TEST_REQUIRES_ARM_NEON;
9607 for (uint32_t n = 32; n <= 48; n += 16) {
9608 for (size_t k = 1; k <= 40; k += 9) {
9609 for (uint32_t m = 1; m <= 4; m++) {
9610 GemmMicrokernelTester()
9611 .mr(4)
9612 .nr(16)
9613 .kr(1)
9614 .sr(1)
9615 .m(m)
9616 .n(n)
9617 .k(k)
9618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009619 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009620 }
9621 }
9622 }
9623 }
9624
9625 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel) {
9626 TEST_REQUIRES_ARM_NEON;
9627 for (size_t k = 1; k <= 40; k += 9) {
9628 GemmMicrokernelTester()
9629 .mr(4)
9630 .nr(16)
9631 .kr(1)
9632 .sr(1)
9633 .m(4)
9634 .n(16)
9635 .k(k)
9636 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009637 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009638 }
9639 }
9640
9641 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, small_kernel_subtile) {
9642 TEST_REQUIRES_ARM_NEON;
9643 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009644 for (uint32_t n = 1; n <= 16; n++) {
9645 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009646 GemmMicrokernelTester()
9647 .mr(4)
9648 .nr(16)
9649 .kr(1)
9650 .sr(1)
9651 .m(m)
9652 .n(n)
9653 .k(k)
9654 .ks(3)
9655 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009656 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009657 }
9658 }
9659 }
9660 }
9661
9662 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_small_kernel) {
9663 TEST_REQUIRES_ARM_NEON;
9664 for (uint32_t n = 17; n < 32; n++) {
9665 for (size_t k = 1; k <= 40; k += 9) {
9666 GemmMicrokernelTester()
9667 .mr(4)
9668 .nr(16)
9669 .kr(1)
9670 .sr(1)
9671 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009672 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009673 .k(k)
9674 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009675 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009676 }
9677 }
9678 }
9679
9680 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_small_kernel) {
9681 TEST_REQUIRES_ARM_NEON;
9682 for (uint32_t n = 32; n <= 48; n += 16) {
9683 for (size_t k = 1; k <= 40; k += 9) {
9684 GemmMicrokernelTester()
9685 .mr(4)
9686 .nr(16)
9687 .kr(1)
9688 .sr(1)
9689 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009690 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009691 .k(k)
9692 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -08009693 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009694 }
9695 }
9696 }
9697
9698 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
9699 TEST_REQUIRES_ARM_NEON;
9700 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009701 for (uint32_t n = 1; n <= 16; n++) {
9702 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009703 GemmMicrokernelTester()
9704 .mr(4)
9705 .nr(16)
9706 .kr(1)
9707 .sr(1)
9708 .m(m)
9709 .n(n)
9710 .k(k)
9711 .cm_stride(19)
9712 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009713 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009714 }
9715 }
9716 }
9717 }
9718
9719 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, a_offset) {
9720 TEST_REQUIRES_ARM_NEON;
9721 for (size_t k = 1; k <= 40; k += 9) {
9722 GemmMicrokernelTester()
9723 .mr(4)
9724 .nr(16)
9725 .kr(1)
9726 .sr(1)
9727 .m(4)
9728 .n(16)
9729 .k(k)
9730 .ks(3)
9731 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08009732 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009733 }
9734 }
9735
9736 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, zero) {
9737 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009738 for (size_t k = 1; k <= 40; k += 9) {
9739 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009740 GemmMicrokernelTester()
9741 .mr(4)
9742 .nr(16)
9743 .kr(1)
9744 .sr(1)
9745 .m(4)
9746 .n(16)
9747 .k(k)
9748 .ks(3)
9749 .a_offset(163)
9750 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -08009751 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009752 }
9753 }
9754 }
9755
9756 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
9757 TEST_REQUIRES_ARM_NEON;
9758 GemmMicrokernelTester()
9759 .mr(4)
9760 .nr(16)
9761 .kr(1)
9762 .sr(1)
9763 .m(4)
9764 .n(16)
9765 .k(8)
9766 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009767 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009768 }
9769
9770 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
9771 TEST_REQUIRES_ARM_NEON;
9772 GemmMicrokernelTester()
9773 .mr(4)
9774 .nr(16)
9775 .kr(1)
9776 .sr(1)
9777 .m(4)
9778 .n(16)
9779 .k(8)
9780 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009781 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009782 }
9783
9784 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
9785 TEST_REQUIRES_ARM_NEON;
9786 GemmMicrokernelTester()
9787 .mr(4)
9788 .nr(16)
9789 .kr(1)
9790 .sr(1)
9791 .m(4)
9792 .n(16)
9793 .k(8)
9794 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009795 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009796 }
9797
9798 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_a_zero_point) {
9799 TEST_REQUIRES_ARM_NEON;
9800 for (size_t k = 1; k <= 40; k += 9) {
9801 GemmMicrokernelTester()
9802 .mr(4)
9803 .nr(16)
9804 .kr(1)
9805 .sr(1)
9806 .m(4)
9807 .n(16)
9808 .k(k)
9809 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009810 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009811 }
9812 }
9813
9814 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_b_zero_point) {
9815 TEST_REQUIRES_ARM_NEON;
9816 for (size_t k = 1; k <= 40; k += 9) {
9817 GemmMicrokernelTester()
9818 .mr(4)
9819 .nr(16)
9820 .kr(1)
9821 .sr(1)
9822 .m(4)
9823 .n(16)
9824 .k(k)
9825 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009826 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009827 }
9828 }
9829
9830 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_zero_point) {
9831 TEST_REQUIRES_ARM_NEON;
9832 for (size_t k = 1; k <= 40; k += 9) {
9833 GemmMicrokernelTester()
9834 .mr(4)
9835 .nr(16)
9836 .kr(1)
9837 .sr(1)
9838 .m(4)
9839 .n(16)
9840 .k(k)
9841 .a_zero_point(0)
9842 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009843 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009844 }
9845 }
9846#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9847
9848
9849#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9850 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
9851 TEST_REQUIRES_ARM_NEON;
9852 GemmMicrokernelTester()
9853 .mr(4)
9854 .nr(16)
9855 .kr(1)
9856 .sr(1)
9857 .m(4)
9858 .n(16)
9859 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009860 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009861 }
9862
9863 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
9864 TEST_REQUIRES_ARM_NEON;
9865 GemmMicrokernelTester()
9866 .mr(4)
9867 .nr(16)
9868 .kr(1)
9869 .sr(1)
9870 .m(4)
9871 .n(16)
9872 .k(8)
9873 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009874 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009875 }
9876
9877 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
9878 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009879 for (uint32_t n = 1; n <= 16; n++) {
9880 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009881 GemmMicrokernelTester()
9882 .mr(4)
9883 .nr(16)
9884 .kr(1)
9885 .sr(1)
9886 .m(m)
9887 .n(n)
9888 .k(8)
9889 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009890 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009891 }
9892 }
9893 }
9894
9895 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
9896 TEST_REQUIRES_ARM_NEON;
9897 for (uint32_t m = 1; m <= 4; m++) {
9898 GemmMicrokernelTester()
9899 .mr(4)
9900 .nr(16)
9901 .kr(1)
9902 .sr(1)
9903 .m(m)
9904 .n(16)
9905 .k(8)
9906 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009907 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009908 }
9909 }
9910
9911 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
9912 TEST_REQUIRES_ARM_NEON;
9913 for (uint32_t n = 1; n <= 16; n++) {
9914 GemmMicrokernelTester()
9915 .mr(4)
9916 .nr(16)
9917 .kr(1)
9918 .sr(1)
9919 .m(4)
9920 .n(n)
9921 .k(8)
9922 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009923 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009924 }
9925 }
9926
9927 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
9928 TEST_REQUIRES_ARM_NEON;
9929 for (size_t k = 1; k < 8; k++) {
9930 GemmMicrokernelTester()
9931 .mr(4)
9932 .nr(16)
9933 .kr(1)
9934 .sr(1)
9935 .m(4)
9936 .n(16)
9937 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009938 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009939 }
9940 }
9941
9942 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
9943 TEST_REQUIRES_ARM_NEON;
9944 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009945 for (uint32_t n = 1; n <= 16; n++) {
9946 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009947 GemmMicrokernelTester()
9948 .mr(4)
9949 .nr(16)
9950 .kr(1)
9951 .sr(1)
9952 .m(m)
9953 .n(n)
9954 .k(k)
9955 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009956 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009957 }
9958 }
9959 }
9960 }
9961
9962 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
9963 TEST_REQUIRES_ARM_NEON;
9964 for (size_t k = 9; k < 16; k++) {
9965 GemmMicrokernelTester()
9966 .mr(4)
9967 .nr(16)
9968 .kr(1)
9969 .sr(1)
9970 .m(4)
9971 .n(16)
9972 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009973 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009974 }
9975 }
9976
9977 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
9978 TEST_REQUIRES_ARM_NEON;
9979 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009980 for (uint32_t n = 1; n <= 16; n++) {
9981 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009982 GemmMicrokernelTester()
9983 .mr(4)
9984 .nr(16)
9985 .kr(1)
9986 .sr(1)
9987 .m(m)
9988 .n(n)
9989 .k(k)
9990 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009991 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009992 }
9993 }
9994 }
9995 }
9996
9997 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
9998 TEST_REQUIRES_ARM_NEON;
9999 for (size_t k = 16; k <= 80; k += 8) {
10000 GemmMicrokernelTester()
10001 .mr(4)
10002 .nr(16)
10003 .kr(1)
10004 .sr(1)
10005 .m(4)
10006 .n(16)
10007 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010008 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010009 }
10010 }
10011
10012 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
10013 TEST_REQUIRES_ARM_NEON;
10014 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010015 for (uint32_t n = 1; n <= 16; n++) {
10016 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010017 GemmMicrokernelTester()
10018 .mr(4)
10019 .nr(16)
10020 .kr(1)
10021 .sr(1)
10022 .m(m)
10023 .n(n)
10024 .k(k)
10025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010026 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010027 }
10028 }
10029 }
10030 }
10031
10032 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) {
10033 TEST_REQUIRES_ARM_NEON;
10034 for (uint32_t n = 17; n < 32; n++) {
10035 for (size_t k = 1; k <= 40; k += 9) {
10036 GemmMicrokernelTester()
10037 .mr(4)
10038 .nr(16)
10039 .kr(1)
10040 .sr(1)
10041 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010042 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010043 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010044 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010045 }
10046 }
10047 }
10048
10049 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) {
10050 TEST_REQUIRES_ARM_NEON;
10051 for (uint32_t n = 17; n < 32; n++) {
10052 for (size_t k = 1; k <= 40; k += 9) {
10053 GemmMicrokernelTester()
10054 .mr(4)
10055 .nr(16)
10056 .kr(1)
10057 .sr(1)
10058 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010059 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010060 .k(k)
10061 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010062 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010063 }
10064 }
10065 }
10066
10067 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) {
10068 TEST_REQUIRES_ARM_NEON;
10069 for (uint32_t n = 17; n < 32; n++) {
10070 for (size_t k = 1; k <= 40; k += 9) {
10071 for (uint32_t m = 1; m <= 4; m++) {
10072 GemmMicrokernelTester()
10073 .mr(4)
10074 .nr(16)
10075 .kr(1)
10076 .sr(1)
10077 .m(m)
10078 .n(n)
10079 .k(k)
10080 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010081 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010082 }
10083 }
10084 }
10085 }
10086
10087 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) {
10088 TEST_REQUIRES_ARM_NEON;
10089 for (uint32_t n = 32; n <= 48; n += 16) {
10090 for (size_t k = 1; k <= 40; k += 9) {
10091 GemmMicrokernelTester()
10092 .mr(4)
10093 .nr(16)
10094 .kr(1)
10095 .sr(1)
10096 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010097 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010098 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010099 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010100 }
10101 }
10102 }
10103
10104 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) {
10105 TEST_REQUIRES_ARM_NEON;
10106 for (uint32_t n = 32; n <= 48; n += 16) {
10107 for (size_t k = 1; k <= 40; k += 9) {
10108 GemmMicrokernelTester()
10109 .mr(4)
10110 .nr(16)
10111 .kr(1)
10112 .sr(1)
10113 .m(4)
10114 .n(n)
10115 .k(k)
10116 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010117 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010118 }
10119 }
10120 }
10121
10122 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) {
10123 TEST_REQUIRES_ARM_NEON;
10124 for (uint32_t n = 32; n <= 48; n += 16) {
10125 for (size_t k = 1; k <= 40; k += 9) {
10126 for (uint32_t m = 1; m <= 4; m++) {
10127 GemmMicrokernelTester()
10128 .mr(4)
10129 .nr(16)
10130 .kr(1)
10131 .sr(1)
10132 .m(m)
10133 .n(n)
10134 .k(k)
10135 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010136 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010137 }
10138 }
10139 }
10140 }
10141
10142 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel) {
10143 TEST_REQUIRES_ARM_NEON;
10144 for (size_t k = 1; k <= 40; k += 9) {
10145 GemmMicrokernelTester()
10146 .mr(4)
10147 .nr(16)
10148 .kr(1)
10149 .sr(1)
10150 .m(4)
10151 .n(16)
10152 .k(k)
10153 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010154 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010155 }
10156 }
10157
10158 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, small_kernel_subtile) {
10159 TEST_REQUIRES_ARM_NEON;
10160 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010161 for (uint32_t n = 1; n <= 16; n++) {
10162 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010163 GemmMicrokernelTester()
10164 .mr(4)
10165 .nr(16)
10166 .kr(1)
10167 .sr(1)
10168 .m(m)
10169 .n(n)
10170 .k(k)
10171 .ks(3)
10172 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010173 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010174 }
10175 }
10176 }
10177 }
10178
10179 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_small_kernel) {
10180 TEST_REQUIRES_ARM_NEON;
10181 for (uint32_t n = 17; n < 32; n++) {
10182 for (size_t k = 1; k <= 40; k += 9) {
10183 GemmMicrokernelTester()
10184 .mr(4)
10185 .nr(16)
10186 .kr(1)
10187 .sr(1)
10188 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010189 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010190 .k(k)
10191 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010192 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010193 }
10194 }
10195 }
10196
10197 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_small_kernel) {
10198 TEST_REQUIRES_ARM_NEON;
10199 for (uint32_t n = 32; n <= 48; n += 16) {
10200 for (size_t k = 1; k <= 40; k += 9) {
10201 GemmMicrokernelTester()
10202 .mr(4)
10203 .nr(16)
10204 .kr(1)
10205 .sr(1)
10206 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010207 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010208 .k(k)
10209 .ks(3)
Marat Dukhan50323b82022-01-11 00:12:01 -080010210 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010211 }
10212 }
10213 }
10214
10215 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
10216 TEST_REQUIRES_ARM_NEON;
10217 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010218 for (uint32_t n = 1; n <= 16; n++) {
10219 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010220 GemmMicrokernelTester()
10221 .mr(4)
10222 .nr(16)
10223 .kr(1)
10224 .sr(1)
10225 .m(m)
10226 .n(n)
10227 .k(k)
10228 .cm_stride(19)
10229 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010230 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010231 }
10232 }
10233 }
10234 }
10235
10236 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, a_offset) {
10237 TEST_REQUIRES_ARM_NEON;
10238 for (size_t k = 1; k <= 40; k += 9) {
10239 GemmMicrokernelTester()
10240 .mr(4)
10241 .nr(16)
10242 .kr(1)
10243 .sr(1)
10244 .m(4)
10245 .n(16)
10246 .k(k)
10247 .ks(3)
10248 .a_offset(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080010249 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010250 }
10251 }
10252
10253 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, zero) {
10254 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010255 for (size_t k = 1; k <= 40; k += 9) {
10256 for (uint32_t mz = 0; mz < 4; mz++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010257 GemmMicrokernelTester()
10258 .mr(4)
10259 .nr(16)
10260 .kr(1)
10261 .sr(1)
10262 .m(4)
10263 .n(16)
10264 .k(k)
10265 .ks(3)
10266 .a_offset(163)
10267 .zero_index(mz)
Marat Dukhan50323b82022-01-11 00:12:01 -080010268 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010269 }
10270 }
10271 }
10272
10273 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
10274 TEST_REQUIRES_ARM_NEON;
10275 GemmMicrokernelTester()
10276 .mr(4)
10277 .nr(16)
10278 .kr(1)
10279 .sr(1)
10280 .m(4)
10281 .n(16)
10282 .k(8)
10283 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010284 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010285 }
10286
10287 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
10288 TEST_REQUIRES_ARM_NEON;
10289 GemmMicrokernelTester()
10290 .mr(4)
10291 .nr(16)
10292 .kr(1)
10293 .sr(1)
10294 .m(4)
10295 .n(16)
10296 .k(8)
10297 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010298 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010299 }
10300
10301 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
10302 TEST_REQUIRES_ARM_NEON;
10303 GemmMicrokernelTester()
10304 .mr(4)
10305 .nr(16)
10306 .kr(1)
10307 .sr(1)
10308 .m(4)
10309 .n(16)
10310 .k(8)
10311 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010312 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010313 }
10314
10315 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_a_zero_point) {
10316 TEST_REQUIRES_ARM_NEON;
10317 for (size_t k = 1; k <= 40; k += 9) {
10318 GemmMicrokernelTester()
10319 .mr(4)
10320 .nr(16)
10321 .kr(1)
10322 .sr(1)
10323 .m(4)
10324 .n(16)
10325 .k(k)
10326 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010327 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010328 }
10329 }
10330
10331 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_b_zero_point) {
10332 TEST_REQUIRES_ARM_NEON;
10333 for (size_t k = 1; k <= 40; k += 9) {
10334 GemmMicrokernelTester()
10335 .mr(4)
10336 .nr(16)
10337 .kr(1)
10338 .sr(1)
10339 .m(4)
10340 .n(16)
10341 .k(k)
10342 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010343 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010344 }
10345 }
10346
10347 TEST(QU8_IGEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_zero_point) {
10348 TEST_REQUIRES_ARM_NEON;
10349 for (size_t k = 1; k <= 40; k += 9) {
10350 GemmMicrokernelTester()
10351 .mr(4)
10352 .nr(16)
10353 .kr(1)
10354 .sr(1)
10355 .m(4)
10356 .n(16)
10357 .k(k)
10358 .a_zero_point(0)
10359 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010360 .Test(xnn_qu8_igemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010361 }
10362 }
10363#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY