blob: 284fe43b30428b9734a239d8c35f4baa543273a4 [file] [log] [blame]
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qu8-gemm-minmax-rndnu.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/allocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
Frank Barchard901845c2022-01-19 01:45:22 -080026#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
27 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
28 TEST_REQUIRES_ARM_NEON;
29 GemmMicrokernelTester()
30 .mr(4)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(4)
35 .n(8)
36 .k(8)
37 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
38 }
39
40 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
41 TEST_REQUIRES_ARM_NEON;
42 GemmMicrokernelTester()
43 .mr(4)
44 .nr(8)
45 .kr(1)
46 .sr(1)
47 .m(4)
48 .n(8)
49 .k(8)
50 .cn_stride(11)
51 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
52 }
53
54 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_strided_a) {
55 TEST_REQUIRES_ARM_NEON;
56 GemmMicrokernelTester()
57 .mr(4)
58 .nr(8)
59 .kr(1)
60 .sr(1)
61 .m(4)
62 .n(8)
63 .k(8)
64 .a_stride(11)
65 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
66 }
67
68 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
69 TEST_REQUIRES_ARM_NEON;
70 for (uint32_t n = 1; n <= 8; n++) {
71 for (uint32_t m = 1; m <= 4; m++) {
72 GemmMicrokernelTester()
73 .mr(4)
74 .nr(8)
75 .kr(1)
76 .sr(1)
77 .m(m)
78 .n(n)
79 .k(8)
80 .iterations(1)
81 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
82 }
83 }
84 }
85
86 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
87 TEST_REQUIRES_ARM_NEON;
88 for (uint32_t m = 1; m <= 4; m++) {
89 GemmMicrokernelTester()
90 .mr(4)
91 .nr(8)
92 .kr(1)
93 .sr(1)
94 .m(m)
95 .n(8)
96 .k(8)
97 .iterations(1)
98 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
99 }
100 }
101
102 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
103 TEST_REQUIRES_ARM_NEON;
104 for (uint32_t n = 1; n <= 8; n++) {
105 GemmMicrokernelTester()
106 .mr(4)
107 .nr(8)
108 .kr(1)
109 .sr(1)
110 .m(4)
111 .n(n)
112 .k(8)
113 .iterations(1)
114 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
115 }
116 }
117
118 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
119 TEST_REQUIRES_ARM_NEON;
120 for (size_t k = 1; k < 8; k++) {
121 GemmMicrokernelTester()
122 .mr(4)
123 .nr(8)
124 .kr(1)
125 .sr(1)
126 .m(4)
127 .n(8)
128 .k(k)
129 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
130 }
131 }
132
133 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_strided_a) {
134 TEST_REQUIRES_ARM_NEON;
135 for (size_t k = 1; k < 8; k++) {
136 GemmMicrokernelTester()
137 .mr(4)
138 .nr(8)
139 .kr(1)
140 .sr(1)
141 .m(4)
142 .n(8)
143 .k(k)
144 .a_stride(11)
145 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
146 }
147 }
148
149 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
150 TEST_REQUIRES_ARM_NEON;
151 for (size_t k = 1; k < 8; k++) {
152 for (uint32_t n = 1; n <= 8; n++) {
153 for (uint32_t m = 1; m <= 4; m++) {
154 GemmMicrokernelTester()
155 .mr(4)
156 .nr(8)
157 .kr(1)
158 .sr(1)
159 .m(m)
160 .n(n)
161 .k(k)
162 .iterations(1)
163 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
164 }
165 }
166 }
167 }
168
169 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
170 TEST_REQUIRES_ARM_NEON;
171 for (size_t k = 9; k < 16; k++) {
172 GemmMicrokernelTester()
173 .mr(4)
174 .nr(8)
175 .kr(1)
176 .sr(1)
177 .m(4)
178 .n(8)
179 .k(k)
180 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
181 }
182 }
183
184 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_strided_a) {
185 TEST_REQUIRES_ARM_NEON;
186 for (size_t k = 9; k < 16; k++) {
187 GemmMicrokernelTester()
188 .mr(4)
189 .nr(8)
190 .kr(1)
191 .sr(1)
192 .m(4)
193 .n(8)
194 .k(k)
195 .a_stride(19)
196 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
197 }
198 }
199
200 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
201 TEST_REQUIRES_ARM_NEON;
202 for (size_t k = 9; k < 16; k++) {
203 for (uint32_t n = 1; n <= 8; n++) {
204 for (uint32_t m = 1; m <= 4; m++) {
205 GemmMicrokernelTester()
206 .mr(4)
207 .nr(8)
208 .kr(1)
209 .sr(1)
210 .m(m)
211 .n(n)
212 .k(k)
213 .iterations(1)
214 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
215 }
216 }
217 }
218 }
219
220 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
221 TEST_REQUIRES_ARM_NEON;
222 for (size_t k = 16; k <= 80; k += 8) {
223 GemmMicrokernelTester()
224 .mr(4)
225 .nr(8)
226 .kr(1)
227 .sr(1)
228 .m(4)
229 .n(8)
230 .k(k)
231 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
232 }
233 }
234
235 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_strided_a) {
236 TEST_REQUIRES_ARM_NEON;
237 for (size_t k = 16; k <= 80; k += 8) {
238 GemmMicrokernelTester()
239 .mr(4)
240 .nr(8)
241 .kr(1)
242 .sr(1)
243 .m(4)
244 .n(8)
245 .k(k)
246 .a_stride(83)
247 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
248 }
249 }
250
251 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
252 TEST_REQUIRES_ARM_NEON;
253 for (size_t k = 16; k <= 80; k += 8) {
254 for (uint32_t n = 1; n <= 8; n++) {
255 for (uint32_t m = 1; m <= 4; m++) {
256 GemmMicrokernelTester()
257 .mr(4)
258 .nr(8)
259 .kr(1)
260 .sr(1)
261 .m(m)
262 .n(n)
263 .k(k)
264 .iterations(1)
265 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
266 }
267 }
268 }
269 }
270
271 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
272 TEST_REQUIRES_ARM_NEON;
273 for (uint32_t n = 9; n < 16; n++) {
274 for (size_t k = 1; k <= 40; k += 9) {
275 GemmMicrokernelTester()
276 .mr(4)
277 .nr(8)
278 .kr(1)
279 .sr(1)
280 .m(4)
281 .n(n)
282 .k(k)
283 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
284 }
285 }
286 }
287
288 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
289 TEST_REQUIRES_ARM_NEON;
290 for (uint32_t n = 9; n < 16; n++) {
291 for (size_t k = 1; k <= 40; k += 9) {
292 GemmMicrokernelTester()
293 .mr(4)
294 .nr(8)
295 .kr(1)
296 .sr(1)
297 .m(4)
298 .n(n)
299 .k(k)
300 .cn_stride(11)
301 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
302 }
303 }
304 }
305
306 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_a) {
307 TEST_REQUIRES_ARM_NEON;
308 for (uint32_t n = 9; n < 16; n++) {
309 for (size_t k = 1; k <= 40; k += 9) {
310 GemmMicrokernelTester()
311 .mr(4)
312 .nr(8)
313 .kr(1)
314 .sr(1)
315 .m(4)
316 .n(n)
317 .k(k)
318 .a_stride(43)
319 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
320 }
321 }
322 }
323
324 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
325 TEST_REQUIRES_ARM_NEON;
326 for (uint32_t n = 9; n < 16; n++) {
327 for (size_t k = 1; k <= 40; k += 9) {
328 for (uint32_t m = 1; m <= 4; m++) {
329 GemmMicrokernelTester()
330 .mr(4)
331 .nr(8)
332 .kr(1)
333 .sr(1)
334 .m(m)
335 .n(n)
336 .k(k)
337 .iterations(1)
338 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
339 }
340 }
341 }
342 }
343
344 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
345 TEST_REQUIRES_ARM_NEON;
346 for (uint32_t n = 16; n <= 24; n += 8) {
347 for (size_t k = 1; k <= 40; k += 9) {
348 GemmMicrokernelTester()
349 .mr(4)
350 .nr(8)
351 .kr(1)
352 .sr(1)
353 .m(4)
354 .n(n)
355 .k(k)
356 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
357 }
358 }
359 }
360
361 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
362 TEST_REQUIRES_ARM_NEON;
363 for (uint32_t n = 16; n <= 24; n += 8) {
364 for (size_t k = 1; k <= 40; k += 9) {
365 GemmMicrokernelTester()
366 .mr(4)
367 .nr(8)
368 .kr(1)
369 .sr(1)
370 .m(4)
371 .n(n)
372 .k(k)
373 .cn_stride(11)
374 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
375 }
376 }
377 }
378
379 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_a) {
380 TEST_REQUIRES_ARM_NEON;
381 for (uint32_t n = 16; n <= 24; n += 8) {
382 for (size_t k = 1; k <= 40; k += 9) {
383 GemmMicrokernelTester()
384 .mr(4)
385 .nr(8)
386 .kr(1)
387 .sr(1)
388 .m(4)
389 .n(n)
390 .k(k)
391 .a_stride(43)
392 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
393 }
394 }
395 }
396
397 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
398 TEST_REQUIRES_ARM_NEON;
399 for (uint32_t n = 16; n <= 24; n += 8) {
400 for (size_t k = 1; k <= 40; k += 9) {
401 for (uint32_t m = 1; m <= 4; m++) {
402 GemmMicrokernelTester()
403 .mr(4)
404 .nr(8)
405 .kr(1)
406 .sr(1)
407 .m(m)
408 .n(n)
409 .k(k)
410 .iterations(1)
411 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
412 }
413 }
414 }
415 }
416
417 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
418 TEST_REQUIRES_ARM_NEON;
419 for (size_t k = 1; k <= 40; k += 9) {
420 for (uint32_t n = 1; n <= 8; n++) {
421 for (uint32_t m = 1; m <= 4; m++) {
422 GemmMicrokernelTester()
423 .mr(4)
424 .nr(8)
425 .kr(1)
426 .sr(1)
427 .m(m)
428 .n(n)
429 .k(k)
430 .cm_stride(11)
431 .iterations(1)
432 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
433 }
434 }
435 }
436 }
437
438 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) {
439 TEST_REQUIRES_ARM_NEON;
440 GemmMicrokernelTester()
441 .mr(4)
442 .nr(8)
443 .kr(1)
444 .sr(1)
445 .m(4)
446 .n(8)
447 .k(8)
448 .qmin(128)
449 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
450 }
451
452 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) {
453 TEST_REQUIRES_ARM_NEON;
454 GemmMicrokernelTester()
455 .mr(4)
456 .nr(8)
457 .kr(1)
458 .sr(1)
459 .m(4)
460 .n(8)
461 .k(8)
462 .qmax(128)
463 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
464 }
465
466 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
467 TEST_REQUIRES_ARM_NEON;
468 GemmMicrokernelTester()
469 .mr(4)
470 .nr(8)
471 .kr(1)
472 .sr(1)
473 .m(4)
474 .n(8)
475 .k(8)
476 .cm_stride(11)
477 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
478 }
479
480 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_a_zero_point) {
481 TEST_REQUIRES_ARM_NEON;
482 for (size_t k = 1; k <= 40; k += 9) {
483 GemmMicrokernelTester()
484 .mr(4)
485 .nr(8)
486 .kr(1)
487 .sr(1)
488 .m(4)
489 .n(8)
490 .k(k)
491 .a_zero_point(0)
492 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
493 }
494 }
495
496 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_b_zero_point) {
497 TEST_REQUIRES_ARM_NEON;
498 for (size_t k = 1; k <= 40; k += 9) {
499 GemmMicrokernelTester()
500 .mr(4)
501 .nr(8)
502 .kr(1)
503 .sr(1)
504 .m(4)
505 .n(8)
506 .k(k)
507 .b_zero_point(0)
508 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
509 }
510 }
511
512 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, no_zero_point) {
513 TEST_REQUIRES_ARM_NEON;
514 for (size_t k = 1; k <= 40; k += 9) {
515 GemmMicrokernelTester()
516 .mr(4)
517 .nr(8)
518 .kr(1)
519 .sr(1)
520 .m(4)
521 .n(8)
522 .k(k)
523 .a_zero_point(0)
524 .b_zero_point(0)
525 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
526 }
527 }
528#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
529
530
531#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
532 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8) {
533 TEST_REQUIRES_ARM_NEON;
534 GemmMicrokernelTester()
535 .mr(4)
536 .nr(8)
537 .kr(1)
538 .sr(1)
539 .m(4)
540 .n(8)
541 .k(8)
542 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
543 }
544
545 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cn) {
546 TEST_REQUIRES_ARM_NEON;
547 GemmMicrokernelTester()
548 .mr(4)
549 .nr(8)
550 .kr(1)
551 .sr(1)
552 .m(4)
553 .n(8)
554 .k(8)
555 .cn_stride(11)
556 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
557 }
558
559 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_strided_a) {
560 TEST_REQUIRES_ARM_NEON;
561 GemmMicrokernelTester()
562 .mr(4)
563 .nr(8)
564 .kr(1)
565 .sr(1)
566 .m(4)
567 .n(8)
568 .k(8)
569 .a_stride(11)
570 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
571 }
572
573 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
574 TEST_REQUIRES_ARM_NEON;
575 for (uint32_t n = 1; n <= 8; n++) {
576 for (uint32_t m = 1; m <= 4; m++) {
577 GemmMicrokernelTester()
578 .mr(4)
579 .nr(8)
580 .kr(1)
581 .sr(1)
582 .m(m)
583 .n(n)
584 .k(8)
585 .iterations(1)
586 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
587 }
588 }
589 }
590
591 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
592 TEST_REQUIRES_ARM_NEON;
593 for (uint32_t m = 1; m <= 4; m++) {
594 GemmMicrokernelTester()
595 .mr(4)
596 .nr(8)
597 .kr(1)
598 .sr(1)
599 .m(m)
600 .n(8)
601 .k(8)
602 .iterations(1)
603 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
604 }
605 }
606
607 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
608 TEST_REQUIRES_ARM_NEON;
609 for (uint32_t n = 1; n <= 8; n++) {
610 GemmMicrokernelTester()
611 .mr(4)
612 .nr(8)
613 .kr(1)
614 .sr(1)
615 .m(4)
616 .n(n)
617 .k(8)
618 .iterations(1)
619 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
620 }
621 }
622
623 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8) {
624 TEST_REQUIRES_ARM_NEON;
625 for (size_t k = 1; k < 8; k++) {
626 GemmMicrokernelTester()
627 .mr(4)
628 .nr(8)
629 .kr(1)
630 .sr(1)
631 .m(4)
632 .n(8)
633 .k(k)
634 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
635 }
636 }
637
638 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_strided_a) {
639 TEST_REQUIRES_ARM_NEON;
640 for (size_t k = 1; k < 8; k++) {
641 GemmMicrokernelTester()
642 .mr(4)
643 .nr(8)
644 .kr(1)
645 .sr(1)
646 .m(4)
647 .n(8)
648 .k(k)
649 .a_stride(11)
650 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
651 }
652 }
653
654 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
655 TEST_REQUIRES_ARM_NEON;
656 for (size_t k = 1; k < 8; k++) {
657 for (uint32_t n = 1; n <= 8; n++) {
658 for (uint32_t m = 1; m <= 4; m++) {
659 GemmMicrokernelTester()
660 .mr(4)
661 .nr(8)
662 .kr(1)
663 .sr(1)
664 .m(m)
665 .n(n)
666 .k(k)
667 .iterations(1)
668 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
669 }
670 }
671 }
672 }
673
674 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8) {
675 TEST_REQUIRES_ARM_NEON;
676 for (size_t k = 9; k < 16; k++) {
677 GemmMicrokernelTester()
678 .mr(4)
679 .nr(8)
680 .kr(1)
681 .sr(1)
682 .m(4)
683 .n(8)
684 .k(k)
685 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
686 }
687 }
688
689 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_strided_a) {
690 TEST_REQUIRES_ARM_NEON;
691 for (size_t k = 9; k < 16; k++) {
692 GemmMicrokernelTester()
693 .mr(4)
694 .nr(8)
695 .kr(1)
696 .sr(1)
697 .m(4)
698 .n(8)
699 .k(k)
700 .a_stride(19)
701 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
702 }
703 }
704
705 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
706 TEST_REQUIRES_ARM_NEON;
707 for (size_t k = 9; k < 16; k++) {
708 for (uint32_t n = 1; n <= 8; n++) {
709 for (uint32_t m = 1; m <= 4; m++) {
710 GemmMicrokernelTester()
711 .mr(4)
712 .nr(8)
713 .kr(1)
714 .sr(1)
715 .m(m)
716 .n(n)
717 .k(k)
718 .iterations(1)
719 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
720 }
721 }
722 }
723 }
724
725 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8) {
726 TEST_REQUIRES_ARM_NEON;
727 for (size_t k = 16; k <= 80; k += 8) {
728 GemmMicrokernelTester()
729 .mr(4)
730 .nr(8)
731 .kr(1)
732 .sr(1)
733 .m(4)
734 .n(8)
735 .k(k)
736 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
737 }
738 }
739
740 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_strided_a) {
741 TEST_REQUIRES_ARM_NEON;
742 for (size_t k = 16; k <= 80; k += 8) {
743 GemmMicrokernelTester()
744 .mr(4)
745 .nr(8)
746 .kr(1)
747 .sr(1)
748 .m(4)
749 .n(8)
750 .k(k)
751 .a_stride(83)
752 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
753 }
754 }
755
756 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
757 TEST_REQUIRES_ARM_NEON;
758 for (size_t k = 16; k <= 80; k += 8) {
759 for (uint32_t n = 1; n <= 8; n++) {
760 for (uint32_t m = 1; m <= 4; m++) {
761 GemmMicrokernelTester()
762 .mr(4)
763 .nr(8)
764 .kr(1)
765 .sr(1)
766 .m(m)
767 .n(n)
768 .k(k)
769 .iterations(1)
770 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
771 }
772 }
773 }
774 }
775
776 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8) {
777 TEST_REQUIRES_ARM_NEON;
778 for (uint32_t n = 9; n < 16; n++) {
779 for (size_t k = 1; k <= 40; k += 9) {
780 GemmMicrokernelTester()
781 .mr(4)
782 .nr(8)
783 .kr(1)
784 .sr(1)
785 .m(4)
786 .n(n)
787 .k(k)
788 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
789 }
790 }
791 }
792
793 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) {
794 TEST_REQUIRES_ARM_NEON;
795 for (uint32_t n = 9; n < 16; n++) {
796 for (size_t k = 1; k <= 40; k += 9) {
797 GemmMicrokernelTester()
798 .mr(4)
799 .nr(8)
800 .kr(1)
801 .sr(1)
802 .m(4)
803 .n(n)
804 .k(k)
805 .cn_stride(11)
806 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
807 }
808 }
809 }
810
811 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_strided_a) {
812 TEST_REQUIRES_ARM_NEON;
813 for (uint32_t n = 9; n < 16; n++) {
814 for (size_t k = 1; k <= 40; k += 9) {
815 GemmMicrokernelTester()
816 .mr(4)
817 .nr(8)
818 .kr(1)
819 .sr(1)
820 .m(4)
821 .n(n)
822 .k(k)
823 .a_stride(43)
824 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
825 }
826 }
827 }
828
829 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_gt_8_subtile) {
830 TEST_REQUIRES_ARM_NEON;
831 for (uint32_t n = 9; n < 16; n++) {
832 for (size_t k = 1; k <= 40; k += 9) {
833 for (uint32_t m = 1; m <= 4; m++) {
834 GemmMicrokernelTester()
835 .mr(4)
836 .nr(8)
837 .kr(1)
838 .sr(1)
839 .m(m)
840 .n(n)
841 .k(k)
842 .iterations(1)
843 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
844 }
845 }
846 }
847 }
848
849 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8) {
850 TEST_REQUIRES_ARM_NEON;
851 for (uint32_t n = 16; n <= 24; n += 8) {
852 for (size_t k = 1; k <= 40; k += 9) {
853 GemmMicrokernelTester()
854 .mr(4)
855 .nr(8)
856 .kr(1)
857 .sr(1)
858 .m(4)
859 .n(n)
860 .k(k)
861 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
862 }
863 }
864 }
865
866 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) {
867 TEST_REQUIRES_ARM_NEON;
868 for (uint32_t n = 16; n <= 24; n += 8) {
869 for (size_t k = 1; k <= 40; k += 9) {
870 GemmMicrokernelTester()
871 .mr(4)
872 .nr(8)
873 .kr(1)
874 .sr(1)
875 .m(4)
876 .n(n)
877 .k(k)
878 .cn_stride(11)
879 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
880 }
881 }
882 }
883
884 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_strided_a) {
885 TEST_REQUIRES_ARM_NEON;
886 for (uint32_t n = 16; n <= 24; n += 8) {
887 for (size_t k = 1; k <= 40; k += 9) {
888 GemmMicrokernelTester()
889 .mr(4)
890 .nr(8)
891 .kr(1)
892 .sr(1)
893 .m(4)
894 .n(n)
895 .k(k)
896 .a_stride(43)
897 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
898 }
899 }
900 }
901
902 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, n_div_8_subtile) {
903 TEST_REQUIRES_ARM_NEON;
904 for (uint32_t n = 16; n <= 24; n += 8) {
905 for (size_t k = 1; k <= 40; k += 9) {
906 for (uint32_t m = 1; m <= 4; m++) {
907 GemmMicrokernelTester()
908 .mr(4)
909 .nr(8)
910 .kr(1)
911 .sr(1)
912 .m(m)
913 .n(n)
914 .k(k)
915 .iterations(1)
916 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
917 }
918 }
919 }
920 }
921
922 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
923 TEST_REQUIRES_ARM_NEON;
924 for (size_t k = 1; k <= 40; k += 9) {
925 for (uint32_t n = 1; n <= 8; n++) {
926 for (uint32_t m = 1; m <= 4; m++) {
927 GemmMicrokernelTester()
928 .mr(4)
929 .nr(8)
930 .kr(1)
931 .sr(1)
932 .m(m)
933 .n(n)
934 .k(k)
935 .cm_stride(11)
936 .iterations(1)
937 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
938 }
939 }
940 }
941 }
942
943 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmin) {
944 TEST_REQUIRES_ARM_NEON;
945 GemmMicrokernelTester()
946 .mr(4)
947 .nr(8)
948 .kr(1)
949 .sr(1)
950 .m(4)
951 .n(8)
952 .k(8)
953 .qmin(128)
954 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
955 }
956
957 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, qmax) {
958 TEST_REQUIRES_ARM_NEON;
959 GemmMicrokernelTester()
960 .mr(4)
961 .nr(8)
962 .kr(1)
963 .sr(1)
964 .m(4)
965 .n(8)
966 .k(8)
967 .qmax(128)
968 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
969 }
970
971 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, strided_cm) {
972 TEST_REQUIRES_ARM_NEON;
973 GemmMicrokernelTester()
974 .mr(4)
975 .nr(8)
976 .kr(1)
977 .sr(1)
978 .m(4)
979 .n(8)
980 .k(8)
981 .cm_stride(11)
982 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
983 }
984
985 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_a_zero_point) {
986 TEST_REQUIRES_ARM_NEON;
987 for (size_t k = 1; k <= 40; k += 9) {
988 GemmMicrokernelTester()
989 .mr(4)
990 .nr(8)
991 .kr(1)
992 .sr(1)
993 .m(4)
994 .n(8)
995 .k(k)
996 .a_zero_point(0)
997 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
998 }
999 }
1000
1001 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_b_zero_point) {
1002 TEST_REQUIRES_ARM_NEON;
1003 for (size_t k = 1; k <= 40; k += 9) {
1004 GemmMicrokernelTester()
1005 .mr(4)
1006 .nr(8)
1007 .kr(1)
1008 .sr(1)
1009 .m(4)
1010 .n(8)
1011 .k(k)
1012 .b_zero_point(0)
1013 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1014 }
1015 }
1016
1017 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_LD64, no_zero_point) {
1018 TEST_REQUIRES_ARM_NEON;
1019 for (size_t k = 1; k <= 40; k += 9) {
1020 GemmMicrokernelTester()
1021 .mr(4)
1022 .nr(8)
1023 .kr(1)
1024 .sr(1)
1025 .m(4)
1026 .n(8)
1027 .k(k)
1028 .a_zero_point(0)
1029 .b_zero_point(0)
1030 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
1031 }
1032 }
1033#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
1034
1035
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001036#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1037 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8) {
1038 TEST_REQUIRES_ARM_NEON;
1039 GemmMicrokernelTester()
1040 .mr(1)
1041 .nr(8)
1042 .kr(1)
1043 .sr(1)
1044 .m(1)
1045 .n(8)
1046 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001047 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001048 }
1049
1050 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cn) {
1051 TEST_REQUIRES_ARM_NEON;
1052 GemmMicrokernelTester()
1053 .mr(1)
1054 .nr(8)
1055 .kr(1)
1056 .sr(1)
1057 .m(1)
1058 .n(8)
1059 .k(8)
1060 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001061 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001062 }
1063
1064 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
1065 TEST_REQUIRES_ARM_NEON;
1066 GemmMicrokernelTester()
1067 .mr(1)
1068 .nr(8)
1069 .kr(1)
1070 .sr(1)
1071 .m(1)
1072 .n(8)
1073 .k(8)
1074 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001075 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001076 }
1077
1078 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile) {
1079 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001080 for (uint32_t n = 1; n <= 8; n++) {
1081 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001082 GemmMicrokernelTester()
1083 .mr(1)
1084 .nr(8)
1085 .kr(1)
1086 .sr(1)
1087 .m(m)
1088 .n(n)
1089 .k(8)
1090 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001091 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001092 }
1093 }
1094 }
1095
1096 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
1097 TEST_REQUIRES_ARM_NEON;
1098 for (uint32_t m = 1; m <= 1; m++) {
1099 GemmMicrokernelTester()
1100 .mr(1)
1101 .nr(8)
1102 .kr(1)
1103 .sr(1)
1104 .m(m)
1105 .n(8)
1106 .k(8)
1107 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001108 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001109 }
1110 }
1111
1112 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
1113 TEST_REQUIRES_ARM_NEON;
1114 for (uint32_t n = 1; n <= 8; n++) {
1115 GemmMicrokernelTester()
1116 .mr(1)
1117 .nr(8)
1118 .kr(1)
1119 .sr(1)
1120 .m(1)
1121 .n(n)
1122 .k(8)
1123 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001124 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001125 }
1126 }
1127
1128 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8) {
1129 TEST_REQUIRES_ARM_NEON;
1130 for (size_t k = 1; k < 8; k++) {
1131 GemmMicrokernelTester()
1132 .mr(1)
1133 .nr(8)
1134 .kr(1)
1135 .sr(1)
1136 .m(1)
1137 .n(8)
1138 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001139 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001140 }
1141 }
1142
1143 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
1144 TEST_REQUIRES_ARM_NEON;
1145 for (size_t k = 1; k < 8; k++) {
1146 GemmMicrokernelTester()
1147 .mr(1)
1148 .nr(8)
1149 .kr(1)
1150 .sr(1)
1151 .m(1)
1152 .n(8)
1153 .k(k)
1154 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001155 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001156 }
1157 }
1158
1159 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_lt_8_subtile) {
1160 TEST_REQUIRES_ARM_NEON;
1161 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001162 for (uint32_t n = 1; n <= 8; n++) {
1163 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001164 GemmMicrokernelTester()
1165 .mr(1)
1166 .nr(8)
1167 .kr(1)
1168 .sr(1)
1169 .m(m)
1170 .n(n)
1171 .k(k)
1172 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001173 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001174 }
1175 }
1176 }
1177 }
1178
1179 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8) {
1180 TEST_REQUIRES_ARM_NEON;
1181 for (size_t k = 9; k < 16; k++) {
1182 GemmMicrokernelTester()
1183 .mr(1)
1184 .nr(8)
1185 .kr(1)
1186 .sr(1)
1187 .m(1)
1188 .n(8)
1189 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001190 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001191 }
1192 }
1193
1194 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
1195 TEST_REQUIRES_ARM_NEON;
1196 for (size_t k = 9; k < 16; k++) {
1197 GemmMicrokernelTester()
1198 .mr(1)
1199 .nr(8)
1200 .kr(1)
1201 .sr(1)
1202 .m(1)
1203 .n(8)
1204 .k(k)
1205 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001206 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001207 }
1208 }
1209
1210 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_gt_8_subtile) {
1211 TEST_REQUIRES_ARM_NEON;
1212 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001213 for (uint32_t n = 1; n <= 8; n++) {
1214 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001215 GemmMicrokernelTester()
1216 .mr(1)
1217 .nr(8)
1218 .kr(1)
1219 .sr(1)
1220 .m(m)
1221 .n(n)
1222 .k(k)
1223 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001224 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001225 }
1226 }
1227 }
1228 }
1229
1230 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8) {
1231 TEST_REQUIRES_ARM_NEON;
1232 for (size_t k = 16; k <= 80; k += 8) {
1233 GemmMicrokernelTester()
1234 .mr(1)
1235 .nr(8)
1236 .kr(1)
1237 .sr(1)
1238 .m(1)
1239 .n(8)
1240 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001241 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001242 }
1243 }
1244
1245 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8_strided_a) {
1246 TEST_REQUIRES_ARM_NEON;
1247 for (size_t k = 16; k <= 80; k += 8) {
1248 GemmMicrokernelTester()
1249 .mr(1)
1250 .nr(8)
1251 .kr(1)
1252 .sr(1)
1253 .m(1)
1254 .n(8)
1255 .k(k)
1256 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001257 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001258 }
1259 }
1260
1261 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, k_div_8_subtile) {
1262 TEST_REQUIRES_ARM_NEON;
1263 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001264 for (uint32_t n = 1; n <= 8; n++) {
1265 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001266 GemmMicrokernelTester()
1267 .mr(1)
1268 .nr(8)
1269 .kr(1)
1270 .sr(1)
1271 .m(m)
1272 .n(n)
1273 .k(k)
1274 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001275 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001276 }
1277 }
1278 }
1279 }
1280
1281 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8) {
1282 TEST_REQUIRES_ARM_NEON;
1283 for (uint32_t n = 9; n < 16; n++) {
1284 for (size_t k = 1; k <= 40; k += 9) {
1285 GemmMicrokernelTester()
1286 .mr(1)
1287 .nr(8)
1288 .kr(1)
1289 .sr(1)
1290 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001291 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001292 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001293 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001294 }
1295 }
1296 }
1297
1298 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
1299 TEST_REQUIRES_ARM_NEON;
1300 for (uint32_t n = 9; n < 16; n++) {
1301 for (size_t k = 1; k <= 40; k += 9) {
1302 GemmMicrokernelTester()
1303 .mr(1)
1304 .nr(8)
1305 .kr(1)
1306 .sr(1)
1307 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001308 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001309 .k(k)
1310 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001311 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001312 }
1313 }
1314 }
1315
1316 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
1317 TEST_REQUIRES_ARM_NEON;
1318 for (uint32_t n = 9; n < 16; n++) {
1319 for (size_t k = 1; k <= 40; k += 9) {
1320 GemmMicrokernelTester()
1321 .mr(1)
1322 .nr(8)
1323 .kr(1)
1324 .sr(1)
1325 .m(1)
1326 .n(n)
1327 .k(k)
1328 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001329 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001330 }
1331 }
1332 }
1333
1334 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_gt_8_subtile) {
1335 TEST_REQUIRES_ARM_NEON;
1336 for (uint32_t n = 9; n < 16; n++) {
1337 for (size_t k = 1; k <= 40; k += 9) {
1338 for (uint32_t m = 1; m <= 1; m++) {
1339 GemmMicrokernelTester()
1340 .mr(1)
1341 .nr(8)
1342 .kr(1)
1343 .sr(1)
1344 .m(m)
1345 .n(n)
1346 .k(k)
1347 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001348 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001349 }
1350 }
1351 }
1352 }
1353
1354 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8) {
1355 TEST_REQUIRES_ARM_NEON;
1356 for (uint32_t n = 16; n <= 24; n += 8) {
1357 for (size_t k = 1; k <= 40; k += 9) {
1358 GemmMicrokernelTester()
1359 .mr(1)
1360 .nr(8)
1361 .kr(1)
1362 .sr(1)
1363 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001364 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001365 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001366 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001367 }
1368 }
1369 }
1370
1371 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
1372 TEST_REQUIRES_ARM_NEON;
1373 for (uint32_t n = 16; n <= 24; n += 8) {
1374 for (size_t k = 1; k <= 40; k += 9) {
1375 GemmMicrokernelTester()
1376 .mr(1)
1377 .nr(8)
1378 .kr(1)
1379 .sr(1)
1380 .m(1)
1381 .n(n)
1382 .k(k)
1383 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001384 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001385 }
1386 }
1387 }
1388
1389 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_strided_a) {
1390 TEST_REQUIRES_ARM_NEON;
1391 for (uint32_t n = 16; n <= 24; n += 8) {
1392 for (size_t k = 1; k <= 40; k += 9) {
1393 GemmMicrokernelTester()
1394 .mr(1)
1395 .nr(8)
1396 .kr(1)
1397 .sr(1)
1398 .m(1)
1399 .n(n)
1400 .k(k)
1401 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001402 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001403 }
1404 }
1405 }
1406
1407 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, n_div_8_subtile) {
1408 TEST_REQUIRES_ARM_NEON;
1409 for (uint32_t n = 16; n <= 24; n += 8) {
1410 for (size_t k = 1; k <= 40; k += 9) {
1411 for (uint32_t m = 1; m <= 1; m++) {
1412 GemmMicrokernelTester()
1413 .mr(1)
1414 .nr(8)
1415 .kr(1)
1416 .sr(1)
1417 .m(m)
1418 .n(n)
1419 .k(k)
1420 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001421 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001422 }
1423 }
1424 }
1425 }
1426
1427 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm_subtile) {
1428 TEST_REQUIRES_ARM_NEON;
1429 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001430 for (uint32_t n = 1; n <= 8; n++) {
1431 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001432 GemmMicrokernelTester()
1433 .mr(1)
1434 .nr(8)
1435 .kr(1)
1436 .sr(1)
1437 .m(m)
1438 .n(n)
1439 .k(k)
1440 .cm_stride(11)
1441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001442 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001443 }
1444 }
1445 }
1446 }
1447
1448 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmin) {
1449 TEST_REQUIRES_ARM_NEON;
1450 GemmMicrokernelTester()
1451 .mr(1)
1452 .nr(8)
1453 .kr(1)
1454 .sr(1)
1455 .m(1)
1456 .n(8)
1457 .k(8)
1458 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001459 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001460 }
1461
1462 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, qmax) {
1463 TEST_REQUIRES_ARM_NEON;
1464 GemmMicrokernelTester()
1465 .mr(1)
1466 .nr(8)
1467 .kr(1)
1468 .sr(1)
1469 .m(1)
1470 .n(8)
1471 .k(8)
1472 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001473 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001474 }
1475
1476 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, strided_cm) {
1477 TEST_REQUIRES_ARM_NEON;
1478 GemmMicrokernelTester()
1479 .mr(1)
1480 .nr(8)
1481 .kr(1)
1482 .sr(1)
1483 .m(1)
1484 .n(8)
1485 .k(8)
1486 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001487 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001488 }
1489
1490 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_a_zero_point) {
1491 TEST_REQUIRES_ARM_NEON;
1492 for (size_t k = 1; k <= 40; k += 9) {
1493 GemmMicrokernelTester()
1494 .mr(1)
1495 .nr(8)
1496 .kr(1)
1497 .sr(1)
1498 .m(1)
1499 .n(8)
1500 .k(k)
1501 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001502 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001503 }
1504 }
1505
1506 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_b_zero_point) {
1507 TEST_REQUIRES_ARM_NEON;
1508 for (size_t k = 1; k <= 40; k += 9) {
1509 GemmMicrokernelTester()
1510 .mr(1)
1511 .nr(8)
1512 .kr(1)
1513 .sr(1)
1514 .m(1)
1515 .n(8)
1516 .k(k)
1517 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001518 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001519 }
1520 }
1521
1522 TEST(QU8_GEMM_MINMAX_RNDNU_1X8__NEON_MLAL_LANE, no_zero_point) {
1523 TEST_REQUIRES_ARM_NEON;
1524 for (size_t k = 1; k <= 40; k += 9) {
1525 GemmMicrokernelTester()
1526 .mr(1)
1527 .nr(8)
1528 .kr(1)
1529 .sr(1)
1530 .m(1)
1531 .n(8)
1532 .k(k)
1533 .a_zero_point(0)
1534 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08001535 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001536 }
1537 }
1538#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1539
1540
1541#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1542 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8) {
1543 TEST_REQUIRES_ARM_NEON;
1544 GemmMicrokernelTester()
1545 .mr(4)
1546 .nr(8)
1547 .kr(1)
1548 .sr(1)
1549 .m(4)
1550 .n(8)
1551 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001552 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001553 }
1554
1555 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cn) {
1556 TEST_REQUIRES_ARM_NEON;
1557 GemmMicrokernelTester()
1558 .mr(4)
1559 .nr(8)
1560 .kr(1)
1561 .sr(1)
1562 .m(4)
1563 .n(8)
1564 .k(8)
1565 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001566 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001567 }
1568
1569 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_strided_a) {
1570 TEST_REQUIRES_ARM_NEON;
1571 GemmMicrokernelTester()
1572 .mr(4)
1573 .nr(8)
1574 .kr(1)
1575 .sr(1)
1576 .m(4)
1577 .n(8)
1578 .k(8)
1579 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001580 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001581 }
1582
1583 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile) {
1584 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001585 for (uint32_t n = 1; n <= 8; n++) {
1586 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001587 GemmMicrokernelTester()
1588 .mr(4)
1589 .nr(8)
1590 .kr(1)
1591 .sr(1)
1592 .m(m)
1593 .n(n)
1594 .k(8)
1595 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001596 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001597 }
1598 }
1599 }
1600
1601 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_m) {
1602 TEST_REQUIRES_ARM_NEON;
1603 for (uint32_t m = 1; m <= 4; m++) {
1604 GemmMicrokernelTester()
1605 .mr(4)
1606 .nr(8)
1607 .kr(1)
1608 .sr(1)
1609 .m(m)
1610 .n(8)
1611 .k(8)
1612 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001613 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001614 }
1615 }
1616
1617 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_eq_8_subtile_n) {
1618 TEST_REQUIRES_ARM_NEON;
1619 for (uint32_t n = 1; n <= 8; n++) {
1620 GemmMicrokernelTester()
1621 .mr(4)
1622 .nr(8)
1623 .kr(1)
1624 .sr(1)
1625 .m(4)
1626 .n(n)
1627 .k(8)
1628 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001629 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001630 }
1631 }
1632
1633 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8) {
1634 TEST_REQUIRES_ARM_NEON;
1635 for (size_t k = 1; k < 8; k++) {
1636 GemmMicrokernelTester()
1637 .mr(4)
1638 .nr(8)
1639 .kr(1)
1640 .sr(1)
1641 .m(4)
1642 .n(8)
1643 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001644 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001645 }
1646 }
1647
1648 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8_strided_a) {
1649 TEST_REQUIRES_ARM_NEON;
1650 for (size_t k = 1; k < 8; k++) {
1651 GemmMicrokernelTester()
1652 .mr(4)
1653 .nr(8)
1654 .kr(1)
1655 .sr(1)
1656 .m(4)
1657 .n(8)
1658 .k(k)
1659 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001660 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001661 }
1662 }
1663
1664 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_lt_8_subtile) {
1665 TEST_REQUIRES_ARM_NEON;
1666 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001667 for (uint32_t n = 1; n <= 8; n++) {
1668 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001669 GemmMicrokernelTester()
1670 .mr(4)
1671 .nr(8)
1672 .kr(1)
1673 .sr(1)
1674 .m(m)
1675 .n(n)
1676 .k(k)
1677 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001678 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001679 }
1680 }
1681 }
1682 }
1683
1684 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8) {
1685 TEST_REQUIRES_ARM_NEON;
1686 for (size_t k = 9; k < 16; k++) {
1687 GemmMicrokernelTester()
1688 .mr(4)
1689 .nr(8)
1690 .kr(1)
1691 .sr(1)
1692 .m(4)
1693 .n(8)
1694 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001695 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001696 }
1697 }
1698
1699 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8_strided_a) {
1700 TEST_REQUIRES_ARM_NEON;
1701 for (size_t k = 9; k < 16; k++) {
1702 GemmMicrokernelTester()
1703 .mr(4)
1704 .nr(8)
1705 .kr(1)
1706 .sr(1)
1707 .m(4)
1708 .n(8)
1709 .k(k)
1710 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001711 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001712 }
1713 }
1714
1715 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_gt_8_subtile) {
1716 TEST_REQUIRES_ARM_NEON;
1717 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001718 for (uint32_t n = 1; n <= 8; n++) {
1719 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001720 GemmMicrokernelTester()
1721 .mr(4)
1722 .nr(8)
1723 .kr(1)
1724 .sr(1)
1725 .m(m)
1726 .n(n)
1727 .k(k)
1728 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001729 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001730 }
1731 }
1732 }
1733 }
1734
1735 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8) {
1736 TEST_REQUIRES_ARM_NEON;
1737 for (size_t k = 16; k <= 80; k += 8) {
1738 GemmMicrokernelTester()
1739 .mr(4)
1740 .nr(8)
1741 .kr(1)
1742 .sr(1)
1743 .m(4)
1744 .n(8)
1745 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001746 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001747 }
1748 }
1749
1750 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8_strided_a) {
1751 TEST_REQUIRES_ARM_NEON;
1752 for (size_t k = 16; k <= 80; k += 8) {
1753 GemmMicrokernelTester()
1754 .mr(4)
1755 .nr(8)
1756 .kr(1)
1757 .sr(1)
1758 .m(4)
1759 .n(8)
1760 .k(k)
1761 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001762 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001763 }
1764 }
1765
1766 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, k_div_8_subtile) {
1767 TEST_REQUIRES_ARM_NEON;
1768 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001769 for (uint32_t n = 1; n <= 8; n++) {
1770 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001771 GemmMicrokernelTester()
1772 .mr(4)
1773 .nr(8)
1774 .kr(1)
1775 .sr(1)
1776 .m(m)
1777 .n(n)
1778 .k(k)
1779 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001780 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001781 }
1782 }
1783 }
1784 }
1785
1786 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8) {
1787 TEST_REQUIRES_ARM_NEON;
1788 for (uint32_t n = 9; n < 16; n++) {
1789 for (size_t k = 1; k <= 40; k += 9) {
1790 GemmMicrokernelTester()
1791 .mr(4)
1792 .nr(8)
1793 .kr(1)
1794 .sr(1)
1795 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001796 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001797 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001798 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001799 }
1800 }
1801 }
1802
1803 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_strided_cn) {
1804 TEST_REQUIRES_ARM_NEON;
1805 for (uint32_t n = 9; n < 16; n++) {
1806 for (size_t k = 1; k <= 40; k += 9) {
1807 GemmMicrokernelTester()
1808 .mr(4)
1809 .nr(8)
1810 .kr(1)
1811 .sr(1)
1812 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001813 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001814 .k(k)
1815 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001816 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001817 }
1818 }
1819 }
1820
1821 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_strided_a) {
1822 TEST_REQUIRES_ARM_NEON;
1823 for (uint32_t n = 9; n < 16; n++) {
1824 for (size_t k = 1; k <= 40; k += 9) {
1825 GemmMicrokernelTester()
1826 .mr(4)
1827 .nr(8)
1828 .kr(1)
1829 .sr(1)
1830 .m(4)
1831 .n(n)
1832 .k(k)
1833 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001834 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001835 }
1836 }
1837 }
1838
1839 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_gt_8_subtile) {
1840 TEST_REQUIRES_ARM_NEON;
1841 for (uint32_t n = 9; n < 16; n++) {
1842 for (size_t k = 1; k <= 40; k += 9) {
1843 for (uint32_t m = 1; m <= 4; m++) {
1844 GemmMicrokernelTester()
1845 .mr(4)
1846 .nr(8)
1847 .kr(1)
1848 .sr(1)
1849 .m(m)
1850 .n(n)
1851 .k(k)
1852 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001853 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001854 }
1855 }
1856 }
1857 }
1858
1859 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8) {
1860 TEST_REQUIRES_ARM_NEON;
1861 for (uint32_t n = 16; n <= 24; n += 8) {
1862 for (size_t k = 1; k <= 40; k += 9) {
1863 GemmMicrokernelTester()
1864 .mr(4)
1865 .nr(8)
1866 .kr(1)
1867 .sr(1)
1868 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001869 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001870 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001871 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001872 }
1873 }
1874 }
1875
1876 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_strided_cn) {
1877 TEST_REQUIRES_ARM_NEON;
1878 for (uint32_t n = 16; n <= 24; n += 8) {
1879 for (size_t k = 1; k <= 40; k += 9) {
1880 GemmMicrokernelTester()
1881 .mr(4)
1882 .nr(8)
1883 .kr(1)
1884 .sr(1)
1885 .m(4)
1886 .n(n)
1887 .k(k)
1888 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001889 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001890 }
1891 }
1892 }
1893
1894 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_strided_a) {
1895 TEST_REQUIRES_ARM_NEON;
1896 for (uint32_t n = 16; n <= 24; n += 8) {
1897 for (size_t k = 1; k <= 40; k += 9) {
1898 GemmMicrokernelTester()
1899 .mr(4)
1900 .nr(8)
1901 .kr(1)
1902 .sr(1)
1903 .m(4)
1904 .n(n)
1905 .k(k)
1906 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001907 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001908 }
1909 }
1910 }
1911
1912 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, n_div_8_subtile) {
1913 TEST_REQUIRES_ARM_NEON;
1914 for (uint32_t n = 16; n <= 24; n += 8) {
1915 for (size_t k = 1; k <= 40; k += 9) {
1916 for (uint32_t m = 1; m <= 4; m++) {
1917 GemmMicrokernelTester()
1918 .mr(4)
1919 .nr(8)
1920 .kr(1)
1921 .sr(1)
1922 .m(m)
1923 .n(n)
1924 .k(k)
1925 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001926 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001927 }
1928 }
1929 }
1930 }
1931
1932 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm_subtile) {
1933 TEST_REQUIRES_ARM_NEON;
1934 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001935 for (uint32_t n = 1; n <= 8; n++) {
1936 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001937 GemmMicrokernelTester()
1938 .mr(4)
1939 .nr(8)
1940 .kr(1)
1941 .sr(1)
1942 .m(m)
1943 .n(n)
1944 .k(k)
1945 .cm_stride(11)
1946 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001947 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001948 }
1949 }
1950 }
1951 }
1952
1953 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmin) {
1954 TEST_REQUIRES_ARM_NEON;
1955 GemmMicrokernelTester()
1956 .mr(4)
1957 .nr(8)
1958 .kr(1)
1959 .sr(1)
1960 .m(4)
1961 .n(8)
1962 .k(8)
1963 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001964 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001965 }
1966
1967 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, qmax) {
1968 TEST_REQUIRES_ARM_NEON;
1969 GemmMicrokernelTester()
1970 .mr(4)
1971 .nr(8)
1972 .kr(1)
1973 .sr(1)
1974 .m(4)
1975 .n(8)
1976 .k(8)
1977 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001978 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001979 }
1980
1981 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, strided_cm) {
1982 TEST_REQUIRES_ARM_NEON;
1983 GemmMicrokernelTester()
1984 .mr(4)
1985 .nr(8)
1986 .kr(1)
1987 .sr(1)
1988 .m(4)
1989 .n(8)
1990 .k(8)
1991 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001992 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001993 }
1994
1995 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_a_zero_point) {
1996 TEST_REQUIRES_ARM_NEON;
1997 for (size_t k = 1; k <= 40; k += 9) {
1998 GemmMicrokernelTester()
1999 .mr(4)
2000 .nr(8)
2001 .kr(1)
2002 .sr(1)
2003 .m(4)
2004 .n(8)
2005 .k(k)
2006 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002007 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002008 }
2009 }
2010
2011 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_b_zero_point) {
2012 TEST_REQUIRES_ARM_NEON;
2013 for (size_t k = 1; k <= 40; k += 9) {
2014 GemmMicrokernelTester()
2015 .mr(4)
2016 .nr(8)
2017 .kr(1)
2018 .sr(1)
2019 .m(4)
2020 .n(8)
2021 .k(k)
2022 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002023 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002024 }
2025 }
2026
2027 TEST(QU8_GEMM_MINMAX_RNDNU_4X8__NEON_MLAL_LANE, no_zero_point) {
2028 TEST_REQUIRES_ARM_NEON;
2029 for (size_t k = 1; k <= 40; k += 9) {
2030 GemmMicrokernelTester()
2031 .mr(4)
2032 .nr(8)
2033 .kr(1)
2034 .sr(1)
2035 .m(4)
2036 .n(8)
2037 .k(k)
2038 .a_zero_point(0)
2039 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002040 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002041 }
2042 }
2043#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2044
2045
2046#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2047 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8) {
2048 TEST_REQUIRES_ARM_NEON;
2049 GemmMicrokernelTester()
2050 .mr(4)
2051 .nr(16)
2052 .kr(1)
2053 .sr(1)
2054 .m(4)
2055 .n(16)
2056 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002057 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002058 }
2059
2060 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cn) {
2061 TEST_REQUIRES_ARM_NEON;
2062 GemmMicrokernelTester()
2063 .mr(4)
2064 .nr(16)
2065 .kr(1)
2066 .sr(1)
2067 .m(4)
2068 .n(16)
2069 .k(8)
2070 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002071 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002072 }
2073
2074 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
2075 TEST_REQUIRES_ARM_NEON;
2076 GemmMicrokernelTester()
2077 .mr(4)
2078 .nr(16)
2079 .kr(1)
2080 .sr(1)
2081 .m(4)
2082 .n(16)
2083 .k(8)
2084 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002085 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002086 }
2087
2088 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile) {
2089 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002090 for (uint32_t n = 1; n <= 16; n++) {
2091 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002092 GemmMicrokernelTester()
2093 .mr(4)
2094 .nr(16)
2095 .kr(1)
2096 .sr(1)
2097 .m(m)
2098 .n(n)
2099 .k(8)
2100 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002101 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002102 }
2103 }
2104 }
2105
2106 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
2107 TEST_REQUIRES_ARM_NEON;
2108 for (uint32_t m = 1; m <= 4; m++) {
2109 GemmMicrokernelTester()
2110 .mr(4)
2111 .nr(16)
2112 .kr(1)
2113 .sr(1)
2114 .m(m)
2115 .n(16)
2116 .k(8)
2117 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002118 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002119 }
2120 }
2121
2122 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
2123 TEST_REQUIRES_ARM_NEON;
2124 for (uint32_t n = 1; n <= 16; n++) {
2125 GemmMicrokernelTester()
2126 .mr(4)
2127 .nr(16)
2128 .kr(1)
2129 .sr(1)
2130 .m(4)
2131 .n(n)
2132 .k(8)
2133 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002134 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002135 }
2136 }
2137
2138 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8) {
2139 TEST_REQUIRES_ARM_NEON;
2140 for (size_t k = 1; k < 8; k++) {
2141 GemmMicrokernelTester()
2142 .mr(4)
2143 .nr(16)
2144 .kr(1)
2145 .sr(1)
2146 .m(4)
2147 .n(16)
2148 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002149 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002150 }
2151 }
2152
2153 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
2154 TEST_REQUIRES_ARM_NEON;
2155 for (size_t k = 1; k < 8; k++) {
2156 GemmMicrokernelTester()
2157 .mr(4)
2158 .nr(16)
2159 .kr(1)
2160 .sr(1)
2161 .m(4)
2162 .n(16)
2163 .k(k)
2164 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002165 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002166 }
2167 }
2168
2169 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_lt_8_subtile) {
2170 TEST_REQUIRES_ARM_NEON;
2171 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002172 for (uint32_t n = 1; n <= 16; n++) {
2173 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002174 GemmMicrokernelTester()
2175 .mr(4)
2176 .nr(16)
2177 .kr(1)
2178 .sr(1)
2179 .m(m)
2180 .n(n)
2181 .k(k)
2182 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002183 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002184 }
2185 }
2186 }
2187 }
2188
2189 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8) {
2190 TEST_REQUIRES_ARM_NEON;
2191 for (size_t k = 9; k < 16; k++) {
2192 GemmMicrokernelTester()
2193 .mr(4)
2194 .nr(16)
2195 .kr(1)
2196 .sr(1)
2197 .m(4)
2198 .n(16)
2199 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002200 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002201 }
2202 }
2203
2204 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
2205 TEST_REQUIRES_ARM_NEON;
2206 for (size_t k = 9; k < 16; k++) {
2207 GemmMicrokernelTester()
2208 .mr(4)
2209 .nr(16)
2210 .kr(1)
2211 .sr(1)
2212 .m(4)
2213 .n(16)
2214 .k(k)
2215 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002216 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002217 }
2218 }
2219
2220 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_gt_8_subtile) {
2221 TEST_REQUIRES_ARM_NEON;
2222 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002223 for (uint32_t n = 1; n <= 16; n++) {
2224 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002225 GemmMicrokernelTester()
2226 .mr(4)
2227 .nr(16)
2228 .kr(1)
2229 .sr(1)
2230 .m(m)
2231 .n(n)
2232 .k(k)
2233 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002234 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002235 }
2236 }
2237 }
2238 }
2239
2240 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8) {
2241 TEST_REQUIRES_ARM_NEON;
2242 for (size_t k = 16; k <= 80; k += 8) {
2243 GemmMicrokernelTester()
2244 .mr(4)
2245 .nr(16)
2246 .kr(1)
2247 .sr(1)
2248 .m(4)
2249 .n(16)
2250 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002251 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002252 }
2253 }
2254
2255 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8_strided_a) {
2256 TEST_REQUIRES_ARM_NEON;
2257 for (size_t k = 16; k <= 80; k += 8) {
2258 GemmMicrokernelTester()
2259 .mr(4)
2260 .nr(16)
2261 .kr(1)
2262 .sr(1)
2263 .m(4)
2264 .n(16)
2265 .k(k)
2266 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002267 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002268 }
2269 }
2270
2271 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, k_div_8_subtile) {
2272 TEST_REQUIRES_ARM_NEON;
2273 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002274 for (uint32_t n = 1; n <= 16; n++) {
2275 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002276 GemmMicrokernelTester()
2277 .mr(4)
2278 .nr(16)
2279 .kr(1)
2280 .sr(1)
2281 .m(m)
2282 .n(n)
2283 .k(k)
2284 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002285 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002286 }
2287 }
2288 }
2289 }
2290
2291 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16) {
2292 TEST_REQUIRES_ARM_NEON;
2293 for (uint32_t n = 17; n < 32; n++) {
2294 for (size_t k = 1; k <= 40; k += 9) {
2295 GemmMicrokernelTester()
2296 .mr(4)
2297 .nr(16)
2298 .kr(1)
2299 .sr(1)
2300 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002301 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002302 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002303 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002304 }
2305 }
2306 }
2307
2308 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
2309 TEST_REQUIRES_ARM_NEON;
2310 for (uint32_t n = 17; n < 32; n++) {
2311 for (size_t k = 1; k <= 40; k += 9) {
2312 GemmMicrokernelTester()
2313 .mr(4)
2314 .nr(16)
2315 .kr(1)
2316 .sr(1)
2317 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002318 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002319 .k(k)
2320 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002321 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002322 }
2323 }
2324 }
2325
2326 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
2327 TEST_REQUIRES_ARM_NEON;
2328 for (uint32_t n = 17; n < 32; n++) {
2329 for (size_t k = 1; k <= 40; k += 9) {
2330 GemmMicrokernelTester()
2331 .mr(4)
2332 .nr(16)
2333 .kr(1)
2334 .sr(1)
2335 .m(4)
2336 .n(n)
2337 .k(k)
2338 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002339 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002340 }
2341 }
2342 }
2343
2344 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_gt_16_subtile) {
2345 TEST_REQUIRES_ARM_NEON;
2346 for (uint32_t n = 17; n < 32; n++) {
2347 for (size_t k = 1; k <= 40; k += 9) {
2348 for (uint32_t m = 1; m <= 4; m++) {
2349 GemmMicrokernelTester()
2350 .mr(4)
2351 .nr(16)
2352 .kr(1)
2353 .sr(1)
2354 .m(m)
2355 .n(n)
2356 .k(k)
2357 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002358 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002359 }
2360 }
2361 }
2362 }
2363
2364 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16) {
2365 TEST_REQUIRES_ARM_NEON;
2366 for (uint32_t n = 32; n <= 48; n += 16) {
2367 for (size_t k = 1; k <= 40; k += 9) {
2368 GemmMicrokernelTester()
2369 .mr(4)
2370 .nr(16)
2371 .kr(1)
2372 .sr(1)
2373 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002374 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002375 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002376 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002377 }
2378 }
2379 }
2380
2381 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
2382 TEST_REQUIRES_ARM_NEON;
2383 for (uint32_t n = 32; n <= 48; n += 16) {
2384 for (size_t k = 1; k <= 40; k += 9) {
2385 GemmMicrokernelTester()
2386 .mr(4)
2387 .nr(16)
2388 .kr(1)
2389 .sr(1)
2390 .m(4)
2391 .n(n)
2392 .k(k)
2393 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002394 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002395 }
2396 }
2397 }
2398
2399 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_strided_a) {
2400 TEST_REQUIRES_ARM_NEON;
2401 for (uint32_t n = 32; n <= 48; n += 16) {
2402 for (size_t k = 1; k <= 40; k += 9) {
2403 GemmMicrokernelTester()
2404 .mr(4)
2405 .nr(16)
2406 .kr(1)
2407 .sr(1)
2408 .m(4)
2409 .n(n)
2410 .k(k)
2411 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002412 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002413 }
2414 }
2415 }
2416
2417 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, n_div_16_subtile) {
2418 TEST_REQUIRES_ARM_NEON;
2419 for (uint32_t n = 32; n <= 48; n += 16) {
2420 for (size_t k = 1; k <= 40; k += 9) {
2421 for (uint32_t m = 1; m <= 4; m++) {
2422 GemmMicrokernelTester()
2423 .mr(4)
2424 .nr(16)
2425 .kr(1)
2426 .sr(1)
2427 .m(m)
2428 .n(n)
2429 .k(k)
2430 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002431 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002432 }
2433 }
2434 }
2435 }
2436
2437 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm_subtile) {
2438 TEST_REQUIRES_ARM_NEON;
2439 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002440 for (uint32_t n = 1; n <= 16; n++) {
2441 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002442 GemmMicrokernelTester()
2443 .mr(4)
2444 .nr(16)
2445 .kr(1)
2446 .sr(1)
2447 .m(m)
2448 .n(n)
2449 .k(k)
2450 .cm_stride(19)
2451 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002452 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002453 }
2454 }
2455 }
2456 }
2457
2458 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmin) {
2459 TEST_REQUIRES_ARM_NEON;
2460 GemmMicrokernelTester()
2461 .mr(4)
2462 .nr(16)
2463 .kr(1)
2464 .sr(1)
2465 .m(4)
2466 .n(16)
2467 .k(8)
2468 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002469 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002470 }
2471
2472 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, qmax) {
2473 TEST_REQUIRES_ARM_NEON;
2474 GemmMicrokernelTester()
2475 .mr(4)
2476 .nr(16)
2477 .kr(1)
2478 .sr(1)
2479 .m(4)
2480 .n(16)
2481 .k(8)
2482 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002483 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002484 }
2485
2486 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, strided_cm) {
2487 TEST_REQUIRES_ARM_NEON;
2488 GemmMicrokernelTester()
2489 .mr(4)
2490 .nr(16)
2491 .kr(1)
2492 .sr(1)
2493 .m(4)
2494 .n(16)
2495 .k(8)
2496 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002497 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002498 }
2499
2500 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_a_zero_point) {
2501 TEST_REQUIRES_ARM_NEON;
2502 for (size_t k = 1; k <= 40; k += 9) {
2503 GemmMicrokernelTester()
2504 .mr(4)
2505 .nr(16)
2506 .kr(1)
2507 .sr(1)
2508 .m(4)
2509 .n(16)
2510 .k(k)
2511 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002512 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002513 }
2514 }
2515
2516 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_b_zero_point) {
2517 TEST_REQUIRES_ARM_NEON;
2518 for (size_t k = 1; k <= 40; k += 9) {
2519 GemmMicrokernelTester()
2520 .mr(4)
2521 .nr(16)
2522 .kr(1)
2523 .sr(1)
2524 .m(4)
2525 .n(16)
2526 .k(k)
2527 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002528 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002529 }
2530 }
2531
2532 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__NEON_MLAL_LANE, no_zero_point) {
2533 TEST_REQUIRES_ARM_NEON;
2534 for (size_t k = 1; k <= 40; k += 9) {
2535 GemmMicrokernelTester()
2536 .mr(4)
2537 .nr(16)
2538 .kr(1)
2539 .sr(1)
2540 .m(4)
2541 .n(16)
2542 .k(k)
2543 .a_zero_point(0)
2544 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08002545 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002546 }
2547 }
2548#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2549
2550
2551#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2552 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8) {
2553 TEST_REQUIRES_ARM_NEON;
2554 GemmMicrokernelTester()
2555 .mr(6)
2556 .nr(16)
2557 .kr(1)
2558 .sr(1)
2559 .m(6)
2560 .n(16)
2561 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002562 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002563 }
2564
2565 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cn) {
2566 TEST_REQUIRES_ARM_NEON;
2567 GemmMicrokernelTester()
2568 .mr(6)
2569 .nr(16)
2570 .kr(1)
2571 .sr(1)
2572 .m(6)
2573 .n(16)
2574 .k(8)
2575 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002576 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002577 }
2578
2579 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
2580 TEST_REQUIRES_ARM_NEON;
2581 GemmMicrokernelTester()
2582 .mr(6)
2583 .nr(16)
2584 .kr(1)
2585 .sr(1)
2586 .m(6)
2587 .n(16)
2588 .k(8)
2589 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002590 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002591 }
2592
2593 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile) {
2594 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002595 for (uint32_t n = 1; n <= 16; n++) {
2596 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002597 GemmMicrokernelTester()
2598 .mr(6)
2599 .nr(16)
2600 .kr(1)
2601 .sr(1)
2602 .m(m)
2603 .n(n)
2604 .k(8)
2605 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002606 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002607 }
2608 }
2609 }
2610
2611 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
2612 TEST_REQUIRES_ARM_NEON;
2613 for (uint32_t m = 1; m <= 6; m++) {
2614 GemmMicrokernelTester()
2615 .mr(6)
2616 .nr(16)
2617 .kr(1)
2618 .sr(1)
2619 .m(m)
2620 .n(16)
2621 .k(8)
2622 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002623 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002624 }
2625 }
2626
2627 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
2628 TEST_REQUIRES_ARM_NEON;
2629 for (uint32_t n = 1; n <= 16; n++) {
2630 GemmMicrokernelTester()
2631 .mr(6)
2632 .nr(16)
2633 .kr(1)
2634 .sr(1)
2635 .m(6)
2636 .n(n)
2637 .k(8)
2638 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002639 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002640 }
2641 }
2642
2643 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8) {
2644 TEST_REQUIRES_ARM_NEON;
2645 for (size_t k = 1; k < 8; k++) {
2646 GemmMicrokernelTester()
2647 .mr(6)
2648 .nr(16)
2649 .kr(1)
2650 .sr(1)
2651 .m(6)
2652 .n(16)
2653 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002654 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002655 }
2656 }
2657
2658 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
2659 TEST_REQUIRES_ARM_NEON;
2660 for (size_t k = 1; k < 8; k++) {
2661 GemmMicrokernelTester()
2662 .mr(6)
2663 .nr(16)
2664 .kr(1)
2665 .sr(1)
2666 .m(6)
2667 .n(16)
2668 .k(k)
2669 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002670 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002671 }
2672 }
2673
2674 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_lt_8_subtile) {
2675 TEST_REQUIRES_ARM_NEON;
2676 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002677 for (uint32_t n = 1; n <= 16; n++) {
2678 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002679 GemmMicrokernelTester()
2680 .mr(6)
2681 .nr(16)
2682 .kr(1)
2683 .sr(1)
2684 .m(m)
2685 .n(n)
2686 .k(k)
2687 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002688 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002689 }
2690 }
2691 }
2692 }
2693
2694 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8) {
2695 TEST_REQUIRES_ARM_NEON;
2696 for (size_t k = 9; k < 16; k++) {
2697 GemmMicrokernelTester()
2698 .mr(6)
2699 .nr(16)
2700 .kr(1)
2701 .sr(1)
2702 .m(6)
2703 .n(16)
2704 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002705 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002706 }
2707 }
2708
2709 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
2710 TEST_REQUIRES_ARM_NEON;
2711 for (size_t k = 9; k < 16; k++) {
2712 GemmMicrokernelTester()
2713 .mr(6)
2714 .nr(16)
2715 .kr(1)
2716 .sr(1)
2717 .m(6)
2718 .n(16)
2719 .k(k)
2720 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002721 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002722 }
2723 }
2724
2725 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_gt_8_subtile) {
2726 TEST_REQUIRES_ARM_NEON;
2727 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002728 for (uint32_t n = 1; n <= 16; n++) {
2729 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002730 GemmMicrokernelTester()
2731 .mr(6)
2732 .nr(16)
2733 .kr(1)
2734 .sr(1)
2735 .m(m)
2736 .n(n)
2737 .k(k)
2738 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002739 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002740 }
2741 }
2742 }
2743 }
2744
2745 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8) {
2746 TEST_REQUIRES_ARM_NEON;
2747 for (size_t k = 16; k <= 80; k += 8) {
2748 GemmMicrokernelTester()
2749 .mr(6)
2750 .nr(16)
2751 .kr(1)
2752 .sr(1)
2753 .m(6)
2754 .n(16)
2755 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002756 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002757 }
2758 }
2759
2760 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8_strided_a) {
2761 TEST_REQUIRES_ARM_NEON;
2762 for (size_t k = 16; k <= 80; k += 8) {
2763 GemmMicrokernelTester()
2764 .mr(6)
2765 .nr(16)
2766 .kr(1)
2767 .sr(1)
2768 .m(6)
2769 .n(16)
2770 .k(k)
2771 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002772 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002773 }
2774 }
2775
2776 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, k_div_8_subtile) {
2777 TEST_REQUIRES_ARM_NEON;
2778 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002779 for (uint32_t n = 1; n <= 16; n++) {
2780 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002781 GemmMicrokernelTester()
2782 .mr(6)
2783 .nr(16)
2784 .kr(1)
2785 .sr(1)
2786 .m(m)
2787 .n(n)
2788 .k(k)
2789 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002790 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002791 }
2792 }
2793 }
2794 }
2795
2796 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16) {
2797 TEST_REQUIRES_ARM_NEON;
2798 for (uint32_t n = 17; n < 32; n++) {
2799 for (size_t k = 1; k <= 40; k += 9) {
2800 GemmMicrokernelTester()
2801 .mr(6)
2802 .nr(16)
2803 .kr(1)
2804 .sr(1)
2805 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002806 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002807 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002808 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002809 }
2810 }
2811 }
2812
2813 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
2814 TEST_REQUIRES_ARM_NEON;
2815 for (uint32_t n = 17; n < 32; n++) {
2816 for (size_t k = 1; k <= 40; k += 9) {
2817 GemmMicrokernelTester()
2818 .mr(6)
2819 .nr(16)
2820 .kr(1)
2821 .sr(1)
2822 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002823 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002824 .k(k)
2825 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002826 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002827 }
2828 }
2829 }
2830
2831 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
2832 TEST_REQUIRES_ARM_NEON;
2833 for (uint32_t n = 17; n < 32; n++) {
2834 for (size_t k = 1; k <= 40; k += 9) {
2835 GemmMicrokernelTester()
2836 .mr(6)
2837 .nr(16)
2838 .kr(1)
2839 .sr(1)
2840 .m(6)
2841 .n(n)
2842 .k(k)
2843 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002844 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002845 }
2846 }
2847 }
2848
2849 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_gt_16_subtile) {
2850 TEST_REQUIRES_ARM_NEON;
2851 for (uint32_t n = 17; n < 32; n++) {
2852 for (size_t k = 1; k <= 40; k += 9) {
2853 for (uint32_t m = 1; m <= 6; m++) {
2854 GemmMicrokernelTester()
2855 .mr(6)
2856 .nr(16)
2857 .kr(1)
2858 .sr(1)
2859 .m(m)
2860 .n(n)
2861 .k(k)
2862 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002863 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002864 }
2865 }
2866 }
2867 }
2868
2869 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16) {
2870 TEST_REQUIRES_ARM_NEON;
2871 for (uint32_t n = 32; n <= 48; n += 16) {
2872 for (size_t k = 1; k <= 40; k += 9) {
2873 GemmMicrokernelTester()
2874 .mr(6)
2875 .nr(16)
2876 .kr(1)
2877 .sr(1)
2878 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002879 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002880 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002881 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002882 }
2883 }
2884 }
2885
2886 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
2887 TEST_REQUIRES_ARM_NEON;
2888 for (uint32_t n = 32; n <= 48; n += 16) {
2889 for (size_t k = 1; k <= 40; k += 9) {
2890 GemmMicrokernelTester()
2891 .mr(6)
2892 .nr(16)
2893 .kr(1)
2894 .sr(1)
2895 .m(6)
2896 .n(n)
2897 .k(k)
2898 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002899 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002900 }
2901 }
2902 }
2903
2904 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_strided_a) {
2905 TEST_REQUIRES_ARM_NEON;
2906 for (uint32_t n = 32; n <= 48; n += 16) {
2907 for (size_t k = 1; k <= 40; k += 9) {
2908 GemmMicrokernelTester()
2909 .mr(6)
2910 .nr(16)
2911 .kr(1)
2912 .sr(1)
2913 .m(6)
2914 .n(n)
2915 .k(k)
2916 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002917 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002918 }
2919 }
2920 }
2921
2922 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, n_div_16_subtile) {
2923 TEST_REQUIRES_ARM_NEON;
2924 for (uint32_t n = 32; n <= 48; n += 16) {
2925 for (size_t k = 1; k <= 40; k += 9) {
2926 for (uint32_t m = 1; m <= 6; m++) {
2927 GemmMicrokernelTester()
2928 .mr(6)
2929 .nr(16)
2930 .kr(1)
2931 .sr(1)
2932 .m(m)
2933 .n(n)
2934 .k(k)
2935 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002936 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002937 }
2938 }
2939 }
2940 }
2941
2942 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm_subtile) {
2943 TEST_REQUIRES_ARM_NEON;
2944 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002945 for (uint32_t n = 1; n <= 16; n++) {
2946 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002947 GemmMicrokernelTester()
2948 .mr(6)
2949 .nr(16)
2950 .kr(1)
2951 .sr(1)
2952 .m(m)
2953 .n(n)
2954 .k(k)
2955 .cm_stride(19)
2956 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002957 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002958 }
2959 }
2960 }
2961 }
2962
2963 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmin) {
2964 TEST_REQUIRES_ARM_NEON;
2965 GemmMicrokernelTester()
2966 .mr(6)
2967 .nr(16)
2968 .kr(1)
2969 .sr(1)
2970 .m(6)
2971 .n(16)
2972 .k(8)
2973 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002974 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002975 }
2976
2977 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, qmax) {
2978 TEST_REQUIRES_ARM_NEON;
2979 GemmMicrokernelTester()
2980 .mr(6)
2981 .nr(16)
2982 .kr(1)
2983 .sr(1)
2984 .m(6)
2985 .n(16)
2986 .k(8)
2987 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002988 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002989 }
2990
2991 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, strided_cm) {
2992 TEST_REQUIRES_ARM_NEON;
2993 GemmMicrokernelTester()
2994 .mr(6)
2995 .nr(16)
2996 .kr(1)
2997 .sr(1)
2998 .m(6)
2999 .n(16)
3000 .k(8)
3001 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003002 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003003 }
3004
3005 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_a_zero_point) {
3006 TEST_REQUIRES_ARM_NEON;
3007 for (size_t k = 1; k <= 40; k += 9) {
3008 GemmMicrokernelTester()
3009 .mr(6)
3010 .nr(16)
3011 .kr(1)
3012 .sr(1)
3013 .m(6)
3014 .n(16)
3015 .k(k)
3016 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003017 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003018 }
3019 }
3020
3021 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_b_zero_point) {
3022 TEST_REQUIRES_ARM_NEON;
3023 for (size_t k = 1; k <= 40; k += 9) {
3024 GemmMicrokernelTester()
3025 .mr(6)
3026 .nr(16)
3027 .kr(1)
3028 .sr(1)
3029 .m(6)
3030 .n(16)
3031 .k(k)
3032 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003033 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003034 }
3035 }
3036
3037 TEST(QU8_GEMM_MINMAX_RNDNU_6X16__NEON_MLAL_LANE, no_zero_point) {
3038 TEST_REQUIRES_ARM_NEON;
3039 for (size_t k = 1; k <= 40; k += 9) {
3040 GemmMicrokernelTester()
3041 .mr(6)
3042 .nr(16)
3043 .kr(1)
3044 .sr(1)
3045 .m(6)
3046 .n(16)
3047 .k(k)
3048 .a_zero_point(0)
3049 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003050 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x16__neon_mlal_lane, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003051 }
3052 }
3053#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3054
3055
3056#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3057 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8) {
3058 TEST_REQUIRES_ARM_NEON_DOT;
3059 GemmMicrokernelTester()
3060 .mr(4)
3061 .nr(8)
3062 .kr(4)
3063 .sr(1)
3064 .m(4)
3065 .n(8)
3066 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003067 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003068 }
3069
3070 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
3071 TEST_REQUIRES_ARM_NEON_DOT;
3072 GemmMicrokernelTester()
3073 .mr(4)
3074 .nr(8)
3075 .kr(4)
3076 .sr(1)
3077 .m(4)
3078 .n(8)
3079 .k(8)
3080 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003081 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003082 }
3083
3084 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_strided_a) {
3085 TEST_REQUIRES_ARM_NEON_DOT;
3086 GemmMicrokernelTester()
3087 .mr(4)
3088 .nr(8)
3089 .kr(4)
3090 .sr(1)
3091 .m(4)
3092 .n(8)
3093 .k(8)
3094 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003095 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003096 }
3097
3098 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile) {
3099 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003100 for (uint32_t n = 1; n <= 8; n++) {
3101 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003102 GemmMicrokernelTester()
3103 .mr(4)
3104 .nr(8)
3105 .kr(4)
3106 .sr(1)
3107 .m(m)
3108 .n(n)
3109 .k(8)
3110 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003111 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003112 }
3113 }
3114 }
3115
3116 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile_m) {
3117 TEST_REQUIRES_ARM_NEON_DOT;
3118 for (uint32_t m = 1; m <= 4; m++) {
3119 GemmMicrokernelTester()
3120 .mr(4)
3121 .nr(8)
3122 .kr(4)
3123 .sr(1)
3124 .m(m)
3125 .n(8)
3126 .k(8)
3127 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003128 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003129 }
3130 }
3131
3132 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_8_subtile_n) {
3133 TEST_REQUIRES_ARM_NEON_DOT;
3134 for (uint32_t n = 1; n <= 8; n++) {
3135 GemmMicrokernelTester()
3136 .mr(4)
3137 .nr(8)
3138 .kr(4)
3139 .sr(1)
3140 .m(4)
3141 .n(n)
3142 .k(8)
3143 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003144 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003145 }
3146 }
3147
3148 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_8) {
3149 TEST_REQUIRES_ARM_NEON_DOT;
3150 for (size_t k = 1; k < 8; k++) {
3151 GemmMicrokernelTester()
3152 .mr(4)
3153 .nr(8)
3154 .kr(4)
3155 .sr(1)
3156 .m(4)
3157 .n(8)
3158 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003159 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003160 }
3161 }
3162
3163 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_8_strided_a) {
3164 TEST_REQUIRES_ARM_NEON_DOT;
3165 for (size_t k = 1; k < 8; k++) {
3166 GemmMicrokernelTester()
3167 .mr(4)
3168 .nr(8)
3169 .kr(4)
3170 .sr(1)
3171 .m(4)
3172 .n(8)
3173 .k(k)
3174 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003175 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003176 }
3177 }
3178
3179 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_8_subtile) {
3180 TEST_REQUIRES_ARM_NEON_DOT;
3181 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003182 for (uint32_t n = 1; n <= 8; n++) {
3183 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003184 GemmMicrokernelTester()
3185 .mr(4)
3186 .nr(8)
3187 .kr(4)
3188 .sr(1)
3189 .m(m)
3190 .n(n)
3191 .k(k)
3192 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003193 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003194 }
3195 }
3196 }
3197 }
3198
3199 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_8) {
3200 TEST_REQUIRES_ARM_NEON_DOT;
3201 for (size_t k = 9; k < 16; k++) {
3202 GemmMicrokernelTester()
3203 .mr(4)
3204 .nr(8)
3205 .kr(4)
3206 .sr(1)
3207 .m(4)
3208 .n(8)
3209 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003210 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003211 }
3212 }
3213
3214 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_8_strided_a) {
3215 TEST_REQUIRES_ARM_NEON_DOT;
3216 for (size_t k = 9; k < 16; k++) {
3217 GemmMicrokernelTester()
3218 .mr(4)
3219 .nr(8)
3220 .kr(4)
3221 .sr(1)
3222 .m(4)
3223 .n(8)
3224 .k(k)
3225 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003226 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003227 }
3228 }
3229
3230 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_8_subtile) {
3231 TEST_REQUIRES_ARM_NEON_DOT;
3232 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003233 for (uint32_t n = 1; n <= 8; n++) {
3234 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003235 GemmMicrokernelTester()
3236 .mr(4)
3237 .nr(8)
3238 .kr(4)
3239 .sr(1)
3240 .m(m)
3241 .n(n)
3242 .k(k)
3243 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003244 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003245 }
3246 }
3247 }
3248 }
3249
3250 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_div_8) {
3251 TEST_REQUIRES_ARM_NEON_DOT;
3252 for (size_t k = 16; k <= 80; k += 8) {
3253 GemmMicrokernelTester()
3254 .mr(4)
3255 .nr(8)
3256 .kr(4)
3257 .sr(1)
3258 .m(4)
3259 .n(8)
3260 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003261 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003262 }
3263 }
3264
3265 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_div_8_strided_a) {
3266 TEST_REQUIRES_ARM_NEON_DOT;
3267 for (size_t k = 16; k <= 80; k += 8) {
3268 GemmMicrokernelTester()
3269 .mr(4)
3270 .nr(8)
3271 .kr(4)
3272 .sr(1)
3273 .m(4)
3274 .n(8)
3275 .k(k)
3276 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003277 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003278 }
3279 }
3280
3281 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, k_div_8_subtile) {
3282 TEST_REQUIRES_ARM_NEON_DOT;
3283 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003284 for (uint32_t n = 1; n <= 8; n++) {
3285 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003286 GemmMicrokernelTester()
3287 .mr(4)
3288 .nr(8)
3289 .kr(4)
3290 .sr(1)
3291 .m(m)
3292 .n(n)
3293 .k(k)
3294 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003295 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003296 }
3297 }
3298 }
3299 }
3300
3301 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8) {
3302 TEST_REQUIRES_ARM_NEON_DOT;
3303 for (uint32_t n = 9; n < 16; n++) {
3304 for (size_t k = 1; k <= 40; k += 9) {
3305 GemmMicrokernelTester()
3306 .mr(4)
3307 .nr(8)
3308 .kr(4)
3309 .sr(1)
3310 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003311 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003312 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003313 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003314 }
3315 }
3316 }
3317
3318 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_strided_cn) {
3319 TEST_REQUIRES_ARM_NEON_DOT;
3320 for (uint32_t n = 9; n < 16; n++) {
3321 for (size_t k = 1; k <= 40; k += 9) {
3322 GemmMicrokernelTester()
3323 .mr(4)
3324 .nr(8)
3325 .kr(4)
3326 .sr(1)
3327 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003328 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003329 .k(k)
3330 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003331 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003332 }
3333 }
3334 }
3335
3336 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_strided_a) {
3337 TEST_REQUIRES_ARM_NEON_DOT;
3338 for (uint32_t n = 9; n < 16; n++) {
3339 for (size_t k = 1; k <= 40; k += 9) {
3340 GemmMicrokernelTester()
3341 .mr(4)
3342 .nr(8)
3343 .kr(4)
3344 .sr(1)
3345 .m(4)
3346 .n(n)
3347 .k(k)
3348 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003349 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003350 }
3351 }
3352 }
3353
3354 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_8_subtile) {
3355 TEST_REQUIRES_ARM_NEON_DOT;
3356 for (uint32_t n = 9; n < 16; n++) {
3357 for (size_t k = 1; k <= 40; k += 9) {
3358 for (uint32_t m = 1; m <= 4; m++) {
3359 GemmMicrokernelTester()
3360 .mr(4)
3361 .nr(8)
3362 .kr(4)
3363 .sr(1)
3364 .m(m)
3365 .n(n)
3366 .k(k)
3367 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003368 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003369 }
3370 }
3371 }
3372 }
3373
3374 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8) {
3375 TEST_REQUIRES_ARM_NEON_DOT;
3376 for (uint32_t n = 16; n <= 24; n += 8) {
3377 for (size_t k = 1; k <= 40; k += 9) {
3378 GemmMicrokernelTester()
3379 .mr(4)
3380 .nr(8)
3381 .kr(4)
3382 .sr(1)
3383 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003384 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003385 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003386 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003387 }
3388 }
3389 }
3390
3391 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_strided_cn) {
3392 TEST_REQUIRES_ARM_NEON_DOT;
3393 for (uint32_t n = 16; n <= 24; n += 8) {
3394 for (size_t k = 1; k <= 40; k += 9) {
3395 GemmMicrokernelTester()
3396 .mr(4)
3397 .nr(8)
3398 .kr(4)
3399 .sr(1)
3400 .m(4)
3401 .n(n)
3402 .k(k)
3403 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003404 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003405 }
3406 }
3407 }
3408
3409 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_strided_a) {
3410 TEST_REQUIRES_ARM_NEON_DOT;
3411 for (uint32_t n = 16; n <= 24; n += 8) {
3412 for (size_t k = 1; k <= 40; k += 9) {
3413 GemmMicrokernelTester()
3414 .mr(4)
3415 .nr(8)
3416 .kr(4)
3417 .sr(1)
3418 .m(4)
3419 .n(n)
3420 .k(k)
3421 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003422 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003423 }
3424 }
3425 }
3426
3427 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, n_div_8_subtile) {
3428 TEST_REQUIRES_ARM_NEON_DOT;
3429 for (uint32_t n = 16; n <= 24; n += 8) {
3430 for (size_t k = 1; k <= 40; k += 9) {
3431 for (uint32_t m = 1; m <= 4; m++) {
3432 GemmMicrokernelTester()
3433 .mr(4)
3434 .nr(8)
3435 .kr(4)
3436 .sr(1)
3437 .m(m)
3438 .n(n)
3439 .k(k)
3440 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003441 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003442 }
3443 }
3444 }
3445 }
3446
3447 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
3448 TEST_REQUIRES_ARM_NEON_DOT;
3449 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003450 for (uint32_t n = 1; n <= 8; n++) {
3451 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003452 GemmMicrokernelTester()
3453 .mr(4)
3454 .nr(8)
3455 .kr(4)
3456 .sr(1)
3457 .m(m)
3458 .n(n)
3459 .k(k)
3460 .cm_stride(11)
3461 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003462 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003463 }
3464 }
3465 }
3466 }
3467
3468 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
3469 TEST_REQUIRES_ARM_NEON_DOT;
3470 GemmMicrokernelTester()
3471 .mr(4)
3472 .nr(8)
3473 .kr(4)
3474 .sr(1)
3475 .m(4)
3476 .n(8)
3477 .k(8)
3478 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003479 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003480 }
3481
3482 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
3483 TEST_REQUIRES_ARM_NEON_DOT;
3484 GemmMicrokernelTester()
3485 .mr(4)
3486 .nr(8)
3487 .kr(4)
3488 .sr(1)
3489 .m(4)
3490 .n(8)
3491 .k(8)
3492 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003493 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003494 }
3495
3496 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
3497 TEST_REQUIRES_ARM_NEON_DOT;
3498 GemmMicrokernelTester()
3499 .mr(4)
3500 .nr(8)
3501 .kr(4)
3502 .sr(1)
3503 .m(4)
3504 .n(8)
3505 .k(8)
3506 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003507 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003508 }
3509
3510 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_a_zero_point) {
3511 TEST_REQUIRES_ARM_NEON_DOT;
3512 for (size_t k = 1; k <= 40; k += 9) {
3513 GemmMicrokernelTester()
3514 .mr(4)
3515 .nr(8)
3516 .kr(4)
3517 .sr(1)
3518 .m(4)
3519 .n(8)
3520 .k(k)
3521 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003522 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003523 }
3524 }
3525
3526 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_b_zero_point) {
3527 TEST_REQUIRES_ARM_NEON_DOT;
3528 for (size_t k = 1; k <= 40; k += 9) {
3529 GemmMicrokernelTester()
3530 .mr(4)
3531 .nr(8)
3532 .kr(4)
3533 .sr(1)
3534 .m(4)
3535 .n(8)
3536 .k(k)
3537 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003538 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003539 }
3540 }
3541
3542 TEST(QU8_GEMM_MINMAX_RNDNU_4X8C4__AARCH64_NEONDOT_CORTEX_A55, no_zero_point) {
3543 TEST_REQUIRES_ARM_NEON_DOT;
3544 for (size_t k = 1; k <= 40; k += 9) {
3545 GemmMicrokernelTester()
3546 .mr(4)
3547 .nr(8)
3548 .kr(4)
3549 .sr(1)
3550 .m(4)
3551 .n(8)
3552 .k(k)
3553 .a_zero_point(0)
3554 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08003555 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x8c4__aarch64_neondot_cortex_a55, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003556 }
3557 }
3558#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
3559
3560
3561#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
3562 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8) {
3563 TEST_REQUIRES_ARM_NEON_DOT;
3564 GemmMicrokernelTester()
3565 .mr(1)
3566 .nr(8)
3567 .kr(4)
3568 .sr(1)
3569 .m(1)
3570 .n(8)
3571 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003572 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003573 }
3574
3575 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cn) {
3576 TEST_REQUIRES_ARM_NEON_DOT;
3577 GemmMicrokernelTester()
3578 .mr(1)
3579 .nr(8)
3580 .kr(4)
3581 .sr(1)
3582 .m(1)
3583 .n(8)
3584 .k(8)
3585 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003586 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003587 }
3588
3589 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_strided_a) {
3590 TEST_REQUIRES_ARM_NEON_DOT;
3591 GemmMicrokernelTester()
3592 .mr(1)
3593 .nr(8)
3594 .kr(4)
3595 .sr(1)
3596 .m(1)
3597 .n(8)
3598 .k(8)
3599 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003600 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003601 }
3602
3603 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile) {
3604 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003605 for (uint32_t n = 1; n <= 8; n++) {
3606 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003607 GemmMicrokernelTester()
3608 .mr(1)
3609 .nr(8)
3610 .kr(4)
3611 .sr(1)
3612 .m(m)
3613 .n(n)
3614 .k(8)
3615 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003616 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003617 }
3618 }
3619 }
3620
3621 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_m) {
3622 TEST_REQUIRES_ARM_NEON_DOT;
3623 for (uint32_t m = 1; m <= 1; m++) {
3624 GemmMicrokernelTester()
3625 .mr(1)
3626 .nr(8)
3627 .kr(4)
3628 .sr(1)
3629 .m(m)
3630 .n(8)
3631 .k(8)
3632 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003633 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003634 }
3635 }
3636
3637 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_eq_8_subtile_n) {
3638 TEST_REQUIRES_ARM_NEON_DOT;
3639 for (uint32_t n = 1; n <= 8; n++) {
3640 GemmMicrokernelTester()
3641 .mr(1)
3642 .nr(8)
3643 .kr(4)
3644 .sr(1)
3645 .m(1)
3646 .n(n)
3647 .k(8)
3648 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003649 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003650 }
3651 }
3652
3653 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8) {
3654 TEST_REQUIRES_ARM_NEON_DOT;
3655 for (size_t k = 1; k < 8; k++) {
3656 GemmMicrokernelTester()
3657 .mr(1)
3658 .nr(8)
3659 .kr(4)
3660 .sr(1)
3661 .m(1)
3662 .n(8)
3663 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003664 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003665 }
3666 }
3667
3668 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8_strided_a) {
3669 TEST_REQUIRES_ARM_NEON_DOT;
3670 for (size_t k = 1; k < 8; k++) {
3671 GemmMicrokernelTester()
3672 .mr(1)
3673 .nr(8)
3674 .kr(4)
3675 .sr(1)
3676 .m(1)
3677 .n(8)
3678 .k(k)
3679 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003680 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003681 }
3682 }
3683
3684 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_lt_8_subtile) {
3685 TEST_REQUIRES_ARM_NEON_DOT;
3686 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003687 for (uint32_t n = 1; n <= 8; n++) {
3688 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003689 GemmMicrokernelTester()
3690 .mr(1)
3691 .nr(8)
3692 .kr(4)
3693 .sr(1)
3694 .m(m)
3695 .n(n)
3696 .k(k)
3697 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003698 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003699 }
3700 }
3701 }
3702 }
3703
3704 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8) {
3705 TEST_REQUIRES_ARM_NEON_DOT;
3706 for (size_t k = 9; k < 16; k++) {
3707 GemmMicrokernelTester()
3708 .mr(1)
3709 .nr(8)
3710 .kr(4)
3711 .sr(1)
3712 .m(1)
3713 .n(8)
3714 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003715 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003716 }
3717 }
3718
3719 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8_strided_a) {
3720 TEST_REQUIRES_ARM_NEON_DOT;
3721 for (size_t k = 9; k < 16; k++) {
3722 GemmMicrokernelTester()
3723 .mr(1)
3724 .nr(8)
3725 .kr(4)
3726 .sr(1)
3727 .m(1)
3728 .n(8)
3729 .k(k)
3730 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003731 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003732 }
3733 }
3734
3735 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_gt_8_subtile) {
3736 TEST_REQUIRES_ARM_NEON_DOT;
3737 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003738 for (uint32_t n = 1; n <= 8; n++) {
3739 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003740 GemmMicrokernelTester()
3741 .mr(1)
3742 .nr(8)
3743 .kr(4)
3744 .sr(1)
3745 .m(m)
3746 .n(n)
3747 .k(k)
3748 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003749 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003750 }
3751 }
3752 }
3753 }
3754
3755 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8) {
3756 TEST_REQUIRES_ARM_NEON_DOT;
3757 for (size_t k = 16; k <= 80; k += 8) {
3758 GemmMicrokernelTester()
3759 .mr(1)
3760 .nr(8)
3761 .kr(4)
3762 .sr(1)
3763 .m(1)
3764 .n(8)
3765 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003766 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003767 }
3768 }
3769
3770 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8_strided_a) {
3771 TEST_REQUIRES_ARM_NEON_DOT;
3772 for (size_t k = 16; k <= 80; k += 8) {
3773 GemmMicrokernelTester()
3774 .mr(1)
3775 .nr(8)
3776 .kr(4)
3777 .sr(1)
3778 .m(1)
3779 .n(8)
3780 .k(k)
3781 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003782 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003783 }
3784 }
3785
3786 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, k_div_8_subtile) {
3787 TEST_REQUIRES_ARM_NEON_DOT;
3788 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003789 for (uint32_t n = 1; n <= 8; n++) {
3790 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003791 GemmMicrokernelTester()
3792 .mr(1)
3793 .nr(8)
3794 .kr(4)
3795 .sr(1)
3796 .m(m)
3797 .n(n)
3798 .k(k)
3799 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003800 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003801 }
3802 }
3803 }
3804 }
3805
3806 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8) {
3807 TEST_REQUIRES_ARM_NEON_DOT;
3808 for (uint32_t n = 9; n < 16; n++) {
3809 for (size_t k = 1; k <= 40; k += 9) {
3810 GemmMicrokernelTester()
3811 .mr(1)
3812 .nr(8)
3813 .kr(4)
3814 .sr(1)
3815 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003816 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003817 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003818 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003819 }
3820 }
3821 }
3822
3823 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_strided_cn) {
3824 TEST_REQUIRES_ARM_NEON_DOT;
3825 for (uint32_t n = 9; n < 16; n++) {
3826 for (size_t k = 1; k <= 40; k += 9) {
3827 GemmMicrokernelTester()
3828 .mr(1)
3829 .nr(8)
3830 .kr(4)
3831 .sr(1)
3832 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003833 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003834 .k(k)
3835 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003836 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003837 }
3838 }
3839 }
3840
3841 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_strided_a) {
3842 TEST_REQUIRES_ARM_NEON_DOT;
3843 for (uint32_t n = 9; n < 16; n++) {
3844 for (size_t k = 1; k <= 40; k += 9) {
3845 GemmMicrokernelTester()
3846 .mr(1)
3847 .nr(8)
3848 .kr(4)
3849 .sr(1)
3850 .m(1)
3851 .n(n)
3852 .k(k)
3853 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003854 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003855 }
3856 }
3857 }
3858
3859 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_gt_8_subtile) {
3860 TEST_REQUIRES_ARM_NEON_DOT;
3861 for (uint32_t n = 9; n < 16; n++) {
3862 for (size_t k = 1; k <= 40; k += 9) {
3863 for (uint32_t m = 1; m <= 1; m++) {
3864 GemmMicrokernelTester()
3865 .mr(1)
3866 .nr(8)
3867 .kr(4)
3868 .sr(1)
3869 .m(m)
3870 .n(n)
3871 .k(k)
3872 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003873 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003874 }
3875 }
3876 }
3877 }
3878
3879 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8) {
3880 TEST_REQUIRES_ARM_NEON_DOT;
3881 for (uint32_t n = 16; n <= 24; n += 8) {
3882 for (size_t k = 1; k <= 40; k += 9) {
3883 GemmMicrokernelTester()
3884 .mr(1)
3885 .nr(8)
3886 .kr(4)
3887 .sr(1)
3888 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003889 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003890 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003891 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003892 }
3893 }
3894 }
3895
3896 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_strided_cn) {
3897 TEST_REQUIRES_ARM_NEON_DOT;
3898 for (uint32_t n = 16; n <= 24; n += 8) {
3899 for (size_t k = 1; k <= 40; k += 9) {
3900 GemmMicrokernelTester()
3901 .mr(1)
3902 .nr(8)
3903 .kr(4)
3904 .sr(1)
3905 .m(1)
3906 .n(n)
3907 .k(k)
3908 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003909 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003910 }
3911 }
3912 }
3913
3914 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_strided_a) {
3915 TEST_REQUIRES_ARM_NEON_DOT;
3916 for (uint32_t n = 16; n <= 24; n += 8) {
3917 for (size_t k = 1; k <= 40; k += 9) {
3918 GemmMicrokernelTester()
3919 .mr(1)
3920 .nr(8)
3921 .kr(4)
3922 .sr(1)
3923 .m(1)
3924 .n(n)
3925 .k(k)
3926 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003927 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003928 }
3929 }
3930 }
3931
3932 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, n_div_8_subtile) {
3933 TEST_REQUIRES_ARM_NEON_DOT;
3934 for (uint32_t n = 16; n <= 24; n += 8) {
3935 for (size_t k = 1; k <= 40; k += 9) {
3936 for (uint32_t m = 1; m <= 1; m++) {
3937 GemmMicrokernelTester()
3938 .mr(1)
3939 .nr(8)
3940 .kr(4)
3941 .sr(1)
3942 .m(m)
3943 .n(n)
3944 .k(k)
3945 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003946 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003947 }
3948 }
3949 }
3950 }
3951
3952 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm_subtile) {
3953 TEST_REQUIRES_ARM_NEON_DOT;
3954 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003955 for (uint32_t n = 1; n <= 8; n++) {
3956 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003957 GemmMicrokernelTester()
3958 .mr(1)
3959 .nr(8)
3960 .kr(4)
3961 .sr(1)
3962 .m(m)
3963 .n(n)
3964 .k(k)
3965 .cm_stride(11)
3966 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003967 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003968 }
3969 }
3970 }
3971 }
3972
3973 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmin) {
3974 TEST_REQUIRES_ARM_NEON_DOT;
3975 GemmMicrokernelTester()
3976 .mr(1)
3977 .nr(8)
3978 .kr(4)
3979 .sr(1)
3980 .m(1)
3981 .n(8)
3982 .k(8)
3983 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003984 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003985 }
3986
3987 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, qmax) {
3988 TEST_REQUIRES_ARM_NEON_DOT;
3989 GemmMicrokernelTester()
3990 .mr(1)
3991 .nr(8)
3992 .kr(4)
3993 .sr(1)
3994 .m(1)
3995 .n(8)
3996 .k(8)
3997 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003998 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003999 }
4000
4001 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, strided_cm) {
4002 TEST_REQUIRES_ARM_NEON_DOT;
4003 GemmMicrokernelTester()
4004 .mr(1)
4005 .nr(8)
4006 .kr(4)
4007 .sr(1)
4008 .m(1)
4009 .n(8)
4010 .k(8)
4011 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004012 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004013 }
4014
4015 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_a_zero_point) {
4016 TEST_REQUIRES_ARM_NEON_DOT;
4017 for (size_t k = 1; k <= 40; k += 9) {
4018 GemmMicrokernelTester()
4019 .mr(1)
4020 .nr(8)
4021 .kr(4)
4022 .sr(1)
4023 .m(1)
4024 .n(8)
4025 .k(k)
4026 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004027 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004028 }
4029 }
4030
4031 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_b_zero_point) {
4032 TEST_REQUIRES_ARM_NEON_DOT;
4033 for (size_t k = 1; k <= 40; k += 9) {
4034 GemmMicrokernelTester()
4035 .mr(1)
4036 .nr(8)
4037 .kr(4)
4038 .sr(1)
4039 .m(1)
4040 .n(8)
4041 .k(k)
4042 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004043 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004044 }
4045 }
4046
4047 TEST(QU8_GEMM_MINMAX_RNDNU_1X8C4__NEONDOT, no_zero_point) {
4048 TEST_REQUIRES_ARM_NEON_DOT;
4049 for (size_t k = 1; k <= 40; k += 9) {
4050 GemmMicrokernelTester()
4051 .mr(1)
4052 .nr(8)
4053 .kr(4)
4054 .sr(1)
4055 .m(1)
4056 .n(8)
4057 .k(k)
4058 .a_zero_point(0)
4059 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004060 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_1x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004061 }
4062 }
4063#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
4064
4065
4066#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
4067 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8) {
4068 TEST_REQUIRES_ARM_NEON_DOT;
4069 GemmMicrokernelTester()
4070 .mr(6)
4071 .nr(8)
4072 .kr(4)
4073 .sr(1)
4074 .m(6)
4075 .n(8)
4076 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004077 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004078 }
4079
4080 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cn) {
4081 TEST_REQUIRES_ARM_NEON_DOT;
4082 GemmMicrokernelTester()
4083 .mr(6)
4084 .nr(8)
4085 .kr(4)
4086 .sr(1)
4087 .m(6)
4088 .n(8)
4089 .k(8)
4090 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004091 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004092 }
4093
4094 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_strided_a) {
4095 TEST_REQUIRES_ARM_NEON_DOT;
4096 GemmMicrokernelTester()
4097 .mr(6)
4098 .nr(8)
4099 .kr(4)
4100 .sr(1)
4101 .m(6)
4102 .n(8)
4103 .k(8)
4104 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004105 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004106 }
4107
4108 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile) {
4109 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004110 for (uint32_t n = 1; n <= 8; n++) {
4111 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004112 GemmMicrokernelTester()
4113 .mr(6)
4114 .nr(8)
4115 .kr(4)
4116 .sr(1)
4117 .m(m)
4118 .n(n)
4119 .k(8)
4120 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004121 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004122 }
4123 }
4124 }
4125
4126 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_m) {
4127 TEST_REQUIRES_ARM_NEON_DOT;
4128 for (uint32_t m = 1; m <= 6; m++) {
4129 GemmMicrokernelTester()
4130 .mr(6)
4131 .nr(8)
4132 .kr(4)
4133 .sr(1)
4134 .m(m)
4135 .n(8)
4136 .k(8)
4137 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004138 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004139 }
4140 }
4141
4142 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_eq_8_subtile_n) {
4143 TEST_REQUIRES_ARM_NEON_DOT;
4144 for (uint32_t n = 1; n <= 8; n++) {
4145 GemmMicrokernelTester()
4146 .mr(6)
4147 .nr(8)
4148 .kr(4)
4149 .sr(1)
4150 .m(6)
4151 .n(n)
4152 .k(8)
4153 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004154 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004155 }
4156 }
4157
4158 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8) {
4159 TEST_REQUIRES_ARM_NEON_DOT;
4160 for (size_t k = 1; k < 8; k++) {
4161 GemmMicrokernelTester()
4162 .mr(6)
4163 .nr(8)
4164 .kr(4)
4165 .sr(1)
4166 .m(6)
4167 .n(8)
4168 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004169 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004170 }
4171 }
4172
4173 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8_strided_a) {
4174 TEST_REQUIRES_ARM_NEON_DOT;
4175 for (size_t k = 1; k < 8; k++) {
4176 GemmMicrokernelTester()
4177 .mr(6)
4178 .nr(8)
4179 .kr(4)
4180 .sr(1)
4181 .m(6)
4182 .n(8)
4183 .k(k)
4184 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004185 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004186 }
4187 }
4188
4189 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_lt_8_subtile) {
4190 TEST_REQUIRES_ARM_NEON_DOT;
4191 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004192 for (uint32_t n = 1; n <= 8; n++) {
4193 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004194 GemmMicrokernelTester()
4195 .mr(6)
4196 .nr(8)
4197 .kr(4)
4198 .sr(1)
4199 .m(m)
4200 .n(n)
4201 .k(k)
4202 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004203 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004204 }
4205 }
4206 }
4207 }
4208
4209 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8) {
4210 TEST_REQUIRES_ARM_NEON_DOT;
4211 for (size_t k = 9; k < 16; k++) {
4212 GemmMicrokernelTester()
4213 .mr(6)
4214 .nr(8)
4215 .kr(4)
4216 .sr(1)
4217 .m(6)
4218 .n(8)
4219 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004220 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004221 }
4222 }
4223
4224 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8_strided_a) {
4225 TEST_REQUIRES_ARM_NEON_DOT;
4226 for (size_t k = 9; k < 16; k++) {
4227 GemmMicrokernelTester()
4228 .mr(6)
4229 .nr(8)
4230 .kr(4)
4231 .sr(1)
4232 .m(6)
4233 .n(8)
4234 .k(k)
4235 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004236 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004237 }
4238 }
4239
4240 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_gt_8_subtile) {
4241 TEST_REQUIRES_ARM_NEON_DOT;
4242 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004243 for (uint32_t n = 1; n <= 8; n++) {
4244 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004245 GemmMicrokernelTester()
4246 .mr(6)
4247 .nr(8)
4248 .kr(4)
4249 .sr(1)
4250 .m(m)
4251 .n(n)
4252 .k(k)
4253 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004254 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004255 }
4256 }
4257 }
4258 }
4259
4260 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8) {
4261 TEST_REQUIRES_ARM_NEON_DOT;
4262 for (size_t k = 16; k <= 80; k += 8) {
4263 GemmMicrokernelTester()
4264 .mr(6)
4265 .nr(8)
4266 .kr(4)
4267 .sr(1)
4268 .m(6)
4269 .n(8)
4270 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004271 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004272 }
4273 }
4274
4275 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8_strided_a) {
4276 TEST_REQUIRES_ARM_NEON_DOT;
4277 for (size_t k = 16; k <= 80; k += 8) {
4278 GemmMicrokernelTester()
4279 .mr(6)
4280 .nr(8)
4281 .kr(4)
4282 .sr(1)
4283 .m(6)
4284 .n(8)
4285 .k(k)
4286 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004287 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004288 }
4289 }
4290
4291 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, k_div_8_subtile) {
4292 TEST_REQUIRES_ARM_NEON_DOT;
4293 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004294 for (uint32_t n = 1; n <= 8; n++) {
4295 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004296 GemmMicrokernelTester()
4297 .mr(6)
4298 .nr(8)
4299 .kr(4)
4300 .sr(1)
4301 .m(m)
4302 .n(n)
4303 .k(k)
4304 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004305 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004306 }
4307 }
4308 }
4309 }
4310
4311 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8) {
4312 TEST_REQUIRES_ARM_NEON_DOT;
4313 for (uint32_t n = 9; n < 16; n++) {
4314 for (size_t k = 1; k <= 40; k += 9) {
4315 GemmMicrokernelTester()
4316 .mr(6)
4317 .nr(8)
4318 .kr(4)
4319 .sr(1)
4320 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004321 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004322 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004323 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004324 }
4325 }
4326 }
4327
4328 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_strided_cn) {
4329 TEST_REQUIRES_ARM_NEON_DOT;
4330 for (uint32_t n = 9; n < 16; n++) {
4331 for (size_t k = 1; k <= 40; k += 9) {
4332 GemmMicrokernelTester()
4333 .mr(6)
4334 .nr(8)
4335 .kr(4)
4336 .sr(1)
4337 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004338 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004339 .k(k)
4340 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004341 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004342 }
4343 }
4344 }
4345
4346 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_strided_a) {
4347 TEST_REQUIRES_ARM_NEON_DOT;
4348 for (uint32_t n = 9; n < 16; n++) {
4349 for (size_t k = 1; k <= 40; k += 9) {
4350 GemmMicrokernelTester()
4351 .mr(6)
4352 .nr(8)
4353 .kr(4)
4354 .sr(1)
4355 .m(6)
4356 .n(n)
4357 .k(k)
4358 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004359 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004360 }
4361 }
4362 }
4363
4364 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_gt_8_subtile) {
4365 TEST_REQUIRES_ARM_NEON_DOT;
4366 for (uint32_t n = 9; n < 16; n++) {
4367 for (size_t k = 1; k <= 40; k += 9) {
4368 for (uint32_t m = 1; m <= 6; m++) {
4369 GemmMicrokernelTester()
4370 .mr(6)
4371 .nr(8)
4372 .kr(4)
4373 .sr(1)
4374 .m(m)
4375 .n(n)
4376 .k(k)
4377 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004378 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004379 }
4380 }
4381 }
4382 }
4383
4384 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8) {
4385 TEST_REQUIRES_ARM_NEON_DOT;
4386 for (uint32_t n = 16; n <= 24; n += 8) {
4387 for (size_t k = 1; k <= 40; k += 9) {
4388 GemmMicrokernelTester()
4389 .mr(6)
4390 .nr(8)
4391 .kr(4)
4392 .sr(1)
4393 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004394 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004395 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004396 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004397 }
4398 }
4399 }
4400
4401 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_strided_cn) {
4402 TEST_REQUIRES_ARM_NEON_DOT;
4403 for (uint32_t n = 16; n <= 24; n += 8) {
4404 for (size_t k = 1; k <= 40; k += 9) {
4405 GemmMicrokernelTester()
4406 .mr(6)
4407 .nr(8)
4408 .kr(4)
4409 .sr(1)
4410 .m(6)
4411 .n(n)
4412 .k(k)
4413 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004414 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004415 }
4416 }
4417 }
4418
4419 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_strided_a) {
4420 TEST_REQUIRES_ARM_NEON_DOT;
4421 for (uint32_t n = 16; n <= 24; n += 8) {
4422 for (size_t k = 1; k <= 40; k += 9) {
4423 GemmMicrokernelTester()
4424 .mr(6)
4425 .nr(8)
4426 .kr(4)
4427 .sr(1)
4428 .m(6)
4429 .n(n)
4430 .k(k)
4431 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004432 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004433 }
4434 }
4435 }
4436
4437 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, n_div_8_subtile) {
4438 TEST_REQUIRES_ARM_NEON_DOT;
4439 for (uint32_t n = 16; n <= 24; n += 8) {
4440 for (size_t k = 1; k <= 40; k += 9) {
4441 for (uint32_t m = 1; m <= 6; m++) {
4442 GemmMicrokernelTester()
4443 .mr(6)
4444 .nr(8)
4445 .kr(4)
4446 .sr(1)
4447 .m(m)
4448 .n(n)
4449 .k(k)
4450 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004451 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004452 }
4453 }
4454 }
4455 }
4456
4457 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm_subtile) {
4458 TEST_REQUIRES_ARM_NEON_DOT;
4459 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004460 for (uint32_t n = 1; n <= 8; n++) {
4461 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004462 GemmMicrokernelTester()
4463 .mr(6)
4464 .nr(8)
4465 .kr(4)
4466 .sr(1)
4467 .m(m)
4468 .n(n)
4469 .k(k)
4470 .cm_stride(11)
4471 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004472 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004473 }
4474 }
4475 }
4476 }
4477
4478 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmin) {
4479 TEST_REQUIRES_ARM_NEON_DOT;
4480 GemmMicrokernelTester()
4481 .mr(6)
4482 .nr(8)
4483 .kr(4)
4484 .sr(1)
4485 .m(6)
4486 .n(8)
4487 .k(8)
4488 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004489 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004490 }
4491
4492 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, qmax) {
4493 TEST_REQUIRES_ARM_NEON_DOT;
4494 GemmMicrokernelTester()
4495 .mr(6)
4496 .nr(8)
4497 .kr(4)
4498 .sr(1)
4499 .m(6)
4500 .n(8)
4501 .k(8)
4502 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004503 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004504 }
4505
4506 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, strided_cm) {
4507 TEST_REQUIRES_ARM_NEON_DOT;
4508 GemmMicrokernelTester()
4509 .mr(6)
4510 .nr(8)
4511 .kr(4)
4512 .sr(1)
4513 .m(6)
4514 .n(8)
4515 .k(8)
4516 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004517 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004518 }
4519
4520 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_a_zero_point) {
4521 TEST_REQUIRES_ARM_NEON_DOT;
4522 for (size_t k = 1; k <= 40; k += 9) {
4523 GemmMicrokernelTester()
4524 .mr(6)
4525 .nr(8)
4526 .kr(4)
4527 .sr(1)
4528 .m(6)
4529 .n(8)
4530 .k(k)
4531 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004532 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004533 }
4534 }
4535
4536 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_b_zero_point) {
4537 TEST_REQUIRES_ARM_NEON_DOT;
4538 for (size_t k = 1; k <= 40; k += 9) {
4539 GemmMicrokernelTester()
4540 .mr(6)
4541 .nr(8)
4542 .kr(4)
4543 .sr(1)
4544 .m(6)
4545 .n(8)
4546 .k(k)
4547 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004548 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004549 }
4550 }
4551
4552 TEST(QU8_GEMM_MINMAX_RNDNU_6X8C4__NEONDOT, no_zero_point) {
4553 TEST_REQUIRES_ARM_NEON_DOT;
4554 for (size_t k = 1; k <= 40; k += 9) {
4555 GemmMicrokernelTester()
4556 .mr(6)
4557 .nr(8)
4558 .kr(4)
4559 .sr(1)
4560 .m(6)
4561 .n(8)
4562 .k(k)
4563 .a_zero_point(0)
4564 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08004565 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_6x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004566 }
4567 }
4568#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
4569
4570
4571#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
4572 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8) {
4573 TEST_REQUIRES_ARM_NEON_DOT;
4574 GemmMicrokernelTester()
4575 .mr(8)
4576 .nr(8)
4577 .kr(4)
4578 .sr(1)
4579 .m(8)
4580 .n(8)
4581 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004582 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004583 }
4584
4585 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cn) {
4586 TEST_REQUIRES_ARM_NEON_DOT;
4587 GemmMicrokernelTester()
4588 .mr(8)
4589 .nr(8)
4590 .kr(4)
4591 .sr(1)
4592 .m(8)
4593 .n(8)
4594 .k(8)
4595 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004596 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004597 }
4598
4599 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_strided_a) {
4600 TEST_REQUIRES_ARM_NEON_DOT;
4601 GemmMicrokernelTester()
4602 .mr(8)
4603 .nr(8)
4604 .kr(4)
4605 .sr(1)
4606 .m(8)
4607 .n(8)
4608 .k(8)
4609 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004610 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004611 }
4612
4613 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile) {
4614 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004615 for (uint32_t n = 1; n <= 8; n++) {
4616 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004617 GemmMicrokernelTester()
4618 .mr(8)
4619 .nr(8)
4620 .kr(4)
4621 .sr(1)
4622 .m(m)
4623 .n(n)
4624 .k(8)
4625 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004626 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004627 }
4628 }
4629 }
4630
4631 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_m) {
4632 TEST_REQUIRES_ARM_NEON_DOT;
4633 for (uint32_t m = 1; m <= 8; m++) {
4634 GemmMicrokernelTester()
4635 .mr(8)
4636 .nr(8)
4637 .kr(4)
4638 .sr(1)
4639 .m(m)
4640 .n(8)
4641 .k(8)
4642 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004643 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004644 }
4645 }
4646
4647 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_eq_8_subtile_n) {
4648 TEST_REQUIRES_ARM_NEON_DOT;
4649 for (uint32_t n = 1; n <= 8; n++) {
4650 GemmMicrokernelTester()
4651 .mr(8)
4652 .nr(8)
4653 .kr(4)
4654 .sr(1)
4655 .m(8)
4656 .n(n)
4657 .k(8)
4658 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004659 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004660 }
4661 }
4662
4663 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8) {
4664 TEST_REQUIRES_ARM_NEON_DOT;
4665 for (size_t k = 1; k < 8; k++) {
4666 GemmMicrokernelTester()
4667 .mr(8)
4668 .nr(8)
4669 .kr(4)
4670 .sr(1)
4671 .m(8)
4672 .n(8)
4673 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004674 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004675 }
4676 }
4677
4678 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8_strided_a) {
4679 TEST_REQUIRES_ARM_NEON_DOT;
4680 for (size_t k = 1; k < 8; k++) {
4681 GemmMicrokernelTester()
4682 .mr(8)
4683 .nr(8)
4684 .kr(4)
4685 .sr(1)
4686 .m(8)
4687 .n(8)
4688 .k(k)
4689 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004690 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004691 }
4692 }
4693
4694 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_lt_8_subtile) {
4695 TEST_REQUIRES_ARM_NEON_DOT;
4696 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004697 for (uint32_t n = 1; n <= 8; n++) {
4698 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004699 GemmMicrokernelTester()
4700 .mr(8)
4701 .nr(8)
4702 .kr(4)
4703 .sr(1)
4704 .m(m)
4705 .n(n)
4706 .k(k)
4707 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004708 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004709 }
4710 }
4711 }
4712 }
4713
4714 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8) {
4715 TEST_REQUIRES_ARM_NEON_DOT;
4716 for (size_t k = 9; k < 16; k++) {
4717 GemmMicrokernelTester()
4718 .mr(8)
4719 .nr(8)
4720 .kr(4)
4721 .sr(1)
4722 .m(8)
4723 .n(8)
4724 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004725 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004726 }
4727 }
4728
4729 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8_strided_a) {
4730 TEST_REQUIRES_ARM_NEON_DOT;
4731 for (size_t k = 9; k < 16; k++) {
4732 GemmMicrokernelTester()
4733 .mr(8)
4734 .nr(8)
4735 .kr(4)
4736 .sr(1)
4737 .m(8)
4738 .n(8)
4739 .k(k)
4740 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004741 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004742 }
4743 }
4744
4745 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_gt_8_subtile) {
4746 TEST_REQUIRES_ARM_NEON_DOT;
4747 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004748 for (uint32_t n = 1; n <= 8; n++) {
4749 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004750 GemmMicrokernelTester()
4751 .mr(8)
4752 .nr(8)
4753 .kr(4)
4754 .sr(1)
4755 .m(m)
4756 .n(n)
4757 .k(k)
4758 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004759 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004760 }
4761 }
4762 }
4763 }
4764
4765 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8) {
4766 TEST_REQUIRES_ARM_NEON_DOT;
4767 for (size_t k = 16; k <= 80; k += 8) {
4768 GemmMicrokernelTester()
4769 .mr(8)
4770 .nr(8)
4771 .kr(4)
4772 .sr(1)
4773 .m(8)
4774 .n(8)
4775 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004776 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004777 }
4778 }
4779
4780 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8_strided_a) {
4781 TEST_REQUIRES_ARM_NEON_DOT;
4782 for (size_t k = 16; k <= 80; k += 8) {
4783 GemmMicrokernelTester()
4784 .mr(8)
4785 .nr(8)
4786 .kr(4)
4787 .sr(1)
4788 .m(8)
4789 .n(8)
4790 .k(k)
4791 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004792 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004793 }
4794 }
4795
4796 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, k_div_8_subtile) {
4797 TEST_REQUIRES_ARM_NEON_DOT;
4798 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004799 for (uint32_t n = 1; n <= 8; n++) {
4800 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004801 GemmMicrokernelTester()
4802 .mr(8)
4803 .nr(8)
4804 .kr(4)
4805 .sr(1)
4806 .m(m)
4807 .n(n)
4808 .k(k)
4809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004810 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004811 }
4812 }
4813 }
4814 }
4815
4816 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8) {
4817 TEST_REQUIRES_ARM_NEON_DOT;
4818 for (uint32_t n = 9; n < 16; n++) {
4819 for (size_t k = 1; k <= 40; k += 9) {
4820 GemmMicrokernelTester()
4821 .mr(8)
4822 .nr(8)
4823 .kr(4)
4824 .sr(1)
4825 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004826 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004828 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004829 }
4830 }
4831 }
4832
4833 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_strided_cn) {
4834 TEST_REQUIRES_ARM_NEON_DOT;
4835 for (uint32_t n = 9; n < 16; n++) {
4836 for (size_t k = 1; k <= 40; k += 9) {
4837 GemmMicrokernelTester()
4838 .mr(8)
4839 .nr(8)
4840 .kr(4)
4841 .sr(1)
4842 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004843 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004844 .k(k)
4845 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004846 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004847 }
4848 }
4849 }
4850
4851 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_strided_a) {
4852 TEST_REQUIRES_ARM_NEON_DOT;
4853 for (uint32_t n = 9; n < 16; n++) {
4854 for (size_t k = 1; k <= 40; k += 9) {
4855 GemmMicrokernelTester()
4856 .mr(8)
4857 .nr(8)
4858 .kr(4)
4859 .sr(1)
4860 .m(8)
4861 .n(n)
4862 .k(k)
4863 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004864 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004865 }
4866 }
4867 }
4868
4869 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_gt_8_subtile) {
4870 TEST_REQUIRES_ARM_NEON_DOT;
4871 for (uint32_t n = 9; n < 16; n++) {
4872 for (size_t k = 1; k <= 40; k += 9) {
4873 for (uint32_t m = 1; m <= 8; m++) {
4874 GemmMicrokernelTester()
4875 .mr(8)
4876 .nr(8)
4877 .kr(4)
4878 .sr(1)
4879 .m(m)
4880 .n(n)
4881 .k(k)
4882 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004883 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004884 }
4885 }
4886 }
4887 }
4888
4889 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8) {
4890 TEST_REQUIRES_ARM_NEON_DOT;
4891 for (uint32_t n = 16; n <= 24; n += 8) {
4892 for (size_t k = 1; k <= 40; k += 9) {
4893 GemmMicrokernelTester()
4894 .mr(8)
4895 .nr(8)
4896 .kr(4)
4897 .sr(1)
4898 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004899 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004900 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004901 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004902 }
4903 }
4904 }
4905
4906 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_strided_cn) {
4907 TEST_REQUIRES_ARM_NEON_DOT;
4908 for (uint32_t n = 16; n <= 24; n += 8) {
4909 for (size_t k = 1; k <= 40; k += 9) {
4910 GemmMicrokernelTester()
4911 .mr(8)
4912 .nr(8)
4913 .kr(4)
4914 .sr(1)
4915 .m(8)
4916 .n(n)
4917 .k(k)
4918 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004919 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004920 }
4921 }
4922 }
4923
4924 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_strided_a) {
4925 TEST_REQUIRES_ARM_NEON_DOT;
4926 for (uint32_t n = 16; n <= 24; n += 8) {
4927 for (size_t k = 1; k <= 40; k += 9) {
4928 GemmMicrokernelTester()
4929 .mr(8)
4930 .nr(8)
4931 .kr(4)
4932 .sr(1)
4933 .m(8)
4934 .n(n)
4935 .k(k)
4936 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004937 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004938 }
4939 }
4940 }
4941
4942 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, n_div_8_subtile) {
4943 TEST_REQUIRES_ARM_NEON_DOT;
4944 for (uint32_t n = 16; n <= 24; n += 8) {
4945 for (size_t k = 1; k <= 40; k += 9) {
4946 for (uint32_t m = 1; m <= 8; m++) {
4947 GemmMicrokernelTester()
4948 .mr(8)
4949 .nr(8)
4950 .kr(4)
4951 .sr(1)
4952 .m(m)
4953 .n(n)
4954 .k(k)
4955 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004956 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004957 }
4958 }
4959 }
4960 }
4961
4962 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm_subtile) {
4963 TEST_REQUIRES_ARM_NEON_DOT;
4964 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004965 for (uint32_t n = 1; n <= 8; n++) {
4966 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004967 GemmMicrokernelTester()
4968 .mr(8)
4969 .nr(8)
4970 .kr(4)
4971 .sr(1)
4972 .m(m)
4973 .n(n)
4974 .k(k)
4975 .cm_stride(11)
4976 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004977 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004978 }
4979 }
4980 }
4981 }
4982
4983 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmin) {
4984 TEST_REQUIRES_ARM_NEON_DOT;
4985 GemmMicrokernelTester()
4986 .mr(8)
4987 .nr(8)
4988 .kr(4)
4989 .sr(1)
4990 .m(8)
4991 .n(8)
4992 .k(8)
4993 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004994 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004995 }
4996
4997 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, qmax) {
4998 TEST_REQUIRES_ARM_NEON_DOT;
4999 GemmMicrokernelTester()
5000 .mr(8)
5001 .nr(8)
5002 .kr(4)
5003 .sr(1)
5004 .m(8)
5005 .n(8)
5006 .k(8)
5007 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005008 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005009 }
5010
5011 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, strided_cm) {
5012 TEST_REQUIRES_ARM_NEON_DOT;
5013 GemmMicrokernelTester()
5014 .mr(8)
5015 .nr(8)
5016 .kr(4)
5017 .sr(1)
5018 .m(8)
5019 .n(8)
5020 .k(8)
5021 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005022 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005023 }
5024
5025 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_a_zero_point) {
5026 TEST_REQUIRES_ARM_NEON_DOT;
5027 for (size_t k = 1; k <= 40; k += 9) {
5028 GemmMicrokernelTester()
5029 .mr(8)
5030 .nr(8)
5031 .kr(4)
5032 .sr(1)
5033 .m(8)
5034 .n(8)
5035 .k(k)
5036 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005037 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005038 }
5039 }
5040
5041 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_b_zero_point) {
5042 TEST_REQUIRES_ARM_NEON_DOT;
5043 for (size_t k = 1; k <= 40; k += 9) {
5044 GemmMicrokernelTester()
5045 .mr(8)
5046 .nr(8)
5047 .kr(4)
5048 .sr(1)
5049 .m(8)
5050 .n(8)
5051 .k(k)
5052 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005053 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005054 }
5055 }
5056
5057 TEST(QU8_GEMM_MINMAX_RNDNU_8X8C4__NEONDOT, no_zero_point) {
5058 TEST_REQUIRES_ARM_NEON_DOT;
5059 for (size_t k = 1; k <= 40; k += 9) {
5060 GemmMicrokernelTester()
5061 .mr(8)
5062 .nr(8)
5063 .kr(4)
5064 .sr(1)
5065 .m(8)
5066 .n(8)
5067 .k(k)
5068 .a_zero_point(0)
5069 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005070 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x8c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005071 }
5072 }
5073#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
5074
5075
5076#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
5077 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8) {
5078 TEST_REQUIRES_ARM_NEON_DOT;
5079 GemmMicrokernelTester()
5080 .mr(2)
5081 .nr(16)
5082 .kr(4)
5083 .sr(1)
5084 .m(2)
5085 .n(16)
5086 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005087 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005088 }
5089
5090 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, strided_cn) {
5091 TEST_REQUIRES_ARM_NEON_DOT;
5092 GemmMicrokernelTester()
5093 .mr(2)
5094 .nr(16)
5095 .kr(4)
5096 .sr(1)
5097 .m(2)
5098 .n(16)
5099 .k(8)
5100 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005101 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005102 }
5103
5104 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8_strided_a) {
5105 TEST_REQUIRES_ARM_NEON_DOT;
5106 GemmMicrokernelTester()
5107 .mr(2)
5108 .nr(16)
5109 .kr(4)
5110 .sr(1)
5111 .m(2)
5112 .n(16)
5113 .k(8)
5114 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005115 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005116 }
5117
5118 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8_subtile) {
5119 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005120 for (uint32_t n = 1; n <= 16; n++) {
5121 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005122 GemmMicrokernelTester()
5123 .mr(2)
5124 .nr(16)
5125 .kr(4)
5126 .sr(1)
5127 .m(m)
5128 .n(n)
5129 .k(8)
5130 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005131 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005132 }
5133 }
5134 }
5135
5136 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8_subtile_m) {
5137 TEST_REQUIRES_ARM_NEON_DOT;
5138 for (uint32_t m = 1; m <= 2; m++) {
5139 GemmMicrokernelTester()
5140 .mr(2)
5141 .nr(16)
5142 .kr(4)
5143 .sr(1)
5144 .m(m)
5145 .n(16)
5146 .k(8)
5147 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005148 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005149 }
5150 }
5151
5152 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_eq_8_subtile_n) {
5153 TEST_REQUIRES_ARM_NEON_DOT;
5154 for (uint32_t n = 1; n <= 16; n++) {
5155 GemmMicrokernelTester()
5156 .mr(2)
5157 .nr(16)
5158 .kr(4)
5159 .sr(1)
5160 .m(2)
5161 .n(n)
5162 .k(8)
5163 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005164 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005165 }
5166 }
5167
5168 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_lt_8) {
5169 TEST_REQUIRES_ARM_NEON_DOT;
5170 for (size_t k = 1; k < 8; k++) {
5171 GemmMicrokernelTester()
5172 .mr(2)
5173 .nr(16)
5174 .kr(4)
5175 .sr(1)
5176 .m(2)
5177 .n(16)
5178 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005179 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005180 }
5181 }
5182
5183 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_lt_8_strided_a) {
5184 TEST_REQUIRES_ARM_NEON_DOT;
5185 for (size_t k = 1; k < 8; k++) {
5186 GemmMicrokernelTester()
5187 .mr(2)
5188 .nr(16)
5189 .kr(4)
5190 .sr(1)
5191 .m(2)
5192 .n(16)
5193 .k(k)
5194 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005195 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005196 }
5197 }
5198
5199 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_lt_8_subtile) {
5200 TEST_REQUIRES_ARM_NEON_DOT;
5201 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005202 for (uint32_t n = 1; n <= 16; n++) {
5203 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005204 GemmMicrokernelTester()
5205 .mr(2)
5206 .nr(16)
5207 .kr(4)
5208 .sr(1)
5209 .m(m)
5210 .n(n)
5211 .k(k)
5212 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005213 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005214 }
5215 }
5216 }
5217 }
5218
5219 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_gt_8) {
5220 TEST_REQUIRES_ARM_NEON_DOT;
5221 for (size_t k = 9; k < 16; k++) {
5222 GemmMicrokernelTester()
5223 .mr(2)
5224 .nr(16)
5225 .kr(4)
5226 .sr(1)
5227 .m(2)
5228 .n(16)
5229 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005230 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005231 }
5232 }
5233
5234 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_gt_8_strided_a) {
5235 TEST_REQUIRES_ARM_NEON_DOT;
5236 for (size_t k = 9; k < 16; k++) {
5237 GemmMicrokernelTester()
5238 .mr(2)
5239 .nr(16)
5240 .kr(4)
5241 .sr(1)
5242 .m(2)
5243 .n(16)
5244 .k(k)
5245 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005246 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005247 }
5248 }
5249
5250 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_gt_8_subtile) {
5251 TEST_REQUIRES_ARM_NEON_DOT;
5252 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005253 for (uint32_t n = 1; n <= 16; n++) {
5254 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005255 GemmMicrokernelTester()
5256 .mr(2)
5257 .nr(16)
5258 .kr(4)
5259 .sr(1)
5260 .m(m)
5261 .n(n)
5262 .k(k)
5263 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005264 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005265 }
5266 }
5267 }
5268 }
5269
5270 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_div_8) {
5271 TEST_REQUIRES_ARM_NEON_DOT;
5272 for (size_t k = 16; k <= 80; k += 8) {
5273 GemmMicrokernelTester()
5274 .mr(2)
5275 .nr(16)
5276 .kr(4)
5277 .sr(1)
5278 .m(2)
5279 .n(16)
5280 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005281 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005282 }
5283 }
5284
5285 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_div_8_strided_a) {
5286 TEST_REQUIRES_ARM_NEON_DOT;
5287 for (size_t k = 16; k <= 80; k += 8) {
5288 GemmMicrokernelTester()
5289 .mr(2)
5290 .nr(16)
5291 .kr(4)
5292 .sr(1)
5293 .m(2)
5294 .n(16)
5295 .k(k)
5296 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005297 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005298 }
5299 }
5300
5301 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, k_div_8_subtile) {
5302 TEST_REQUIRES_ARM_NEON_DOT;
5303 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005304 for (uint32_t n = 1; n <= 16; n++) {
5305 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005306 GemmMicrokernelTester()
5307 .mr(2)
5308 .nr(16)
5309 .kr(4)
5310 .sr(1)
5311 .m(m)
5312 .n(n)
5313 .k(k)
5314 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005315 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005316 }
5317 }
5318 }
5319 }
5320
5321 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16) {
5322 TEST_REQUIRES_ARM_NEON_DOT;
5323 for (uint32_t n = 17; n < 32; n++) {
5324 for (size_t k = 1; k <= 40; k += 9) {
5325 GemmMicrokernelTester()
5326 .mr(2)
5327 .nr(16)
5328 .kr(4)
5329 .sr(1)
5330 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005331 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005332 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005333 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005334 }
5335 }
5336 }
5337
5338 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16_strided_cn) {
5339 TEST_REQUIRES_ARM_NEON_DOT;
5340 for (uint32_t n = 17; n < 32; n++) {
5341 for (size_t k = 1; k <= 40; k += 9) {
5342 GemmMicrokernelTester()
5343 .mr(2)
5344 .nr(16)
5345 .kr(4)
5346 .sr(1)
5347 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005348 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005349 .k(k)
5350 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005351 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005352 }
5353 }
5354 }
5355
5356 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16_strided_a) {
5357 TEST_REQUIRES_ARM_NEON_DOT;
5358 for (uint32_t n = 17; n < 32; n++) {
5359 for (size_t k = 1; k <= 40; k += 9) {
5360 GemmMicrokernelTester()
5361 .mr(2)
5362 .nr(16)
5363 .kr(4)
5364 .sr(1)
5365 .m(2)
5366 .n(n)
5367 .k(k)
5368 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005369 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005370 }
5371 }
5372 }
5373
5374 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_gt_16_subtile) {
5375 TEST_REQUIRES_ARM_NEON_DOT;
5376 for (uint32_t n = 17; n < 32; n++) {
5377 for (size_t k = 1; k <= 40; k += 9) {
5378 for (uint32_t m = 1; m <= 2; m++) {
5379 GemmMicrokernelTester()
5380 .mr(2)
5381 .nr(16)
5382 .kr(4)
5383 .sr(1)
5384 .m(m)
5385 .n(n)
5386 .k(k)
5387 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005388 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005389 }
5390 }
5391 }
5392 }
5393
5394 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16) {
5395 TEST_REQUIRES_ARM_NEON_DOT;
5396 for (uint32_t n = 32; n <= 48; n += 16) {
5397 for (size_t k = 1; k <= 40; k += 9) {
5398 GemmMicrokernelTester()
5399 .mr(2)
5400 .nr(16)
5401 .kr(4)
5402 .sr(1)
5403 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005404 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005405 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005406 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005407 }
5408 }
5409 }
5410
5411 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16_strided_cn) {
5412 TEST_REQUIRES_ARM_NEON_DOT;
5413 for (uint32_t n = 32; n <= 48; n += 16) {
5414 for (size_t k = 1; k <= 40; k += 9) {
5415 GemmMicrokernelTester()
5416 .mr(2)
5417 .nr(16)
5418 .kr(4)
5419 .sr(1)
5420 .m(2)
5421 .n(n)
5422 .k(k)
5423 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005424 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005425 }
5426 }
5427 }
5428
5429 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16_strided_a) {
5430 TEST_REQUIRES_ARM_NEON_DOT;
5431 for (uint32_t n = 32; n <= 48; n += 16) {
5432 for (size_t k = 1; k <= 40; k += 9) {
5433 GemmMicrokernelTester()
5434 .mr(2)
5435 .nr(16)
5436 .kr(4)
5437 .sr(1)
5438 .m(2)
5439 .n(n)
5440 .k(k)
5441 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005442 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005443 }
5444 }
5445 }
5446
5447 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, n_div_16_subtile) {
5448 TEST_REQUIRES_ARM_NEON_DOT;
5449 for (uint32_t n = 32; n <= 48; n += 16) {
5450 for (size_t k = 1; k <= 40; k += 9) {
5451 for (uint32_t m = 1; m <= 2; m++) {
5452 GemmMicrokernelTester()
5453 .mr(2)
5454 .nr(16)
5455 .kr(4)
5456 .sr(1)
5457 .m(m)
5458 .n(n)
5459 .k(k)
5460 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005461 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005462 }
5463 }
5464 }
5465 }
5466
5467 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, strided_cm_subtile) {
5468 TEST_REQUIRES_ARM_NEON_DOT;
5469 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005470 for (uint32_t n = 1; n <= 16; n++) {
5471 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005472 GemmMicrokernelTester()
5473 .mr(2)
5474 .nr(16)
5475 .kr(4)
5476 .sr(1)
5477 .m(m)
5478 .n(n)
5479 .k(k)
5480 .cm_stride(19)
5481 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005482 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005483 }
5484 }
5485 }
5486 }
5487
5488 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, qmin) {
5489 TEST_REQUIRES_ARM_NEON_DOT;
5490 GemmMicrokernelTester()
5491 .mr(2)
5492 .nr(16)
5493 .kr(4)
5494 .sr(1)
5495 .m(2)
5496 .n(16)
5497 .k(8)
5498 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005499 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005500 }
5501
5502 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, qmax) {
5503 TEST_REQUIRES_ARM_NEON_DOT;
5504 GemmMicrokernelTester()
5505 .mr(2)
5506 .nr(16)
5507 .kr(4)
5508 .sr(1)
5509 .m(2)
5510 .n(16)
5511 .k(8)
5512 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005513 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005514 }
5515
5516 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, strided_cm) {
5517 TEST_REQUIRES_ARM_NEON_DOT;
5518 GemmMicrokernelTester()
5519 .mr(2)
5520 .nr(16)
5521 .kr(4)
5522 .sr(1)
5523 .m(2)
5524 .n(16)
5525 .k(8)
5526 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005527 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005528 }
5529
5530 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, no_a_zero_point) {
5531 TEST_REQUIRES_ARM_NEON_DOT;
5532 for (size_t k = 1; k <= 40; k += 9) {
5533 GemmMicrokernelTester()
5534 .mr(2)
5535 .nr(16)
5536 .kr(4)
5537 .sr(1)
5538 .m(2)
5539 .n(16)
5540 .k(k)
5541 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005542 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005543 }
5544 }
5545
5546 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, no_b_zero_point) {
5547 TEST_REQUIRES_ARM_NEON_DOT;
5548 for (size_t k = 1; k <= 40; k += 9) {
5549 GemmMicrokernelTester()
5550 .mr(2)
5551 .nr(16)
5552 .kr(4)
5553 .sr(1)
5554 .m(2)
5555 .n(16)
5556 .k(k)
5557 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005558 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005559 }
5560 }
5561
5562 TEST(QU8_GEMM_MINMAX_RNDNU_2X16C4__NEONDOT, no_zero_point) {
5563 TEST_REQUIRES_ARM_NEON_DOT;
5564 for (size_t k = 1; k <= 40; k += 9) {
5565 GemmMicrokernelTester()
5566 .mr(2)
5567 .nr(16)
5568 .kr(4)
5569 .sr(1)
5570 .m(2)
5571 .n(16)
5572 .k(k)
5573 .a_zero_point(0)
5574 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08005575 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005576 }
5577 }
5578#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
5579
5580
5581#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
5582 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8) {
5583 TEST_REQUIRES_ARM_NEON_DOT;
5584 GemmMicrokernelTester()
5585 .mr(3)
5586 .nr(16)
5587 .kr(4)
5588 .sr(1)
5589 .m(3)
5590 .n(16)
5591 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08005592 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005593 }
5594
5595 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, strided_cn) {
5596 TEST_REQUIRES_ARM_NEON_DOT;
5597 GemmMicrokernelTester()
5598 .mr(3)
5599 .nr(16)
5600 .kr(4)
5601 .sr(1)
5602 .m(3)
5603 .n(16)
5604 .k(8)
5605 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005606 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005607 }
5608
5609 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8_strided_a) {
5610 TEST_REQUIRES_ARM_NEON_DOT;
5611 GemmMicrokernelTester()
5612 .mr(3)
5613 .nr(16)
5614 .kr(4)
5615 .sr(1)
5616 .m(3)
5617 .n(16)
5618 .k(8)
5619 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005620 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005621 }
5622
5623 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8_subtile) {
5624 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005625 for (uint32_t n = 1; n <= 16; n++) {
5626 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005627 GemmMicrokernelTester()
5628 .mr(3)
5629 .nr(16)
5630 .kr(4)
5631 .sr(1)
5632 .m(m)
5633 .n(n)
5634 .k(8)
5635 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005636 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005637 }
5638 }
5639 }
5640
5641 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8_subtile_m) {
5642 TEST_REQUIRES_ARM_NEON_DOT;
5643 for (uint32_t m = 1; m <= 3; m++) {
5644 GemmMicrokernelTester()
5645 .mr(3)
5646 .nr(16)
5647 .kr(4)
5648 .sr(1)
5649 .m(m)
5650 .n(16)
5651 .k(8)
5652 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005653 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005654 }
5655 }
5656
5657 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_eq_8_subtile_n) {
5658 TEST_REQUIRES_ARM_NEON_DOT;
5659 for (uint32_t n = 1; n <= 16; n++) {
5660 GemmMicrokernelTester()
5661 .mr(3)
5662 .nr(16)
5663 .kr(4)
5664 .sr(1)
5665 .m(3)
5666 .n(n)
5667 .k(8)
5668 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005669 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005670 }
5671 }
5672
5673 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_lt_8) {
5674 TEST_REQUIRES_ARM_NEON_DOT;
5675 for (size_t k = 1; k < 8; k++) {
5676 GemmMicrokernelTester()
5677 .mr(3)
5678 .nr(16)
5679 .kr(4)
5680 .sr(1)
5681 .m(3)
5682 .n(16)
5683 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005684 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005685 }
5686 }
5687
5688 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_lt_8_strided_a) {
5689 TEST_REQUIRES_ARM_NEON_DOT;
5690 for (size_t k = 1; k < 8; k++) {
5691 GemmMicrokernelTester()
5692 .mr(3)
5693 .nr(16)
5694 .kr(4)
5695 .sr(1)
5696 .m(3)
5697 .n(16)
5698 .k(k)
5699 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005700 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005701 }
5702 }
5703
5704 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_lt_8_subtile) {
5705 TEST_REQUIRES_ARM_NEON_DOT;
5706 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005707 for (uint32_t n = 1; n <= 16; n++) {
5708 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005709 GemmMicrokernelTester()
5710 .mr(3)
5711 .nr(16)
5712 .kr(4)
5713 .sr(1)
5714 .m(m)
5715 .n(n)
5716 .k(k)
5717 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005718 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005719 }
5720 }
5721 }
5722 }
5723
5724 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_gt_8) {
5725 TEST_REQUIRES_ARM_NEON_DOT;
5726 for (size_t k = 9; k < 16; k++) {
5727 GemmMicrokernelTester()
5728 .mr(3)
5729 .nr(16)
5730 .kr(4)
5731 .sr(1)
5732 .m(3)
5733 .n(16)
5734 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005735 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005736 }
5737 }
5738
5739 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_gt_8_strided_a) {
5740 TEST_REQUIRES_ARM_NEON_DOT;
5741 for (size_t k = 9; k < 16; k++) {
5742 GemmMicrokernelTester()
5743 .mr(3)
5744 .nr(16)
5745 .kr(4)
5746 .sr(1)
5747 .m(3)
5748 .n(16)
5749 .k(k)
5750 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005751 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005752 }
5753 }
5754
5755 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_gt_8_subtile) {
5756 TEST_REQUIRES_ARM_NEON_DOT;
5757 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005758 for (uint32_t n = 1; n <= 16; n++) {
5759 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005760 GemmMicrokernelTester()
5761 .mr(3)
5762 .nr(16)
5763 .kr(4)
5764 .sr(1)
5765 .m(m)
5766 .n(n)
5767 .k(k)
5768 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005769 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005770 }
5771 }
5772 }
5773 }
5774
5775 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_div_8) {
5776 TEST_REQUIRES_ARM_NEON_DOT;
5777 for (size_t k = 16; k <= 80; k += 8) {
5778 GemmMicrokernelTester()
5779 .mr(3)
5780 .nr(16)
5781 .kr(4)
5782 .sr(1)
5783 .m(3)
5784 .n(16)
5785 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005786 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005787 }
5788 }
5789
5790 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_div_8_strided_a) {
5791 TEST_REQUIRES_ARM_NEON_DOT;
5792 for (size_t k = 16; k <= 80; k += 8) {
5793 GemmMicrokernelTester()
5794 .mr(3)
5795 .nr(16)
5796 .kr(4)
5797 .sr(1)
5798 .m(3)
5799 .n(16)
5800 .k(k)
5801 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005802 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005803 }
5804 }
5805
5806 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, k_div_8_subtile) {
5807 TEST_REQUIRES_ARM_NEON_DOT;
5808 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005809 for (uint32_t n = 1; n <= 16; n++) {
5810 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005811 GemmMicrokernelTester()
5812 .mr(3)
5813 .nr(16)
5814 .kr(4)
5815 .sr(1)
5816 .m(m)
5817 .n(n)
5818 .k(k)
5819 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005820 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005821 }
5822 }
5823 }
5824 }
5825
5826 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16) {
5827 TEST_REQUIRES_ARM_NEON_DOT;
5828 for (uint32_t n = 17; n < 32; n++) {
5829 for (size_t k = 1; k <= 40; k += 9) {
5830 GemmMicrokernelTester()
5831 .mr(3)
5832 .nr(16)
5833 .kr(4)
5834 .sr(1)
5835 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005836 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005837 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005838 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005839 }
5840 }
5841 }
5842
5843 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16_strided_cn) {
5844 TEST_REQUIRES_ARM_NEON_DOT;
5845 for (uint32_t n = 17; n < 32; n++) {
5846 for (size_t k = 1; k <= 40; k += 9) {
5847 GemmMicrokernelTester()
5848 .mr(3)
5849 .nr(16)
5850 .kr(4)
5851 .sr(1)
5852 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005853 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005854 .k(k)
5855 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005856 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005857 }
5858 }
5859 }
5860
5861 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16_strided_a) {
5862 TEST_REQUIRES_ARM_NEON_DOT;
5863 for (uint32_t n = 17; n < 32; n++) {
5864 for (size_t k = 1; k <= 40; k += 9) {
5865 GemmMicrokernelTester()
5866 .mr(3)
5867 .nr(16)
5868 .kr(4)
5869 .sr(1)
5870 .m(3)
5871 .n(n)
5872 .k(k)
5873 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005874 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005875 }
5876 }
5877 }
5878
5879 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_gt_16_subtile) {
5880 TEST_REQUIRES_ARM_NEON_DOT;
5881 for (uint32_t n = 17; n < 32; n++) {
5882 for (size_t k = 1; k <= 40; k += 9) {
5883 for (uint32_t m = 1; m <= 3; m++) {
5884 GemmMicrokernelTester()
5885 .mr(3)
5886 .nr(16)
5887 .kr(4)
5888 .sr(1)
5889 .m(m)
5890 .n(n)
5891 .k(k)
5892 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005893 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005894 }
5895 }
5896 }
5897 }
5898
5899 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16) {
5900 TEST_REQUIRES_ARM_NEON_DOT;
5901 for (uint32_t n = 32; n <= 48; n += 16) {
5902 for (size_t k = 1; k <= 40; k += 9) {
5903 GemmMicrokernelTester()
5904 .mr(3)
5905 .nr(16)
5906 .kr(4)
5907 .sr(1)
5908 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005909 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005910 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005911 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005912 }
5913 }
5914 }
5915
5916 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16_strided_cn) {
5917 TEST_REQUIRES_ARM_NEON_DOT;
5918 for (uint32_t n = 32; n <= 48; n += 16) {
5919 for (size_t k = 1; k <= 40; k += 9) {
5920 GemmMicrokernelTester()
5921 .mr(3)
5922 .nr(16)
5923 .kr(4)
5924 .sr(1)
5925 .m(3)
5926 .n(n)
5927 .k(k)
5928 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005929 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005930 }
5931 }
5932 }
5933
5934 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16_strided_a) {
5935 TEST_REQUIRES_ARM_NEON_DOT;
5936 for (uint32_t n = 32; n <= 48; n += 16) {
5937 for (size_t k = 1; k <= 40; k += 9) {
5938 GemmMicrokernelTester()
5939 .mr(3)
5940 .nr(16)
5941 .kr(4)
5942 .sr(1)
5943 .m(3)
5944 .n(n)
5945 .k(k)
5946 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08005947 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005948 }
5949 }
5950 }
5951
5952 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, n_div_16_subtile) {
5953 TEST_REQUIRES_ARM_NEON_DOT;
5954 for (uint32_t n = 32; n <= 48; n += 16) {
5955 for (size_t k = 1; k <= 40; k += 9) {
5956 for (uint32_t m = 1; m <= 3; m++) {
5957 GemmMicrokernelTester()
5958 .mr(3)
5959 .nr(16)
5960 .kr(4)
5961 .sr(1)
5962 .m(m)
5963 .n(n)
5964 .k(k)
5965 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005966 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005967 }
5968 }
5969 }
5970 }
5971
5972 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, strided_cm_subtile) {
5973 TEST_REQUIRES_ARM_NEON_DOT;
5974 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005975 for (uint32_t n = 1; n <= 16; n++) {
5976 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005977 GemmMicrokernelTester()
5978 .mr(3)
5979 .nr(16)
5980 .kr(4)
5981 .sr(1)
5982 .m(m)
5983 .n(n)
5984 .k(k)
5985 .cm_stride(19)
5986 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005987 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005988 }
5989 }
5990 }
5991 }
5992
5993 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, qmin) {
5994 TEST_REQUIRES_ARM_NEON_DOT;
5995 GemmMicrokernelTester()
5996 .mr(3)
5997 .nr(16)
5998 .kr(4)
5999 .sr(1)
6000 .m(3)
6001 .n(16)
6002 .k(8)
6003 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006004 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006005 }
6006
6007 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, qmax) {
6008 TEST_REQUIRES_ARM_NEON_DOT;
6009 GemmMicrokernelTester()
6010 .mr(3)
6011 .nr(16)
6012 .kr(4)
6013 .sr(1)
6014 .m(3)
6015 .n(16)
6016 .k(8)
6017 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006018 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006019 }
6020
6021 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, strided_cm) {
6022 TEST_REQUIRES_ARM_NEON_DOT;
6023 GemmMicrokernelTester()
6024 .mr(3)
6025 .nr(16)
6026 .kr(4)
6027 .sr(1)
6028 .m(3)
6029 .n(16)
6030 .k(8)
6031 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006032 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006033 }
6034
6035 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, no_a_zero_point) {
6036 TEST_REQUIRES_ARM_NEON_DOT;
6037 for (size_t k = 1; k <= 40; k += 9) {
6038 GemmMicrokernelTester()
6039 .mr(3)
6040 .nr(16)
6041 .kr(4)
6042 .sr(1)
6043 .m(3)
6044 .n(16)
6045 .k(k)
6046 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006047 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006048 }
6049 }
6050
6051 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, no_b_zero_point) {
6052 TEST_REQUIRES_ARM_NEON_DOT;
6053 for (size_t k = 1; k <= 40; k += 9) {
6054 GemmMicrokernelTester()
6055 .mr(3)
6056 .nr(16)
6057 .kr(4)
6058 .sr(1)
6059 .m(3)
6060 .n(16)
6061 .k(k)
6062 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006063 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006064 }
6065 }
6066
6067 TEST(QU8_GEMM_MINMAX_RNDNU_3X16C4__NEONDOT, no_zero_point) {
6068 TEST_REQUIRES_ARM_NEON_DOT;
6069 for (size_t k = 1; k <= 40; k += 9) {
6070 GemmMicrokernelTester()
6071 .mr(3)
6072 .nr(16)
6073 .kr(4)
6074 .sr(1)
6075 .m(3)
6076 .n(16)
6077 .k(k)
6078 .a_zero_point(0)
6079 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006080 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006081 }
6082 }
6083#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
6084
6085
6086#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
6087 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8) {
6088 TEST_REQUIRES_ARM_NEON_DOT;
6089 GemmMicrokernelTester()
6090 .mr(4)
6091 .nr(16)
6092 .kr(4)
6093 .sr(1)
6094 .m(4)
6095 .n(16)
6096 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006097 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006098 }
6099
6100 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cn) {
6101 TEST_REQUIRES_ARM_NEON_DOT;
6102 GemmMicrokernelTester()
6103 .mr(4)
6104 .nr(16)
6105 .kr(4)
6106 .sr(1)
6107 .m(4)
6108 .n(16)
6109 .k(8)
6110 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006111 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006112 }
6113
6114 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_strided_a) {
6115 TEST_REQUIRES_ARM_NEON_DOT;
6116 GemmMicrokernelTester()
6117 .mr(4)
6118 .nr(16)
6119 .kr(4)
6120 .sr(1)
6121 .m(4)
6122 .n(16)
6123 .k(8)
6124 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006125 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006126 }
6127
6128 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile) {
6129 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006130 for (uint32_t n = 1; n <= 16; n++) {
6131 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006132 GemmMicrokernelTester()
6133 .mr(4)
6134 .nr(16)
6135 .kr(4)
6136 .sr(1)
6137 .m(m)
6138 .n(n)
6139 .k(8)
6140 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006141 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006142 }
6143 }
6144 }
6145
6146 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_m) {
6147 TEST_REQUIRES_ARM_NEON_DOT;
6148 for (uint32_t m = 1; m <= 4; m++) {
6149 GemmMicrokernelTester()
6150 .mr(4)
6151 .nr(16)
6152 .kr(4)
6153 .sr(1)
6154 .m(m)
6155 .n(16)
6156 .k(8)
6157 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006158 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006159 }
6160 }
6161
6162 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_n) {
6163 TEST_REQUIRES_ARM_NEON_DOT;
6164 for (uint32_t n = 1; n <= 16; n++) {
6165 GemmMicrokernelTester()
6166 .mr(4)
6167 .nr(16)
6168 .kr(4)
6169 .sr(1)
6170 .m(4)
6171 .n(n)
6172 .k(8)
6173 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006174 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006175 }
6176 }
6177
6178 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8) {
6179 TEST_REQUIRES_ARM_NEON_DOT;
6180 for (size_t k = 1; k < 8; k++) {
6181 GemmMicrokernelTester()
6182 .mr(4)
6183 .nr(16)
6184 .kr(4)
6185 .sr(1)
6186 .m(4)
6187 .n(16)
6188 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006189 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006190 }
6191 }
6192
6193 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8_strided_a) {
6194 TEST_REQUIRES_ARM_NEON_DOT;
6195 for (size_t k = 1; k < 8; k++) {
6196 GemmMicrokernelTester()
6197 .mr(4)
6198 .nr(16)
6199 .kr(4)
6200 .sr(1)
6201 .m(4)
6202 .n(16)
6203 .k(k)
6204 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006205 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006206 }
6207 }
6208
6209 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8_subtile) {
6210 TEST_REQUIRES_ARM_NEON_DOT;
6211 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006212 for (uint32_t n = 1; n <= 16; n++) {
6213 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006214 GemmMicrokernelTester()
6215 .mr(4)
6216 .nr(16)
6217 .kr(4)
6218 .sr(1)
6219 .m(m)
6220 .n(n)
6221 .k(k)
6222 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006223 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006224 }
6225 }
6226 }
6227 }
6228
6229 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8) {
6230 TEST_REQUIRES_ARM_NEON_DOT;
6231 for (size_t k = 9; k < 16; k++) {
6232 GemmMicrokernelTester()
6233 .mr(4)
6234 .nr(16)
6235 .kr(4)
6236 .sr(1)
6237 .m(4)
6238 .n(16)
6239 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006240 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006241 }
6242 }
6243
6244 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8_strided_a) {
6245 TEST_REQUIRES_ARM_NEON_DOT;
6246 for (size_t k = 9; k < 16; k++) {
6247 GemmMicrokernelTester()
6248 .mr(4)
6249 .nr(16)
6250 .kr(4)
6251 .sr(1)
6252 .m(4)
6253 .n(16)
6254 .k(k)
6255 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006256 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006257 }
6258 }
6259
6260 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8_subtile) {
6261 TEST_REQUIRES_ARM_NEON_DOT;
6262 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006263 for (uint32_t n = 1; n <= 16; n++) {
6264 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006265 GemmMicrokernelTester()
6266 .mr(4)
6267 .nr(16)
6268 .kr(4)
6269 .sr(1)
6270 .m(m)
6271 .n(n)
6272 .k(k)
6273 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006274 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006275 }
6276 }
6277 }
6278 }
6279
6280 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8) {
6281 TEST_REQUIRES_ARM_NEON_DOT;
6282 for (size_t k = 16; k <= 80; k += 8) {
6283 GemmMicrokernelTester()
6284 .mr(4)
6285 .nr(16)
6286 .kr(4)
6287 .sr(1)
6288 .m(4)
6289 .n(16)
6290 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006291 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006292 }
6293 }
6294
6295 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8_strided_a) {
6296 TEST_REQUIRES_ARM_NEON_DOT;
6297 for (size_t k = 16; k <= 80; k += 8) {
6298 GemmMicrokernelTester()
6299 .mr(4)
6300 .nr(16)
6301 .kr(4)
6302 .sr(1)
6303 .m(4)
6304 .n(16)
6305 .k(k)
6306 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006307 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006308 }
6309 }
6310
6311 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8_subtile) {
6312 TEST_REQUIRES_ARM_NEON_DOT;
6313 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006314 for (uint32_t n = 1; n <= 16; n++) {
6315 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006316 GemmMicrokernelTester()
6317 .mr(4)
6318 .nr(16)
6319 .kr(4)
6320 .sr(1)
6321 .m(m)
6322 .n(n)
6323 .k(k)
6324 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006325 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006326 }
6327 }
6328 }
6329 }
6330
6331 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16) {
6332 TEST_REQUIRES_ARM_NEON_DOT;
6333 for (uint32_t n = 17; n < 32; n++) {
6334 for (size_t k = 1; k <= 40; k += 9) {
6335 GemmMicrokernelTester()
6336 .mr(4)
6337 .nr(16)
6338 .kr(4)
6339 .sr(1)
6340 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006341 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006342 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006343 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006344 }
6345 }
6346 }
6347
6348 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_strided_cn) {
6349 TEST_REQUIRES_ARM_NEON_DOT;
6350 for (uint32_t n = 17; n < 32; n++) {
6351 for (size_t k = 1; k <= 40; k += 9) {
6352 GemmMicrokernelTester()
6353 .mr(4)
6354 .nr(16)
6355 .kr(4)
6356 .sr(1)
6357 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006358 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006359 .k(k)
6360 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006361 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006362 }
6363 }
6364 }
6365
6366 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_strided_a) {
6367 TEST_REQUIRES_ARM_NEON_DOT;
6368 for (uint32_t n = 17; n < 32; n++) {
6369 for (size_t k = 1; k <= 40; k += 9) {
6370 GemmMicrokernelTester()
6371 .mr(4)
6372 .nr(16)
6373 .kr(4)
6374 .sr(1)
6375 .m(4)
6376 .n(n)
6377 .k(k)
6378 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006379 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006380 }
6381 }
6382 }
6383
6384 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_subtile) {
6385 TEST_REQUIRES_ARM_NEON_DOT;
6386 for (uint32_t n = 17; n < 32; n++) {
6387 for (size_t k = 1; k <= 40; k += 9) {
6388 for (uint32_t m = 1; m <= 4; m++) {
6389 GemmMicrokernelTester()
6390 .mr(4)
6391 .nr(16)
6392 .kr(4)
6393 .sr(1)
6394 .m(m)
6395 .n(n)
6396 .k(k)
6397 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006398 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006399 }
6400 }
6401 }
6402 }
6403
6404 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16) {
6405 TEST_REQUIRES_ARM_NEON_DOT;
6406 for (uint32_t n = 32; n <= 48; n += 16) {
6407 for (size_t k = 1; k <= 40; k += 9) {
6408 GemmMicrokernelTester()
6409 .mr(4)
6410 .nr(16)
6411 .kr(4)
6412 .sr(1)
6413 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006414 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006415 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006416 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006417 }
6418 }
6419 }
6420
6421 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_strided_cn) {
6422 TEST_REQUIRES_ARM_NEON_DOT;
6423 for (uint32_t n = 32; n <= 48; n += 16) {
6424 for (size_t k = 1; k <= 40; k += 9) {
6425 GemmMicrokernelTester()
6426 .mr(4)
6427 .nr(16)
6428 .kr(4)
6429 .sr(1)
6430 .m(4)
6431 .n(n)
6432 .k(k)
6433 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006434 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006435 }
6436 }
6437 }
6438
6439 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_strided_a) {
6440 TEST_REQUIRES_ARM_NEON_DOT;
6441 for (uint32_t n = 32; n <= 48; n += 16) {
6442 for (size_t k = 1; k <= 40; k += 9) {
6443 GemmMicrokernelTester()
6444 .mr(4)
6445 .nr(16)
6446 .kr(4)
6447 .sr(1)
6448 .m(4)
6449 .n(n)
6450 .k(k)
6451 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006452 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006453 }
6454 }
6455 }
6456
6457 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_subtile) {
6458 TEST_REQUIRES_ARM_NEON_DOT;
6459 for (uint32_t n = 32; n <= 48; n += 16) {
6460 for (size_t k = 1; k <= 40; k += 9) {
6461 for (uint32_t m = 1; m <= 4; m++) {
6462 GemmMicrokernelTester()
6463 .mr(4)
6464 .nr(16)
6465 .kr(4)
6466 .sr(1)
6467 .m(m)
6468 .n(n)
6469 .k(k)
6470 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006471 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006472 }
6473 }
6474 }
6475 }
6476
6477 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm_subtile) {
6478 TEST_REQUIRES_ARM_NEON_DOT;
6479 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006480 for (uint32_t n = 1; n <= 16; n++) {
6481 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006482 GemmMicrokernelTester()
6483 .mr(4)
6484 .nr(16)
6485 .kr(4)
6486 .sr(1)
6487 .m(m)
6488 .n(n)
6489 .k(k)
6490 .cm_stride(19)
6491 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006492 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006493 }
6494 }
6495 }
6496 }
6497
6498 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmin) {
6499 TEST_REQUIRES_ARM_NEON_DOT;
6500 GemmMicrokernelTester()
6501 .mr(4)
6502 .nr(16)
6503 .kr(4)
6504 .sr(1)
6505 .m(4)
6506 .n(16)
6507 .k(8)
6508 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006509 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006510 }
6511
6512 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmax) {
6513 TEST_REQUIRES_ARM_NEON_DOT;
6514 GemmMicrokernelTester()
6515 .mr(4)
6516 .nr(16)
6517 .kr(4)
6518 .sr(1)
6519 .m(4)
6520 .n(16)
6521 .k(8)
6522 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006523 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006524 }
6525
6526 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm) {
6527 TEST_REQUIRES_ARM_NEON_DOT;
6528 GemmMicrokernelTester()
6529 .mr(4)
6530 .nr(16)
6531 .kr(4)
6532 .sr(1)
6533 .m(4)
6534 .n(16)
6535 .k(8)
6536 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006537 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006538 }
6539
6540 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_a_zero_point) {
6541 TEST_REQUIRES_ARM_NEON_DOT;
6542 for (size_t k = 1; k <= 40; k += 9) {
6543 GemmMicrokernelTester()
6544 .mr(4)
6545 .nr(16)
6546 .kr(4)
6547 .sr(1)
6548 .m(4)
6549 .n(16)
6550 .k(k)
6551 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006552 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006553 }
6554 }
6555
6556 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_b_zero_point) {
6557 TEST_REQUIRES_ARM_NEON_DOT;
6558 for (size_t k = 1; k <= 40; k += 9) {
6559 GemmMicrokernelTester()
6560 .mr(4)
6561 .nr(16)
6562 .kr(4)
6563 .sr(1)
6564 .m(4)
6565 .n(16)
6566 .k(k)
6567 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006568 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006569 }
6570 }
6571
6572 TEST(QU8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, no_zero_point) {
6573 TEST_REQUIRES_ARM_NEON_DOT;
6574 for (size_t k = 1; k <= 40; k += 9) {
6575 GemmMicrokernelTester()
6576 .mr(4)
6577 .nr(16)
6578 .kr(4)
6579 .sr(1)
6580 .m(4)
6581 .n(16)
6582 .k(k)
6583 .a_zero_point(0)
6584 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08006585 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006586 }
6587 }
6588#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
6589
6590
6591#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
6592 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8) {
6593 TEST_REQUIRES_ARM_NEON_DOT;
6594 GemmMicrokernelTester()
6595 .mr(5)
6596 .nr(16)
6597 .kr(4)
6598 .sr(1)
6599 .m(5)
6600 .n(16)
6601 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006602 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006603 }
6604
6605 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, strided_cn) {
6606 TEST_REQUIRES_ARM_NEON_DOT;
6607 GemmMicrokernelTester()
6608 .mr(5)
6609 .nr(16)
6610 .kr(4)
6611 .sr(1)
6612 .m(5)
6613 .n(16)
6614 .k(8)
6615 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006616 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006617 }
6618
6619 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8_strided_a) {
6620 TEST_REQUIRES_ARM_NEON_DOT;
6621 GemmMicrokernelTester()
6622 .mr(5)
6623 .nr(16)
6624 .kr(4)
6625 .sr(1)
6626 .m(5)
6627 .n(16)
6628 .k(8)
6629 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006630 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006631 }
6632
6633 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8_subtile) {
6634 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006635 for (uint32_t n = 1; n <= 16; n++) {
6636 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006637 GemmMicrokernelTester()
6638 .mr(5)
6639 .nr(16)
6640 .kr(4)
6641 .sr(1)
6642 .m(m)
6643 .n(n)
6644 .k(8)
6645 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006646 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006647 }
6648 }
6649 }
6650
6651 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8_subtile_m) {
6652 TEST_REQUIRES_ARM_NEON_DOT;
6653 for (uint32_t m = 1; m <= 5; m++) {
6654 GemmMicrokernelTester()
6655 .mr(5)
6656 .nr(16)
6657 .kr(4)
6658 .sr(1)
6659 .m(m)
6660 .n(16)
6661 .k(8)
6662 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006663 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006664 }
6665 }
6666
6667 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_eq_8_subtile_n) {
6668 TEST_REQUIRES_ARM_NEON_DOT;
6669 for (uint32_t n = 1; n <= 16; n++) {
6670 GemmMicrokernelTester()
6671 .mr(5)
6672 .nr(16)
6673 .kr(4)
6674 .sr(1)
6675 .m(5)
6676 .n(n)
6677 .k(8)
6678 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006679 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006680 }
6681 }
6682
6683 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_lt_8) {
6684 TEST_REQUIRES_ARM_NEON_DOT;
6685 for (size_t k = 1; k < 8; k++) {
6686 GemmMicrokernelTester()
6687 .mr(5)
6688 .nr(16)
6689 .kr(4)
6690 .sr(1)
6691 .m(5)
6692 .n(16)
6693 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006694 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006695 }
6696 }
6697
6698 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_lt_8_strided_a) {
6699 TEST_REQUIRES_ARM_NEON_DOT;
6700 for (size_t k = 1; k < 8; k++) {
6701 GemmMicrokernelTester()
6702 .mr(5)
6703 .nr(16)
6704 .kr(4)
6705 .sr(1)
6706 .m(5)
6707 .n(16)
6708 .k(k)
6709 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006710 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006711 }
6712 }
6713
6714 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_lt_8_subtile) {
6715 TEST_REQUIRES_ARM_NEON_DOT;
6716 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006717 for (uint32_t n = 1; n <= 16; n++) {
6718 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006719 GemmMicrokernelTester()
6720 .mr(5)
6721 .nr(16)
6722 .kr(4)
6723 .sr(1)
6724 .m(m)
6725 .n(n)
6726 .k(k)
6727 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006728 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006729 }
6730 }
6731 }
6732 }
6733
6734 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_gt_8) {
6735 TEST_REQUIRES_ARM_NEON_DOT;
6736 for (size_t k = 9; k < 16; k++) {
6737 GemmMicrokernelTester()
6738 .mr(5)
6739 .nr(16)
6740 .kr(4)
6741 .sr(1)
6742 .m(5)
6743 .n(16)
6744 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006745 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006746 }
6747 }
6748
6749 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_gt_8_strided_a) {
6750 TEST_REQUIRES_ARM_NEON_DOT;
6751 for (size_t k = 9; k < 16; k++) {
6752 GemmMicrokernelTester()
6753 .mr(5)
6754 .nr(16)
6755 .kr(4)
6756 .sr(1)
6757 .m(5)
6758 .n(16)
6759 .k(k)
6760 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006761 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006762 }
6763 }
6764
6765 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_gt_8_subtile) {
6766 TEST_REQUIRES_ARM_NEON_DOT;
6767 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006768 for (uint32_t n = 1; n <= 16; n++) {
6769 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006770 GemmMicrokernelTester()
6771 .mr(5)
6772 .nr(16)
6773 .kr(4)
6774 .sr(1)
6775 .m(m)
6776 .n(n)
6777 .k(k)
6778 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006779 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006780 }
6781 }
6782 }
6783 }
6784
6785 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_div_8) {
6786 TEST_REQUIRES_ARM_NEON_DOT;
6787 for (size_t k = 16; k <= 80; k += 8) {
6788 GemmMicrokernelTester()
6789 .mr(5)
6790 .nr(16)
6791 .kr(4)
6792 .sr(1)
6793 .m(5)
6794 .n(16)
6795 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006796 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006797 }
6798 }
6799
6800 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_div_8_strided_a) {
6801 TEST_REQUIRES_ARM_NEON_DOT;
6802 for (size_t k = 16; k <= 80; k += 8) {
6803 GemmMicrokernelTester()
6804 .mr(5)
6805 .nr(16)
6806 .kr(4)
6807 .sr(1)
6808 .m(5)
6809 .n(16)
6810 .k(k)
6811 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006812 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006813 }
6814 }
6815
6816 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, k_div_8_subtile) {
6817 TEST_REQUIRES_ARM_NEON_DOT;
6818 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006819 for (uint32_t n = 1; n <= 16; n++) {
6820 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006821 GemmMicrokernelTester()
6822 .mr(5)
6823 .nr(16)
6824 .kr(4)
6825 .sr(1)
6826 .m(m)
6827 .n(n)
6828 .k(k)
6829 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006830 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006831 }
6832 }
6833 }
6834 }
6835
6836 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16) {
6837 TEST_REQUIRES_ARM_NEON_DOT;
6838 for (uint32_t n = 17; n < 32; n++) {
6839 for (size_t k = 1; k <= 40; k += 9) {
6840 GemmMicrokernelTester()
6841 .mr(5)
6842 .nr(16)
6843 .kr(4)
6844 .sr(1)
6845 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006846 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006847 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006848 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006849 }
6850 }
6851 }
6852
6853 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16_strided_cn) {
6854 TEST_REQUIRES_ARM_NEON_DOT;
6855 for (uint32_t n = 17; n < 32; n++) {
6856 for (size_t k = 1; k <= 40; k += 9) {
6857 GemmMicrokernelTester()
6858 .mr(5)
6859 .nr(16)
6860 .kr(4)
6861 .sr(1)
6862 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006863 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006864 .k(k)
6865 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006866 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006867 }
6868 }
6869 }
6870
6871 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16_strided_a) {
6872 TEST_REQUIRES_ARM_NEON_DOT;
6873 for (uint32_t n = 17; n < 32; n++) {
6874 for (size_t k = 1; k <= 40; k += 9) {
6875 GemmMicrokernelTester()
6876 .mr(5)
6877 .nr(16)
6878 .kr(4)
6879 .sr(1)
6880 .m(5)
6881 .n(n)
6882 .k(k)
6883 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006884 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006885 }
6886 }
6887 }
6888
6889 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_gt_16_subtile) {
6890 TEST_REQUIRES_ARM_NEON_DOT;
6891 for (uint32_t n = 17; n < 32; n++) {
6892 for (size_t k = 1; k <= 40; k += 9) {
6893 for (uint32_t m = 1; m <= 5; m++) {
6894 GemmMicrokernelTester()
6895 .mr(5)
6896 .nr(16)
6897 .kr(4)
6898 .sr(1)
6899 .m(m)
6900 .n(n)
6901 .k(k)
6902 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006903 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006904 }
6905 }
6906 }
6907 }
6908
6909 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16) {
6910 TEST_REQUIRES_ARM_NEON_DOT;
6911 for (uint32_t n = 32; n <= 48; n += 16) {
6912 for (size_t k = 1; k <= 40; k += 9) {
6913 GemmMicrokernelTester()
6914 .mr(5)
6915 .nr(16)
6916 .kr(4)
6917 .sr(1)
6918 .m(5)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006919 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006920 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006921 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006922 }
6923 }
6924 }
6925
6926 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16_strided_cn) {
6927 TEST_REQUIRES_ARM_NEON_DOT;
6928 for (uint32_t n = 32; n <= 48; n += 16) {
6929 for (size_t k = 1; k <= 40; k += 9) {
6930 GemmMicrokernelTester()
6931 .mr(5)
6932 .nr(16)
6933 .kr(4)
6934 .sr(1)
6935 .m(5)
6936 .n(n)
6937 .k(k)
6938 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006939 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006940 }
6941 }
6942 }
6943
6944 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16_strided_a) {
6945 TEST_REQUIRES_ARM_NEON_DOT;
6946 for (uint32_t n = 32; n <= 48; n += 16) {
6947 for (size_t k = 1; k <= 40; k += 9) {
6948 GemmMicrokernelTester()
6949 .mr(5)
6950 .nr(16)
6951 .kr(4)
6952 .sr(1)
6953 .m(5)
6954 .n(n)
6955 .k(k)
6956 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006957 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006958 }
6959 }
6960 }
6961
6962 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, n_div_16_subtile) {
6963 TEST_REQUIRES_ARM_NEON_DOT;
6964 for (uint32_t n = 32; n <= 48; n += 16) {
6965 for (size_t k = 1; k <= 40; k += 9) {
6966 for (uint32_t m = 1; m <= 5; m++) {
6967 GemmMicrokernelTester()
6968 .mr(5)
6969 .nr(16)
6970 .kr(4)
6971 .sr(1)
6972 .m(m)
6973 .n(n)
6974 .k(k)
6975 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006976 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006977 }
6978 }
6979 }
6980 }
6981
6982 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, strided_cm_subtile) {
6983 TEST_REQUIRES_ARM_NEON_DOT;
6984 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006985 for (uint32_t n = 1; n <= 16; n++) {
6986 for (uint32_t m = 1; m <= 5; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006987 GemmMicrokernelTester()
6988 .mr(5)
6989 .nr(16)
6990 .kr(4)
6991 .sr(1)
6992 .m(m)
6993 .n(n)
6994 .k(k)
6995 .cm_stride(19)
6996 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006997 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006998 }
6999 }
7000 }
7001 }
7002
7003 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, qmin) {
7004 TEST_REQUIRES_ARM_NEON_DOT;
7005 GemmMicrokernelTester()
7006 .mr(5)
7007 .nr(16)
7008 .kr(4)
7009 .sr(1)
7010 .m(5)
7011 .n(16)
7012 .k(8)
7013 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007014 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007015 }
7016
7017 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, qmax) {
7018 TEST_REQUIRES_ARM_NEON_DOT;
7019 GemmMicrokernelTester()
7020 .mr(5)
7021 .nr(16)
7022 .kr(4)
7023 .sr(1)
7024 .m(5)
7025 .n(16)
7026 .k(8)
7027 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007028 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007029 }
7030
7031 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, strided_cm) {
7032 TEST_REQUIRES_ARM_NEON_DOT;
7033 GemmMicrokernelTester()
7034 .mr(5)
7035 .nr(16)
7036 .kr(4)
7037 .sr(1)
7038 .m(5)
7039 .n(16)
7040 .k(8)
7041 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007042 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007043 }
7044
7045 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, no_a_zero_point) {
7046 TEST_REQUIRES_ARM_NEON_DOT;
7047 for (size_t k = 1; k <= 40; k += 9) {
7048 GemmMicrokernelTester()
7049 .mr(5)
7050 .nr(16)
7051 .kr(4)
7052 .sr(1)
7053 .m(5)
7054 .n(16)
7055 .k(k)
7056 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007057 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007058 }
7059 }
7060
7061 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, no_b_zero_point) {
7062 TEST_REQUIRES_ARM_NEON_DOT;
7063 for (size_t k = 1; k <= 40; k += 9) {
7064 GemmMicrokernelTester()
7065 .mr(5)
7066 .nr(16)
7067 .kr(4)
7068 .sr(1)
7069 .m(5)
7070 .n(16)
7071 .k(k)
7072 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007073 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007074 }
7075 }
7076
7077 TEST(QU8_GEMM_MINMAX_RNDNU_5X16C4__NEONDOT, no_zero_point) {
7078 TEST_REQUIRES_ARM_NEON_DOT;
7079 for (size_t k = 1; k <= 40; k += 9) {
7080 GemmMicrokernelTester()
7081 .mr(5)
7082 .nr(16)
7083 .kr(4)
7084 .sr(1)
7085 .m(5)
7086 .n(16)
7087 .k(k)
7088 .a_zero_point(0)
7089 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007090 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_5x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007091 }
7092 }
7093#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
7094
7095
7096#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
7097 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8) {
7098 TEST_REQUIRES_ARM_NEON_DOT;
7099 GemmMicrokernelTester()
7100 .mr(8)
7101 .nr(16)
7102 .kr(4)
7103 .sr(1)
7104 .m(8)
7105 .n(16)
7106 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007107 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007108 }
7109
7110 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cn) {
7111 TEST_REQUIRES_ARM_NEON_DOT;
7112 GemmMicrokernelTester()
7113 .mr(8)
7114 .nr(16)
7115 .kr(4)
7116 .sr(1)
7117 .m(8)
7118 .n(16)
7119 .k(8)
7120 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007121 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007122 }
7123
7124 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_strided_a) {
7125 TEST_REQUIRES_ARM_NEON_DOT;
7126 GemmMicrokernelTester()
7127 .mr(8)
7128 .nr(16)
7129 .kr(4)
7130 .sr(1)
7131 .m(8)
7132 .n(16)
7133 .k(8)
7134 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007135 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007136 }
7137
7138 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile) {
7139 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007140 for (uint32_t n = 1; n <= 16; n++) {
7141 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007142 GemmMicrokernelTester()
7143 .mr(8)
7144 .nr(16)
7145 .kr(4)
7146 .sr(1)
7147 .m(m)
7148 .n(n)
7149 .k(8)
7150 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007151 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007152 }
7153 }
7154 }
7155
7156 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_m) {
7157 TEST_REQUIRES_ARM_NEON_DOT;
7158 for (uint32_t m = 1; m <= 8; m++) {
7159 GemmMicrokernelTester()
7160 .mr(8)
7161 .nr(16)
7162 .kr(4)
7163 .sr(1)
7164 .m(m)
7165 .n(16)
7166 .k(8)
7167 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007168 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007169 }
7170 }
7171
7172 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_n) {
7173 TEST_REQUIRES_ARM_NEON_DOT;
7174 for (uint32_t n = 1; n <= 16; n++) {
7175 GemmMicrokernelTester()
7176 .mr(8)
7177 .nr(16)
7178 .kr(4)
7179 .sr(1)
7180 .m(8)
7181 .n(n)
7182 .k(8)
7183 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007184 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007185 }
7186 }
7187
7188 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8) {
7189 TEST_REQUIRES_ARM_NEON_DOT;
7190 for (size_t k = 1; k < 8; k++) {
7191 GemmMicrokernelTester()
7192 .mr(8)
7193 .nr(16)
7194 .kr(4)
7195 .sr(1)
7196 .m(8)
7197 .n(16)
7198 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007199 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007200 }
7201 }
7202
7203 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8_strided_a) {
7204 TEST_REQUIRES_ARM_NEON_DOT;
7205 for (size_t k = 1; k < 8; k++) {
7206 GemmMicrokernelTester()
7207 .mr(8)
7208 .nr(16)
7209 .kr(4)
7210 .sr(1)
7211 .m(8)
7212 .n(16)
7213 .k(k)
7214 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007215 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007216 }
7217 }
7218
7219 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8_subtile) {
7220 TEST_REQUIRES_ARM_NEON_DOT;
7221 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007222 for (uint32_t n = 1; n <= 16; n++) {
7223 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007224 GemmMicrokernelTester()
7225 .mr(8)
7226 .nr(16)
7227 .kr(4)
7228 .sr(1)
7229 .m(m)
7230 .n(n)
7231 .k(k)
7232 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007233 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007234 }
7235 }
7236 }
7237 }
7238
7239 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8) {
7240 TEST_REQUIRES_ARM_NEON_DOT;
7241 for (size_t k = 9; k < 16; k++) {
7242 GemmMicrokernelTester()
7243 .mr(8)
7244 .nr(16)
7245 .kr(4)
7246 .sr(1)
7247 .m(8)
7248 .n(16)
7249 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007250 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007251 }
7252 }
7253
7254 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8_strided_a) {
7255 TEST_REQUIRES_ARM_NEON_DOT;
7256 for (size_t k = 9; k < 16; k++) {
7257 GemmMicrokernelTester()
7258 .mr(8)
7259 .nr(16)
7260 .kr(4)
7261 .sr(1)
7262 .m(8)
7263 .n(16)
7264 .k(k)
7265 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007266 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007267 }
7268 }
7269
7270 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8_subtile) {
7271 TEST_REQUIRES_ARM_NEON_DOT;
7272 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007273 for (uint32_t n = 1; n <= 16; n++) {
7274 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007275 GemmMicrokernelTester()
7276 .mr(8)
7277 .nr(16)
7278 .kr(4)
7279 .sr(1)
7280 .m(m)
7281 .n(n)
7282 .k(k)
7283 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007284 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007285 }
7286 }
7287 }
7288 }
7289
7290 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8) {
7291 TEST_REQUIRES_ARM_NEON_DOT;
7292 for (size_t k = 16; k <= 80; k += 8) {
7293 GemmMicrokernelTester()
7294 .mr(8)
7295 .nr(16)
7296 .kr(4)
7297 .sr(1)
7298 .m(8)
7299 .n(16)
7300 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007301 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007302 }
7303 }
7304
7305 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8_strided_a) {
7306 TEST_REQUIRES_ARM_NEON_DOT;
7307 for (size_t k = 16; k <= 80; k += 8) {
7308 GemmMicrokernelTester()
7309 .mr(8)
7310 .nr(16)
7311 .kr(4)
7312 .sr(1)
7313 .m(8)
7314 .n(16)
7315 .k(k)
7316 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007317 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007318 }
7319 }
7320
7321 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8_subtile) {
7322 TEST_REQUIRES_ARM_NEON_DOT;
7323 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007324 for (uint32_t n = 1; n <= 16; n++) {
7325 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007326 GemmMicrokernelTester()
7327 .mr(8)
7328 .nr(16)
7329 .kr(4)
7330 .sr(1)
7331 .m(m)
7332 .n(n)
7333 .k(k)
7334 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007335 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007336 }
7337 }
7338 }
7339 }
7340
7341 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16) {
7342 TEST_REQUIRES_ARM_NEON_DOT;
7343 for (uint32_t n = 17; n < 32; n++) {
7344 for (size_t k = 1; k <= 40; k += 9) {
7345 GemmMicrokernelTester()
7346 .mr(8)
7347 .nr(16)
7348 .kr(4)
7349 .sr(1)
7350 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007351 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007352 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007353 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007354 }
7355 }
7356 }
7357
7358 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_strided_cn) {
7359 TEST_REQUIRES_ARM_NEON_DOT;
7360 for (uint32_t n = 17; n < 32; n++) {
7361 for (size_t k = 1; k <= 40; k += 9) {
7362 GemmMicrokernelTester()
7363 .mr(8)
7364 .nr(16)
7365 .kr(4)
7366 .sr(1)
7367 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007368 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007369 .k(k)
7370 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007371 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007372 }
7373 }
7374 }
7375
7376 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_strided_a) {
7377 TEST_REQUIRES_ARM_NEON_DOT;
7378 for (uint32_t n = 17; n < 32; n++) {
7379 for (size_t k = 1; k <= 40; k += 9) {
7380 GemmMicrokernelTester()
7381 .mr(8)
7382 .nr(16)
7383 .kr(4)
7384 .sr(1)
7385 .m(8)
7386 .n(n)
7387 .k(k)
7388 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007389 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007390 }
7391 }
7392 }
7393
7394 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_subtile) {
7395 TEST_REQUIRES_ARM_NEON_DOT;
7396 for (uint32_t n = 17; n < 32; n++) {
7397 for (size_t k = 1; k <= 40; k += 9) {
7398 for (uint32_t m = 1; m <= 8; m++) {
7399 GemmMicrokernelTester()
7400 .mr(8)
7401 .nr(16)
7402 .kr(4)
7403 .sr(1)
7404 .m(m)
7405 .n(n)
7406 .k(k)
7407 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007408 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007409 }
7410 }
7411 }
7412 }
7413
7414 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16) {
7415 TEST_REQUIRES_ARM_NEON_DOT;
7416 for (uint32_t n = 32; n <= 48; n += 16) {
7417 for (size_t k = 1; k <= 40; k += 9) {
7418 GemmMicrokernelTester()
7419 .mr(8)
7420 .nr(16)
7421 .kr(4)
7422 .sr(1)
7423 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007424 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007425 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007426 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007427 }
7428 }
7429 }
7430
7431 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_strided_cn) {
7432 TEST_REQUIRES_ARM_NEON_DOT;
7433 for (uint32_t n = 32; n <= 48; n += 16) {
7434 for (size_t k = 1; k <= 40; k += 9) {
7435 GemmMicrokernelTester()
7436 .mr(8)
7437 .nr(16)
7438 .kr(4)
7439 .sr(1)
7440 .m(8)
7441 .n(n)
7442 .k(k)
7443 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007444 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007445 }
7446 }
7447 }
7448
7449 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_strided_a) {
7450 TEST_REQUIRES_ARM_NEON_DOT;
7451 for (uint32_t n = 32; n <= 48; n += 16) {
7452 for (size_t k = 1; k <= 40; k += 9) {
7453 GemmMicrokernelTester()
7454 .mr(8)
7455 .nr(16)
7456 .kr(4)
7457 .sr(1)
7458 .m(8)
7459 .n(n)
7460 .k(k)
7461 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007462 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007463 }
7464 }
7465 }
7466
7467 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_subtile) {
7468 TEST_REQUIRES_ARM_NEON_DOT;
7469 for (uint32_t n = 32; n <= 48; n += 16) {
7470 for (size_t k = 1; k <= 40; k += 9) {
7471 for (uint32_t m = 1; m <= 8; m++) {
7472 GemmMicrokernelTester()
7473 .mr(8)
7474 .nr(16)
7475 .kr(4)
7476 .sr(1)
7477 .m(m)
7478 .n(n)
7479 .k(k)
7480 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007481 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007482 }
7483 }
7484 }
7485 }
7486
7487 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm_subtile) {
7488 TEST_REQUIRES_ARM_NEON_DOT;
7489 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007490 for (uint32_t n = 1; n <= 16; n++) {
7491 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007492 GemmMicrokernelTester()
7493 .mr(8)
7494 .nr(16)
7495 .kr(4)
7496 .sr(1)
7497 .m(m)
7498 .n(n)
7499 .k(k)
7500 .cm_stride(19)
7501 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007502 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007503 }
7504 }
7505 }
7506 }
7507
7508 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmin) {
7509 TEST_REQUIRES_ARM_NEON_DOT;
7510 GemmMicrokernelTester()
7511 .mr(8)
7512 .nr(16)
7513 .kr(4)
7514 .sr(1)
7515 .m(8)
7516 .n(16)
7517 .k(8)
7518 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007519 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007520 }
7521
7522 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmax) {
7523 TEST_REQUIRES_ARM_NEON_DOT;
7524 GemmMicrokernelTester()
7525 .mr(8)
7526 .nr(16)
7527 .kr(4)
7528 .sr(1)
7529 .m(8)
7530 .n(16)
7531 .k(8)
7532 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007533 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007534 }
7535
7536 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm) {
7537 TEST_REQUIRES_ARM_NEON_DOT;
7538 GemmMicrokernelTester()
7539 .mr(8)
7540 .nr(16)
7541 .kr(4)
7542 .sr(1)
7543 .m(8)
7544 .n(16)
7545 .k(8)
7546 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007547 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007548 }
7549
7550 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_a_zero_point) {
7551 TEST_REQUIRES_ARM_NEON_DOT;
7552 for (size_t k = 1; k <= 40; k += 9) {
7553 GemmMicrokernelTester()
7554 .mr(8)
7555 .nr(16)
7556 .kr(4)
7557 .sr(1)
7558 .m(8)
7559 .n(16)
7560 .k(k)
7561 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007562 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007563 }
7564 }
7565
7566 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_b_zero_point) {
7567 TEST_REQUIRES_ARM_NEON_DOT;
7568 for (size_t k = 1; k <= 40; k += 9) {
7569 GemmMicrokernelTester()
7570 .mr(8)
7571 .nr(16)
7572 .kr(4)
7573 .sr(1)
7574 .m(8)
7575 .n(16)
7576 .k(k)
7577 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007578 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007579 }
7580 }
7581
7582 TEST(QU8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, no_zero_point) {
7583 TEST_REQUIRES_ARM_NEON_DOT;
7584 for (size_t k = 1; k <= 40; k += 9) {
7585 GemmMicrokernelTester()
7586 .mr(8)
7587 .nr(16)
7588 .kr(4)
7589 .sr(1)
7590 .m(8)
7591 .n(16)
7592 .k(k)
7593 .a_zero_point(0)
7594 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08007595 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007596 }
7597 }
7598#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
7599
7600
7601#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
7602 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8) {
7603 TEST_REQUIRES_ARM_NEON_DOT;
7604 GemmMicrokernelTester()
7605 .mr(2)
7606 .nr(32)
7607 .kr(4)
7608 .sr(1)
7609 .m(2)
7610 .n(32)
7611 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007612 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007613 }
7614
7615 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, strided_cn) {
7616 TEST_REQUIRES_ARM_NEON_DOT;
7617 GemmMicrokernelTester()
7618 .mr(2)
7619 .nr(32)
7620 .kr(4)
7621 .sr(1)
7622 .m(2)
7623 .n(32)
7624 .k(8)
7625 .cn_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08007626 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007627 }
7628
7629 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8_strided_a) {
7630 TEST_REQUIRES_ARM_NEON_DOT;
7631 GemmMicrokernelTester()
7632 .mr(2)
7633 .nr(32)
7634 .kr(4)
7635 .sr(1)
7636 .m(2)
7637 .n(32)
7638 .k(8)
7639 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007640 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007641 }
7642
7643 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8_subtile) {
7644 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007645 for (uint32_t n = 1; n <= 32; n++) {
7646 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007647 GemmMicrokernelTester()
7648 .mr(2)
7649 .nr(32)
7650 .kr(4)
7651 .sr(1)
7652 .m(m)
7653 .n(n)
7654 .k(8)
7655 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007656 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007657 }
7658 }
7659 }
7660
7661 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8_subtile_m) {
7662 TEST_REQUIRES_ARM_NEON_DOT;
7663 for (uint32_t m = 1; m <= 2; m++) {
7664 GemmMicrokernelTester()
7665 .mr(2)
7666 .nr(32)
7667 .kr(4)
7668 .sr(1)
7669 .m(m)
7670 .n(32)
7671 .k(8)
7672 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007673 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007674 }
7675 }
7676
7677 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_eq_8_subtile_n) {
7678 TEST_REQUIRES_ARM_NEON_DOT;
7679 for (uint32_t n = 1; n <= 32; n++) {
7680 GemmMicrokernelTester()
7681 .mr(2)
7682 .nr(32)
7683 .kr(4)
7684 .sr(1)
7685 .m(2)
7686 .n(n)
7687 .k(8)
7688 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007689 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007690 }
7691 }
7692
7693 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_lt_8) {
7694 TEST_REQUIRES_ARM_NEON_DOT;
7695 for (size_t k = 1; k < 8; k++) {
7696 GemmMicrokernelTester()
7697 .mr(2)
7698 .nr(32)
7699 .kr(4)
7700 .sr(1)
7701 .m(2)
7702 .n(32)
7703 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007704 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007705 }
7706 }
7707
7708 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_lt_8_strided_a) {
7709 TEST_REQUIRES_ARM_NEON_DOT;
7710 for (size_t k = 1; k < 8; k++) {
7711 GemmMicrokernelTester()
7712 .mr(2)
7713 .nr(32)
7714 .kr(4)
7715 .sr(1)
7716 .m(2)
7717 .n(32)
7718 .k(k)
7719 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007720 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007721 }
7722 }
7723
7724 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_lt_8_subtile) {
7725 TEST_REQUIRES_ARM_NEON_DOT;
7726 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007727 for (uint32_t n = 1; n <= 32; n++) {
7728 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007729 GemmMicrokernelTester()
7730 .mr(2)
7731 .nr(32)
7732 .kr(4)
7733 .sr(1)
7734 .m(m)
7735 .n(n)
7736 .k(k)
7737 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007738 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007739 }
7740 }
7741 }
7742 }
7743
7744 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_gt_8) {
7745 TEST_REQUIRES_ARM_NEON_DOT;
7746 for (size_t k = 9; k < 16; k++) {
7747 GemmMicrokernelTester()
7748 .mr(2)
7749 .nr(32)
7750 .kr(4)
7751 .sr(1)
7752 .m(2)
7753 .n(32)
7754 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007755 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007756 }
7757 }
7758
7759 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_gt_8_strided_a) {
7760 TEST_REQUIRES_ARM_NEON_DOT;
7761 for (size_t k = 9; k < 16; k++) {
7762 GemmMicrokernelTester()
7763 .mr(2)
7764 .nr(32)
7765 .kr(4)
7766 .sr(1)
7767 .m(2)
7768 .n(32)
7769 .k(k)
7770 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007771 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007772 }
7773 }
7774
7775 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_gt_8_subtile) {
7776 TEST_REQUIRES_ARM_NEON_DOT;
7777 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007778 for (uint32_t n = 1; n <= 32; n++) {
7779 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007780 GemmMicrokernelTester()
7781 .mr(2)
7782 .nr(32)
7783 .kr(4)
7784 .sr(1)
7785 .m(m)
7786 .n(n)
7787 .k(k)
7788 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007789 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007790 }
7791 }
7792 }
7793 }
7794
7795 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_div_8) {
7796 TEST_REQUIRES_ARM_NEON_DOT;
7797 for (size_t k = 16; k <= 80; k += 8) {
7798 GemmMicrokernelTester()
7799 .mr(2)
7800 .nr(32)
7801 .kr(4)
7802 .sr(1)
7803 .m(2)
7804 .n(32)
7805 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007806 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007807 }
7808 }
7809
7810 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_div_8_strided_a) {
7811 TEST_REQUIRES_ARM_NEON_DOT;
7812 for (size_t k = 16; k <= 80; k += 8) {
7813 GemmMicrokernelTester()
7814 .mr(2)
7815 .nr(32)
7816 .kr(4)
7817 .sr(1)
7818 .m(2)
7819 .n(32)
7820 .k(k)
7821 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007822 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007823 }
7824 }
7825
7826 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, k_div_8_subtile) {
7827 TEST_REQUIRES_ARM_NEON_DOT;
7828 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007829 for (uint32_t n = 1; n <= 32; n++) {
7830 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007831 GemmMicrokernelTester()
7832 .mr(2)
7833 .nr(32)
7834 .kr(4)
7835 .sr(1)
7836 .m(m)
7837 .n(n)
7838 .k(k)
7839 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007840 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007841 }
7842 }
7843 }
7844 }
7845
7846 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32) {
7847 TEST_REQUIRES_ARM_NEON_DOT;
7848 for (uint32_t n = 33; n < 64; n++) {
7849 for (size_t k = 1; k <= 40; k += 9) {
7850 GemmMicrokernelTester()
7851 .mr(2)
7852 .nr(32)
7853 .kr(4)
7854 .sr(1)
7855 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007856 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007857 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007858 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007859 }
7860 }
7861 }
7862
7863 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32_strided_cn) {
7864 TEST_REQUIRES_ARM_NEON_DOT;
7865 for (uint32_t n = 33; n < 64; n++) {
7866 for (size_t k = 1; k <= 40; k += 9) {
7867 GemmMicrokernelTester()
7868 .mr(2)
7869 .nr(32)
7870 .kr(4)
7871 .sr(1)
7872 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007873 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007874 .k(k)
7875 .cn_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08007876 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007877 }
7878 }
7879 }
7880
7881 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32_strided_a) {
7882 TEST_REQUIRES_ARM_NEON_DOT;
7883 for (uint32_t n = 33; n < 64; n++) {
7884 for (size_t k = 1; k <= 40; k += 9) {
7885 GemmMicrokernelTester()
7886 .mr(2)
7887 .nr(32)
7888 .kr(4)
7889 .sr(1)
7890 .m(2)
7891 .n(n)
7892 .k(k)
7893 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007894 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007895 }
7896 }
7897 }
7898
7899 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_gt_32_subtile) {
7900 TEST_REQUIRES_ARM_NEON_DOT;
7901 for (uint32_t n = 33; n < 64; n++) {
7902 for (size_t k = 1; k <= 40; k += 9) {
7903 for (uint32_t m = 1; m <= 2; m++) {
7904 GemmMicrokernelTester()
7905 .mr(2)
7906 .nr(32)
7907 .kr(4)
7908 .sr(1)
7909 .m(m)
7910 .n(n)
7911 .k(k)
7912 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007913 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007914 }
7915 }
7916 }
7917 }
7918
7919 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32) {
7920 TEST_REQUIRES_ARM_NEON_DOT;
7921 for (uint32_t n = 64; n <= 96; n += 32) {
7922 for (size_t k = 1; k <= 40; k += 9) {
7923 GemmMicrokernelTester()
7924 .mr(2)
7925 .nr(32)
7926 .kr(4)
7927 .sr(1)
7928 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007929 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007930 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007931 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007932 }
7933 }
7934 }
7935
7936 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32_strided_cn) {
7937 TEST_REQUIRES_ARM_NEON_DOT;
7938 for (uint32_t n = 64; n <= 96; n += 32) {
7939 for (size_t k = 1; k <= 40; k += 9) {
7940 GemmMicrokernelTester()
7941 .mr(2)
7942 .nr(32)
7943 .kr(4)
7944 .sr(1)
7945 .m(2)
7946 .n(n)
7947 .k(k)
7948 .cn_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08007949 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007950 }
7951 }
7952 }
7953
7954 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32_strided_a) {
7955 TEST_REQUIRES_ARM_NEON_DOT;
7956 for (uint32_t n = 64; n <= 96; n += 32) {
7957 for (size_t k = 1; k <= 40; k += 9) {
7958 GemmMicrokernelTester()
7959 .mr(2)
7960 .nr(32)
7961 .kr(4)
7962 .sr(1)
7963 .m(2)
7964 .n(n)
7965 .k(k)
7966 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007967 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007968 }
7969 }
7970 }
7971
7972 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, n_div_32_subtile) {
7973 TEST_REQUIRES_ARM_NEON_DOT;
7974 for (uint32_t n = 64; n <= 96; n += 32) {
7975 for (size_t k = 1; k <= 40; k += 9) {
7976 for (uint32_t m = 1; m <= 2; m++) {
7977 GemmMicrokernelTester()
7978 .mr(2)
7979 .nr(32)
7980 .kr(4)
7981 .sr(1)
7982 .m(m)
7983 .n(n)
7984 .k(k)
7985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007986 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007987 }
7988 }
7989 }
7990 }
7991
7992 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, strided_cm_subtile) {
7993 TEST_REQUIRES_ARM_NEON_DOT;
7994 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007995 for (uint32_t n = 1; n <= 32; n++) {
7996 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007997 GemmMicrokernelTester()
7998 .mr(2)
7999 .nr(32)
8000 .kr(4)
8001 .sr(1)
8002 .m(m)
8003 .n(n)
8004 .k(k)
8005 .cm_stride(37)
8006 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008007 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008008 }
8009 }
8010 }
8011 }
8012
8013 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, qmin) {
8014 TEST_REQUIRES_ARM_NEON_DOT;
8015 GemmMicrokernelTester()
8016 .mr(2)
8017 .nr(32)
8018 .kr(4)
8019 .sr(1)
8020 .m(2)
8021 .n(32)
8022 .k(8)
8023 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008024 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008025 }
8026
8027 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, qmax) {
8028 TEST_REQUIRES_ARM_NEON_DOT;
8029 GemmMicrokernelTester()
8030 .mr(2)
8031 .nr(32)
8032 .kr(4)
8033 .sr(1)
8034 .m(2)
8035 .n(32)
8036 .k(8)
8037 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008038 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008039 }
8040
8041 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, strided_cm) {
8042 TEST_REQUIRES_ARM_NEON_DOT;
8043 GemmMicrokernelTester()
8044 .mr(2)
8045 .nr(32)
8046 .kr(4)
8047 .sr(1)
8048 .m(2)
8049 .n(32)
8050 .k(8)
8051 .cm_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08008052 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008053 }
8054
8055 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, no_a_zero_point) {
8056 TEST_REQUIRES_ARM_NEON_DOT;
8057 for (size_t k = 1; k <= 40; k += 9) {
8058 GemmMicrokernelTester()
8059 .mr(2)
8060 .nr(32)
8061 .kr(4)
8062 .sr(1)
8063 .m(2)
8064 .n(32)
8065 .k(k)
8066 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008067 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008068 }
8069 }
8070
8071 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, no_b_zero_point) {
8072 TEST_REQUIRES_ARM_NEON_DOT;
8073 for (size_t k = 1; k <= 40; k += 9) {
8074 GemmMicrokernelTester()
8075 .mr(2)
8076 .nr(32)
8077 .kr(4)
8078 .sr(1)
8079 .m(2)
8080 .n(32)
8081 .k(k)
8082 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008083 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008084 }
8085 }
8086
8087 TEST(QU8_GEMM_MINMAX_RNDNU_2X32C4__NEONDOT, no_zero_point) {
8088 TEST_REQUIRES_ARM_NEON_DOT;
8089 for (size_t k = 1; k <= 40; k += 9) {
8090 GemmMicrokernelTester()
8091 .mr(2)
8092 .nr(32)
8093 .kr(4)
8094 .sr(1)
8095 .m(2)
8096 .n(32)
8097 .k(k)
8098 .a_zero_point(0)
8099 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008100 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_2x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008101 }
8102 }
8103#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
8104
8105
8106#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
8107 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8) {
8108 TEST_REQUIRES_ARM_NEON_DOT;
8109 GemmMicrokernelTester()
8110 .mr(3)
8111 .nr(32)
8112 .kr(4)
8113 .sr(1)
8114 .m(3)
8115 .n(32)
8116 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008117 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008118 }
8119
8120 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, strided_cn) {
8121 TEST_REQUIRES_ARM_NEON_DOT;
8122 GemmMicrokernelTester()
8123 .mr(3)
8124 .nr(32)
8125 .kr(4)
8126 .sr(1)
8127 .m(3)
8128 .n(32)
8129 .k(8)
8130 .cn_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08008131 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008132 }
8133
8134 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8_strided_a) {
8135 TEST_REQUIRES_ARM_NEON_DOT;
8136 GemmMicrokernelTester()
8137 .mr(3)
8138 .nr(32)
8139 .kr(4)
8140 .sr(1)
8141 .m(3)
8142 .n(32)
8143 .k(8)
8144 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008145 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008146 }
8147
8148 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8_subtile) {
8149 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008150 for (uint32_t n = 1; n <= 32; n++) {
8151 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008152 GemmMicrokernelTester()
8153 .mr(3)
8154 .nr(32)
8155 .kr(4)
8156 .sr(1)
8157 .m(m)
8158 .n(n)
8159 .k(8)
8160 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008161 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008162 }
8163 }
8164 }
8165
8166 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8_subtile_m) {
8167 TEST_REQUIRES_ARM_NEON_DOT;
8168 for (uint32_t m = 1; m <= 3; m++) {
8169 GemmMicrokernelTester()
8170 .mr(3)
8171 .nr(32)
8172 .kr(4)
8173 .sr(1)
8174 .m(m)
8175 .n(32)
8176 .k(8)
8177 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008178 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008179 }
8180 }
8181
8182 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_eq_8_subtile_n) {
8183 TEST_REQUIRES_ARM_NEON_DOT;
8184 for (uint32_t n = 1; n <= 32; n++) {
8185 GemmMicrokernelTester()
8186 .mr(3)
8187 .nr(32)
8188 .kr(4)
8189 .sr(1)
8190 .m(3)
8191 .n(n)
8192 .k(8)
8193 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008194 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008195 }
8196 }
8197
8198 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_lt_8) {
8199 TEST_REQUIRES_ARM_NEON_DOT;
8200 for (size_t k = 1; k < 8; k++) {
8201 GemmMicrokernelTester()
8202 .mr(3)
8203 .nr(32)
8204 .kr(4)
8205 .sr(1)
8206 .m(3)
8207 .n(32)
8208 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008209 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008210 }
8211 }
8212
8213 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_lt_8_strided_a) {
8214 TEST_REQUIRES_ARM_NEON_DOT;
8215 for (size_t k = 1; k < 8; k++) {
8216 GemmMicrokernelTester()
8217 .mr(3)
8218 .nr(32)
8219 .kr(4)
8220 .sr(1)
8221 .m(3)
8222 .n(32)
8223 .k(k)
8224 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008225 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008226 }
8227 }
8228
8229 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_lt_8_subtile) {
8230 TEST_REQUIRES_ARM_NEON_DOT;
8231 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008232 for (uint32_t n = 1; n <= 32; n++) {
8233 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008234 GemmMicrokernelTester()
8235 .mr(3)
8236 .nr(32)
8237 .kr(4)
8238 .sr(1)
8239 .m(m)
8240 .n(n)
8241 .k(k)
8242 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008243 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008244 }
8245 }
8246 }
8247 }
8248
8249 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_gt_8) {
8250 TEST_REQUIRES_ARM_NEON_DOT;
8251 for (size_t k = 9; k < 16; k++) {
8252 GemmMicrokernelTester()
8253 .mr(3)
8254 .nr(32)
8255 .kr(4)
8256 .sr(1)
8257 .m(3)
8258 .n(32)
8259 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008260 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008261 }
8262 }
8263
8264 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_gt_8_strided_a) {
8265 TEST_REQUIRES_ARM_NEON_DOT;
8266 for (size_t k = 9; k < 16; k++) {
8267 GemmMicrokernelTester()
8268 .mr(3)
8269 .nr(32)
8270 .kr(4)
8271 .sr(1)
8272 .m(3)
8273 .n(32)
8274 .k(k)
8275 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008276 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008277 }
8278 }
8279
8280 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_gt_8_subtile) {
8281 TEST_REQUIRES_ARM_NEON_DOT;
8282 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008283 for (uint32_t n = 1; n <= 32; n++) {
8284 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008285 GemmMicrokernelTester()
8286 .mr(3)
8287 .nr(32)
8288 .kr(4)
8289 .sr(1)
8290 .m(m)
8291 .n(n)
8292 .k(k)
8293 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008294 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008295 }
8296 }
8297 }
8298 }
8299
8300 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_div_8) {
8301 TEST_REQUIRES_ARM_NEON_DOT;
8302 for (size_t k = 16; k <= 80; k += 8) {
8303 GemmMicrokernelTester()
8304 .mr(3)
8305 .nr(32)
8306 .kr(4)
8307 .sr(1)
8308 .m(3)
8309 .n(32)
8310 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008311 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008312 }
8313 }
8314
8315 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_div_8_strided_a) {
8316 TEST_REQUIRES_ARM_NEON_DOT;
8317 for (size_t k = 16; k <= 80; k += 8) {
8318 GemmMicrokernelTester()
8319 .mr(3)
8320 .nr(32)
8321 .kr(4)
8322 .sr(1)
8323 .m(3)
8324 .n(32)
8325 .k(k)
8326 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008327 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008328 }
8329 }
8330
8331 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, k_div_8_subtile) {
8332 TEST_REQUIRES_ARM_NEON_DOT;
8333 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008334 for (uint32_t n = 1; n <= 32; n++) {
8335 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008336 GemmMicrokernelTester()
8337 .mr(3)
8338 .nr(32)
8339 .kr(4)
8340 .sr(1)
8341 .m(m)
8342 .n(n)
8343 .k(k)
8344 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008345 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008346 }
8347 }
8348 }
8349 }
8350
8351 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32) {
8352 TEST_REQUIRES_ARM_NEON_DOT;
8353 for (uint32_t n = 33; n < 64; n++) {
8354 for (size_t k = 1; k <= 40; k += 9) {
8355 GemmMicrokernelTester()
8356 .mr(3)
8357 .nr(32)
8358 .kr(4)
8359 .sr(1)
8360 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008361 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008362 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008363 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008364 }
8365 }
8366 }
8367
8368 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32_strided_cn) {
8369 TEST_REQUIRES_ARM_NEON_DOT;
8370 for (uint32_t n = 33; n < 64; n++) {
8371 for (size_t k = 1; k <= 40; k += 9) {
8372 GemmMicrokernelTester()
8373 .mr(3)
8374 .nr(32)
8375 .kr(4)
8376 .sr(1)
8377 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008378 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008379 .k(k)
8380 .cn_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08008381 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008382 }
8383 }
8384 }
8385
8386 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32_strided_a) {
8387 TEST_REQUIRES_ARM_NEON_DOT;
8388 for (uint32_t n = 33; n < 64; n++) {
8389 for (size_t k = 1; k <= 40; k += 9) {
8390 GemmMicrokernelTester()
8391 .mr(3)
8392 .nr(32)
8393 .kr(4)
8394 .sr(1)
8395 .m(3)
8396 .n(n)
8397 .k(k)
8398 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008399 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008400 }
8401 }
8402 }
8403
8404 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_gt_32_subtile) {
8405 TEST_REQUIRES_ARM_NEON_DOT;
8406 for (uint32_t n = 33; n < 64; n++) {
8407 for (size_t k = 1; k <= 40; k += 9) {
8408 for (uint32_t m = 1; m <= 3; m++) {
8409 GemmMicrokernelTester()
8410 .mr(3)
8411 .nr(32)
8412 .kr(4)
8413 .sr(1)
8414 .m(m)
8415 .n(n)
8416 .k(k)
8417 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008418 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008419 }
8420 }
8421 }
8422 }
8423
8424 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32) {
8425 TEST_REQUIRES_ARM_NEON_DOT;
8426 for (uint32_t n = 64; n <= 96; n += 32) {
8427 for (size_t k = 1; k <= 40; k += 9) {
8428 GemmMicrokernelTester()
8429 .mr(3)
8430 .nr(32)
8431 .kr(4)
8432 .sr(1)
8433 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008434 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008435 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008436 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008437 }
8438 }
8439 }
8440
8441 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32_strided_cn) {
8442 TEST_REQUIRES_ARM_NEON_DOT;
8443 for (uint32_t n = 64; n <= 96; n += 32) {
8444 for (size_t k = 1; k <= 40; k += 9) {
8445 GemmMicrokernelTester()
8446 .mr(3)
8447 .nr(32)
8448 .kr(4)
8449 .sr(1)
8450 .m(3)
8451 .n(n)
8452 .k(k)
8453 .cn_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08008454 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008455 }
8456 }
8457 }
8458
8459 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32_strided_a) {
8460 TEST_REQUIRES_ARM_NEON_DOT;
8461 for (uint32_t n = 64; n <= 96; n += 32) {
8462 for (size_t k = 1; k <= 40; k += 9) {
8463 GemmMicrokernelTester()
8464 .mr(3)
8465 .nr(32)
8466 .kr(4)
8467 .sr(1)
8468 .m(3)
8469 .n(n)
8470 .k(k)
8471 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008472 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008473 }
8474 }
8475 }
8476
8477 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, n_div_32_subtile) {
8478 TEST_REQUIRES_ARM_NEON_DOT;
8479 for (uint32_t n = 64; n <= 96; n += 32) {
8480 for (size_t k = 1; k <= 40; k += 9) {
8481 for (uint32_t m = 1; m <= 3; m++) {
8482 GemmMicrokernelTester()
8483 .mr(3)
8484 .nr(32)
8485 .kr(4)
8486 .sr(1)
8487 .m(m)
8488 .n(n)
8489 .k(k)
8490 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008491 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008492 }
8493 }
8494 }
8495 }
8496
8497 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, strided_cm_subtile) {
8498 TEST_REQUIRES_ARM_NEON_DOT;
8499 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008500 for (uint32_t n = 1; n <= 32; n++) {
8501 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008502 GemmMicrokernelTester()
8503 .mr(3)
8504 .nr(32)
8505 .kr(4)
8506 .sr(1)
8507 .m(m)
8508 .n(n)
8509 .k(k)
8510 .cm_stride(37)
8511 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008512 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008513 }
8514 }
8515 }
8516 }
8517
8518 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, qmin) {
8519 TEST_REQUIRES_ARM_NEON_DOT;
8520 GemmMicrokernelTester()
8521 .mr(3)
8522 .nr(32)
8523 .kr(4)
8524 .sr(1)
8525 .m(3)
8526 .n(32)
8527 .k(8)
8528 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008529 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008530 }
8531
8532 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, qmax) {
8533 TEST_REQUIRES_ARM_NEON_DOT;
8534 GemmMicrokernelTester()
8535 .mr(3)
8536 .nr(32)
8537 .kr(4)
8538 .sr(1)
8539 .m(3)
8540 .n(32)
8541 .k(8)
8542 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008543 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008544 }
8545
8546 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, strided_cm) {
8547 TEST_REQUIRES_ARM_NEON_DOT;
8548 GemmMicrokernelTester()
8549 .mr(3)
8550 .nr(32)
8551 .kr(4)
8552 .sr(1)
8553 .m(3)
8554 .n(32)
8555 .k(8)
8556 .cm_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08008557 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008558 }
8559
8560 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, no_a_zero_point) {
8561 TEST_REQUIRES_ARM_NEON_DOT;
8562 for (size_t k = 1; k <= 40; k += 9) {
8563 GemmMicrokernelTester()
8564 .mr(3)
8565 .nr(32)
8566 .kr(4)
8567 .sr(1)
8568 .m(3)
8569 .n(32)
8570 .k(k)
8571 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008572 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008573 }
8574 }
8575
8576 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, no_b_zero_point) {
8577 TEST_REQUIRES_ARM_NEON_DOT;
8578 for (size_t k = 1; k <= 40; k += 9) {
8579 GemmMicrokernelTester()
8580 .mr(3)
8581 .nr(32)
8582 .kr(4)
8583 .sr(1)
8584 .m(3)
8585 .n(32)
8586 .k(k)
8587 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008588 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008589 }
8590 }
8591
8592 TEST(QU8_GEMM_MINMAX_RNDNU_3X32C4__NEONDOT, no_zero_point) {
8593 TEST_REQUIRES_ARM_NEON_DOT;
8594 for (size_t k = 1; k <= 40; k += 9) {
8595 GemmMicrokernelTester()
8596 .mr(3)
8597 .nr(32)
8598 .kr(4)
8599 .sr(1)
8600 .m(3)
8601 .n(32)
8602 .k(k)
8603 .a_zero_point(0)
8604 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08008605 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_3x32c4__neondot, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008606 }
8607 }
8608#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
8609
8610
8611#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
8612 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8) {
8613 TEST_REQUIRES_ARM_NEON;
8614 GemmMicrokernelTester()
8615 .mr(4)
8616 .nr(16)
8617 .kr(1)
8618 .sr(1)
8619 .m(4)
8620 .n(16)
8621 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008622 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008623 }
8624
8625 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cn) {
8626 TEST_REQUIRES_ARM_NEON;
8627 GemmMicrokernelTester()
8628 .mr(4)
8629 .nr(16)
8630 .kr(1)
8631 .sr(1)
8632 .m(4)
8633 .n(16)
8634 .k(8)
8635 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008636 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008637 }
8638
8639 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_strided_a) {
8640 TEST_REQUIRES_ARM_NEON;
8641 GemmMicrokernelTester()
8642 .mr(4)
8643 .nr(16)
8644 .kr(1)
8645 .sr(1)
8646 .m(4)
8647 .n(16)
8648 .k(8)
8649 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008650 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008651 }
8652
8653 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile) {
8654 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008655 for (uint32_t n = 1; n <= 16; n++) {
8656 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008657 GemmMicrokernelTester()
8658 .mr(4)
8659 .nr(16)
8660 .kr(1)
8661 .sr(1)
8662 .m(m)
8663 .n(n)
8664 .k(8)
8665 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008666 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008667 }
8668 }
8669 }
8670
8671 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile_m) {
8672 TEST_REQUIRES_ARM_NEON;
8673 for (uint32_t m = 1; m <= 4; m++) {
8674 GemmMicrokernelTester()
8675 .mr(4)
8676 .nr(16)
8677 .kr(1)
8678 .sr(1)
8679 .m(m)
8680 .n(16)
8681 .k(8)
8682 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008683 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008684 }
8685 }
8686
8687 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_eq_8_subtile_n) {
8688 TEST_REQUIRES_ARM_NEON;
8689 for (uint32_t n = 1; n <= 16; n++) {
8690 GemmMicrokernelTester()
8691 .mr(4)
8692 .nr(16)
8693 .kr(1)
8694 .sr(1)
8695 .m(4)
8696 .n(n)
8697 .k(8)
8698 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008699 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008700 }
8701 }
8702
8703 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_lt_8) {
8704 TEST_REQUIRES_ARM_NEON;
8705 for (size_t k = 1; k < 8; k++) {
8706 GemmMicrokernelTester()
8707 .mr(4)
8708 .nr(16)
8709 .kr(1)
8710 .sr(1)
8711 .m(4)
8712 .n(16)
8713 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008714 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008715 }
8716 }
8717
8718 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_lt_8_strided_a) {
8719 TEST_REQUIRES_ARM_NEON;
8720 for (size_t k = 1; k < 8; k++) {
8721 GemmMicrokernelTester()
8722 .mr(4)
8723 .nr(16)
8724 .kr(1)
8725 .sr(1)
8726 .m(4)
8727 .n(16)
8728 .k(k)
8729 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008730 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008731 }
8732 }
8733
8734 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_lt_8_subtile) {
8735 TEST_REQUIRES_ARM_NEON;
8736 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008737 for (uint32_t n = 1; n <= 16; n++) {
8738 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008739 GemmMicrokernelTester()
8740 .mr(4)
8741 .nr(16)
8742 .kr(1)
8743 .sr(1)
8744 .m(m)
8745 .n(n)
8746 .k(k)
8747 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008748 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008749 }
8750 }
8751 }
8752 }
8753
8754 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_gt_8) {
8755 TEST_REQUIRES_ARM_NEON;
8756 for (size_t k = 9; k < 16; k++) {
8757 GemmMicrokernelTester()
8758 .mr(4)
8759 .nr(16)
8760 .kr(1)
8761 .sr(1)
8762 .m(4)
8763 .n(16)
8764 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008765 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008766 }
8767 }
8768
8769 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_gt_8_strided_a) {
8770 TEST_REQUIRES_ARM_NEON;
8771 for (size_t k = 9; k < 16; k++) {
8772 GemmMicrokernelTester()
8773 .mr(4)
8774 .nr(16)
8775 .kr(1)
8776 .sr(1)
8777 .m(4)
8778 .n(16)
8779 .k(k)
8780 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008781 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008782 }
8783 }
8784
8785 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_gt_8_subtile) {
8786 TEST_REQUIRES_ARM_NEON;
8787 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008788 for (uint32_t n = 1; n <= 16; n++) {
8789 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008790 GemmMicrokernelTester()
8791 .mr(4)
8792 .nr(16)
8793 .kr(1)
8794 .sr(1)
8795 .m(m)
8796 .n(n)
8797 .k(k)
8798 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008799 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008800 }
8801 }
8802 }
8803 }
8804
8805 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_div_8) {
8806 TEST_REQUIRES_ARM_NEON;
8807 for (size_t k = 16; k <= 80; k += 8) {
8808 GemmMicrokernelTester()
8809 .mr(4)
8810 .nr(16)
8811 .kr(1)
8812 .sr(1)
8813 .m(4)
8814 .n(16)
8815 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008816 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008817 }
8818 }
8819
8820 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_div_8_strided_a) {
8821 TEST_REQUIRES_ARM_NEON;
8822 for (size_t k = 16; k <= 80; k += 8) {
8823 GemmMicrokernelTester()
8824 .mr(4)
8825 .nr(16)
8826 .kr(1)
8827 .sr(1)
8828 .m(4)
8829 .n(16)
8830 .k(k)
8831 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008832 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008833 }
8834 }
8835
8836 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, k_div_8_subtile) {
8837 TEST_REQUIRES_ARM_NEON;
8838 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008839 for (uint32_t n = 1; n <= 16; n++) {
8840 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008841 GemmMicrokernelTester()
8842 .mr(4)
8843 .nr(16)
8844 .kr(1)
8845 .sr(1)
8846 .m(m)
8847 .n(n)
8848 .k(k)
8849 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008850 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008851 }
8852 }
8853 }
8854 }
8855
8856 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16) {
8857 TEST_REQUIRES_ARM_NEON;
8858 for (uint32_t n = 17; n < 32; n++) {
8859 for (size_t k = 1; k <= 40; k += 9) {
8860 GemmMicrokernelTester()
8861 .mr(4)
8862 .nr(16)
8863 .kr(1)
8864 .sr(1)
8865 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008866 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008867 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008868 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008869 }
8870 }
8871 }
8872
8873 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_strided_cn) {
8874 TEST_REQUIRES_ARM_NEON;
8875 for (uint32_t n = 17; n < 32; n++) {
8876 for (size_t k = 1; k <= 40; k += 9) {
8877 GemmMicrokernelTester()
8878 .mr(4)
8879 .nr(16)
8880 .kr(1)
8881 .sr(1)
8882 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008883 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008884 .k(k)
8885 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008886 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008887 }
8888 }
8889 }
8890
8891 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_strided_a) {
8892 TEST_REQUIRES_ARM_NEON;
8893 for (uint32_t n = 17; n < 32; n++) {
8894 for (size_t k = 1; k <= 40; k += 9) {
8895 GemmMicrokernelTester()
8896 .mr(4)
8897 .nr(16)
8898 .kr(1)
8899 .sr(1)
8900 .m(4)
8901 .n(n)
8902 .k(k)
8903 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008904 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008905 }
8906 }
8907 }
8908
8909 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_gt_16_subtile) {
8910 TEST_REQUIRES_ARM_NEON;
8911 for (uint32_t n = 17; n < 32; n++) {
8912 for (size_t k = 1; k <= 40; k += 9) {
8913 for (uint32_t m = 1; m <= 4; m++) {
8914 GemmMicrokernelTester()
8915 .mr(4)
8916 .nr(16)
8917 .kr(1)
8918 .sr(1)
8919 .m(m)
8920 .n(n)
8921 .k(k)
8922 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008923 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008924 }
8925 }
8926 }
8927 }
8928
8929 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16) {
8930 TEST_REQUIRES_ARM_NEON;
8931 for (uint32_t n = 32; n <= 48; n += 16) {
8932 for (size_t k = 1; k <= 40; k += 9) {
8933 GemmMicrokernelTester()
8934 .mr(4)
8935 .nr(16)
8936 .kr(1)
8937 .sr(1)
8938 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008939 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008940 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008941 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008942 }
8943 }
8944 }
8945
8946 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_strided_cn) {
8947 TEST_REQUIRES_ARM_NEON;
8948 for (uint32_t n = 32; n <= 48; n += 16) {
8949 for (size_t k = 1; k <= 40; k += 9) {
8950 GemmMicrokernelTester()
8951 .mr(4)
8952 .nr(16)
8953 .kr(1)
8954 .sr(1)
8955 .m(4)
8956 .n(n)
8957 .k(k)
8958 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008959 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008960 }
8961 }
8962 }
8963
8964 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_strided_a) {
8965 TEST_REQUIRES_ARM_NEON;
8966 for (uint32_t n = 32; n <= 48; n += 16) {
8967 for (size_t k = 1; k <= 40; k += 9) {
8968 GemmMicrokernelTester()
8969 .mr(4)
8970 .nr(16)
8971 .kr(1)
8972 .sr(1)
8973 .m(4)
8974 .n(n)
8975 .k(k)
8976 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008977 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008978 }
8979 }
8980 }
8981
8982 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, n_div_16_subtile) {
8983 TEST_REQUIRES_ARM_NEON;
8984 for (uint32_t n = 32; n <= 48; n += 16) {
8985 for (size_t k = 1; k <= 40; k += 9) {
8986 for (uint32_t m = 1; m <= 4; m++) {
8987 GemmMicrokernelTester()
8988 .mr(4)
8989 .nr(16)
8990 .kr(1)
8991 .sr(1)
8992 .m(m)
8993 .n(n)
8994 .k(k)
8995 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008996 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008997 }
8998 }
8999 }
9000 }
9001
9002 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cm_subtile) {
9003 TEST_REQUIRES_ARM_NEON;
9004 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009005 for (uint32_t n = 1; n <= 16; n++) {
9006 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009007 GemmMicrokernelTester()
9008 .mr(4)
9009 .nr(16)
9010 .kr(1)
9011 .sr(1)
9012 .m(m)
9013 .n(n)
9014 .k(k)
9015 .cm_stride(19)
9016 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009017 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009018 }
9019 }
9020 }
9021 }
9022
9023 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, qmin) {
9024 TEST_REQUIRES_ARM_NEON;
9025 GemmMicrokernelTester()
9026 .mr(4)
9027 .nr(16)
9028 .kr(1)
9029 .sr(1)
9030 .m(4)
9031 .n(16)
9032 .k(8)
9033 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009034 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009035 }
9036
9037 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, qmax) {
9038 TEST_REQUIRES_ARM_NEON;
9039 GemmMicrokernelTester()
9040 .mr(4)
9041 .nr(16)
9042 .kr(1)
9043 .sr(1)
9044 .m(4)
9045 .n(16)
9046 .k(8)
9047 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009048 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009049 }
9050
9051 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, strided_cm) {
9052 TEST_REQUIRES_ARM_NEON;
9053 GemmMicrokernelTester()
9054 .mr(4)
9055 .nr(16)
9056 .kr(1)
9057 .sr(1)
9058 .m(4)
9059 .n(16)
9060 .k(8)
9061 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009062 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009063 }
9064
9065 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_a_zero_point) {
9066 TEST_REQUIRES_ARM_NEON;
9067 for (size_t k = 1; k <= 40; k += 9) {
9068 GemmMicrokernelTester()
9069 .mr(4)
9070 .nr(16)
9071 .kr(1)
9072 .sr(1)
9073 .m(4)
9074 .n(16)
9075 .k(k)
9076 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009077 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009078 }
9079 }
9080
9081 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_b_zero_point) {
9082 TEST_REQUIRES_ARM_NEON;
9083 for (size_t k = 1; k <= 40; k += 9) {
9084 GemmMicrokernelTester()
9085 .mr(4)
9086 .nr(16)
9087 .kr(1)
9088 .sr(1)
9089 .m(4)
9090 .n(16)
9091 .k(k)
9092 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009093 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009094 }
9095 }
9096
9097 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_CORTEX_A75, no_zero_point) {
9098 TEST_REQUIRES_ARM_NEON;
9099 for (size_t k = 1; k <= 40; k += 9) {
9100 GemmMicrokernelTester()
9101 .mr(4)
9102 .nr(16)
9103 .kr(1)
9104 .sr(1)
9105 .m(4)
9106 .n(16)
9107 .k(k)
9108 .a_zero_point(0)
9109 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009110 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a75, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009111 }
9112 }
9113#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9114
9115
9116#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9117 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8) {
9118 TEST_REQUIRES_ARM_NEON;
9119 GemmMicrokernelTester()
9120 .mr(4)
9121 .nr(16)
9122 .kr(1)
9123 .sr(1)
9124 .m(4)
9125 .n(16)
9126 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009127 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009128 }
9129
9130 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cn) {
9131 TEST_REQUIRES_ARM_NEON;
9132 GemmMicrokernelTester()
9133 .mr(4)
9134 .nr(16)
9135 .kr(1)
9136 .sr(1)
9137 .m(4)
9138 .n(16)
9139 .k(8)
9140 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009141 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009142 }
9143
9144 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_strided_a) {
9145 TEST_REQUIRES_ARM_NEON;
9146 GemmMicrokernelTester()
9147 .mr(4)
9148 .nr(16)
9149 .kr(1)
9150 .sr(1)
9151 .m(4)
9152 .n(16)
9153 .k(8)
9154 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009155 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009156 }
9157
9158 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile) {
9159 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009160 for (uint32_t n = 1; n <= 16; n++) {
9161 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009162 GemmMicrokernelTester()
9163 .mr(4)
9164 .nr(16)
9165 .kr(1)
9166 .sr(1)
9167 .m(m)
9168 .n(n)
9169 .k(8)
9170 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009171 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009172 }
9173 }
9174 }
9175
9176 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_m) {
9177 TEST_REQUIRES_ARM_NEON;
9178 for (uint32_t m = 1; m <= 4; m++) {
9179 GemmMicrokernelTester()
9180 .mr(4)
9181 .nr(16)
9182 .kr(1)
9183 .sr(1)
9184 .m(m)
9185 .n(16)
9186 .k(8)
9187 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009188 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009189 }
9190 }
9191
9192 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_eq_8_subtile_n) {
9193 TEST_REQUIRES_ARM_NEON;
9194 for (uint32_t n = 1; n <= 16; n++) {
9195 GemmMicrokernelTester()
9196 .mr(4)
9197 .nr(16)
9198 .kr(1)
9199 .sr(1)
9200 .m(4)
9201 .n(n)
9202 .k(8)
9203 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009204 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009205 }
9206 }
9207
9208 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8) {
9209 TEST_REQUIRES_ARM_NEON;
9210 for (size_t k = 1; k < 8; k++) {
9211 GemmMicrokernelTester()
9212 .mr(4)
9213 .nr(16)
9214 .kr(1)
9215 .sr(1)
9216 .m(4)
9217 .n(16)
9218 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009219 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009220 }
9221 }
9222
9223 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_strided_a) {
9224 TEST_REQUIRES_ARM_NEON;
9225 for (size_t k = 1; k < 8; k++) {
9226 GemmMicrokernelTester()
9227 .mr(4)
9228 .nr(16)
9229 .kr(1)
9230 .sr(1)
9231 .m(4)
9232 .n(16)
9233 .k(k)
9234 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009235 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009236 }
9237 }
9238
9239 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_lt_8_subtile) {
9240 TEST_REQUIRES_ARM_NEON;
9241 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009242 for (uint32_t n = 1; n <= 16; n++) {
9243 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009244 GemmMicrokernelTester()
9245 .mr(4)
9246 .nr(16)
9247 .kr(1)
9248 .sr(1)
9249 .m(m)
9250 .n(n)
9251 .k(k)
9252 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009253 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009254 }
9255 }
9256 }
9257 }
9258
9259 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8) {
9260 TEST_REQUIRES_ARM_NEON;
9261 for (size_t k = 9; k < 16; k++) {
9262 GemmMicrokernelTester()
9263 .mr(4)
9264 .nr(16)
9265 .kr(1)
9266 .sr(1)
9267 .m(4)
9268 .n(16)
9269 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009270 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009271 }
9272 }
9273
9274 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_strided_a) {
9275 TEST_REQUIRES_ARM_NEON;
9276 for (size_t k = 9; k < 16; k++) {
9277 GemmMicrokernelTester()
9278 .mr(4)
9279 .nr(16)
9280 .kr(1)
9281 .sr(1)
9282 .m(4)
9283 .n(16)
9284 .k(k)
9285 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009286 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009287 }
9288 }
9289
9290 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_gt_8_subtile) {
9291 TEST_REQUIRES_ARM_NEON;
9292 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009293 for (uint32_t n = 1; n <= 16; n++) {
9294 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009295 GemmMicrokernelTester()
9296 .mr(4)
9297 .nr(16)
9298 .kr(1)
9299 .sr(1)
9300 .m(m)
9301 .n(n)
9302 .k(k)
9303 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009304 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009305 }
9306 }
9307 }
9308 }
9309
9310 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8) {
9311 TEST_REQUIRES_ARM_NEON;
9312 for (size_t k = 16; k <= 80; k += 8) {
9313 GemmMicrokernelTester()
9314 .mr(4)
9315 .nr(16)
9316 .kr(1)
9317 .sr(1)
9318 .m(4)
9319 .n(16)
9320 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009321 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009322 }
9323 }
9324
9325 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_strided_a) {
9326 TEST_REQUIRES_ARM_NEON;
9327 for (size_t k = 16; k <= 80; k += 8) {
9328 GemmMicrokernelTester()
9329 .mr(4)
9330 .nr(16)
9331 .kr(1)
9332 .sr(1)
9333 .m(4)
9334 .n(16)
9335 .k(k)
9336 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009337 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009338 }
9339 }
9340
9341 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, k_div_8_subtile) {
9342 TEST_REQUIRES_ARM_NEON;
9343 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009344 for (uint32_t n = 1; n <= 16; n++) {
9345 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009346 GemmMicrokernelTester()
9347 .mr(4)
9348 .nr(16)
9349 .kr(1)
9350 .sr(1)
9351 .m(m)
9352 .n(n)
9353 .k(k)
9354 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009355 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009356 }
9357 }
9358 }
9359 }
9360
9361 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16) {
9362 TEST_REQUIRES_ARM_NEON;
9363 for (uint32_t n = 17; n < 32; n++) {
9364 for (size_t k = 1; k <= 40; k += 9) {
9365 GemmMicrokernelTester()
9366 .mr(4)
9367 .nr(16)
9368 .kr(1)
9369 .sr(1)
9370 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009371 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009372 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009373 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009374 }
9375 }
9376 }
9377
9378 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_cn) {
9379 TEST_REQUIRES_ARM_NEON;
9380 for (uint32_t n = 17; n < 32; n++) {
9381 for (size_t k = 1; k <= 40; k += 9) {
9382 GemmMicrokernelTester()
9383 .mr(4)
9384 .nr(16)
9385 .kr(1)
9386 .sr(1)
9387 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009388 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009389 .k(k)
9390 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009391 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009392 }
9393 }
9394 }
9395
9396 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_strided_a) {
9397 TEST_REQUIRES_ARM_NEON;
9398 for (uint32_t n = 17; n < 32; n++) {
9399 for (size_t k = 1; k <= 40; k += 9) {
9400 GemmMicrokernelTester()
9401 .mr(4)
9402 .nr(16)
9403 .kr(1)
9404 .sr(1)
9405 .m(4)
9406 .n(n)
9407 .k(k)
9408 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009409 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009410 }
9411 }
9412 }
9413
9414 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_gt_16_subtile) {
9415 TEST_REQUIRES_ARM_NEON;
9416 for (uint32_t n = 17; n < 32; n++) {
9417 for (size_t k = 1; k <= 40; k += 9) {
9418 for (uint32_t m = 1; m <= 4; m++) {
9419 GemmMicrokernelTester()
9420 .mr(4)
9421 .nr(16)
9422 .kr(1)
9423 .sr(1)
9424 .m(m)
9425 .n(n)
9426 .k(k)
9427 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009428 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009429 }
9430 }
9431 }
9432 }
9433
9434 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16) {
9435 TEST_REQUIRES_ARM_NEON;
9436 for (uint32_t n = 32; n <= 48; n += 16) {
9437 for (size_t k = 1; k <= 40; k += 9) {
9438 GemmMicrokernelTester()
9439 .mr(4)
9440 .nr(16)
9441 .kr(1)
9442 .sr(1)
9443 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009444 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009445 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009446 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009447 }
9448 }
9449 }
9450
9451 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_cn) {
9452 TEST_REQUIRES_ARM_NEON;
9453 for (uint32_t n = 32; n <= 48; n += 16) {
9454 for (size_t k = 1; k <= 40; k += 9) {
9455 GemmMicrokernelTester()
9456 .mr(4)
9457 .nr(16)
9458 .kr(1)
9459 .sr(1)
9460 .m(4)
9461 .n(n)
9462 .k(k)
9463 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009464 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009465 }
9466 }
9467 }
9468
9469 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_strided_a) {
9470 TEST_REQUIRES_ARM_NEON;
9471 for (uint32_t n = 32; n <= 48; n += 16) {
9472 for (size_t k = 1; k <= 40; k += 9) {
9473 GemmMicrokernelTester()
9474 .mr(4)
9475 .nr(16)
9476 .kr(1)
9477 .sr(1)
9478 .m(4)
9479 .n(n)
9480 .k(k)
9481 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009482 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009483 }
9484 }
9485 }
9486
9487 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, n_div_16_subtile) {
9488 TEST_REQUIRES_ARM_NEON;
9489 for (uint32_t n = 32; n <= 48; n += 16) {
9490 for (size_t k = 1; k <= 40; k += 9) {
9491 for (uint32_t m = 1; m <= 4; m++) {
9492 GemmMicrokernelTester()
9493 .mr(4)
9494 .nr(16)
9495 .kr(1)
9496 .sr(1)
9497 .m(m)
9498 .n(n)
9499 .k(k)
9500 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009501 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009502 }
9503 }
9504 }
9505 }
9506
9507 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm_subtile) {
9508 TEST_REQUIRES_ARM_NEON;
9509 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009510 for (uint32_t n = 1; n <= 16; n++) {
9511 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009512 GemmMicrokernelTester()
9513 .mr(4)
9514 .nr(16)
9515 .kr(1)
9516 .sr(1)
9517 .m(m)
9518 .n(n)
9519 .k(k)
9520 .cm_stride(19)
9521 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009522 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009523 }
9524 }
9525 }
9526 }
9527
9528 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmin) {
9529 TEST_REQUIRES_ARM_NEON;
9530 GemmMicrokernelTester()
9531 .mr(4)
9532 .nr(16)
9533 .kr(1)
9534 .sr(1)
9535 .m(4)
9536 .n(16)
9537 .k(8)
9538 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009539 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009540 }
9541
9542 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, qmax) {
9543 TEST_REQUIRES_ARM_NEON;
9544 GemmMicrokernelTester()
9545 .mr(4)
9546 .nr(16)
9547 .kr(1)
9548 .sr(1)
9549 .m(4)
9550 .n(16)
9551 .k(8)
9552 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009553 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009554 }
9555
9556 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, strided_cm) {
9557 TEST_REQUIRES_ARM_NEON;
9558 GemmMicrokernelTester()
9559 .mr(4)
9560 .nr(16)
9561 .kr(1)
9562 .sr(1)
9563 .m(4)
9564 .n(16)
9565 .k(8)
9566 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009567 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009568 }
9569
9570 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_a_zero_point) {
9571 TEST_REQUIRES_ARM_NEON;
9572 for (size_t k = 1; k <= 40; k += 9) {
9573 GemmMicrokernelTester()
9574 .mr(4)
9575 .nr(16)
9576 .kr(1)
9577 .sr(1)
9578 .m(4)
9579 .n(16)
9580 .k(k)
9581 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009582 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009583 }
9584 }
9585
9586 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_b_zero_point) {
9587 TEST_REQUIRES_ARM_NEON;
9588 for (size_t k = 1; k <= 40; k += 9) {
9589 GemmMicrokernelTester()
9590 .mr(4)
9591 .nr(16)
9592 .kr(1)
9593 .sr(1)
9594 .m(4)
9595 .n(16)
9596 .k(k)
9597 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009598 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009599 }
9600 }
9601
9602 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_CORTEX_A53, no_zero_point) {
9603 TEST_REQUIRES_ARM_NEON;
9604 for (size_t k = 1; k <= 40; k += 9) {
9605 GemmMicrokernelTester()
9606 .mr(4)
9607 .nr(16)
9608 .kr(1)
9609 .sr(1)
9610 .m(4)
9611 .n(16)
9612 .k(k)
9613 .a_zero_point(0)
9614 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -08009615 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009616 }
9617 }
9618#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9619
9620
9621#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
9622 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
9623 TEST_REQUIRES_ARM_NEON;
9624 GemmMicrokernelTester()
9625 .mr(4)
9626 .nr(16)
9627 .kr(1)
9628 .sr(1)
9629 .m(4)
9630 .n(16)
9631 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009632 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009633 }
9634
9635 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
9636 TEST_REQUIRES_ARM_NEON;
9637 GemmMicrokernelTester()
9638 .mr(4)
9639 .nr(16)
9640 .kr(1)
9641 .sr(1)
9642 .m(4)
9643 .n(16)
9644 .k(8)
9645 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009646 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009647 }
9648
9649 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_strided_a) {
9650 TEST_REQUIRES_ARM_NEON;
9651 GemmMicrokernelTester()
9652 .mr(4)
9653 .nr(16)
9654 .kr(1)
9655 .sr(1)
9656 .m(4)
9657 .n(16)
9658 .k(8)
9659 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009660 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009661 }
9662
9663 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
9664 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009665 for (uint32_t n = 1; n <= 16; n++) {
9666 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009667 GemmMicrokernelTester()
9668 .mr(4)
9669 .nr(16)
9670 .kr(1)
9671 .sr(1)
9672 .m(m)
9673 .n(n)
9674 .k(8)
9675 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009676 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009677 }
9678 }
9679 }
9680
9681 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
9682 TEST_REQUIRES_ARM_NEON;
9683 for (uint32_t m = 1; m <= 4; m++) {
9684 GemmMicrokernelTester()
9685 .mr(4)
9686 .nr(16)
9687 .kr(1)
9688 .sr(1)
9689 .m(m)
9690 .n(16)
9691 .k(8)
9692 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009693 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009694 }
9695 }
9696
9697 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
9698 TEST_REQUIRES_ARM_NEON;
9699 for (uint32_t n = 1; n <= 16; n++) {
9700 GemmMicrokernelTester()
9701 .mr(4)
9702 .nr(16)
9703 .kr(1)
9704 .sr(1)
9705 .m(4)
9706 .n(n)
9707 .k(8)
9708 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009709 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009710 }
9711 }
9712
9713 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
9714 TEST_REQUIRES_ARM_NEON;
9715 for (size_t k = 1; k < 8; k++) {
9716 GemmMicrokernelTester()
9717 .mr(4)
9718 .nr(16)
9719 .kr(1)
9720 .sr(1)
9721 .m(4)
9722 .n(16)
9723 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009724 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009725 }
9726 }
9727
9728 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_strided_a) {
9729 TEST_REQUIRES_ARM_NEON;
9730 for (size_t k = 1; k < 8; k++) {
9731 GemmMicrokernelTester()
9732 .mr(4)
9733 .nr(16)
9734 .kr(1)
9735 .sr(1)
9736 .m(4)
9737 .n(16)
9738 .k(k)
9739 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009740 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009741 }
9742 }
9743
9744 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
9745 TEST_REQUIRES_ARM_NEON;
9746 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009747 for (uint32_t n = 1; n <= 16; n++) {
9748 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009749 GemmMicrokernelTester()
9750 .mr(4)
9751 .nr(16)
9752 .kr(1)
9753 .sr(1)
9754 .m(m)
9755 .n(n)
9756 .k(k)
9757 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009758 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009759 }
9760 }
9761 }
9762 }
9763
9764 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
9765 TEST_REQUIRES_ARM_NEON;
9766 for (size_t k = 9; k < 16; k++) {
9767 GemmMicrokernelTester()
9768 .mr(4)
9769 .nr(16)
9770 .kr(1)
9771 .sr(1)
9772 .m(4)
9773 .n(16)
9774 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009775 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009776 }
9777 }
9778
9779 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_strided_a) {
9780 TEST_REQUIRES_ARM_NEON;
9781 for (size_t k = 9; k < 16; k++) {
9782 GemmMicrokernelTester()
9783 .mr(4)
9784 .nr(16)
9785 .kr(1)
9786 .sr(1)
9787 .m(4)
9788 .n(16)
9789 .k(k)
9790 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009791 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009792 }
9793 }
9794
9795 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
9796 TEST_REQUIRES_ARM_NEON;
9797 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009798 for (uint32_t n = 1; n <= 16; n++) {
9799 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009800 GemmMicrokernelTester()
9801 .mr(4)
9802 .nr(16)
9803 .kr(1)
9804 .sr(1)
9805 .m(m)
9806 .n(n)
9807 .k(k)
9808 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009809 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009810 }
9811 }
9812 }
9813 }
9814
9815 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
9816 TEST_REQUIRES_ARM_NEON;
9817 for (size_t k = 16; k <= 80; k += 8) {
9818 GemmMicrokernelTester()
9819 .mr(4)
9820 .nr(16)
9821 .kr(1)
9822 .sr(1)
9823 .m(4)
9824 .n(16)
9825 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009826 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009827 }
9828 }
9829
9830 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_strided_a) {
9831 TEST_REQUIRES_ARM_NEON;
9832 for (size_t k = 16; k <= 80; k += 8) {
9833 GemmMicrokernelTester()
9834 .mr(4)
9835 .nr(16)
9836 .kr(1)
9837 .sr(1)
9838 .m(4)
9839 .n(16)
9840 .k(k)
9841 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009842 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009843 }
9844 }
9845
9846 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
9847 TEST_REQUIRES_ARM_NEON;
9848 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009849 for (uint32_t n = 1; n <= 16; n++) {
9850 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009851 GemmMicrokernelTester()
9852 .mr(4)
9853 .nr(16)
9854 .kr(1)
9855 .sr(1)
9856 .m(m)
9857 .n(n)
9858 .k(k)
9859 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009860 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009861 }
9862 }
9863 }
9864 }
9865
9866 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16) {
9867 TEST_REQUIRES_ARM_NEON;
9868 for (uint32_t n = 17; n < 32; n++) {
9869 for (size_t k = 1; k <= 40; k += 9) {
9870 GemmMicrokernelTester()
9871 .mr(4)
9872 .nr(16)
9873 .kr(1)
9874 .sr(1)
9875 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009876 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009877 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009878 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009879 }
9880 }
9881 }
9882
9883 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_cn) {
9884 TEST_REQUIRES_ARM_NEON;
9885 for (uint32_t n = 17; n < 32; n++) {
9886 for (size_t k = 1; k <= 40; k += 9) {
9887 GemmMicrokernelTester()
9888 .mr(4)
9889 .nr(16)
9890 .kr(1)
9891 .sr(1)
9892 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009893 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009894 .k(k)
9895 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009896 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009897 }
9898 }
9899 }
9900
9901 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_strided_a) {
9902 TEST_REQUIRES_ARM_NEON;
9903 for (uint32_t n = 17; n < 32; n++) {
9904 for (size_t k = 1; k <= 40; k += 9) {
9905 GemmMicrokernelTester()
9906 .mr(4)
9907 .nr(16)
9908 .kr(1)
9909 .sr(1)
9910 .m(4)
9911 .n(n)
9912 .k(k)
9913 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009914 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009915 }
9916 }
9917 }
9918
9919 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_gt_16_subtile) {
9920 TEST_REQUIRES_ARM_NEON;
9921 for (uint32_t n = 17; n < 32; n++) {
9922 for (size_t k = 1; k <= 40; k += 9) {
9923 for (uint32_t m = 1; m <= 4; m++) {
9924 GemmMicrokernelTester()
9925 .mr(4)
9926 .nr(16)
9927 .kr(1)
9928 .sr(1)
9929 .m(m)
9930 .n(n)
9931 .k(k)
9932 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009933 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009934 }
9935 }
9936 }
9937 }
9938
9939 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16) {
9940 TEST_REQUIRES_ARM_NEON;
9941 for (uint32_t n = 32; n <= 48; n += 16) {
9942 for (size_t k = 1; k <= 40; k += 9) {
9943 GemmMicrokernelTester()
9944 .mr(4)
9945 .nr(16)
9946 .kr(1)
9947 .sr(1)
9948 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009949 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009950 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009951 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009952 }
9953 }
9954 }
9955
9956 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_cn) {
9957 TEST_REQUIRES_ARM_NEON;
9958 for (uint32_t n = 32; n <= 48; n += 16) {
9959 for (size_t k = 1; k <= 40; k += 9) {
9960 GemmMicrokernelTester()
9961 .mr(4)
9962 .nr(16)
9963 .kr(1)
9964 .sr(1)
9965 .m(4)
9966 .n(n)
9967 .k(k)
9968 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009969 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009970 }
9971 }
9972 }
9973
9974 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_strided_a) {
9975 TEST_REQUIRES_ARM_NEON;
9976 for (uint32_t n = 32; n <= 48; n += 16) {
9977 for (size_t k = 1; k <= 40; k += 9) {
9978 GemmMicrokernelTester()
9979 .mr(4)
9980 .nr(16)
9981 .kr(1)
9982 .sr(1)
9983 .m(4)
9984 .n(n)
9985 .k(k)
9986 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009987 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009988 }
9989 }
9990 }
9991
9992 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, n_div_16_subtile) {
9993 TEST_REQUIRES_ARM_NEON;
9994 for (uint32_t n = 32; n <= 48; n += 16) {
9995 for (size_t k = 1; k <= 40; k += 9) {
9996 for (uint32_t m = 1; m <= 4; m++) {
9997 GemmMicrokernelTester()
9998 .mr(4)
9999 .nr(16)
10000 .kr(1)
10001 .sr(1)
10002 .m(m)
10003 .n(n)
10004 .k(k)
10005 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010006 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010007 }
10008 }
10009 }
10010 }
10011
10012 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
10013 TEST_REQUIRES_ARM_NEON;
10014 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010015 for (uint32_t n = 1; n <= 16; n++) {
10016 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010017 GemmMicrokernelTester()
10018 .mr(4)
10019 .nr(16)
10020 .kr(1)
10021 .sr(1)
10022 .m(m)
10023 .n(n)
10024 .k(k)
10025 .cm_stride(19)
10026 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010027 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010028 }
10029 }
10030 }
10031 }
10032
10033 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmin) {
10034 TEST_REQUIRES_ARM_NEON;
10035 GemmMicrokernelTester()
10036 .mr(4)
10037 .nr(16)
10038 .kr(1)
10039 .sr(1)
10040 .m(4)
10041 .n(16)
10042 .k(8)
10043 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010044 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010045 }
10046
10047 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, qmax) {
10048 TEST_REQUIRES_ARM_NEON;
10049 GemmMicrokernelTester()
10050 .mr(4)
10051 .nr(16)
10052 .kr(1)
10053 .sr(1)
10054 .m(4)
10055 .n(16)
10056 .k(8)
10057 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010058 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010059 }
10060
10061 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
10062 TEST_REQUIRES_ARM_NEON;
10063 GemmMicrokernelTester()
10064 .mr(4)
10065 .nr(16)
10066 .kr(1)
10067 .sr(1)
10068 .m(4)
10069 .n(16)
10070 .k(8)
10071 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010072 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010073 }
10074
10075 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_a_zero_point) {
10076 TEST_REQUIRES_ARM_NEON;
10077 for (size_t k = 1; k <= 40; k += 9) {
10078 GemmMicrokernelTester()
10079 .mr(4)
10080 .nr(16)
10081 .kr(1)
10082 .sr(1)
10083 .m(4)
10084 .n(16)
10085 .k(k)
10086 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010087 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010088 }
10089 }
10090
10091 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_b_zero_point) {
10092 TEST_REQUIRES_ARM_NEON;
10093 for (size_t k = 1; k <= 40; k += 9) {
10094 GemmMicrokernelTester()
10095 .mr(4)
10096 .nr(16)
10097 .kr(1)
10098 .sr(1)
10099 .m(4)
10100 .n(16)
10101 .k(k)
10102 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010103 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010104 }
10105 }
10106
10107 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_PRFM_LD64, no_zero_point) {
10108 TEST_REQUIRES_ARM_NEON;
10109 for (size_t k = 1; k <= 40; k += 9) {
10110 GemmMicrokernelTester()
10111 .mr(4)
10112 .nr(16)
10113 .kr(1)
10114 .sr(1)
10115 .m(4)
10116 .n(16)
10117 .k(k)
10118 .a_zero_point(0)
10119 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010120 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_prfm_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010121 }
10122 }
10123#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
10124
10125
10126#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
10127 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
10128 TEST_REQUIRES_ARM_NEON;
10129 GemmMicrokernelTester()
10130 .mr(4)
10131 .nr(16)
10132 .kr(1)
10133 .sr(1)
10134 .m(4)
10135 .n(16)
10136 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010137 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010138 }
10139
10140 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
10141 TEST_REQUIRES_ARM_NEON;
10142 GemmMicrokernelTester()
10143 .mr(4)
10144 .nr(16)
10145 .kr(1)
10146 .sr(1)
10147 .m(4)
10148 .n(16)
10149 .k(8)
10150 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010151 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010152 }
10153
10154 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_strided_a) {
10155 TEST_REQUIRES_ARM_NEON;
10156 GemmMicrokernelTester()
10157 .mr(4)
10158 .nr(16)
10159 .kr(1)
10160 .sr(1)
10161 .m(4)
10162 .n(16)
10163 .k(8)
10164 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010165 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010166 }
10167
10168 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
10169 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010170 for (uint32_t n = 1; n <= 16; n++) {
10171 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010172 GemmMicrokernelTester()
10173 .mr(4)
10174 .nr(16)
10175 .kr(1)
10176 .sr(1)
10177 .m(m)
10178 .n(n)
10179 .k(8)
10180 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010181 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010182 }
10183 }
10184 }
10185
10186 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
10187 TEST_REQUIRES_ARM_NEON;
10188 for (uint32_t m = 1; m <= 4; m++) {
10189 GemmMicrokernelTester()
10190 .mr(4)
10191 .nr(16)
10192 .kr(1)
10193 .sr(1)
10194 .m(m)
10195 .n(16)
10196 .k(8)
10197 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010198 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010199 }
10200 }
10201
10202 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
10203 TEST_REQUIRES_ARM_NEON;
10204 for (uint32_t n = 1; n <= 16; n++) {
10205 GemmMicrokernelTester()
10206 .mr(4)
10207 .nr(16)
10208 .kr(1)
10209 .sr(1)
10210 .m(4)
10211 .n(n)
10212 .k(8)
10213 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010214 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010215 }
10216 }
10217
10218 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
10219 TEST_REQUIRES_ARM_NEON;
10220 for (size_t k = 1; k < 8; k++) {
10221 GemmMicrokernelTester()
10222 .mr(4)
10223 .nr(16)
10224 .kr(1)
10225 .sr(1)
10226 .m(4)
10227 .n(16)
10228 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010229 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010230 }
10231 }
10232
10233 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_strided_a) {
10234 TEST_REQUIRES_ARM_NEON;
10235 for (size_t k = 1; k < 8; k++) {
10236 GemmMicrokernelTester()
10237 .mr(4)
10238 .nr(16)
10239 .kr(1)
10240 .sr(1)
10241 .m(4)
10242 .n(16)
10243 .k(k)
10244 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010245 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010246 }
10247 }
10248
10249 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
10250 TEST_REQUIRES_ARM_NEON;
10251 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010252 for (uint32_t n = 1; n <= 16; n++) {
10253 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010254 GemmMicrokernelTester()
10255 .mr(4)
10256 .nr(16)
10257 .kr(1)
10258 .sr(1)
10259 .m(m)
10260 .n(n)
10261 .k(k)
10262 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010263 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010264 }
10265 }
10266 }
10267 }
10268
10269 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
10270 TEST_REQUIRES_ARM_NEON;
10271 for (size_t k = 9; k < 16; k++) {
10272 GemmMicrokernelTester()
10273 .mr(4)
10274 .nr(16)
10275 .kr(1)
10276 .sr(1)
10277 .m(4)
10278 .n(16)
10279 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010280 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010281 }
10282 }
10283
10284 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_strided_a) {
10285 TEST_REQUIRES_ARM_NEON;
10286 for (size_t k = 9; k < 16; k++) {
10287 GemmMicrokernelTester()
10288 .mr(4)
10289 .nr(16)
10290 .kr(1)
10291 .sr(1)
10292 .m(4)
10293 .n(16)
10294 .k(k)
10295 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010296 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010297 }
10298 }
10299
10300 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
10301 TEST_REQUIRES_ARM_NEON;
10302 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010303 for (uint32_t n = 1; n <= 16; n++) {
10304 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010305 GemmMicrokernelTester()
10306 .mr(4)
10307 .nr(16)
10308 .kr(1)
10309 .sr(1)
10310 .m(m)
10311 .n(n)
10312 .k(k)
10313 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010314 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010315 }
10316 }
10317 }
10318 }
10319
10320 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
10321 TEST_REQUIRES_ARM_NEON;
10322 for (size_t k = 16; k <= 80; k += 8) {
10323 GemmMicrokernelTester()
10324 .mr(4)
10325 .nr(16)
10326 .kr(1)
10327 .sr(1)
10328 .m(4)
10329 .n(16)
10330 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010331 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010332 }
10333 }
10334
10335 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_strided_a) {
10336 TEST_REQUIRES_ARM_NEON;
10337 for (size_t k = 16; k <= 80; k += 8) {
10338 GemmMicrokernelTester()
10339 .mr(4)
10340 .nr(16)
10341 .kr(1)
10342 .sr(1)
10343 .m(4)
10344 .n(16)
10345 .k(k)
10346 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010347 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010348 }
10349 }
10350
10351 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
10352 TEST_REQUIRES_ARM_NEON;
10353 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010354 for (uint32_t n = 1; n <= 16; n++) {
10355 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010356 GemmMicrokernelTester()
10357 .mr(4)
10358 .nr(16)
10359 .kr(1)
10360 .sr(1)
10361 .m(m)
10362 .n(n)
10363 .k(k)
10364 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010365 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010366 }
10367 }
10368 }
10369 }
10370
10371 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16) {
10372 TEST_REQUIRES_ARM_NEON;
10373 for (uint32_t n = 17; n < 32; n++) {
10374 for (size_t k = 1; k <= 40; k += 9) {
10375 GemmMicrokernelTester()
10376 .mr(4)
10377 .nr(16)
10378 .kr(1)
10379 .sr(1)
10380 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010381 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010382 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010383 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010384 }
10385 }
10386 }
10387
10388 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_cn) {
10389 TEST_REQUIRES_ARM_NEON;
10390 for (uint32_t n = 17; n < 32; n++) {
10391 for (size_t k = 1; k <= 40; k += 9) {
10392 GemmMicrokernelTester()
10393 .mr(4)
10394 .nr(16)
10395 .kr(1)
10396 .sr(1)
10397 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010398 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010399 .k(k)
10400 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010401 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010402 }
10403 }
10404 }
10405
10406 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_strided_a) {
10407 TEST_REQUIRES_ARM_NEON;
10408 for (uint32_t n = 17; n < 32; n++) {
10409 for (size_t k = 1; k <= 40; k += 9) {
10410 GemmMicrokernelTester()
10411 .mr(4)
10412 .nr(16)
10413 .kr(1)
10414 .sr(1)
10415 .m(4)
10416 .n(n)
10417 .k(k)
10418 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010419 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010420 }
10421 }
10422 }
10423
10424 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_gt_16_subtile) {
10425 TEST_REQUIRES_ARM_NEON;
10426 for (uint32_t n = 17; n < 32; n++) {
10427 for (size_t k = 1; k <= 40; k += 9) {
10428 for (uint32_t m = 1; m <= 4; m++) {
10429 GemmMicrokernelTester()
10430 .mr(4)
10431 .nr(16)
10432 .kr(1)
10433 .sr(1)
10434 .m(m)
10435 .n(n)
10436 .k(k)
10437 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010438 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010439 }
10440 }
10441 }
10442 }
10443
10444 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16) {
10445 TEST_REQUIRES_ARM_NEON;
10446 for (uint32_t n = 32; n <= 48; n += 16) {
10447 for (size_t k = 1; k <= 40; k += 9) {
10448 GemmMicrokernelTester()
10449 .mr(4)
10450 .nr(16)
10451 .kr(1)
10452 .sr(1)
10453 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010454 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010455 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010456 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010457 }
10458 }
10459 }
10460
10461 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_cn) {
10462 TEST_REQUIRES_ARM_NEON;
10463 for (uint32_t n = 32; n <= 48; n += 16) {
10464 for (size_t k = 1; k <= 40; k += 9) {
10465 GemmMicrokernelTester()
10466 .mr(4)
10467 .nr(16)
10468 .kr(1)
10469 .sr(1)
10470 .m(4)
10471 .n(n)
10472 .k(k)
10473 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010474 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010475 }
10476 }
10477 }
10478
10479 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_strided_a) {
10480 TEST_REQUIRES_ARM_NEON;
10481 for (uint32_t n = 32; n <= 48; n += 16) {
10482 for (size_t k = 1; k <= 40; k += 9) {
10483 GemmMicrokernelTester()
10484 .mr(4)
10485 .nr(16)
10486 .kr(1)
10487 .sr(1)
10488 .m(4)
10489 .n(n)
10490 .k(k)
10491 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010492 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010493 }
10494 }
10495 }
10496
10497 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, n_div_16_subtile) {
10498 TEST_REQUIRES_ARM_NEON;
10499 for (uint32_t n = 32; n <= 48; n += 16) {
10500 for (size_t k = 1; k <= 40; k += 9) {
10501 for (uint32_t m = 1; m <= 4; m++) {
10502 GemmMicrokernelTester()
10503 .mr(4)
10504 .nr(16)
10505 .kr(1)
10506 .sr(1)
10507 .m(m)
10508 .n(n)
10509 .k(k)
10510 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010511 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010512 }
10513 }
10514 }
10515 }
10516
10517 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
10518 TEST_REQUIRES_ARM_NEON;
10519 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010520 for (uint32_t n = 1; n <= 16; n++) {
10521 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010522 GemmMicrokernelTester()
10523 .mr(4)
10524 .nr(16)
10525 .kr(1)
10526 .sr(1)
10527 .m(m)
10528 .n(n)
10529 .k(k)
10530 .cm_stride(19)
10531 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010532 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010533 }
10534 }
10535 }
10536 }
10537
10538 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
10539 TEST_REQUIRES_ARM_NEON;
10540 GemmMicrokernelTester()
10541 .mr(4)
10542 .nr(16)
10543 .kr(1)
10544 .sr(1)
10545 .m(4)
10546 .n(16)
10547 .k(8)
10548 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010549 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010550 }
10551
10552 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
10553 TEST_REQUIRES_ARM_NEON;
10554 GemmMicrokernelTester()
10555 .mr(4)
10556 .nr(16)
10557 .kr(1)
10558 .sr(1)
10559 .m(4)
10560 .n(16)
10561 .k(8)
10562 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010563 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010564 }
10565
10566 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
10567 TEST_REQUIRES_ARM_NEON;
10568 GemmMicrokernelTester()
10569 .mr(4)
10570 .nr(16)
10571 .kr(1)
10572 .sr(1)
10573 .m(4)
10574 .n(16)
10575 .k(8)
10576 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010577 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010578 }
10579
10580 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_a_zero_point) {
10581 TEST_REQUIRES_ARM_NEON;
10582 for (size_t k = 1; k <= 40; k += 9) {
10583 GemmMicrokernelTester()
10584 .mr(4)
10585 .nr(16)
10586 .kr(1)
10587 .sr(1)
10588 .m(4)
10589 .n(16)
10590 .k(k)
10591 .a_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010592 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010593 }
10594 }
10595
10596 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_b_zero_point) {
10597 TEST_REQUIRES_ARM_NEON;
10598 for (size_t k = 1; k <= 40; k += 9) {
10599 GemmMicrokernelTester()
10600 .mr(4)
10601 .nr(16)
10602 .kr(1)
10603 .sr(1)
10604 .m(4)
10605 .n(16)
10606 .k(k)
10607 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010608 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010609 }
10610 }
10611
10612 TEST(QU8_GEMM_MINMAX_RNDNU_4X16__AARCH64_NEON_MLAL_LANE_LD64, no_zero_point) {
10613 TEST_REQUIRES_ARM_NEON;
10614 for (size_t k = 1; k <= 40; k += 9) {
10615 GemmMicrokernelTester()
10616 .mr(4)
10617 .nr(16)
10618 .kr(1)
10619 .sr(1)
10620 .m(4)
10621 .n(16)
10622 .k(k)
10623 .a_zero_point(0)
10624 .b_zero_point(0)
Marat Dukhan50323b82022-01-11 00:12:01 -080010625 .Test(xnn_qu8_gemm_minmax_rndnu_ukernel_4x16__aarch64_neon_mlal_lane_ld64, xnn_init_qu8_conv_minmax_rndnu_neon_params, xnn_qu8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010626 }
10627 }
10628#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY