blob: 0b9b883d0c6bd1ec8872c4b2b8f46b5ae103bd88 [file] [log] [blame]
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8//
9// Auto-generated file. Do not edit!
10// Specification: test/qs8-gemm-minmax-rndnu.yaml
11// Generator: tools/generate-gemm-test.py
12
13
14#include <gtest/gtest.h>
15
16#include <xnnpack/allocator.h>
17#include <xnnpack/common.h>
18#include <xnnpack/isa-checks.h>
19
20#include <xnnpack/gemm.h>
21#include <xnnpack/igemm.h>
22#include <xnnpack/ppmm.h>
23#include "gemm-microkernel-tester.h"
24
25
Frank Barchard9e4d2aa2022-02-02 00:31:21 -080026#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
27 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8) {
28 TEST_REQUIRES_ARM_NEON;
29 GemmMicrokernelTester()
30 .mr(4)
31 .nr(8)
32 .kr(1)
33 .sr(1)
34 .m(4)
35 .n(8)
36 .k(8)
37 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
38 }
39
40 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cn) {
41 TEST_REQUIRES_ARM_NEON;
42 GemmMicrokernelTester()
43 .mr(4)
44 .nr(8)
45 .kr(1)
46 .sr(1)
47 .m(4)
48 .n(8)
49 .k(8)
50 .cn_stride(11)
51 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
52 }
53
54 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_strided_a) {
55 TEST_REQUIRES_ARM_NEON;
56 GemmMicrokernelTester()
57 .mr(4)
58 .nr(8)
59 .kr(1)
60 .sr(1)
61 .m(4)
62 .n(8)
63 .k(8)
64 .a_stride(11)
65 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
66 }
67
68 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile) {
69 TEST_REQUIRES_ARM_NEON;
70 for (uint32_t n = 1; n <= 8; n++) {
71 for (uint32_t m = 1; m <= 4; m++) {
72 GemmMicrokernelTester()
73 .mr(4)
74 .nr(8)
75 .kr(1)
76 .sr(1)
77 .m(m)
78 .n(n)
79 .k(8)
80 .iterations(1)
81 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
82 }
83 }
84 }
85
86 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_m) {
87 TEST_REQUIRES_ARM_NEON;
88 for (uint32_t m = 1; m <= 4; m++) {
89 GemmMicrokernelTester()
90 .mr(4)
91 .nr(8)
92 .kr(1)
93 .sr(1)
94 .m(m)
95 .n(8)
96 .k(8)
97 .iterations(1)
98 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
99 }
100 }
101
102 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_eq_8_subtile_n) {
103 TEST_REQUIRES_ARM_NEON;
104 for (uint32_t n = 1; n <= 8; n++) {
105 GemmMicrokernelTester()
106 .mr(4)
107 .nr(8)
108 .kr(1)
109 .sr(1)
110 .m(4)
111 .n(n)
112 .k(8)
113 .iterations(1)
114 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
115 }
116 }
117
118 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8) {
119 TEST_REQUIRES_ARM_NEON;
120 for (size_t k = 1; k < 8; k++) {
121 GemmMicrokernelTester()
122 .mr(4)
123 .nr(8)
124 .kr(1)
125 .sr(1)
126 .m(4)
127 .n(8)
128 .k(k)
129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
130 }
131 }
132
133 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_strided_a) {
134 TEST_REQUIRES_ARM_NEON;
135 for (size_t k = 1; k < 8; k++) {
136 GemmMicrokernelTester()
137 .mr(4)
138 .nr(8)
139 .kr(1)
140 .sr(1)
141 .m(4)
142 .n(8)
143 .k(k)
144 .a_stride(11)
145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
146 }
147 }
148
149 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_lt_8_subtile) {
150 TEST_REQUIRES_ARM_NEON;
151 for (size_t k = 1; k < 8; k++) {
152 for (uint32_t n = 1; n <= 8; n++) {
153 for (uint32_t m = 1; m <= 4; m++) {
154 GemmMicrokernelTester()
155 .mr(4)
156 .nr(8)
157 .kr(1)
158 .sr(1)
159 .m(m)
160 .n(n)
161 .k(k)
162 .iterations(1)
163 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
164 }
165 }
166 }
167 }
168
169 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8) {
170 TEST_REQUIRES_ARM_NEON;
171 for (size_t k = 9; k < 16; k++) {
172 GemmMicrokernelTester()
173 .mr(4)
174 .nr(8)
175 .kr(1)
176 .sr(1)
177 .m(4)
178 .n(8)
179 .k(k)
180 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
181 }
182 }
183
184 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_strided_a) {
185 TEST_REQUIRES_ARM_NEON;
186 for (size_t k = 9; k < 16; k++) {
187 GemmMicrokernelTester()
188 .mr(4)
189 .nr(8)
190 .kr(1)
191 .sr(1)
192 .m(4)
193 .n(8)
194 .k(k)
195 .a_stride(19)
196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
197 }
198 }
199
200 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_gt_8_subtile) {
201 TEST_REQUIRES_ARM_NEON;
202 for (size_t k = 9; k < 16; k++) {
203 for (uint32_t n = 1; n <= 8; n++) {
204 for (uint32_t m = 1; m <= 4; m++) {
205 GemmMicrokernelTester()
206 .mr(4)
207 .nr(8)
208 .kr(1)
209 .sr(1)
210 .m(m)
211 .n(n)
212 .k(k)
213 .iterations(1)
214 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
215 }
216 }
217 }
218 }
219
220 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8) {
221 TEST_REQUIRES_ARM_NEON;
222 for (size_t k = 16; k <= 80; k += 8) {
223 GemmMicrokernelTester()
224 .mr(4)
225 .nr(8)
226 .kr(1)
227 .sr(1)
228 .m(4)
229 .n(8)
230 .k(k)
231 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
232 }
233 }
234
235 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_strided_a) {
236 TEST_REQUIRES_ARM_NEON;
237 for (size_t k = 16; k <= 80; k += 8) {
238 GemmMicrokernelTester()
239 .mr(4)
240 .nr(8)
241 .kr(1)
242 .sr(1)
243 .m(4)
244 .n(8)
245 .k(k)
246 .a_stride(83)
247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
248 }
249 }
250
251 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, k_div_8_subtile) {
252 TEST_REQUIRES_ARM_NEON;
253 for (size_t k = 16; k <= 80; k += 8) {
254 for (uint32_t n = 1; n <= 8; n++) {
255 for (uint32_t m = 1; m <= 4; m++) {
256 GemmMicrokernelTester()
257 .mr(4)
258 .nr(8)
259 .kr(1)
260 .sr(1)
261 .m(m)
262 .n(n)
263 .k(k)
264 .iterations(1)
265 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
266 }
267 }
268 }
269 }
270
271 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8) {
272 TEST_REQUIRES_ARM_NEON;
273 for (uint32_t n = 9; n < 16; n++) {
274 for (size_t k = 1; k <= 40; k += 9) {
275 GemmMicrokernelTester()
276 .mr(4)
277 .nr(8)
278 .kr(1)
279 .sr(1)
280 .m(4)
281 .n(n)
282 .k(k)
283 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
284 }
285 }
286 }
287
288 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_strided_cn) {
289 TEST_REQUIRES_ARM_NEON;
290 for (uint32_t n = 9; n < 16; n++) {
291 for (size_t k = 1; k <= 40; k += 9) {
292 GemmMicrokernelTester()
293 .mr(4)
294 .nr(8)
295 .kr(1)
296 .sr(1)
297 .m(4)
298 .n(n)
299 .k(k)
300 .cn_stride(11)
301 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
302 }
303 }
304 }
305
306 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_strided_a) {
307 TEST_REQUIRES_ARM_NEON;
308 for (uint32_t n = 9; n < 16; n++) {
309 for (size_t k = 1; k <= 40; k += 9) {
310 GemmMicrokernelTester()
311 .mr(4)
312 .nr(8)
313 .kr(1)
314 .sr(1)
315 .m(4)
316 .n(n)
317 .k(k)
318 .a_stride(43)
319 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
320 }
321 }
322 }
323
324 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_gt_8_subtile) {
325 TEST_REQUIRES_ARM_NEON;
326 for (uint32_t n = 9; n < 16; n++) {
327 for (size_t k = 1; k <= 40; k += 9) {
328 for (uint32_t m = 1; m <= 4; m++) {
329 GemmMicrokernelTester()
330 .mr(4)
331 .nr(8)
332 .kr(1)
333 .sr(1)
334 .m(m)
335 .n(n)
336 .k(k)
337 .iterations(1)
338 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
339 }
340 }
341 }
342 }
343
344 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8) {
345 TEST_REQUIRES_ARM_NEON;
346 for (uint32_t n = 16; n <= 24; n += 8) {
347 for (size_t k = 1; k <= 40; k += 9) {
348 GemmMicrokernelTester()
349 .mr(4)
350 .nr(8)
351 .kr(1)
352 .sr(1)
353 .m(4)
354 .n(n)
355 .k(k)
356 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
357 }
358 }
359 }
360
361 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_strided_cn) {
362 TEST_REQUIRES_ARM_NEON;
363 for (uint32_t n = 16; n <= 24; n += 8) {
364 for (size_t k = 1; k <= 40; k += 9) {
365 GemmMicrokernelTester()
366 .mr(4)
367 .nr(8)
368 .kr(1)
369 .sr(1)
370 .m(4)
371 .n(n)
372 .k(k)
373 .cn_stride(11)
374 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
375 }
376 }
377 }
378
379 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_strided_a) {
380 TEST_REQUIRES_ARM_NEON;
381 for (uint32_t n = 16; n <= 24; n += 8) {
382 for (size_t k = 1; k <= 40; k += 9) {
383 GemmMicrokernelTester()
384 .mr(4)
385 .nr(8)
386 .kr(1)
387 .sr(1)
388 .m(4)
389 .n(n)
390 .k(k)
391 .a_stride(43)
392 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
393 }
394 }
395 }
396
397 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, n_div_8_subtile) {
398 TEST_REQUIRES_ARM_NEON;
399 for (uint32_t n = 16; n <= 24; n += 8) {
400 for (size_t k = 1; k <= 40; k += 9) {
401 for (uint32_t m = 1; m <= 4; m++) {
402 GemmMicrokernelTester()
403 .mr(4)
404 .nr(8)
405 .kr(1)
406 .sr(1)
407 .m(m)
408 .n(n)
409 .k(k)
410 .iterations(1)
411 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
412 }
413 }
414 }
415 }
416
417 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm_subtile) {
418 TEST_REQUIRES_ARM_NEON;
419 for (size_t k = 1; k <= 40; k += 9) {
420 for (uint32_t n = 1; n <= 8; n++) {
421 for (uint32_t m = 1; m <= 4; m++) {
422 GemmMicrokernelTester()
423 .mr(4)
424 .nr(8)
425 .kr(1)
426 .sr(1)
427 .m(m)
428 .n(n)
429 .k(k)
430 .cm_stride(11)
431 .iterations(1)
432 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
433 }
434 }
435 }
436 }
437
438 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmin) {
439 TEST_REQUIRES_ARM_NEON;
440 GemmMicrokernelTester()
441 .mr(4)
442 .nr(8)
443 .kr(1)
444 .sr(1)
445 .m(4)
446 .n(8)
447 .k(8)
448 .qmin(128)
449 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
450 }
451
452 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, qmax) {
453 TEST_REQUIRES_ARM_NEON;
454 GemmMicrokernelTester()
455 .mr(4)
456 .nr(8)
457 .kr(1)
458 .sr(1)
459 .m(4)
460 .n(8)
461 .k(8)
462 .qmax(128)
463 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
464 }
465
466 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_CORTEX_A53, strided_cm) {
467 TEST_REQUIRES_ARM_NEON;
468 GemmMicrokernelTester()
469 .mr(4)
470 .nr(8)
471 .kr(1)
472 .sr(1)
473 .m(4)
474 .n(8)
475 .k(8)
476 .cm_stride(11)
477 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_cortex_a53, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
478 }
479#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
480
481
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800482#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
483 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8) {
484 TEST_REQUIRES_ARM_NEON;
485 GemmMicrokernelTester()
486 .mr(4)
487 .nr(8)
488 .kr(1)
489 .sr(1)
490 .m(4)
491 .n(8)
492 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -0800493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800494 }
495
496 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, strided_cn) {
497 TEST_REQUIRES_ARM_NEON;
498 GemmMicrokernelTester()
499 .mr(4)
500 .nr(8)
501 .kr(1)
502 .sr(1)
503 .m(4)
504 .n(8)
505 .k(8)
506 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800507 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800508 }
509
510 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_strided_a) {
511 TEST_REQUIRES_ARM_NEON;
512 GemmMicrokernelTester()
513 .mr(4)
514 .nr(8)
515 .kr(1)
516 .sr(1)
517 .m(4)
518 .n(8)
519 .k(8)
520 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800521 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800522 }
523
524 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile) {
525 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800526 for (uint32_t n = 1; n <= 8; n++) {
527 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800528 GemmMicrokernelTester()
529 .mr(4)
530 .nr(8)
531 .kr(1)
532 .sr(1)
533 .m(m)
534 .n(n)
535 .k(8)
536 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800537 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800538 }
539 }
540 }
541
542 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_m) {
543 TEST_REQUIRES_ARM_NEON;
544 for (uint32_t m = 1; m <= 4; m++) {
545 GemmMicrokernelTester()
546 .mr(4)
547 .nr(8)
548 .kr(1)
549 .sr(1)
550 .m(m)
551 .n(8)
552 .k(8)
553 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800554 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800555 }
556 }
557
558 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_eq_8_subtile_n) {
559 TEST_REQUIRES_ARM_NEON;
560 for (uint32_t n = 1; n <= 8; n++) {
561 GemmMicrokernelTester()
562 .mr(4)
563 .nr(8)
564 .kr(1)
565 .sr(1)
566 .m(4)
567 .n(n)
568 .k(8)
569 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800570 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800571 }
572 }
573
574 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8) {
575 TEST_REQUIRES_ARM_NEON;
576 for (size_t k = 1; k < 8; k++) {
577 GemmMicrokernelTester()
578 .mr(4)
579 .nr(8)
580 .kr(1)
581 .sr(1)
582 .m(4)
583 .n(8)
584 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800586 }
587 }
588
589 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_strided_a) {
590 TEST_REQUIRES_ARM_NEON;
591 for (size_t k = 1; k < 8; k++) {
592 GemmMicrokernelTester()
593 .mr(4)
594 .nr(8)
595 .kr(1)
596 .sr(1)
597 .m(4)
598 .n(8)
599 .k(k)
600 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800602 }
603 }
604
605 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_lt_8_subtile) {
606 TEST_REQUIRES_ARM_NEON;
607 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800608 for (uint32_t n = 1; n <= 8; n++) {
609 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800610 GemmMicrokernelTester()
611 .mr(4)
612 .nr(8)
613 .kr(1)
614 .sr(1)
615 .m(m)
616 .n(n)
617 .k(k)
618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800619 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800620 }
621 }
622 }
623 }
624
625 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8) {
626 TEST_REQUIRES_ARM_NEON;
627 for (size_t k = 9; k < 16; k++) {
628 GemmMicrokernelTester()
629 .mr(4)
630 .nr(8)
631 .kr(1)
632 .sr(1)
633 .m(4)
634 .n(8)
635 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800636 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800637 }
638 }
639
640 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_strided_a) {
641 TEST_REQUIRES_ARM_NEON;
642 for (size_t k = 9; k < 16; k++) {
643 GemmMicrokernelTester()
644 .mr(4)
645 .nr(8)
646 .kr(1)
647 .sr(1)
648 .m(4)
649 .n(8)
650 .k(k)
651 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -0800652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800653 }
654 }
655
656 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_gt_8_subtile) {
657 TEST_REQUIRES_ARM_NEON;
658 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800659 for (uint32_t n = 1; n <= 8; n++) {
660 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800661 GemmMicrokernelTester()
662 .mr(4)
663 .nr(8)
664 .kr(1)
665 .sr(1)
666 .m(m)
667 .n(n)
668 .k(k)
669 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800670 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800671 }
672 }
673 }
674 }
675
676 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_div_8) {
677 TEST_REQUIRES_ARM_NEON;
678 for (size_t k = 16; k <= 80; k += 8) {
679 GemmMicrokernelTester()
680 .mr(4)
681 .nr(8)
682 .kr(1)
683 .sr(1)
684 .m(4)
685 .n(8)
686 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800687 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800688 }
689 }
690
691 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_strided_a) {
692 TEST_REQUIRES_ARM_NEON;
693 for (size_t k = 16; k <= 80; k += 8) {
694 GemmMicrokernelTester()
695 .mr(4)
696 .nr(8)
697 .kr(1)
698 .sr(1)
699 .m(4)
700 .n(8)
701 .k(k)
702 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -0800703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800704 }
705 }
706
707 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, k_div_8_subtile) {
708 TEST_REQUIRES_ARM_NEON;
709 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800710 for (uint32_t n = 1; n <= 8; n++) {
711 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800712 GemmMicrokernelTester()
713 .mr(4)
714 .nr(8)
715 .kr(1)
716 .sr(1)
717 .m(m)
718 .n(n)
719 .k(k)
720 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800721 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800722 }
723 }
724 }
725 }
726
727 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_gt_8) {
728 TEST_REQUIRES_ARM_NEON;
729 for (uint32_t n = 9; n < 16; n++) {
730 for (size_t k = 1; k <= 40; k += 9) {
731 GemmMicrokernelTester()
732 .mr(4)
733 .nr(8)
734 .kr(1)
735 .sr(1)
736 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800737 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800738 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800739 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800740 }
741 }
742 }
743
744 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_gt_8_strided_cn) {
745 TEST_REQUIRES_ARM_NEON;
746 for (uint32_t n = 9; n < 16; n++) {
747 for (size_t k = 1; k <= 40; k += 9) {
748 GemmMicrokernelTester()
749 .mr(4)
750 .nr(8)
751 .kr(1)
752 .sr(1)
753 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800754 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800755 .k(k)
756 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800757 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800758 }
759 }
760 }
761
762 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_gt_8_strided_a) {
763 TEST_REQUIRES_ARM_NEON;
764 for (uint32_t n = 9; n < 16; n++) {
765 for (size_t k = 1; k <= 40; k += 9) {
766 GemmMicrokernelTester()
767 .mr(4)
768 .nr(8)
769 .kr(1)
770 .sr(1)
771 .m(4)
772 .n(n)
773 .k(k)
774 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -0800775 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800776 }
777 }
778 }
779
780 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_gt_8_subtile) {
781 TEST_REQUIRES_ARM_NEON;
782 for (uint32_t n = 9; n < 16; n++) {
783 for (size_t k = 1; k <= 40; k += 9) {
784 for (uint32_t m = 1; m <= 4; m++) {
785 GemmMicrokernelTester()
786 .mr(4)
787 .nr(8)
788 .kr(1)
789 .sr(1)
790 .m(m)
791 .n(n)
792 .k(k)
793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800794 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800795 }
796 }
797 }
798 }
799
800 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_div_8) {
801 TEST_REQUIRES_ARM_NEON;
802 for (uint32_t n = 16; n <= 24; n += 8) {
803 for (size_t k = 1; k <= 40; k += 9) {
804 GemmMicrokernelTester()
805 .mr(4)
806 .nr(8)
807 .kr(1)
808 .sr(1)
809 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -0800810 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800811 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -0800812 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800813 }
814 }
815 }
816
817 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_div_8_strided_cn) {
818 TEST_REQUIRES_ARM_NEON;
819 for (uint32_t n = 16; n <= 24; n += 8) {
820 for (size_t k = 1; k <= 40; k += 9) {
821 GemmMicrokernelTester()
822 .mr(4)
823 .nr(8)
824 .kr(1)
825 .sr(1)
826 .m(4)
827 .n(n)
828 .k(k)
829 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800830 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800831 }
832 }
833 }
834
835 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_div_8_strided_a) {
836 TEST_REQUIRES_ARM_NEON;
837 for (uint32_t n = 16; n <= 24; n += 8) {
838 for (size_t k = 1; k <= 40; k += 9) {
839 GemmMicrokernelTester()
840 .mr(4)
841 .nr(8)
842 .kr(1)
843 .sr(1)
844 .m(4)
845 .n(n)
846 .k(k)
847 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -0800848 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800849 }
850 }
851 }
852
853 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, n_div_8_subtile) {
854 TEST_REQUIRES_ARM_NEON;
855 for (uint32_t n = 16; n <= 24; n += 8) {
856 for (size_t k = 1; k <= 40; k += 9) {
857 for (uint32_t m = 1; m <= 4; m++) {
858 GemmMicrokernelTester()
859 .mr(4)
860 .nr(8)
861 .kr(1)
862 .sr(1)
863 .m(m)
864 .n(n)
865 .k(k)
866 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800867 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800868 }
869 }
870 }
871 }
872
873 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, strided_cm_subtile) {
874 TEST_REQUIRES_ARM_NEON;
875 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -0800876 for (uint32_t n = 1; n <= 8; n++) {
877 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800878 GemmMicrokernelTester()
879 .mr(4)
880 .nr(8)
881 .kr(1)
882 .sr(1)
883 .m(m)
884 .n(n)
885 .k(k)
886 .cm_stride(11)
887 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800888 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800889 }
890 }
891 }
892 }
893
894 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, qmin) {
895 TEST_REQUIRES_ARM_NEON;
896 GemmMicrokernelTester()
897 .mr(4)
898 .nr(8)
899 .kr(1)
900 .sr(1)
901 .m(4)
902 .n(8)
903 .k(8)
904 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800905 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800906 }
907
908 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, qmax) {
909 TEST_REQUIRES_ARM_NEON;
910 GemmMicrokernelTester()
911 .mr(4)
912 .nr(8)
913 .kr(1)
914 .sr(1)
915 .m(4)
916 .n(8)
917 .k(8)
918 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -0800919 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800920 }
921
922 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__AARCH64_NEON_MLAL_LANE_LD64, strided_cm) {
923 TEST_REQUIRES_ARM_NEON;
924 GemmMicrokernelTester()
925 .mr(4)
926 .nr(8)
927 .kr(1)
928 .sr(1)
929 .m(4)
930 .n(8)
931 .k(8)
932 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800933 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__aarch64_neon_mlal_lane_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800934 }
935#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
936
937
938#if XNN_ARCH_ARM || XNN_ARCH_ARM64
939 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8) {
940 TEST_REQUIRES_ARM_NEON;
941 GemmMicrokernelTester()
942 .mr(1)
943 .nr(8)
944 .kr(2)
945 .sr(1)
946 .m(1)
947 .n(8)
948 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -0800949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800950 }
951
952 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cn) {
953 TEST_REQUIRES_ARM_NEON;
954 GemmMicrokernelTester()
955 .mr(1)
956 .nr(8)
957 .kr(2)
958 .sr(1)
959 .m(1)
960 .n(8)
961 .k(8)
962 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800963 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800964 }
965
966 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_strided_a) {
967 TEST_REQUIRES_ARM_NEON;
968 GemmMicrokernelTester()
969 .mr(1)
970 .nr(8)
971 .kr(2)
972 .sr(1)
973 .m(1)
974 .n(8)
975 .k(8)
976 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -0800977 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800978 }
979
980 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile) {
981 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -0800982 for (uint32_t n = 1; n <= 8; n++) {
983 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800984 GemmMicrokernelTester()
985 .mr(1)
986 .nr(8)
987 .kr(2)
988 .sr(1)
989 .m(m)
990 .n(n)
991 .k(8)
992 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -0800993 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -0800994 }
995 }
996 }
997
998 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
999 TEST_REQUIRES_ARM_NEON;
1000 for (uint32_t m = 1; m <= 1; m++) {
1001 GemmMicrokernelTester()
1002 .mr(1)
1003 .nr(8)
1004 .kr(2)
1005 .sr(1)
1006 .m(m)
1007 .n(8)
1008 .k(8)
1009 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001010 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001011 }
1012 }
1013
1014 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
1015 TEST_REQUIRES_ARM_NEON;
1016 for (uint32_t n = 1; n <= 8; n++) {
1017 GemmMicrokernelTester()
1018 .mr(1)
1019 .nr(8)
1020 .kr(2)
1021 .sr(1)
1022 .m(1)
1023 .n(n)
1024 .k(8)
1025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001026 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001027 }
1028 }
1029
1030 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_lt_8) {
1031 TEST_REQUIRES_ARM_NEON;
1032 for (size_t k = 1; k < 8; k++) {
1033 GemmMicrokernelTester()
1034 .mr(1)
1035 .nr(8)
1036 .kr(2)
1037 .sr(1)
1038 .m(1)
1039 .n(8)
1040 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001042 }
1043 }
1044
1045 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_lt_8_strided_a) {
1046 TEST_REQUIRES_ARM_NEON;
1047 for (size_t k = 1; k < 8; k++) {
1048 GemmMicrokernelTester()
1049 .mr(1)
1050 .nr(8)
1051 .kr(2)
1052 .sr(1)
1053 .m(1)
1054 .n(8)
1055 .k(k)
1056 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001057 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001058 }
1059 }
1060
1061 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_lt_8_subtile) {
1062 TEST_REQUIRES_ARM_NEON;
1063 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001064 for (uint32_t n = 1; n <= 8; n++) {
1065 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001066 GemmMicrokernelTester()
1067 .mr(1)
1068 .nr(8)
1069 .kr(2)
1070 .sr(1)
1071 .m(m)
1072 .n(n)
1073 .k(k)
1074 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001075 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001076 }
1077 }
1078 }
1079 }
1080
1081 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_gt_8) {
1082 TEST_REQUIRES_ARM_NEON;
1083 for (size_t k = 9; k < 16; k++) {
1084 GemmMicrokernelTester()
1085 .mr(1)
1086 .nr(8)
1087 .kr(2)
1088 .sr(1)
1089 .m(1)
1090 .n(8)
1091 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001092 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001093 }
1094 }
1095
1096 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_gt_8_strided_a) {
1097 TEST_REQUIRES_ARM_NEON;
1098 for (size_t k = 9; k < 16; k++) {
1099 GemmMicrokernelTester()
1100 .mr(1)
1101 .nr(8)
1102 .kr(2)
1103 .sr(1)
1104 .m(1)
1105 .n(8)
1106 .k(k)
1107 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001109 }
1110 }
1111
1112 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_gt_8_subtile) {
1113 TEST_REQUIRES_ARM_NEON;
1114 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001115 for (uint32_t n = 1; n <= 8; n++) {
1116 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001117 GemmMicrokernelTester()
1118 .mr(1)
1119 .nr(8)
1120 .kr(2)
1121 .sr(1)
1122 .m(m)
1123 .n(n)
1124 .k(k)
1125 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001126 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001127 }
1128 }
1129 }
1130 }
1131
1132 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_div_8) {
1133 TEST_REQUIRES_ARM_NEON;
1134 for (size_t k = 16; k <= 80; k += 8) {
1135 GemmMicrokernelTester()
1136 .mr(1)
1137 .nr(8)
1138 .kr(2)
1139 .sr(1)
1140 .m(1)
1141 .n(8)
1142 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001143 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001144 }
1145 }
1146
1147 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_div_8_strided_a) {
1148 TEST_REQUIRES_ARM_NEON;
1149 for (size_t k = 16; k <= 80; k += 8) {
1150 GemmMicrokernelTester()
1151 .mr(1)
1152 .nr(8)
1153 .kr(2)
1154 .sr(1)
1155 .m(1)
1156 .n(8)
1157 .k(k)
1158 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001160 }
1161 }
1162
1163 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, k_div_8_subtile) {
1164 TEST_REQUIRES_ARM_NEON;
1165 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001166 for (uint32_t n = 1; n <= 8; n++) {
1167 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001168 GemmMicrokernelTester()
1169 .mr(1)
1170 .nr(8)
1171 .kr(2)
1172 .sr(1)
1173 .m(m)
1174 .n(n)
1175 .k(k)
1176 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001177 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001178 }
1179 }
1180 }
1181 }
1182
1183 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8) {
1184 TEST_REQUIRES_ARM_NEON;
1185 for (uint32_t n = 9; n < 16; n++) {
1186 for (size_t k = 1; k <= 40; k += 9) {
1187 GemmMicrokernelTester()
1188 .mr(1)
1189 .nr(8)
1190 .kr(2)
1191 .sr(1)
1192 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001193 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001194 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001195 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001196 }
1197 }
1198 }
1199
1200 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) {
1201 TEST_REQUIRES_ARM_NEON;
1202 for (uint32_t n = 9; n < 16; n++) {
1203 for (size_t k = 1; k <= 40; k += 9) {
1204 GemmMicrokernelTester()
1205 .mr(1)
1206 .nr(8)
1207 .kr(2)
1208 .sr(1)
1209 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001210 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001211 .k(k)
1212 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001213 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001214 }
1215 }
1216 }
1217
1218 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_strided_a) {
1219 TEST_REQUIRES_ARM_NEON;
1220 for (uint32_t n = 9; n < 16; n++) {
1221 for (size_t k = 1; k <= 40; k += 9) {
1222 GemmMicrokernelTester()
1223 .mr(1)
1224 .nr(8)
1225 .kr(2)
1226 .sr(1)
1227 .m(1)
1228 .n(n)
1229 .k(k)
1230 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001231 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001232 }
1233 }
1234 }
1235
1236 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_gt_8_subtile) {
1237 TEST_REQUIRES_ARM_NEON;
1238 for (uint32_t n = 9; n < 16; n++) {
1239 for (size_t k = 1; k <= 40; k += 9) {
1240 for (uint32_t m = 1; m <= 1; m++) {
1241 GemmMicrokernelTester()
1242 .mr(1)
1243 .nr(8)
1244 .kr(2)
1245 .sr(1)
1246 .m(m)
1247 .n(n)
1248 .k(k)
1249 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001250 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001251 }
1252 }
1253 }
1254 }
1255
1256 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8) {
1257 TEST_REQUIRES_ARM_NEON;
1258 for (uint32_t n = 16; n <= 24; n += 8) {
1259 for (size_t k = 1; k <= 40; k += 9) {
1260 GemmMicrokernelTester()
1261 .mr(1)
1262 .nr(8)
1263 .kr(2)
1264 .sr(1)
1265 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001266 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001267 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001268 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001269 }
1270 }
1271 }
1272
1273 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) {
1274 TEST_REQUIRES_ARM_NEON;
1275 for (uint32_t n = 16; n <= 24; n += 8) {
1276 for (size_t k = 1; k <= 40; k += 9) {
1277 GemmMicrokernelTester()
1278 .mr(1)
1279 .nr(8)
1280 .kr(2)
1281 .sr(1)
1282 .m(1)
1283 .n(n)
1284 .k(k)
1285 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001286 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001287 }
1288 }
1289 }
1290
1291 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_strided_a) {
1292 TEST_REQUIRES_ARM_NEON;
1293 for (uint32_t n = 16; n <= 24; n += 8) {
1294 for (size_t k = 1; k <= 40; k += 9) {
1295 GemmMicrokernelTester()
1296 .mr(1)
1297 .nr(8)
1298 .kr(2)
1299 .sr(1)
1300 .m(1)
1301 .n(n)
1302 .k(k)
1303 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001304 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001305 }
1306 }
1307 }
1308
1309 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, n_div_8_subtile) {
1310 TEST_REQUIRES_ARM_NEON;
1311 for (uint32_t n = 16; n <= 24; n += 8) {
1312 for (size_t k = 1; k <= 40; k += 9) {
1313 for (uint32_t m = 1; m <= 1; m++) {
1314 GemmMicrokernelTester()
1315 .mr(1)
1316 .nr(8)
1317 .kr(2)
1318 .sr(1)
1319 .m(m)
1320 .n(n)
1321 .k(k)
1322 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001323 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001324 }
1325 }
1326 }
1327 }
1328
1329 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cm_subtile) {
1330 TEST_REQUIRES_ARM_NEON;
1331 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001332 for (uint32_t n = 1; n <= 8; n++) {
1333 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001334 GemmMicrokernelTester()
1335 .mr(1)
1336 .nr(8)
1337 .kr(2)
1338 .sr(1)
1339 .m(m)
1340 .n(n)
1341 .k(k)
1342 .cm_stride(11)
1343 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001344 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001345 }
1346 }
1347 }
1348 }
1349
1350 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, qmin) {
1351 TEST_REQUIRES_ARM_NEON;
1352 GemmMicrokernelTester()
1353 .mr(1)
1354 .nr(8)
1355 .kr(2)
1356 .sr(1)
1357 .m(1)
1358 .n(8)
1359 .k(8)
1360 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001361 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001362 }
1363
1364 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, qmax) {
1365 TEST_REQUIRES_ARM_NEON;
1366 GemmMicrokernelTester()
1367 .mr(1)
1368 .nr(8)
1369 .kr(2)
1370 .sr(1)
1371 .m(1)
1372 .n(8)
1373 .k(8)
1374 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001375 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001376 }
1377
1378 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MULL_LD1R, strided_cm) {
1379 TEST_REQUIRES_ARM_NEON;
1380 GemmMicrokernelTester()
1381 .mr(1)
1382 .nr(8)
1383 .kr(2)
1384 .sr(1)
1385 .m(1)
1386 .n(8)
1387 .k(8)
1388 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001389 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001390 }
1391#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1392
1393
1394#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1395 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8) {
1396 TEST_REQUIRES_ARM_NEON;
1397 GemmMicrokernelTester()
1398 .mr(4)
1399 .nr(8)
1400 .kr(2)
1401 .sr(1)
1402 .m(4)
1403 .n(8)
1404 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001406 }
1407
1408 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, strided_cn) {
1409 TEST_REQUIRES_ARM_NEON;
1410 GemmMicrokernelTester()
1411 .mr(4)
1412 .nr(8)
1413 .kr(2)
1414 .sr(1)
1415 .m(4)
1416 .n(8)
1417 .k(8)
1418 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001419 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001420 }
1421
1422 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8_strided_a) {
1423 TEST_REQUIRES_ARM_NEON;
1424 GemmMicrokernelTester()
1425 .mr(4)
1426 .nr(8)
1427 .kr(2)
1428 .sr(1)
1429 .m(4)
1430 .n(8)
1431 .k(8)
1432 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001433 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001434 }
1435
1436 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8_subtile) {
1437 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001438 for (uint32_t n = 1; n <= 8; n++) {
1439 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001440 GemmMicrokernelTester()
1441 .mr(4)
1442 .nr(8)
1443 .kr(2)
1444 .sr(1)
1445 .m(m)
1446 .n(n)
1447 .k(8)
1448 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001449 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001450 }
1451 }
1452 }
1453
1454 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
1455 TEST_REQUIRES_ARM_NEON;
1456 for (uint32_t m = 1; m <= 4; m++) {
1457 GemmMicrokernelTester()
1458 .mr(4)
1459 .nr(8)
1460 .kr(2)
1461 .sr(1)
1462 .m(m)
1463 .n(8)
1464 .k(8)
1465 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001466 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001467 }
1468 }
1469
1470 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
1471 TEST_REQUIRES_ARM_NEON;
1472 for (uint32_t n = 1; n <= 8; n++) {
1473 GemmMicrokernelTester()
1474 .mr(4)
1475 .nr(8)
1476 .kr(2)
1477 .sr(1)
1478 .m(4)
1479 .n(n)
1480 .k(8)
1481 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001482 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001483 }
1484 }
1485
1486 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_lt_8) {
1487 TEST_REQUIRES_ARM_NEON;
1488 for (size_t k = 1; k < 8; k++) {
1489 GemmMicrokernelTester()
1490 .mr(4)
1491 .nr(8)
1492 .kr(2)
1493 .sr(1)
1494 .m(4)
1495 .n(8)
1496 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001497 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001498 }
1499 }
1500
1501 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_lt_8_strided_a) {
1502 TEST_REQUIRES_ARM_NEON;
1503 for (size_t k = 1; k < 8; k++) {
1504 GemmMicrokernelTester()
1505 .mr(4)
1506 .nr(8)
1507 .kr(2)
1508 .sr(1)
1509 .m(4)
1510 .n(8)
1511 .k(k)
1512 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001513 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001514 }
1515 }
1516
1517 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_lt_8_subtile) {
1518 TEST_REQUIRES_ARM_NEON;
1519 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001520 for (uint32_t n = 1; n <= 8; n++) {
1521 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001522 GemmMicrokernelTester()
1523 .mr(4)
1524 .nr(8)
1525 .kr(2)
1526 .sr(1)
1527 .m(m)
1528 .n(n)
1529 .k(k)
1530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001531 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001532 }
1533 }
1534 }
1535 }
1536
1537 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_gt_8) {
1538 TEST_REQUIRES_ARM_NEON;
1539 for (size_t k = 9; k < 16; k++) {
1540 GemmMicrokernelTester()
1541 .mr(4)
1542 .nr(8)
1543 .kr(2)
1544 .sr(1)
1545 .m(4)
1546 .n(8)
1547 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001548 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001549 }
1550 }
1551
1552 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_gt_8_strided_a) {
1553 TEST_REQUIRES_ARM_NEON;
1554 for (size_t k = 9; k < 16; k++) {
1555 GemmMicrokernelTester()
1556 .mr(4)
1557 .nr(8)
1558 .kr(2)
1559 .sr(1)
1560 .m(4)
1561 .n(8)
1562 .k(k)
1563 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001564 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001565 }
1566 }
1567
1568 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_gt_8_subtile) {
1569 TEST_REQUIRES_ARM_NEON;
1570 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001571 for (uint32_t n = 1; n <= 8; n++) {
1572 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001573 GemmMicrokernelTester()
1574 .mr(4)
1575 .nr(8)
1576 .kr(2)
1577 .sr(1)
1578 .m(m)
1579 .n(n)
1580 .k(k)
1581 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001582 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001583 }
1584 }
1585 }
1586 }
1587
1588 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_div_8) {
1589 TEST_REQUIRES_ARM_NEON;
1590 for (size_t k = 16; k <= 80; k += 8) {
1591 GemmMicrokernelTester()
1592 .mr(4)
1593 .nr(8)
1594 .kr(2)
1595 .sr(1)
1596 .m(4)
1597 .n(8)
1598 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001599 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001600 }
1601 }
1602
1603 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_div_8_strided_a) {
1604 TEST_REQUIRES_ARM_NEON;
1605 for (size_t k = 16; k <= 80; k += 8) {
1606 GemmMicrokernelTester()
1607 .mr(4)
1608 .nr(8)
1609 .kr(2)
1610 .sr(1)
1611 .m(4)
1612 .n(8)
1613 .k(k)
1614 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08001615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001616 }
1617 }
1618
1619 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, k_div_8_subtile) {
1620 TEST_REQUIRES_ARM_NEON;
1621 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001622 for (uint32_t n = 1; n <= 8; n++) {
1623 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001624 GemmMicrokernelTester()
1625 .mr(4)
1626 .nr(8)
1627 .kr(2)
1628 .sr(1)
1629 .m(m)
1630 .n(n)
1631 .k(k)
1632 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001633 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001634 }
1635 }
1636 }
1637 }
1638
1639 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8) {
1640 TEST_REQUIRES_ARM_NEON;
1641 for (uint32_t n = 9; n < 16; n++) {
1642 for (size_t k = 1; k <= 40; k += 9) {
1643 GemmMicrokernelTester()
1644 .mr(4)
1645 .nr(8)
1646 .kr(2)
1647 .sr(1)
1648 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001649 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001650 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001651 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001652 }
1653 }
1654 }
1655
1656 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8_strided_cn) {
1657 TEST_REQUIRES_ARM_NEON;
1658 for (uint32_t n = 9; n < 16; n++) {
1659 for (size_t k = 1; k <= 40; k += 9) {
1660 GemmMicrokernelTester()
1661 .mr(4)
1662 .nr(8)
1663 .kr(2)
1664 .sr(1)
1665 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001666 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001667 .k(k)
1668 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001669 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001670 }
1671 }
1672 }
1673
1674 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8_strided_a) {
1675 TEST_REQUIRES_ARM_NEON;
1676 for (uint32_t n = 9; n < 16; n++) {
1677 for (size_t k = 1; k <= 40; k += 9) {
1678 GemmMicrokernelTester()
1679 .mr(4)
1680 .nr(8)
1681 .kr(2)
1682 .sr(1)
1683 .m(4)
1684 .n(n)
1685 .k(k)
1686 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001687 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001688 }
1689 }
1690 }
1691
1692 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_gt_8_subtile) {
1693 TEST_REQUIRES_ARM_NEON;
1694 for (uint32_t n = 9; n < 16; n++) {
1695 for (size_t k = 1; k <= 40; k += 9) {
1696 for (uint32_t m = 1; m <= 4; m++) {
1697 GemmMicrokernelTester()
1698 .mr(4)
1699 .nr(8)
1700 .kr(2)
1701 .sr(1)
1702 .m(m)
1703 .n(n)
1704 .k(k)
1705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001706 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001707 }
1708 }
1709 }
1710 }
1711
1712 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8) {
1713 TEST_REQUIRES_ARM_NEON;
1714 for (uint32_t n = 16; n <= 24; n += 8) {
1715 for (size_t k = 1; k <= 40; k += 9) {
1716 GemmMicrokernelTester()
1717 .mr(4)
1718 .nr(8)
1719 .kr(2)
1720 .sr(1)
1721 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08001722 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001723 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001724 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001725 }
1726 }
1727 }
1728
1729 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8_strided_cn) {
1730 TEST_REQUIRES_ARM_NEON;
1731 for (uint32_t n = 16; n <= 24; n += 8) {
1732 for (size_t k = 1; k <= 40; k += 9) {
1733 GemmMicrokernelTester()
1734 .mr(4)
1735 .nr(8)
1736 .kr(2)
1737 .sr(1)
1738 .m(4)
1739 .n(n)
1740 .k(k)
1741 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001742 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001743 }
1744 }
1745 }
1746
1747 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8_strided_a) {
1748 TEST_REQUIRES_ARM_NEON;
1749 for (uint32_t n = 16; n <= 24; n += 8) {
1750 for (size_t k = 1; k <= 40; k += 9) {
1751 GemmMicrokernelTester()
1752 .mr(4)
1753 .nr(8)
1754 .kr(2)
1755 .sr(1)
1756 .m(4)
1757 .n(n)
1758 .k(k)
1759 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08001760 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001761 }
1762 }
1763 }
1764
1765 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, n_div_8_subtile) {
1766 TEST_REQUIRES_ARM_NEON;
1767 for (uint32_t n = 16; n <= 24; n += 8) {
1768 for (size_t k = 1; k <= 40; k += 9) {
1769 for (uint32_t m = 1; m <= 4; m++) {
1770 GemmMicrokernelTester()
1771 .mr(4)
1772 .nr(8)
1773 .kr(2)
1774 .sr(1)
1775 .m(m)
1776 .n(n)
1777 .k(k)
1778 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001779 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001780 }
1781 }
1782 }
1783 }
1784
1785 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, strided_cm_subtile) {
1786 TEST_REQUIRES_ARM_NEON;
1787 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001788 for (uint32_t n = 1; n <= 8; n++) {
1789 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001790 GemmMicrokernelTester()
1791 .mr(4)
1792 .nr(8)
1793 .kr(2)
1794 .sr(1)
1795 .m(m)
1796 .n(n)
1797 .k(k)
1798 .cm_stride(11)
1799 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001800 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001801 }
1802 }
1803 }
1804 }
1805
1806 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, qmin) {
1807 TEST_REQUIRES_ARM_NEON;
1808 GemmMicrokernelTester()
1809 .mr(4)
1810 .nr(8)
1811 .kr(2)
1812 .sr(1)
1813 .m(4)
1814 .n(8)
1815 .k(8)
1816 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001817 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001818 }
1819
1820 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, qmax) {
1821 TEST_REQUIRES_ARM_NEON;
1822 GemmMicrokernelTester()
1823 .mr(4)
1824 .nr(8)
1825 .kr(2)
1826 .sr(1)
1827 .m(4)
1828 .n(8)
1829 .k(8)
1830 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08001831 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001832 }
1833
1834 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD1R, strided_cm) {
1835 TEST_REQUIRES_ARM_NEON;
1836 GemmMicrokernelTester()
1837 .mr(4)
1838 .nr(8)
1839 .kr(2)
1840 .sr(1)
1841 .m(4)
1842 .n(8)
1843 .k(8)
1844 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001845 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001846 }
1847#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1848
1849
1850#if XNN_ARCH_ARM || XNN_ARCH_ARM64
1851 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_eq_8) {
1852 TEST_REQUIRES_ARM_NEON;
1853 GemmMicrokernelTester()
1854 .mr(1)
1855 .nr(16)
1856 .kr(2)
1857 .sr(1)
1858 .m(1)
1859 .n(16)
1860 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08001861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001862 }
1863
1864 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, strided_cn) {
1865 TEST_REQUIRES_ARM_NEON;
1866 GemmMicrokernelTester()
1867 .mr(1)
1868 .nr(16)
1869 .kr(2)
1870 .sr(1)
1871 .m(1)
1872 .n(16)
1873 .k(8)
1874 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08001875 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001876 }
1877
1878 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_eq_8_strided_a) {
1879 TEST_REQUIRES_ARM_NEON;
1880 GemmMicrokernelTester()
1881 .mr(1)
1882 .nr(16)
1883 .kr(2)
1884 .sr(1)
1885 .m(1)
1886 .n(16)
1887 .k(8)
1888 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001889 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001890 }
1891
1892 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_eq_8_subtile) {
1893 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08001894 for (uint32_t n = 1; n <= 16; n++) {
1895 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001896 GemmMicrokernelTester()
1897 .mr(1)
1898 .nr(16)
1899 .kr(2)
1900 .sr(1)
1901 .m(m)
1902 .n(n)
1903 .k(8)
1904 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001905 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001906 }
1907 }
1908 }
1909
1910 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_eq_8_subtile_m) {
1911 TEST_REQUIRES_ARM_NEON;
1912 for (uint32_t m = 1; m <= 1; m++) {
1913 GemmMicrokernelTester()
1914 .mr(1)
1915 .nr(16)
1916 .kr(2)
1917 .sr(1)
1918 .m(m)
1919 .n(16)
1920 .k(8)
1921 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001922 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001923 }
1924 }
1925
1926 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_eq_8_subtile_n) {
1927 TEST_REQUIRES_ARM_NEON;
1928 for (uint32_t n = 1; n <= 16; n++) {
1929 GemmMicrokernelTester()
1930 .mr(1)
1931 .nr(16)
1932 .kr(2)
1933 .sr(1)
1934 .m(1)
1935 .n(n)
1936 .k(8)
1937 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001938 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001939 }
1940 }
1941
1942 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_lt_8) {
1943 TEST_REQUIRES_ARM_NEON;
1944 for (size_t k = 1; k < 8; k++) {
1945 GemmMicrokernelTester()
1946 .mr(1)
1947 .nr(16)
1948 .kr(2)
1949 .sr(1)
1950 .m(1)
1951 .n(16)
1952 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08001953 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001954 }
1955 }
1956
1957 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_lt_8_strided_a) {
1958 TEST_REQUIRES_ARM_NEON;
1959 for (size_t k = 1; k < 8; k++) {
1960 GemmMicrokernelTester()
1961 .mr(1)
1962 .nr(16)
1963 .kr(2)
1964 .sr(1)
1965 .m(1)
1966 .n(16)
1967 .k(k)
1968 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08001969 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001970 }
1971 }
1972
1973 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_lt_8_subtile) {
1974 TEST_REQUIRES_ARM_NEON;
1975 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08001976 for (uint32_t n = 1; n <= 16; n++) {
1977 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001978 GemmMicrokernelTester()
1979 .mr(1)
1980 .nr(16)
1981 .kr(2)
1982 .sr(1)
1983 .m(m)
1984 .n(n)
1985 .k(k)
1986 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08001987 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08001988 }
1989 }
1990 }
1991 }
1992
1993 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_gt_8) {
1994 TEST_REQUIRES_ARM_NEON;
1995 for (size_t k = 9; k < 16; k++) {
1996 GemmMicrokernelTester()
1997 .mr(1)
1998 .nr(16)
1999 .kr(2)
2000 .sr(1)
2001 .m(1)
2002 .n(16)
2003 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002004 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002005 }
2006 }
2007
2008 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_gt_8_strided_a) {
2009 TEST_REQUIRES_ARM_NEON;
2010 for (size_t k = 9; k < 16; k++) {
2011 GemmMicrokernelTester()
2012 .mr(1)
2013 .nr(16)
2014 .kr(2)
2015 .sr(1)
2016 .m(1)
2017 .n(16)
2018 .k(k)
2019 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002020 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002021 }
2022 }
2023
2024 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_gt_8_subtile) {
2025 TEST_REQUIRES_ARM_NEON;
2026 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002027 for (uint32_t n = 1; n <= 16; n++) {
2028 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002029 GemmMicrokernelTester()
2030 .mr(1)
2031 .nr(16)
2032 .kr(2)
2033 .sr(1)
2034 .m(m)
2035 .n(n)
2036 .k(k)
2037 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002038 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002039 }
2040 }
2041 }
2042 }
2043
2044 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_div_8) {
2045 TEST_REQUIRES_ARM_NEON;
2046 for (size_t k = 16; k <= 80; k += 8) {
2047 GemmMicrokernelTester()
2048 .mr(1)
2049 .nr(16)
2050 .kr(2)
2051 .sr(1)
2052 .m(1)
2053 .n(16)
2054 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002055 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002056 }
2057 }
2058
2059 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_div_8_strided_a) {
2060 TEST_REQUIRES_ARM_NEON;
2061 for (size_t k = 16; k <= 80; k += 8) {
2062 GemmMicrokernelTester()
2063 .mr(1)
2064 .nr(16)
2065 .kr(2)
2066 .sr(1)
2067 .m(1)
2068 .n(16)
2069 .k(k)
2070 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002072 }
2073 }
2074
2075 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, k_div_8_subtile) {
2076 TEST_REQUIRES_ARM_NEON;
2077 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002078 for (uint32_t n = 1; n <= 16; n++) {
2079 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002080 GemmMicrokernelTester()
2081 .mr(1)
2082 .nr(16)
2083 .kr(2)
2084 .sr(1)
2085 .m(m)
2086 .n(n)
2087 .k(k)
2088 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002089 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002090 }
2091 }
2092 }
2093 }
2094
2095 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, n_gt_16) {
2096 TEST_REQUIRES_ARM_NEON;
2097 for (uint32_t n = 17; n < 32; n++) {
2098 for (size_t k = 1; k <= 40; k += 9) {
2099 GemmMicrokernelTester()
2100 .mr(1)
2101 .nr(16)
2102 .kr(2)
2103 .sr(1)
2104 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002105 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002106 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002107 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002108 }
2109 }
2110 }
2111
2112 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, n_gt_16_strided_cn) {
2113 TEST_REQUIRES_ARM_NEON;
2114 for (uint32_t n = 17; n < 32; n++) {
2115 for (size_t k = 1; k <= 40; k += 9) {
2116 GemmMicrokernelTester()
2117 .mr(1)
2118 .nr(16)
2119 .kr(2)
2120 .sr(1)
2121 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002122 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002123 .k(k)
2124 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002125 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002126 }
2127 }
2128 }
2129
2130 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, n_gt_16_strided_a) {
2131 TEST_REQUIRES_ARM_NEON;
2132 for (uint32_t n = 17; n < 32; n++) {
2133 for (size_t k = 1; k <= 40; k += 9) {
2134 GemmMicrokernelTester()
2135 .mr(1)
2136 .nr(16)
2137 .kr(2)
2138 .sr(1)
2139 .m(1)
2140 .n(n)
2141 .k(k)
2142 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002143 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002144 }
2145 }
2146 }
2147
2148 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, n_gt_16_subtile) {
2149 TEST_REQUIRES_ARM_NEON;
2150 for (uint32_t n = 17; n < 32; n++) {
2151 for (size_t k = 1; k <= 40; k += 9) {
2152 for (uint32_t m = 1; m <= 1; m++) {
2153 GemmMicrokernelTester()
2154 .mr(1)
2155 .nr(16)
2156 .kr(2)
2157 .sr(1)
2158 .m(m)
2159 .n(n)
2160 .k(k)
2161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002162 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002163 }
2164 }
2165 }
2166 }
2167
2168 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, n_div_16) {
2169 TEST_REQUIRES_ARM_NEON;
2170 for (uint32_t n = 32; n <= 48; n += 16) {
2171 for (size_t k = 1; k <= 40; k += 9) {
2172 GemmMicrokernelTester()
2173 .mr(1)
2174 .nr(16)
2175 .kr(2)
2176 .sr(1)
2177 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002178 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002179 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002180 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002181 }
2182 }
2183 }
2184
2185 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, n_div_16_strided_cn) {
2186 TEST_REQUIRES_ARM_NEON;
2187 for (uint32_t n = 32; n <= 48; n += 16) {
2188 for (size_t k = 1; k <= 40; k += 9) {
2189 GemmMicrokernelTester()
2190 .mr(1)
2191 .nr(16)
2192 .kr(2)
2193 .sr(1)
2194 .m(1)
2195 .n(n)
2196 .k(k)
2197 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002198 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002199 }
2200 }
2201 }
2202
2203 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, n_div_16_strided_a) {
2204 TEST_REQUIRES_ARM_NEON;
2205 for (uint32_t n = 32; n <= 48; n += 16) {
2206 for (size_t k = 1; k <= 40; k += 9) {
2207 GemmMicrokernelTester()
2208 .mr(1)
2209 .nr(16)
2210 .kr(2)
2211 .sr(1)
2212 .m(1)
2213 .n(n)
2214 .k(k)
2215 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08002216 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002217 }
2218 }
2219 }
2220
2221 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, n_div_16_subtile) {
2222 TEST_REQUIRES_ARM_NEON;
2223 for (uint32_t n = 32; n <= 48; n += 16) {
2224 for (size_t k = 1; k <= 40; k += 9) {
2225 for (uint32_t m = 1; m <= 1; m++) {
2226 GemmMicrokernelTester()
2227 .mr(1)
2228 .nr(16)
2229 .kr(2)
2230 .sr(1)
2231 .m(m)
2232 .n(n)
2233 .k(k)
2234 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002235 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002236 }
2237 }
2238 }
2239 }
2240
2241 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, strided_cm_subtile) {
2242 TEST_REQUIRES_ARM_NEON;
2243 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002244 for (uint32_t n = 1; n <= 16; n++) {
2245 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002246 GemmMicrokernelTester()
2247 .mr(1)
2248 .nr(16)
2249 .kr(2)
2250 .sr(1)
2251 .m(m)
2252 .n(n)
2253 .k(k)
2254 .cm_stride(19)
2255 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002256 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002257 }
2258 }
2259 }
2260 }
2261
2262 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, qmin) {
2263 TEST_REQUIRES_ARM_NEON;
2264 GemmMicrokernelTester()
2265 .mr(1)
2266 .nr(16)
2267 .kr(2)
2268 .sr(1)
2269 .m(1)
2270 .n(16)
2271 .k(8)
2272 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002273 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002274 }
2275
2276 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, qmax) {
2277 TEST_REQUIRES_ARM_NEON;
2278 GemmMicrokernelTester()
2279 .mr(1)
2280 .nr(16)
2281 .kr(2)
2282 .sr(1)
2283 .m(1)
2284 .n(16)
2285 .k(8)
2286 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002287 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002288 }
2289
2290 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MULL_LD1R, strided_cm) {
2291 TEST_REQUIRES_ARM_NEON;
2292 GemmMicrokernelTester()
2293 .mr(1)
2294 .nr(16)
2295 .kr(2)
2296 .sr(1)
2297 .m(1)
2298 .n(16)
2299 .k(8)
2300 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002301 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002302 }
2303#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2304
2305
2306#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2307 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_eq_16) {
2308 TEST_REQUIRES_ARM_NEON;
2309 GemmMicrokernelTester()
2310 .mr(4)
2311 .nr(16)
2312 .kr(2)
2313 .sr(1)
2314 .m(4)
2315 .n(16)
2316 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08002317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002318 }
2319
2320 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, strided_cn) {
2321 TEST_REQUIRES_ARM_NEON;
2322 GemmMicrokernelTester()
2323 .mr(4)
2324 .nr(16)
2325 .kr(2)
2326 .sr(1)
2327 .m(4)
2328 .n(16)
2329 .k(16)
2330 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002331 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002332 }
2333
2334 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_eq_16_strided_a) {
2335 TEST_REQUIRES_ARM_NEON;
2336 GemmMicrokernelTester()
2337 .mr(4)
2338 .nr(16)
2339 .kr(2)
2340 .sr(1)
2341 .m(4)
2342 .n(16)
2343 .k(16)
2344 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002345 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002346 }
2347
2348 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_eq_16_subtile) {
2349 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002350 for (uint32_t n = 1; n <= 16; n++) {
2351 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002352 GemmMicrokernelTester()
2353 .mr(4)
2354 .nr(16)
2355 .kr(2)
2356 .sr(1)
2357 .m(m)
2358 .n(n)
2359 .k(16)
2360 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002361 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002362 }
2363 }
2364 }
2365
2366 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_eq_16_subtile_m) {
2367 TEST_REQUIRES_ARM_NEON;
2368 for (uint32_t m = 1; m <= 4; m++) {
2369 GemmMicrokernelTester()
2370 .mr(4)
2371 .nr(16)
2372 .kr(2)
2373 .sr(1)
2374 .m(m)
2375 .n(16)
2376 .k(16)
2377 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002378 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002379 }
2380 }
2381
2382 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_eq_16_subtile_n) {
2383 TEST_REQUIRES_ARM_NEON;
2384 for (uint32_t n = 1; n <= 16; n++) {
2385 GemmMicrokernelTester()
2386 .mr(4)
2387 .nr(16)
2388 .kr(2)
2389 .sr(1)
2390 .m(4)
2391 .n(n)
2392 .k(16)
2393 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002394 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002395 }
2396 }
2397
2398 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_lt_16) {
2399 TEST_REQUIRES_ARM_NEON;
2400 for (size_t k = 1; k < 16; k++) {
2401 GemmMicrokernelTester()
2402 .mr(4)
2403 .nr(16)
2404 .kr(2)
2405 .sr(1)
2406 .m(4)
2407 .n(16)
2408 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002409 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002410 }
2411 }
2412
2413 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_lt_16_strided_a) {
2414 TEST_REQUIRES_ARM_NEON;
2415 for (size_t k = 1; k < 16; k++) {
2416 GemmMicrokernelTester()
2417 .mr(4)
2418 .nr(16)
2419 .kr(2)
2420 .sr(1)
2421 .m(4)
2422 .n(16)
2423 .k(k)
2424 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002425 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002426 }
2427 }
2428
2429 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_lt_16_subtile) {
2430 TEST_REQUIRES_ARM_NEON;
2431 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002432 for (uint32_t n = 1; n <= 16; n++) {
2433 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002434 GemmMicrokernelTester()
2435 .mr(4)
2436 .nr(16)
2437 .kr(2)
2438 .sr(1)
2439 .m(m)
2440 .n(n)
2441 .k(k)
2442 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002443 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002444 }
2445 }
2446 }
2447 }
2448
2449 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_gt_16) {
2450 TEST_REQUIRES_ARM_NEON;
2451 for (size_t k = 17; k < 32; k++) {
2452 GemmMicrokernelTester()
2453 .mr(4)
2454 .nr(16)
2455 .kr(2)
2456 .sr(1)
2457 .m(4)
2458 .n(16)
2459 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002460 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002461 }
2462 }
2463
2464 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_gt_16_strided_a) {
2465 TEST_REQUIRES_ARM_NEON;
2466 for (size_t k = 17; k < 32; k++) {
2467 GemmMicrokernelTester()
2468 .mr(4)
2469 .nr(16)
2470 .kr(2)
2471 .sr(1)
2472 .m(4)
2473 .n(16)
2474 .k(k)
2475 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08002476 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002477 }
2478 }
2479
2480 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_gt_16_subtile) {
2481 TEST_REQUIRES_ARM_NEON;
2482 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002483 for (uint32_t n = 1; n <= 16; n++) {
2484 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002485 GemmMicrokernelTester()
2486 .mr(4)
2487 .nr(16)
2488 .kr(2)
2489 .sr(1)
2490 .m(m)
2491 .n(n)
2492 .k(k)
2493 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002494 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002495 }
2496 }
2497 }
2498 }
2499
2500 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_div_16) {
2501 TEST_REQUIRES_ARM_NEON;
2502 for (size_t k = 32; k <= 160; k += 16) {
2503 GemmMicrokernelTester()
2504 .mr(4)
2505 .nr(16)
2506 .kr(2)
2507 .sr(1)
2508 .m(4)
2509 .n(16)
2510 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002511 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002512 }
2513 }
2514
2515 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_div_16_strided_a) {
2516 TEST_REQUIRES_ARM_NEON;
2517 for (size_t k = 32; k <= 160; k += 16) {
2518 GemmMicrokernelTester()
2519 .mr(4)
2520 .nr(16)
2521 .kr(2)
2522 .sr(1)
2523 .m(4)
2524 .n(16)
2525 .k(k)
2526 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08002527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002528 }
2529 }
2530
2531 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, k_div_16_subtile) {
2532 TEST_REQUIRES_ARM_NEON;
2533 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002534 for (uint32_t n = 1; n <= 16; n++) {
2535 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002536 GemmMicrokernelTester()
2537 .mr(4)
2538 .nr(16)
2539 .kr(2)
2540 .sr(1)
2541 .m(m)
2542 .n(n)
2543 .k(k)
2544 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002545 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002546 }
2547 }
2548 }
2549 }
2550
2551 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, n_gt_16) {
2552 TEST_REQUIRES_ARM_NEON;
2553 for (uint32_t n = 17; n < 32; n++) {
2554 for (size_t k = 1; k <= 80; k += 17) {
2555 GemmMicrokernelTester()
2556 .mr(4)
2557 .nr(16)
2558 .kr(2)
2559 .sr(1)
2560 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002561 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002562 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002563 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002564 }
2565 }
2566 }
2567
2568 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, n_gt_16_strided_cn) {
2569 TEST_REQUIRES_ARM_NEON;
2570 for (uint32_t n = 17; n < 32; n++) {
2571 for (size_t k = 1; k <= 80; k += 17) {
2572 GemmMicrokernelTester()
2573 .mr(4)
2574 .nr(16)
2575 .kr(2)
2576 .sr(1)
2577 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002578 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002579 .k(k)
2580 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002581 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002582 }
2583 }
2584 }
2585
2586 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, n_gt_16_strided_a) {
2587 TEST_REQUIRES_ARM_NEON;
2588 for (uint32_t n = 17; n < 32; n++) {
2589 for (size_t k = 1; k <= 80; k += 17) {
2590 GemmMicrokernelTester()
2591 .mr(4)
2592 .nr(16)
2593 .kr(2)
2594 .sr(1)
2595 .m(4)
2596 .n(n)
2597 .k(k)
2598 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002599 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002600 }
2601 }
2602 }
2603
2604 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, n_gt_16_subtile) {
2605 TEST_REQUIRES_ARM_NEON;
2606 for (uint32_t n = 17; n < 32; n++) {
2607 for (size_t k = 1; k <= 80; k += 17) {
2608 for (uint32_t m = 1; m <= 4; m++) {
2609 GemmMicrokernelTester()
2610 .mr(4)
2611 .nr(16)
2612 .kr(2)
2613 .sr(1)
2614 .m(m)
2615 .n(n)
2616 .k(k)
2617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002618 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002619 }
2620 }
2621 }
2622 }
2623
2624 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, n_div_16) {
2625 TEST_REQUIRES_ARM_NEON;
2626 for (uint32_t n = 32; n <= 48; n += 16) {
2627 for (size_t k = 1; k <= 80; k += 17) {
2628 GemmMicrokernelTester()
2629 .mr(4)
2630 .nr(16)
2631 .kr(2)
2632 .sr(1)
2633 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08002634 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002635 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002636 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002637 }
2638 }
2639 }
2640
2641 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, n_div_16_strided_cn) {
2642 TEST_REQUIRES_ARM_NEON;
2643 for (uint32_t n = 32; n <= 48; n += 16) {
2644 for (size_t k = 1; k <= 80; k += 17) {
2645 GemmMicrokernelTester()
2646 .mr(4)
2647 .nr(16)
2648 .kr(2)
2649 .sr(1)
2650 .m(4)
2651 .n(n)
2652 .k(k)
2653 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002654 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002655 }
2656 }
2657 }
2658
2659 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, n_div_16_strided_a) {
2660 TEST_REQUIRES_ARM_NEON;
2661 for (uint32_t n = 32; n <= 48; n += 16) {
2662 for (size_t k = 1; k <= 80; k += 17) {
2663 GemmMicrokernelTester()
2664 .mr(4)
2665 .nr(16)
2666 .kr(2)
2667 .sr(1)
2668 .m(4)
2669 .n(n)
2670 .k(k)
2671 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002672 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002673 }
2674 }
2675 }
2676
2677 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, n_div_16_subtile) {
2678 TEST_REQUIRES_ARM_NEON;
2679 for (uint32_t n = 32; n <= 48; n += 16) {
2680 for (size_t k = 1; k <= 80; k += 17) {
2681 for (uint32_t m = 1; m <= 4; m++) {
2682 GemmMicrokernelTester()
2683 .mr(4)
2684 .nr(16)
2685 .kr(2)
2686 .sr(1)
2687 .m(m)
2688 .n(n)
2689 .k(k)
2690 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002691 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002692 }
2693 }
2694 }
2695 }
2696
2697 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, strided_cm_subtile) {
2698 TEST_REQUIRES_ARM_NEON;
2699 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002700 for (uint32_t n = 1; n <= 16; n++) {
2701 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002702 GemmMicrokernelTester()
2703 .mr(4)
2704 .nr(16)
2705 .kr(2)
2706 .sr(1)
2707 .m(m)
2708 .n(n)
2709 .k(k)
2710 .cm_stride(19)
2711 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002712 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002713 }
2714 }
2715 }
2716 }
2717
2718 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, qmin) {
2719 TEST_REQUIRES_ARM_NEON;
2720 GemmMicrokernelTester()
2721 .mr(4)
2722 .nr(16)
2723 .kr(2)
2724 .sr(1)
2725 .m(4)
2726 .n(16)
2727 .k(16)
2728 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002729 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002730 }
2731
2732 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, qmax) {
2733 TEST_REQUIRES_ARM_NEON;
2734 GemmMicrokernelTester()
2735 .mr(4)
2736 .nr(16)
2737 .kr(2)
2738 .sr(1)
2739 .m(4)
2740 .n(16)
2741 .k(16)
2742 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08002743 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002744 }
2745
2746 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MLAL_LD1R, strided_cm) {
2747 TEST_REQUIRES_ARM_NEON;
2748 GemmMicrokernelTester()
2749 .mr(4)
2750 .nr(16)
2751 .kr(2)
2752 .sr(1)
2753 .m(4)
2754 .n(16)
2755 .k(16)
2756 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002757 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mlal_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002758 }
2759#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2760
2761
2762#if XNN_ARCH_ARM || XNN_ARCH_ARM64
2763 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_eq_8) {
2764 TEST_REQUIRES_ARM_NEON;
2765 GemmMicrokernelTester()
2766 .mr(2)
2767 .nr(8)
2768 .kr(2)
2769 .sr(1)
2770 .m(2)
2771 .n(8)
2772 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08002773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002774 }
2775
2776 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, strided_cn) {
2777 TEST_REQUIRES_ARM_NEON;
2778 GemmMicrokernelTester()
2779 .mr(2)
2780 .nr(8)
2781 .kr(2)
2782 .sr(1)
2783 .m(2)
2784 .n(8)
2785 .k(8)
2786 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002787 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002788 }
2789
2790 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_eq_8_strided_a) {
2791 TEST_REQUIRES_ARM_NEON;
2792 GemmMicrokernelTester()
2793 .mr(2)
2794 .nr(8)
2795 .kr(2)
2796 .sr(1)
2797 .m(2)
2798 .n(8)
2799 .k(8)
2800 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002801 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002802 }
2803
2804 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_eq_8_subtile) {
2805 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08002806 for (uint32_t n = 1; n <= 8; n++) {
2807 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002808 GemmMicrokernelTester()
2809 .mr(2)
2810 .nr(8)
2811 .kr(2)
2812 .sr(1)
2813 .m(m)
2814 .n(n)
2815 .k(8)
2816 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002817 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002818 }
2819 }
2820 }
2821
2822 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
2823 TEST_REQUIRES_ARM_NEON;
2824 for (uint32_t m = 1; m <= 2; m++) {
2825 GemmMicrokernelTester()
2826 .mr(2)
2827 .nr(8)
2828 .kr(2)
2829 .sr(1)
2830 .m(m)
2831 .n(8)
2832 .k(8)
2833 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002834 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002835 }
2836 }
2837
2838 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
2839 TEST_REQUIRES_ARM_NEON;
2840 for (uint32_t n = 1; n <= 8; n++) {
2841 GemmMicrokernelTester()
2842 .mr(2)
2843 .nr(8)
2844 .kr(2)
2845 .sr(1)
2846 .m(2)
2847 .n(n)
2848 .k(8)
2849 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002850 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002851 }
2852 }
2853
2854 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_lt_8) {
2855 TEST_REQUIRES_ARM_NEON;
2856 for (size_t k = 1; k < 8; k++) {
2857 GemmMicrokernelTester()
2858 .mr(2)
2859 .nr(8)
2860 .kr(2)
2861 .sr(1)
2862 .m(2)
2863 .n(8)
2864 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002865 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002866 }
2867 }
2868
2869 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_lt_8_strided_a) {
2870 TEST_REQUIRES_ARM_NEON;
2871 for (size_t k = 1; k < 8; k++) {
2872 GemmMicrokernelTester()
2873 .mr(2)
2874 .nr(8)
2875 .kr(2)
2876 .sr(1)
2877 .m(2)
2878 .n(8)
2879 .k(k)
2880 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08002881 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002882 }
2883 }
2884
2885 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_lt_8_subtile) {
2886 TEST_REQUIRES_ARM_NEON;
2887 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002888 for (uint32_t n = 1; n <= 8; n++) {
2889 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002890 GemmMicrokernelTester()
2891 .mr(2)
2892 .nr(8)
2893 .kr(2)
2894 .sr(1)
2895 .m(m)
2896 .n(n)
2897 .k(k)
2898 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002899 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002900 }
2901 }
2902 }
2903 }
2904
2905 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_gt_8) {
2906 TEST_REQUIRES_ARM_NEON;
2907 for (size_t k = 9; k < 16; k++) {
2908 GemmMicrokernelTester()
2909 .mr(2)
2910 .nr(8)
2911 .kr(2)
2912 .sr(1)
2913 .m(2)
2914 .n(8)
2915 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002916 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002917 }
2918 }
2919
2920 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_gt_8_strided_a) {
2921 TEST_REQUIRES_ARM_NEON;
2922 for (size_t k = 9; k < 16; k++) {
2923 GemmMicrokernelTester()
2924 .mr(2)
2925 .nr(8)
2926 .kr(2)
2927 .sr(1)
2928 .m(2)
2929 .n(8)
2930 .k(k)
2931 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08002932 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002933 }
2934 }
2935
2936 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_gt_8_subtile) {
2937 TEST_REQUIRES_ARM_NEON;
2938 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002939 for (uint32_t n = 1; n <= 8; n++) {
2940 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002941 GemmMicrokernelTester()
2942 .mr(2)
2943 .nr(8)
2944 .kr(2)
2945 .sr(1)
2946 .m(m)
2947 .n(n)
2948 .k(k)
2949 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08002950 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002951 }
2952 }
2953 }
2954 }
2955
2956 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_div_8) {
2957 TEST_REQUIRES_ARM_NEON;
2958 for (size_t k = 16; k <= 80; k += 8) {
2959 GemmMicrokernelTester()
2960 .mr(2)
2961 .nr(8)
2962 .kr(2)
2963 .sr(1)
2964 .m(2)
2965 .n(8)
2966 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08002967 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002968 }
2969 }
2970
2971 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_div_8_strided_a) {
2972 TEST_REQUIRES_ARM_NEON;
2973 for (size_t k = 16; k <= 80; k += 8) {
2974 GemmMicrokernelTester()
2975 .mr(2)
2976 .nr(8)
2977 .kr(2)
2978 .sr(1)
2979 .m(2)
2980 .n(8)
2981 .k(k)
2982 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08002983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002984 }
2985 }
2986
2987 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, k_div_8_subtile) {
2988 TEST_REQUIRES_ARM_NEON;
2989 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08002990 for (uint32_t n = 1; n <= 8; n++) {
2991 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08002992 GemmMicrokernelTester()
2993 .mr(2)
2994 .nr(8)
2995 .kr(2)
2996 .sr(1)
2997 .m(m)
2998 .n(n)
2999 .k(k)
3000 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003001 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003002 }
3003 }
3004 }
3005 }
3006
3007 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_gt_8) {
3008 TEST_REQUIRES_ARM_NEON;
3009 for (uint32_t n = 9; n < 16; n++) {
3010 for (size_t k = 1; k <= 40; k += 9) {
3011 GemmMicrokernelTester()
3012 .mr(2)
3013 .nr(8)
3014 .kr(2)
3015 .sr(1)
3016 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003017 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003018 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003019 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003020 }
3021 }
3022 }
3023
3024 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_gt_8_strided_cn) {
3025 TEST_REQUIRES_ARM_NEON;
3026 for (uint32_t n = 9; n < 16; n++) {
3027 for (size_t k = 1; k <= 40; k += 9) {
3028 GemmMicrokernelTester()
3029 .mr(2)
3030 .nr(8)
3031 .kr(2)
3032 .sr(1)
3033 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003034 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003035 .k(k)
3036 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003037 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003038 }
3039 }
3040 }
3041
3042 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_gt_8_strided_a) {
3043 TEST_REQUIRES_ARM_NEON;
3044 for (uint32_t n = 9; n < 16; n++) {
3045 for (size_t k = 1; k <= 40; k += 9) {
3046 GemmMicrokernelTester()
3047 .mr(2)
3048 .nr(8)
3049 .kr(2)
3050 .sr(1)
3051 .m(2)
3052 .n(n)
3053 .k(k)
3054 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003055 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003056 }
3057 }
3058 }
3059
3060 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_gt_8_subtile) {
3061 TEST_REQUIRES_ARM_NEON;
3062 for (uint32_t n = 9; n < 16; n++) {
3063 for (size_t k = 1; k <= 40; k += 9) {
3064 for (uint32_t m = 1; m <= 2; m++) {
3065 GemmMicrokernelTester()
3066 .mr(2)
3067 .nr(8)
3068 .kr(2)
3069 .sr(1)
3070 .m(m)
3071 .n(n)
3072 .k(k)
3073 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003074 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003075 }
3076 }
3077 }
3078 }
3079
3080 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_div_8) {
3081 TEST_REQUIRES_ARM_NEON;
3082 for (uint32_t n = 16; n <= 24; n += 8) {
3083 for (size_t k = 1; k <= 40; k += 9) {
3084 GemmMicrokernelTester()
3085 .mr(2)
3086 .nr(8)
3087 .kr(2)
3088 .sr(1)
3089 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003090 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003091 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003092 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003093 }
3094 }
3095 }
3096
3097 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_div_8_strided_cn) {
3098 TEST_REQUIRES_ARM_NEON;
3099 for (uint32_t n = 16; n <= 24; n += 8) {
3100 for (size_t k = 1; k <= 40; k += 9) {
3101 GemmMicrokernelTester()
3102 .mr(2)
3103 .nr(8)
3104 .kr(2)
3105 .sr(1)
3106 .m(2)
3107 .n(n)
3108 .k(k)
3109 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003110 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003111 }
3112 }
3113 }
3114
3115 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_div_8_strided_a) {
3116 TEST_REQUIRES_ARM_NEON;
3117 for (uint32_t n = 16; n <= 24; n += 8) {
3118 for (size_t k = 1; k <= 40; k += 9) {
3119 GemmMicrokernelTester()
3120 .mr(2)
3121 .nr(8)
3122 .kr(2)
3123 .sr(1)
3124 .m(2)
3125 .n(n)
3126 .k(k)
3127 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003128 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003129 }
3130 }
3131 }
3132
3133 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, n_div_8_subtile) {
3134 TEST_REQUIRES_ARM_NEON;
3135 for (uint32_t n = 16; n <= 24; n += 8) {
3136 for (size_t k = 1; k <= 40; k += 9) {
3137 for (uint32_t m = 1; m <= 2; m++) {
3138 GemmMicrokernelTester()
3139 .mr(2)
3140 .nr(8)
3141 .kr(2)
3142 .sr(1)
3143 .m(m)
3144 .n(n)
3145 .k(k)
3146 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003147 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003148 }
3149 }
3150 }
3151 }
3152
3153 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, strided_cm_subtile) {
3154 TEST_REQUIRES_ARM_NEON;
3155 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003156 for (uint32_t n = 1; n <= 8; n++) {
3157 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003158 GemmMicrokernelTester()
3159 .mr(2)
3160 .nr(8)
3161 .kr(2)
3162 .sr(1)
3163 .m(m)
3164 .n(n)
3165 .k(k)
3166 .cm_stride(11)
3167 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003168 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003169 }
3170 }
3171 }
3172 }
3173
3174 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, qmin) {
3175 TEST_REQUIRES_ARM_NEON;
3176 GemmMicrokernelTester()
3177 .mr(2)
3178 .nr(8)
3179 .kr(2)
3180 .sr(1)
3181 .m(2)
3182 .n(8)
3183 .k(8)
3184 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003185 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003186 }
3187
3188 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, qmax) {
3189 TEST_REQUIRES_ARM_NEON;
3190 GemmMicrokernelTester()
3191 .mr(2)
3192 .nr(8)
3193 .kr(2)
3194 .sr(1)
3195 .m(2)
3196 .n(8)
3197 .k(8)
3198 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003199 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003200 }
3201
3202 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MULL_LD2R, strided_cm) {
3203 TEST_REQUIRES_ARM_NEON;
3204 GemmMicrokernelTester()
3205 .mr(2)
3206 .nr(8)
3207 .kr(2)
3208 .sr(1)
3209 .m(2)
3210 .n(8)
3211 .k(8)
3212 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003213 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003214 }
3215#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3216
3217
3218#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3219 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_eq_8) {
3220 TEST_REQUIRES_ARM_NEON;
3221 GemmMicrokernelTester()
3222 .mr(4)
3223 .nr(8)
3224 .kr(2)
3225 .sr(1)
3226 .m(4)
3227 .n(8)
3228 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003230 }
3231
3232 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, strided_cn) {
3233 TEST_REQUIRES_ARM_NEON;
3234 GemmMicrokernelTester()
3235 .mr(4)
3236 .nr(8)
3237 .kr(2)
3238 .sr(1)
3239 .m(4)
3240 .n(8)
3241 .k(8)
3242 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003243 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003244 }
3245
3246 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_eq_8_strided_a) {
3247 TEST_REQUIRES_ARM_NEON;
3248 GemmMicrokernelTester()
3249 .mr(4)
3250 .nr(8)
3251 .kr(2)
3252 .sr(1)
3253 .m(4)
3254 .n(8)
3255 .k(8)
3256 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003257 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003258 }
3259
3260 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_eq_8_subtile) {
3261 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003262 for (uint32_t n = 1; n <= 8; n++) {
3263 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003264 GemmMicrokernelTester()
3265 .mr(4)
3266 .nr(8)
3267 .kr(2)
3268 .sr(1)
3269 .m(m)
3270 .n(n)
3271 .k(8)
3272 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003273 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003274 }
3275 }
3276 }
3277
3278 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
3279 TEST_REQUIRES_ARM_NEON;
3280 for (uint32_t m = 1; m <= 4; m++) {
3281 GemmMicrokernelTester()
3282 .mr(4)
3283 .nr(8)
3284 .kr(2)
3285 .sr(1)
3286 .m(m)
3287 .n(8)
3288 .k(8)
3289 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003290 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003291 }
3292 }
3293
3294 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
3295 TEST_REQUIRES_ARM_NEON;
3296 for (uint32_t n = 1; n <= 8; n++) {
3297 GemmMicrokernelTester()
3298 .mr(4)
3299 .nr(8)
3300 .kr(2)
3301 .sr(1)
3302 .m(4)
3303 .n(n)
3304 .k(8)
3305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003306 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003307 }
3308 }
3309
3310 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_lt_8) {
3311 TEST_REQUIRES_ARM_NEON;
3312 for (size_t k = 1; k < 8; k++) {
3313 GemmMicrokernelTester()
3314 .mr(4)
3315 .nr(8)
3316 .kr(2)
3317 .sr(1)
3318 .m(4)
3319 .n(8)
3320 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003321 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003322 }
3323 }
3324
3325 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_lt_8_strided_a) {
3326 TEST_REQUIRES_ARM_NEON;
3327 for (size_t k = 1; k < 8; k++) {
3328 GemmMicrokernelTester()
3329 .mr(4)
3330 .nr(8)
3331 .kr(2)
3332 .sr(1)
3333 .m(4)
3334 .n(8)
3335 .k(k)
3336 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003337 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003338 }
3339 }
3340
3341 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_lt_8_subtile) {
3342 TEST_REQUIRES_ARM_NEON;
3343 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003344 for (uint32_t n = 1; n <= 8; n++) {
3345 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003346 GemmMicrokernelTester()
3347 .mr(4)
3348 .nr(8)
3349 .kr(2)
3350 .sr(1)
3351 .m(m)
3352 .n(n)
3353 .k(k)
3354 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003355 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003356 }
3357 }
3358 }
3359 }
3360
3361 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_gt_8) {
3362 TEST_REQUIRES_ARM_NEON;
3363 for (size_t k = 9; k < 16; k++) {
3364 GemmMicrokernelTester()
3365 .mr(4)
3366 .nr(8)
3367 .kr(2)
3368 .sr(1)
3369 .m(4)
3370 .n(8)
3371 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003372 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003373 }
3374 }
3375
3376 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_gt_8_strided_a) {
3377 TEST_REQUIRES_ARM_NEON;
3378 for (size_t k = 9; k < 16; k++) {
3379 GemmMicrokernelTester()
3380 .mr(4)
3381 .nr(8)
3382 .kr(2)
3383 .sr(1)
3384 .m(4)
3385 .n(8)
3386 .k(k)
3387 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003388 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003389 }
3390 }
3391
3392 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_gt_8_subtile) {
3393 TEST_REQUIRES_ARM_NEON;
3394 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003395 for (uint32_t n = 1; n <= 8; n++) {
3396 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003397 GemmMicrokernelTester()
3398 .mr(4)
3399 .nr(8)
3400 .kr(2)
3401 .sr(1)
3402 .m(m)
3403 .n(n)
3404 .k(k)
3405 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003406 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003407 }
3408 }
3409 }
3410 }
3411
3412 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_div_8) {
3413 TEST_REQUIRES_ARM_NEON;
3414 for (size_t k = 16; k <= 80; k += 8) {
3415 GemmMicrokernelTester()
3416 .mr(4)
3417 .nr(8)
3418 .kr(2)
3419 .sr(1)
3420 .m(4)
3421 .n(8)
3422 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003423 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003424 }
3425 }
3426
3427 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_div_8_strided_a) {
3428 TEST_REQUIRES_ARM_NEON;
3429 for (size_t k = 16; k <= 80; k += 8) {
3430 GemmMicrokernelTester()
3431 .mr(4)
3432 .nr(8)
3433 .kr(2)
3434 .sr(1)
3435 .m(4)
3436 .n(8)
3437 .k(k)
3438 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003440 }
3441 }
3442
3443 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, k_div_8_subtile) {
3444 TEST_REQUIRES_ARM_NEON;
3445 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003446 for (uint32_t n = 1; n <= 8; n++) {
3447 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003448 GemmMicrokernelTester()
3449 .mr(4)
3450 .nr(8)
3451 .kr(2)
3452 .sr(1)
3453 .m(m)
3454 .n(n)
3455 .k(k)
3456 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003457 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003458 }
3459 }
3460 }
3461 }
3462
3463 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, n_gt_8) {
3464 TEST_REQUIRES_ARM_NEON;
3465 for (uint32_t n = 9; n < 16; n++) {
3466 for (size_t k = 1; k <= 40; k += 9) {
3467 GemmMicrokernelTester()
3468 .mr(4)
3469 .nr(8)
3470 .kr(2)
3471 .sr(1)
3472 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003473 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003474 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003475 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003476 }
3477 }
3478 }
3479
3480 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, n_gt_8_strided_cn) {
3481 TEST_REQUIRES_ARM_NEON;
3482 for (uint32_t n = 9; n < 16; n++) {
3483 for (size_t k = 1; k <= 40; k += 9) {
3484 GemmMicrokernelTester()
3485 .mr(4)
3486 .nr(8)
3487 .kr(2)
3488 .sr(1)
3489 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003490 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003491 .k(k)
3492 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003494 }
3495 }
3496 }
3497
3498 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, n_gt_8_strided_a) {
3499 TEST_REQUIRES_ARM_NEON;
3500 for (uint32_t n = 9; n < 16; n++) {
3501 for (size_t k = 1; k <= 40; k += 9) {
3502 GemmMicrokernelTester()
3503 .mr(4)
3504 .nr(8)
3505 .kr(2)
3506 .sr(1)
3507 .m(4)
3508 .n(n)
3509 .k(k)
3510 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003511 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003512 }
3513 }
3514 }
3515
3516 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, n_gt_8_subtile) {
3517 TEST_REQUIRES_ARM_NEON;
3518 for (uint32_t n = 9; n < 16; n++) {
3519 for (size_t k = 1; k <= 40; k += 9) {
3520 for (uint32_t m = 1; m <= 4; m++) {
3521 GemmMicrokernelTester()
3522 .mr(4)
3523 .nr(8)
3524 .kr(2)
3525 .sr(1)
3526 .m(m)
3527 .n(n)
3528 .k(k)
3529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003530 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003531 }
3532 }
3533 }
3534 }
3535
3536 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, n_div_8) {
3537 TEST_REQUIRES_ARM_NEON;
3538 for (uint32_t n = 16; n <= 24; n += 8) {
3539 for (size_t k = 1; k <= 40; k += 9) {
3540 GemmMicrokernelTester()
3541 .mr(4)
3542 .nr(8)
3543 .kr(2)
3544 .sr(1)
3545 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003546 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003547 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003548 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003549 }
3550 }
3551 }
3552
3553 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, n_div_8_strided_cn) {
3554 TEST_REQUIRES_ARM_NEON;
3555 for (uint32_t n = 16; n <= 24; n += 8) {
3556 for (size_t k = 1; k <= 40; k += 9) {
3557 GemmMicrokernelTester()
3558 .mr(4)
3559 .nr(8)
3560 .kr(2)
3561 .sr(1)
3562 .m(4)
3563 .n(n)
3564 .k(k)
3565 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003566 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003567 }
3568 }
3569 }
3570
3571 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, n_div_8_strided_a) {
3572 TEST_REQUIRES_ARM_NEON;
3573 for (uint32_t n = 16; n <= 24; n += 8) {
3574 for (size_t k = 1; k <= 40; k += 9) {
3575 GemmMicrokernelTester()
3576 .mr(4)
3577 .nr(8)
3578 .kr(2)
3579 .sr(1)
3580 .m(4)
3581 .n(n)
3582 .k(k)
3583 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003584 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003585 }
3586 }
3587 }
3588
3589 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, n_div_8_subtile) {
3590 TEST_REQUIRES_ARM_NEON;
3591 for (uint32_t n = 16; n <= 24; n += 8) {
3592 for (size_t k = 1; k <= 40; k += 9) {
3593 for (uint32_t m = 1; m <= 4; m++) {
3594 GemmMicrokernelTester()
3595 .mr(4)
3596 .nr(8)
3597 .kr(2)
3598 .sr(1)
3599 .m(m)
3600 .n(n)
3601 .k(k)
3602 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003603 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003604 }
3605 }
3606 }
3607 }
3608
3609 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, strided_cm_subtile) {
3610 TEST_REQUIRES_ARM_NEON;
3611 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003612 for (uint32_t n = 1; n <= 8; n++) {
3613 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003614 GemmMicrokernelTester()
3615 .mr(4)
3616 .nr(8)
3617 .kr(2)
3618 .sr(1)
3619 .m(m)
3620 .n(n)
3621 .k(k)
3622 .cm_stride(11)
3623 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003624 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003625 }
3626 }
3627 }
3628 }
3629
3630 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, qmin) {
3631 TEST_REQUIRES_ARM_NEON;
3632 GemmMicrokernelTester()
3633 .mr(4)
3634 .nr(8)
3635 .kr(2)
3636 .sr(1)
3637 .m(4)
3638 .n(8)
3639 .k(8)
3640 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003641 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003642 }
3643
3644 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, qmax) {
3645 TEST_REQUIRES_ARM_NEON;
3646 GemmMicrokernelTester()
3647 .mr(4)
3648 .nr(8)
3649 .kr(2)
3650 .sr(1)
3651 .m(4)
3652 .n(8)
3653 .k(8)
3654 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08003655 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003656 }
3657
3658 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_LD2R, strided_cm) {
3659 TEST_REQUIRES_ARM_NEON;
3660 GemmMicrokernelTester()
3661 .mr(4)
3662 .nr(8)
3663 .kr(2)
3664 .sr(1)
3665 .m(4)
3666 .n(8)
3667 .k(8)
3668 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003669 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003670 }
3671#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
3672
3673
3674#if XNN_ARCH_ARM || XNN_ARCH_ARM64
3675 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8) {
3676 TEST_REQUIRES_ARM_NEON;
3677 GemmMicrokernelTester()
3678 .mr(3)
3679 .nr(16)
3680 .kr(2)
3681 .sr(1)
3682 .m(3)
3683 .n(16)
3684 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08003685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003686 }
3687
3688 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, strided_cn) {
3689 TEST_REQUIRES_ARM_NEON;
3690 GemmMicrokernelTester()
3691 .mr(3)
3692 .nr(16)
3693 .kr(2)
3694 .sr(1)
3695 .m(3)
3696 .n(16)
3697 .k(8)
3698 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003699 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003700 }
3701
3702 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8_strided_a) {
3703 TEST_REQUIRES_ARM_NEON;
3704 GemmMicrokernelTester()
3705 .mr(3)
3706 .nr(16)
3707 .kr(2)
3708 .sr(1)
3709 .m(3)
3710 .n(16)
3711 .k(8)
3712 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003713 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003714 }
3715
3716 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8_subtile) {
3717 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08003718 for (uint32_t n = 1; n <= 16; n++) {
3719 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003720 GemmMicrokernelTester()
3721 .mr(3)
3722 .nr(16)
3723 .kr(2)
3724 .sr(1)
3725 .m(m)
3726 .n(n)
3727 .k(8)
3728 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003729 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003730 }
3731 }
3732 }
3733
3734 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8_subtile_m) {
3735 TEST_REQUIRES_ARM_NEON;
3736 for (uint32_t m = 1; m <= 3; m++) {
3737 GemmMicrokernelTester()
3738 .mr(3)
3739 .nr(16)
3740 .kr(2)
3741 .sr(1)
3742 .m(m)
3743 .n(16)
3744 .k(8)
3745 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003746 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003747 }
3748 }
3749
3750 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_eq_8_subtile_n) {
3751 TEST_REQUIRES_ARM_NEON;
3752 for (uint32_t n = 1; n <= 16; n++) {
3753 GemmMicrokernelTester()
3754 .mr(3)
3755 .nr(16)
3756 .kr(2)
3757 .sr(1)
3758 .m(3)
3759 .n(n)
3760 .k(8)
3761 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003762 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003763 }
3764 }
3765
3766 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_lt_8) {
3767 TEST_REQUIRES_ARM_NEON;
3768 for (size_t k = 1; k < 8; k++) {
3769 GemmMicrokernelTester()
3770 .mr(3)
3771 .nr(16)
3772 .kr(2)
3773 .sr(1)
3774 .m(3)
3775 .n(16)
3776 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003777 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003778 }
3779 }
3780
3781 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_lt_8_strided_a) {
3782 TEST_REQUIRES_ARM_NEON;
3783 for (size_t k = 1; k < 8; k++) {
3784 GemmMicrokernelTester()
3785 .mr(3)
3786 .nr(16)
3787 .kr(2)
3788 .sr(1)
3789 .m(3)
3790 .n(16)
3791 .k(k)
3792 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08003793 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003794 }
3795 }
3796
3797 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_lt_8_subtile) {
3798 TEST_REQUIRES_ARM_NEON;
3799 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003800 for (uint32_t n = 1; n <= 16; n++) {
3801 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003802 GemmMicrokernelTester()
3803 .mr(3)
3804 .nr(16)
3805 .kr(2)
3806 .sr(1)
3807 .m(m)
3808 .n(n)
3809 .k(k)
3810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003811 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003812 }
3813 }
3814 }
3815 }
3816
3817 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_gt_8) {
3818 TEST_REQUIRES_ARM_NEON;
3819 for (size_t k = 9; k < 16; k++) {
3820 GemmMicrokernelTester()
3821 .mr(3)
3822 .nr(16)
3823 .kr(2)
3824 .sr(1)
3825 .m(3)
3826 .n(16)
3827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003828 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003829 }
3830 }
3831
3832 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_gt_8_strided_a) {
3833 TEST_REQUIRES_ARM_NEON;
3834 for (size_t k = 9; k < 16; k++) {
3835 GemmMicrokernelTester()
3836 .mr(3)
3837 .nr(16)
3838 .kr(2)
3839 .sr(1)
3840 .m(3)
3841 .n(16)
3842 .k(k)
3843 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003844 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003845 }
3846 }
3847
3848 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_gt_8_subtile) {
3849 TEST_REQUIRES_ARM_NEON;
3850 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003851 for (uint32_t n = 1; n <= 16; n++) {
3852 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003853 GemmMicrokernelTester()
3854 .mr(3)
3855 .nr(16)
3856 .kr(2)
3857 .sr(1)
3858 .m(m)
3859 .n(n)
3860 .k(k)
3861 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003862 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003863 }
3864 }
3865 }
3866 }
3867
3868 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_div_8) {
3869 TEST_REQUIRES_ARM_NEON;
3870 for (size_t k = 16; k <= 80; k += 8) {
3871 GemmMicrokernelTester()
3872 .mr(3)
3873 .nr(16)
3874 .kr(2)
3875 .sr(1)
3876 .m(3)
3877 .n(16)
3878 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003879 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003880 }
3881 }
3882
3883 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_div_8_strided_a) {
3884 TEST_REQUIRES_ARM_NEON;
3885 for (size_t k = 16; k <= 80; k += 8) {
3886 GemmMicrokernelTester()
3887 .mr(3)
3888 .nr(16)
3889 .kr(2)
3890 .sr(1)
3891 .m(3)
3892 .n(16)
3893 .k(k)
3894 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08003895 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003896 }
3897 }
3898
3899 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, k_div_8_subtile) {
3900 TEST_REQUIRES_ARM_NEON;
3901 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08003902 for (uint32_t n = 1; n <= 16; n++) {
3903 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003904 GemmMicrokernelTester()
3905 .mr(3)
3906 .nr(16)
3907 .kr(2)
3908 .sr(1)
3909 .m(m)
3910 .n(n)
3911 .k(k)
3912 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003913 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003914 }
3915 }
3916 }
3917 }
3918
3919 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16) {
3920 TEST_REQUIRES_ARM_NEON;
3921 for (uint32_t n = 17; n < 32; n++) {
3922 for (size_t k = 1; k <= 40; k += 9) {
3923 GemmMicrokernelTester()
3924 .mr(3)
3925 .nr(16)
3926 .kr(2)
3927 .sr(1)
3928 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003929 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003930 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08003931 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003932 }
3933 }
3934 }
3935
3936 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16_strided_cn) {
3937 TEST_REQUIRES_ARM_NEON;
3938 for (uint32_t n = 17; n < 32; n++) {
3939 for (size_t k = 1; k <= 40; k += 9) {
3940 GemmMicrokernelTester()
3941 .mr(3)
3942 .nr(16)
3943 .kr(2)
3944 .sr(1)
3945 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08003946 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003947 .k(k)
3948 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08003949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003950 }
3951 }
3952 }
3953
3954 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16_strided_a) {
3955 TEST_REQUIRES_ARM_NEON;
3956 for (uint32_t n = 17; n < 32; n++) {
3957 for (size_t k = 1; k <= 40; k += 9) {
3958 GemmMicrokernelTester()
3959 .mr(3)
3960 .nr(16)
3961 .kr(2)
3962 .sr(1)
3963 .m(3)
3964 .n(n)
3965 .k(k)
3966 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08003967 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003968 }
3969 }
3970 }
3971
3972 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_gt_16_subtile) {
3973 TEST_REQUIRES_ARM_NEON;
3974 for (uint32_t n = 17; n < 32; n++) {
3975 for (size_t k = 1; k <= 40; k += 9) {
3976 for (uint32_t m = 1; m <= 3; m++) {
3977 GemmMicrokernelTester()
3978 .mr(3)
3979 .nr(16)
3980 .kr(2)
3981 .sr(1)
3982 .m(m)
3983 .n(n)
3984 .k(k)
3985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08003986 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08003987 }
3988 }
3989 }
3990 }
3991
3992 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16) {
3993 TEST_REQUIRES_ARM_NEON;
3994 for (uint32_t n = 32; n <= 48; n += 16) {
3995 for (size_t k = 1; k <= 40; k += 9) {
3996 GemmMicrokernelTester()
3997 .mr(3)
3998 .nr(16)
3999 .kr(2)
4000 .sr(1)
4001 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004002 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004003 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004004 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004005 }
4006 }
4007 }
4008
4009 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16_strided_cn) {
4010 TEST_REQUIRES_ARM_NEON;
4011 for (uint32_t n = 32; n <= 48; n += 16) {
4012 for (size_t k = 1; k <= 40; k += 9) {
4013 GemmMicrokernelTester()
4014 .mr(3)
4015 .nr(16)
4016 .kr(2)
4017 .sr(1)
4018 .m(3)
4019 .n(n)
4020 .k(k)
4021 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004022 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004023 }
4024 }
4025 }
4026
4027 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16_strided_a) {
4028 TEST_REQUIRES_ARM_NEON;
4029 for (uint32_t n = 32; n <= 48; n += 16) {
4030 for (size_t k = 1; k <= 40; k += 9) {
4031 GemmMicrokernelTester()
4032 .mr(3)
4033 .nr(16)
4034 .kr(2)
4035 .sr(1)
4036 .m(3)
4037 .n(n)
4038 .k(k)
4039 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004040 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004041 }
4042 }
4043 }
4044
4045 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, n_div_16_subtile) {
4046 TEST_REQUIRES_ARM_NEON;
4047 for (uint32_t n = 32; n <= 48; n += 16) {
4048 for (size_t k = 1; k <= 40; k += 9) {
4049 for (uint32_t m = 1; m <= 3; m++) {
4050 GemmMicrokernelTester()
4051 .mr(3)
4052 .nr(16)
4053 .kr(2)
4054 .sr(1)
4055 .m(m)
4056 .n(n)
4057 .k(k)
4058 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004059 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004060 }
4061 }
4062 }
4063 }
4064
4065 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, strided_cm_subtile) {
4066 TEST_REQUIRES_ARM_NEON;
4067 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004068 for (uint32_t n = 1; n <= 16; n++) {
4069 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004070 GemmMicrokernelTester()
4071 .mr(3)
4072 .nr(16)
4073 .kr(2)
4074 .sr(1)
4075 .m(m)
4076 .n(n)
4077 .k(k)
4078 .cm_stride(19)
4079 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004080 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004081 }
4082 }
4083 }
4084 }
4085
4086 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, qmin) {
4087 TEST_REQUIRES_ARM_NEON;
4088 GemmMicrokernelTester()
4089 .mr(3)
4090 .nr(16)
4091 .kr(2)
4092 .sr(1)
4093 .m(3)
4094 .n(16)
4095 .k(8)
4096 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004097 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004098 }
4099
4100 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, qmax) {
4101 TEST_REQUIRES_ARM_NEON;
4102 GemmMicrokernelTester()
4103 .mr(3)
4104 .nr(16)
4105 .kr(2)
4106 .sr(1)
4107 .m(3)
4108 .n(16)
4109 .k(8)
4110 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004111 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004112 }
4113
4114 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_LD2R, strided_cm) {
4115 TEST_REQUIRES_ARM_NEON;
4116 GemmMicrokernelTester()
4117 .mr(3)
4118 .nr(16)
4119 .kr(2)
4120 .sr(1)
4121 .m(3)
4122 .n(16)
4123 .k(8)
4124 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004125 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004126 }
4127#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4128
4129
4130#if XNN_ARCH_ARM || XNN_ARCH_ARM64
4131 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16) {
4132 TEST_REQUIRES_ARM_NEON;
4133 GemmMicrokernelTester()
4134 .mr(1)
4135 .nr(8)
4136 .kr(2)
4137 .sr(1)
4138 .m(1)
4139 .n(8)
4140 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08004141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004142 }
4143
4144 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cn) {
4145 TEST_REQUIRES_ARM_NEON;
4146 GemmMicrokernelTester()
4147 .mr(1)
4148 .nr(8)
4149 .kr(2)
4150 .sr(1)
4151 .m(1)
4152 .n(8)
4153 .k(16)
4154 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004155 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004156 }
4157
4158 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_strided_a) {
4159 TEST_REQUIRES_ARM_NEON;
4160 GemmMicrokernelTester()
4161 .mr(1)
4162 .nr(8)
4163 .kr(2)
4164 .sr(1)
4165 .m(1)
4166 .n(8)
4167 .k(16)
4168 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004169 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004170 }
4171
4172 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile) {
4173 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004174 for (uint32_t n = 1; n <= 8; n++) {
4175 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004176 GemmMicrokernelTester()
4177 .mr(1)
4178 .nr(8)
4179 .kr(2)
4180 .sr(1)
4181 .m(m)
4182 .n(n)
4183 .k(16)
4184 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004185 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004186 }
4187 }
4188 }
4189
4190 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
4191 TEST_REQUIRES_ARM_NEON;
4192 for (uint32_t m = 1; m <= 1; m++) {
4193 GemmMicrokernelTester()
4194 .mr(1)
4195 .nr(8)
4196 .kr(2)
4197 .sr(1)
4198 .m(m)
4199 .n(8)
4200 .k(16)
4201 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004202 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004203 }
4204 }
4205
4206 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
4207 TEST_REQUIRES_ARM_NEON;
4208 for (uint32_t n = 1; n <= 8; n++) {
4209 GemmMicrokernelTester()
4210 .mr(1)
4211 .nr(8)
4212 .kr(2)
4213 .sr(1)
4214 .m(1)
4215 .n(n)
4216 .k(16)
4217 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004218 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004219 }
4220 }
4221
4222 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_lt_16) {
4223 TEST_REQUIRES_ARM_NEON;
4224 for (size_t k = 1; k < 16; k++) {
4225 GemmMicrokernelTester()
4226 .mr(1)
4227 .nr(8)
4228 .kr(2)
4229 .sr(1)
4230 .m(1)
4231 .n(8)
4232 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004233 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004234 }
4235 }
4236
4237 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_lt_16_strided_a) {
4238 TEST_REQUIRES_ARM_NEON;
4239 for (size_t k = 1; k < 16; k++) {
4240 GemmMicrokernelTester()
4241 .mr(1)
4242 .nr(8)
4243 .kr(2)
4244 .sr(1)
4245 .m(1)
4246 .n(8)
4247 .k(k)
4248 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004249 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004250 }
4251 }
4252
4253 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_lt_16_subtile) {
4254 TEST_REQUIRES_ARM_NEON;
4255 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004256 for (uint32_t n = 1; n <= 8; n++) {
4257 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004258 GemmMicrokernelTester()
4259 .mr(1)
4260 .nr(8)
4261 .kr(2)
4262 .sr(1)
4263 .m(m)
4264 .n(n)
4265 .k(k)
4266 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004267 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004268 }
4269 }
4270 }
4271 }
4272
4273 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_gt_16) {
4274 TEST_REQUIRES_ARM_NEON;
4275 for (size_t k = 17; k < 32; k++) {
4276 GemmMicrokernelTester()
4277 .mr(1)
4278 .nr(8)
4279 .kr(2)
4280 .sr(1)
4281 .m(1)
4282 .n(8)
4283 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004284 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004285 }
4286 }
4287
4288 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_gt_16_strided_a) {
4289 TEST_REQUIRES_ARM_NEON;
4290 for (size_t k = 17; k < 32; k++) {
4291 GemmMicrokernelTester()
4292 .mr(1)
4293 .nr(8)
4294 .kr(2)
4295 .sr(1)
4296 .m(1)
4297 .n(8)
4298 .k(k)
4299 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08004300 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004301 }
4302 }
4303
4304 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_gt_16_subtile) {
4305 TEST_REQUIRES_ARM_NEON;
4306 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004307 for (uint32_t n = 1; n <= 8; n++) {
4308 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004309 GemmMicrokernelTester()
4310 .mr(1)
4311 .nr(8)
4312 .kr(2)
4313 .sr(1)
4314 .m(m)
4315 .n(n)
4316 .k(k)
4317 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004318 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004319 }
4320 }
4321 }
4322 }
4323
4324 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_div_16) {
4325 TEST_REQUIRES_ARM_NEON;
4326 for (size_t k = 32; k <= 160; k += 16) {
4327 GemmMicrokernelTester()
4328 .mr(1)
4329 .nr(8)
4330 .kr(2)
4331 .sr(1)
4332 .m(1)
4333 .n(8)
4334 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004335 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004336 }
4337 }
4338
4339 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_div_16_strided_a) {
4340 TEST_REQUIRES_ARM_NEON;
4341 for (size_t k = 32; k <= 160; k += 16) {
4342 GemmMicrokernelTester()
4343 .mr(1)
4344 .nr(8)
4345 .kr(2)
4346 .sr(1)
4347 .m(1)
4348 .n(8)
4349 .k(k)
4350 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08004351 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004352 }
4353 }
4354
4355 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, k_div_16_subtile) {
4356 TEST_REQUIRES_ARM_NEON;
4357 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004358 for (uint32_t n = 1; n <= 8; n++) {
4359 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004360 GemmMicrokernelTester()
4361 .mr(1)
4362 .nr(8)
4363 .kr(2)
4364 .sr(1)
4365 .m(m)
4366 .n(n)
4367 .k(k)
4368 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004369 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004370 }
4371 }
4372 }
4373 }
4374
4375 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8) {
4376 TEST_REQUIRES_ARM_NEON;
4377 for (uint32_t n = 9; n < 16; n++) {
4378 for (size_t k = 1; k <= 80; k += 17) {
4379 GemmMicrokernelTester()
4380 .mr(1)
4381 .nr(8)
4382 .kr(2)
4383 .sr(1)
4384 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004385 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004386 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004387 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004388 }
4389 }
4390 }
4391
4392 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_cn) {
4393 TEST_REQUIRES_ARM_NEON;
4394 for (uint32_t n = 9; n < 16; n++) {
4395 for (size_t k = 1; k <= 80; k += 17) {
4396 GemmMicrokernelTester()
4397 .mr(1)
4398 .nr(8)
4399 .kr(2)
4400 .sr(1)
4401 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004402 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004403 .k(k)
4404 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004406 }
4407 }
4408 }
4409
4410 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_strided_a) {
4411 TEST_REQUIRES_ARM_NEON;
4412 for (uint32_t n = 9; n < 16; n++) {
4413 for (size_t k = 1; k <= 80; k += 17) {
4414 GemmMicrokernelTester()
4415 .mr(1)
4416 .nr(8)
4417 .kr(2)
4418 .sr(1)
4419 .m(1)
4420 .n(n)
4421 .k(k)
4422 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004423 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004424 }
4425 }
4426 }
4427
4428 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_gt_8_subtile) {
4429 TEST_REQUIRES_ARM_NEON;
4430 for (uint32_t n = 9; n < 16; n++) {
4431 for (size_t k = 1; k <= 80; k += 17) {
4432 for (uint32_t m = 1; m <= 1; m++) {
4433 GemmMicrokernelTester()
4434 .mr(1)
4435 .nr(8)
4436 .kr(2)
4437 .sr(1)
4438 .m(m)
4439 .n(n)
4440 .k(k)
4441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004442 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004443 }
4444 }
4445 }
4446 }
4447
4448 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8) {
4449 TEST_REQUIRES_ARM_NEON;
4450 for (uint32_t n = 16; n <= 24; n += 8) {
4451 for (size_t k = 1; k <= 80; k += 17) {
4452 GemmMicrokernelTester()
4453 .mr(1)
4454 .nr(8)
4455 .kr(2)
4456 .sr(1)
4457 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004458 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004459 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004460 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004461 }
4462 }
4463 }
4464
4465 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_cn) {
4466 TEST_REQUIRES_ARM_NEON;
4467 for (uint32_t n = 16; n <= 24; n += 8) {
4468 for (size_t k = 1; k <= 80; k += 17) {
4469 GemmMicrokernelTester()
4470 .mr(1)
4471 .nr(8)
4472 .kr(2)
4473 .sr(1)
4474 .m(1)
4475 .n(n)
4476 .k(k)
4477 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004478 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004479 }
4480 }
4481 }
4482
4483 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_strided_a) {
4484 TEST_REQUIRES_ARM_NEON;
4485 for (uint32_t n = 16; n <= 24; n += 8) {
4486 for (size_t k = 1; k <= 80; k += 17) {
4487 GemmMicrokernelTester()
4488 .mr(1)
4489 .nr(8)
4490 .kr(2)
4491 .sr(1)
4492 .m(1)
4493 .n(n)
4494 .k(k)
4495 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004496 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004497 }
4498 }
4499 }
4500
4501 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, n_div_8_subtile) {
4502 TEST_REQUIRES_ARM_NEON;
4503 for (uint32_t n = 16; n <= 24; n += 8) {
4504 for (size_t k = 1; k <= 80; k += 17) {
4505 for (uint32_t m = 1; m <= 1; m++) {
4506 GemmMicrokernelTester()
4507 .mr(1)
4508 .nr(8)
4509 .kr(2)
4510 .sr(1)
4511 .m(m)
4512 .n(n)
4513 .k(k)
4514 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004515 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004516 }
4517 }
4518 }
4519 }
4520
4521 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cm_subtile) {
4522 TEST_REQUIRES_ARM_NEON;
4523 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004524 for (uint32_t n = 1; n <= 8; n++) {
4525 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004526 GemmMicrokernelTester()
4527 .mr(1)
4528 .nr(8)
4529 .kr(2)
4530 .sr(1)
4531 .m(m)
4532 .n(n)
4533 .k(k)
4534 .cm_stride(11)
4535 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004536 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004537 }
4538 }
4539 }
4540 }
4541
4542 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, qmin) {
4543 TEST_REQUIRES_ARM_NEON;
4544 GemmMicrokernelTester()
4545 .mr(1)
4546 .nr(8)
4547 .kr(2)
4548 .sr(1)
4549 .m(1)
4550 .n(8)
4551 .k(16)
4552 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004553 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004554 }
4555
4556 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, qmax) {
4557 TEST_REQUIRES_ARM_NEON;
4558 GemmMicrokernelTester()
4559 .mr(1)
4560 .nr(8)
4561 .kr(2)
4562 .sr(1)
4563 .m(1)
4564 .n(8)
4565 .k(16)
4566 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08004567 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004568 }
4569
4570 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2__NEON_MLAL_LD2R, strided_cm) {
4571 TEST_REQUIRES_ARM_NEON;
4572 GemmMicrokernelTester()
4573 .mr(1)
4574 .nr(8)
4575 .kr(2)
4576 .sr(1)
4577 .m(1)
4578 .n(8)
4579 .k(16)
4580 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004581 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004582 }
4583#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
4584
4585
4586#if XNN_ARCH_ARM || XNN_ARCH_ARM64
4587 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8) {
4588 TEST_REQUIRES_ARM_NEON;
4589 GemmMicrokernelTester()
4590 .mr(1)
4591 .nr(8)
4592 .kr(4)
4593 .sr(2)
4594 .m(1)
4595 .n(8)
4596 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08004597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004598 }
4599
4600 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cn) {
4601 TEST_REQUIRES_ARM_NEON;
4602 GemmMicrokernelTester()
4603 .mr(1)
4604 .nr(8)
4605 .kr(4)
4606 .sr(2)
4607 .m(1)
4608 .n(8)
4609 .k(8)
4610 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004611 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004612 }
4613
4614 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_strided_a) {
4615 TEST_REQUIRES_ARM_NEON;
4616 GemmMicrokernelTester()
4617 .mr(1)
4618 .nr(8)
4619 .kr(4)
4620 .sr(2)
4621 .m(1)
4622 .n(8)
4623 .k(8)
4624 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004625 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004626 }
4627
4628 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile) {
4629 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08004630 for (uint32_t n = 1; n <= 8; n++) {
4631 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004632 GemmMicrokernelTester()
4633 .mr(1)
4634 .nr(8)
4635 .kr(4)
4636 .sr(2)
4637 .m(m)
4638 .n(n)
4639 .k(8)
4640 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004641 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004642 }
4643 }
4644 }
4645
4646 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile_m) {
4647 TEST_REQUIRES_ARM_NEON;
4648 for (uint32_t m = 1; m <= 1; m++) {
4649 GemmMicrokernelTester()
4650 .mr(1)
4651 .nr(8)
4652 .kr(4)
4653 .sr(2)
4654 .m(m)
4655 .n(8)
4656 .k(8)
4657 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004658 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004659 }
4660 }
4661
4662 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_eq_8_subtile_n) {
4663 TEST_REQUIRES_ARM_NEON;
4664 for (uint32_t n = 1; n <= 8; n++) {
4665 GemmMicrokernelTester()
4666 .mr(1)
4667 .nr(8)
4668 .kr(4)
4669 .sr(2)
4670 .m(1)
4671 .n(n)
4672 .k(8)
4673 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004674 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004675 }
4676 }
4677
4678 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_lt_8) {
4679 TEST_REQUIRES_ARM_NEON;
4680 for (size_t k = 1; k < 8; k++) {
4681 GemmMicrokernelTester()
4682 .mr(1)
4683 .nr(8)
4684 .kr(4)
4685 .sr(2)
4686 .m(1)
4687 .n(8)
4688 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004689 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004690 }
4691 }
4692
4693 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_lt_8_strided_a) {
4694 TEST_REQUIRES_ARM_NEON;
4695 for (size_t k = 1; k < 8; k++) {
4696 GemmMicrokernelTester()
4697 .mr(1)
4698 .nr(8)
4699 .kr(4)
4700 .sr(2)
4701 .m(1)
4702 .n(8)
4703 .k(k)
4704 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004705 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004706 }
4707 }
4708
4709 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_lt_8_subtile) {
4710 TEST_REQUIRES_ARM_NEON;
4711 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004712 for (uint32_t n = 1; n <= 8; n++) {
4713 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004714 GemmMicrokernelTester()
4715 .mr(1)
4716 .nr(8)
4717 .kr(4)
4718 .sr(2)
4719 .m(m)
4720 .n(n)
4721 .k(k)
4722 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004723 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004724 }
4725 }
4726 }
4727 }
4728
4729 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_gt_8) {
4730 TEST_REQUIRES_ARM_NEON;
4731 for (size_t k = 9; k < 16; k++) {
4732 GemmMicrokernelTester()
4733 .mr(1)
4734 .nr(8)
4735 .kr(4)
4736 .sr(2)
4737 .m(1)
4738 .n(8)
4739 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004740 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004741 }
4742 }
4743
4744 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_gt_8_strided_a) {
4745 TEST_REQUIRES_ARM_NEON;
4746 for (size_t k = 9; k < 16; k++) {
4747 GemmMicrokernelTester()
4748 .mr(1)
4749 .nr(8)
4750 .kr(4)
4751 .sr(2)
4752 .m(1)
4753 .n(8)
4754 .k(k)
4755 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08004756 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004757 }
4758 }
4759
4760 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_gt_8_subtile) {
4761 TEST_REQUIRES_ARM_NEON;
4762 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004763 for (uint32_t n = 1; n <= 8; n++) {
4764 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004765 GemmMicrokernelTester()
4766 .mr(1)
4767 .nr(8)
4768 .kr(4)
4769 .sr(2)
4770 .m(m)
4771 .n(n)
4772 .k(k)
4773 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004774 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004775 }
4776 }
4777 }
4778 }
4779
4780 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_div_8) {
4781 TEST_REQUIRES_ARM_NEON;
4782 for (size_t k = 16; k <= 80; k += 8) {
4783 GemmMicrokernelTester()
4784 .mr(1)
4785 .nr(8)
4786 .kr(4)
4787 .sr(2)
4788 .m(1)
4789 .n(8)
4790 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004791 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004792 }
4793 }
4794
4795 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_div_8_strided_a) {
4796 TEST_REQUIRES_ARM_NEON;
4797 for (size_t k = 16; k <= 80; k += 8) {
4798 GemmMicrokernelTester()
4799 .mr(1)
4800 .nr(8)
4801 .kr(4)
4802 .sr(2)
4803 .m(1)
4804 .n(8)
4805 .k(k)
4806 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08004807 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004808 }
4809 }
4810
4811 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, k_div_8_subtile) {
4812 TEST_REQUIRES_ARM_NEON;
4813 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004814 for (uint32_t n = 1; n <= 8; n++) {
4815 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004816 GemmMicrokernelTester()
4817 .mr(1)
4818 .nr(8)
4819 .kr(4)
4820 .sr(2)
4821 .m(m)
4822 .n(n)
4823 .k(k)
4824 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004825 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004826 }
4827 }
4828 }
4829 }
4830
4831 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8) {
4832 TEST_REQUIRES_ARM_NEON;
4833 for (uint32_t n = 9; n < 16; n++) {
4834 for (size_t k = 1; k <= 40; k += 9) {
4835 GemmMicrokernelTester()
4836 .mr(1)
4837 .nr(8)
4838 .kr(4)
4839 .sr(2)
4840 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004841 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004842 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004843 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004844 }
4845 }
4846 }
4847
4848 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_strided_cn) {
4849 TEST_REQUIRES_ARM_NEON;
4850 for (uint32_t n = 9; n < 16; n++) {
4851 for (size_t k = 1; k <= 40; k += 9) {
4852 GemmMicrokernelTester()
4853 .mr(1)
4854 .nr(8)
4855 .kr(4)
4856 .sr(2)
4857 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004858 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004859 .k(k)
4860 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004862 }
4863 }
4864 }
4865
4866 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_strided_a) {
4867 TEST_REQUIRES_ARM_NEON;
4868 for (uint32_t n = 9; n < 16; n++) {
4869 for (size_t k = 1; k <= 40; k += 9) {
4870 GemmMicrokernelTester()
4871 .mr(1)
4872 .nr(8)
4873 .kr(4)
4874 .sr(2)
4875 .m(1)
4876 .n(n)
4877 .k(k)
4878 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004879 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004880 }
4881 }
4882 }
4883
4884 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_gt_8_subtile) {
4885 TEST_REQUIRES_ARM_NEON;
4886 for (uint32_t n = 9; n < 16; n++) {
4887 for (size_t k = 1; k <= 40; k += 9) {
4888 for (uint32_t m = 1; m <= 1; m++) {
4889 GemmMicrokernelTester()
4890 .mr(1)
4891 .nr(8)
4892 .kr(4)
4893 .sr(2)
4894 .m(m)
4895 .n(n)
4896 .k(k)
4897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004898 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004899 }
4900 }
4901 }
4902 }
4903
4904 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8) {
4905 TEST_REQUIRES_ARM_NEON;
4906 for (uint32_t n = 16; n <= 24; n += 8) {
4907 for (size_t k = 1; k <= 40; k += 9) {
4908 GemmMicrokernelTester()
4909 .mr(1)
4910 .nr(8)
4911 .kr(4)
4912 .sr(2)
4913 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08004914 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004915 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08004916 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004917 }
4918 }
4919 }
4920
4921 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_strided_cn) {
4922 TEST_REQUIRES_ARM_NEON;
4923 for (uint32_t n = 16; n <= 24; n += 8) {
4924 for (size_t k = 1; k <= 40; k += 9) {
4925 GemmMicrokernelTester()
4926 .mr(1)
4927 .nr(8)
4928 .kr(4)
4929 .sr(2)
4930 .m(1)
4931 .n(n)
4932 .k(k)
4933 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08004934 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004935 }
4936 }
4937 }
4938
4939 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_strided_a) {
4940 TEST_REQUIRES_ARM_NEON;
4941 for (uint32_t n = 16; n <= 24; n += 8) {
4942 for (size_t k = 1; k <= 40; k += 9) {
4943 GemmMicrokernelTester()
4944 .mr(1)
4945 .nr(8)
4946 .kr(4)
4947 .sr(2)
4948 .m(1)
4949 .n(n)
4950 .k(k)
4951 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08004952 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004953 }
4954 }
4955 }
4956
4957 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, n_div_8_subtile) {
4958 TEST_REQUIRES_ARM_NEON;
4959 for (uint32_t n = 16; n <= 24; n += 8) {
4960 for (size_t k = 1; k <= 40; k += 9) {
4961 for (uint32_t m = 1; m <= 1; m++) {
4962 GemmMicrokernelTester()
4963 .mr(1)
4964 .nr(8)
4965 .kr(4)
4966 .sr(2)
4967 .m(m)
4968 .n(n)
4969 .k(k)
4970 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004971 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004972 }
4973 }
4974 }
4975 }
4976
4977 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cm_subtile) {
4978 TEST_REQUIRES_ARM_NEON;
4979 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08004980 for (uint32_t n = 1; n <= 8; n++) {
4981 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004982 GemmMicrokernelTester()
4983 .mr(1)
4984 .nr(8)
4985 .kr(4)
4986 .sr(2)
4987 .m(m)
4988 .n(n)
4989 .k(k)
4990 .cm_stride(11)
4991 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08004992 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08004993 }
4994 }
4995 }
4996 }
4997
4998 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, qmin) {
4999 TEST_REQUIRES_ARM_NEON;
5000 GemmMicrokernelTester()
5001 .mr(1)
5002 .nr(8)
5003 .kr(4)
5004 .sr(2)
5005 .m(1)
5006 .n(8)
5007 .k(8)
5008 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005009 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005010 }
5011
5012 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, qmax) {
5013 TEST_REQUIRES_ARM_NEON;
5014 GemmMicrokernelTester()
5015 .mr(1)
5016 .nr(8)
5017 .kr(4)
5018 .sr(2)
5019 .m(1)
5020 .n(8)
5021 .k(8)
5022 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005023 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005024 }
5025
5026 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C4S2__NEON_MULL, strided_cm) {
5027 TEST_REQUIRES_ARM_NEON;
5028 GemmMicrokernelTester()
5029 .mr(1)
5030 .nr(8)
5031 .kr(4)
5032 .sr(2)
5033 .m(1)
5034 .n(8)
5035 .k(8)
5036 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005037 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c4s2__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005038 }
5039#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5040
5041
5042#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5043 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16) {
5044 TEST_REQUIRES_ARM_NEON;
5045 GemmMicrokernelTester()
5046 .mr(3)
5047 .nr(8)
5048 .kr(4)
5049 .sr(2)
5050 .m(3)
5051 .n(8)
5052 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005054 }
5055
5056 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, strided_cn) {
5057 TEST_REQUIRES_ARM_NEON;
5058 GemmMicrokernelTester()
5059 .mr(3)
5060 .nr(8)
5061 .kr(4)
5062 .sr(2)
5063 .m(3)
5064 .n(8)
5065 .k(16)
5066 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005067 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005068 }
5069
5070 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16_strided_a) {
5071 TEST_REQUIRES_ARM_NEON;
5072 GemmMicrokernelTester()
5073 .mr(3)
5074 .nr(8)
5075 .kr(4)
5076 .sr(2)
5077 .m(3)
5078 .n(8)
5079 .k(16)
5080 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005081 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005082 }
5083
5084 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16_subtile) {
5085 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005086 for (uint32_t n = 1; n <= 8; n++) {
5087 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005088 GemmMicrokernelTester()
5089 .mr(3)
5090 .nr(8)
5091 .kr(4)
5092 .sr(2)
5093 .m(m)
5094 .n(n)
5095 .k(16)
5096 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005097 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005098 }
5099 }
5100 }
5101
5102 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16_subtile_m) {
5103 TEST_REQUIRES_ARM_NEON;
5104 for (uint32_t m = 1; m <= 3; m++) {
5105 GemmMicrokernelTester()
5106 .mr(3)
5107 .nr(8)
5108 .kr(4)
5109 .sr(2)
5110 .m(m)
5111 .n(8)
5112 .k(16)
5113 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005114 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005115 }
5116 }
5117
5118 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_eq_16_subtile_n) {
5119 TEST_REQUIRES_ARM_NEON;
5120 for (uint32_t n = 1; n <= 8; n++) {
5121 GemmMicrokernelTester()
5122 .mr(3)
5123 .nr(8)
5124 .kr(4)
5125 .sr(2)
5126 .m(3)
5127 .n(n)
5128 .k(16)
5129 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005130 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005131 }
5132 }
5133
5134 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_lt_16) {
5135 TEST_REQUIRES_ARM_NEON;
5136 for (size_t k = 1; k < 16; k++) {
5137 GemmMicrokernelTester()
5138 .mr(3)
5139 .nr(8)
5140 .kr(4)
5141 .sr(2)
5142 .m(3)
5143 .n(8)
5144 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005146 }
5147 }
5148
5149 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_lt_16_strided_a) {
5150 TEST_REQUIRES_ARM_NEON;
5151 for (size_t k = 1; k < 16; k++) {
5152 GemmMicrokernelTester()
5153 .mr(3)
5154 .nr(8)
5155 .kr(4)
5156 .sr(2)
5157 .m(3)
5158 .n(8)
5159 .k(k)
5160 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005161 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005162 }
5163 }
5164
5165 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_lt_16_subtile) {
5166 TEST_REQUIRES_ARM_NEON;
5167 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005168 for (uint32_t n = 1; n <= 8; n++) {
5169 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005170 GemmMicrokernelTester()
5171 .mr(3)
5172 .nr(8)
5173 .kr(4)
5174 .sr(2)
5175 .m(m)
5176 .n(n)
5177 .k(k)
5178 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005179 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005180 }
5181 }
5182 }
5183 }
5184
5185 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_gt_16) {
5186 TEST_REQUIRES_ARM_NEON;
5187 for (size_t k = 17; k < 32; k++) {
5188 GemmMicrokernelTester()
5189 .mr(3)
5190 .nr(8)
5191 .kr(4)
5192 .sr(2)
5193 .m(3)
5194 .n(8)
5195 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005197 }
5198 }
5199
5200 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_gt_16_strided_a) {
5201 TEST_REQUIRES_ARM_NEON;
5202 for (size_t k = 17; k < 32; k++) {
5203 GemmMicrokernelTester()
5204 .mr(3)
5205 .nr(8)
5206 .kr(4)
5207 .sr(2)
5208 .m(3)
5209 .n(8)
5210 .k(k)
5211 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08005212 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005213 }
5214 }
5215
5216 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_gt_16_subtile) {
5217 TEST_REQUIRES_ARM_NEON;
5218 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005219 for (uint32_t n = 1; n <= 8; n++) {
5220 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005221 GemmMicrokernelTester()
5222 .mr(3)
5223 .nr(8)
5224 .kr(4)
5225 .sr(2)
5226 .m(m)
5227 .n(n)
5228 .k(k)
5229 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005230 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005231 }
5232 }
5233 }
5234 }
5235
5236 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_div_16) {
5237 TEST_REQUIRES_ARM_NEON;
5238 for (size_t k = 32; k <= 160; k += 16) {
5239 GemmMicrokernelTester()
5240 .mr(3)
5241 .nr(8)
5242 .kr(4)
5243 .sr(2)
5244 .m(3)
5245 .n(8)
5246 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005248 }
5249 }
5250
5251 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_div_16_strided_a) {
5252 TEST_REQUIRES_ARM_NEON;
5253 for (size_t k = 32; k <= 160; k += 16) {
5254 GemmMicrokernelTester()
5255 .mr(3)
5256 .nr(8)
5257 .kr(4)
5258 .sr(2)
5259 .m(3)
5260 .n(8)
5261 .k(k)
5262 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08005263 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005264 }
5265 }
5266
5267 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, k_div_16_subtile) {
5268 TEST_REQUIRES_ARM_NEON;
5269 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005270 for (uint32_t n = 1; n <= 8; n++) {
5271 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005272 GemmMicrokernelTester()
5273 .mr(3)
5274 .nr(8)
5275 .kr(4)
5276 .sr(2)
5277 .m(m)
5278 .n(n)
5279 .k(k)
5280 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005281 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005282 }
5283 }
5284 }
5285 }
5286
5287 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8) {
5288 TEST_REQUIRES_ARM_NEON;
5289 for (uint32_t n = 9; n < 16; n++) {
5290 for (size_t k = 1; k <= 80; k += 17) {
5291 GemmMicrokernelTester()
5292 .mr(3)
5293 .nr(8)
5294 .kr(4)
5295 .sr(2)
5296 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005297 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005298 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005299 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005300 }
5301 }
5302 }
5303
5304 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8_strided_cn) {
5305 TEST_REQUIRES_ARM_NEON;
5306 for (uint32_t n = 9; n < 16; n++) {
5307 for (size_t k = 1; k <= 80; k += 17) {
5308 GemmMicrokernelTester()
5309 .mr(3)
5310 .nr(8)
5311 .kr(4)
5312 .sr(2)
5313 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005314 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005315 .k(k)
5316 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005318 }
5319 }
5320 }
5321
5322 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8_strided_a) {
5323 TEST_REQUIRES_ARM_NEON;
5324 for (uint32_t n = 9; n < 16; n++) {
5325 for (size_t k = 1; k <= 80; k += 17) {
5326 GemmMicrokernelTester()
5327 .mr(3)
5328 .nr(8)
5329 .kr(4)
5330 .sr(2)
5331 .m(3)
5332 .n(n)
5333 .k(k)
5334 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005335 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005336 }
5337 }
5338 }
5339
5340 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_gt_8_subtile) {
5341 TEST_REQUIRES_ARM_NEON;
5342 for (uint32_t n = 9; n < 16; n++) {
5343 for (size_t k = 1; k <= 80; k += 17) {
5344 for (uint32_t m = 1; m <= 3; m++) {
5345 GemmMicrokernelTester()
5346 .mr(3)
5347 .nr(8)
5348 .kr(4)
5349 .sr(2)
5350 .m(m)
5351 .n(n)
5352 .k(k)
5353 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005354 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005355 }
5356 }
5357 }
5358 }
5359
5360 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8) {
5361 TEST_REQUIRES_ARM_NEON;
5362 for (uint32_t n = 16; n <= 24; n += 8) {
5363 for (size_t k = 1; k <= 80; k += 17) {
5364 GemmMicrokernelTester()
5365 .mr(3)
5366 .nr(8)
5367 .kr(4)
5368 .sr(2)
5369 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005370 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005371 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005372 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005373 }
5374 }
5375 }
5376
5377 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8_strided_cn) {
5378 TEST_REQUIRES_ARM_NEON;
5379 for (uint32_t n = 16; n <= 24; n += 8) {
5380 for (size_t k = 1; k <= 80; k += 17) {
5381 GemmMicrokernelTester()
5382 .mr(3)
5383 .nr(8)
5384 .kr(4)
5385 .sr(2)
5386 .m(3)
5387 .n(n)
5388 .k(k)
5389 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005390 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005391 }
5392 }
5393 }
5394
5395 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8_strided_a) {
5396 TEST_REQUIRES_ARM_NEON;
5397 for (uint32_t n = 16; n <= 24; n += 8) {
5398 for (size_t k = 1; k <= 80; k += 17) {
5399 GemmMicrokernelTester()
5400 .mr(3)
5401 .nr(8)
5402 .kr(4)
5403 .sr(2)
5404 .m(3)
5405 .n(n)
5406 .k(k)
5407 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005408 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005409 }
5410 }
5411 }
5412
5413 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, n_div_8_subtile) {
5414 TEST_REQUIRES_ARM_NEON;
5415 for (uint32_t n = 16; n <= 24; n += 8) {
5416 for (size_t k = 1; k <= 80; k += 17) {
5417 for (uint32_t m = 1; m <= 3; m++) {
5418 GemmMicrokernelTester()
5419 .mr(3)
5420 .nr(8)
5421 .kr(4)
5422 .sr(2)
5423 .m(m)
5424 .n(n)
5425 .k(k)
5426 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005427 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005428 }
5429 }
5430 }
5431 }
5432
5433 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, strided_cm_subtile) {
5434 TEST_REQUIRES_ARM_NEON;
5435 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005436 for (uint32_t n = 1; n <= 8; n++) {
5437 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005438 GemmMicrokernelTester()
5439 .mr(3)
5440 .nr(8)
5441 .kr(4)
5442 .sr(2)
5443 .m(m)
5444 .n(n)
5445 .k(k)
5446 .cm_stride(11)
5447 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005448 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005449 }
5450 }
5451 }
5452 }
5453
5454 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, qmin) {
5455 TEST_REQUIRES_ARM_NEON;
5456 GemmMicrokernelTester()
5457 .mr(3)
5458 .nr(8)
5459 .kr(4)
5460 .sr(2)
5461 .m(3)
5462 .n(8)
5463 .k(16)
5464 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005465 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005466 }
5467
5468 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, qmax) {
5469 TEST_REQUIRES_ARM_NEON;
5470 GemmMicrokernelTester()
5471 .mr(3)
5472 .nr(8)
5473 .kr(4)
5474 .sr(2)
5475 .m(3)
5476 .n(8)
5477 .k(16)
5478 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005479 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005480 }
5481
5482 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4S2__NEON_MLAL, strided_cm) {
5483 TEST_REQUIRES_ARM_NEON;
5484 GemmMicrokernelTester()
5485 .mr(3)
5486 .nr(8)
5487 .kr(4)
5488 .sr(2)
5489 .m(3)
5490 .n(8)
5491 .k(16)
5492 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08005493 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005494 }
5495#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5496
5497
5498#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5499 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16) {
5500 TEST_REQUIRES_ARM_NEON;
5501 GemmMicrokernelTester()
5502 .mr(3)
5503 .nr(16)
5504 .kr(4)
5505 .sr(2)
5506 .m(3)
5507 .n(16)
5508 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005509 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005510 }
5511
5512 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, strided_cn) {
5513 TEST_REQUIRES_ARM_NEON;
5514 GemmMicrokernelTester()
5515 .mr(3)
5516 .nr(16)
5517 .kr(4)
5518 .sr(2)
5519 .m(3)
5520 .n(16)
5521 .k(16)
5522 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005523 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005524 }
5525
5526 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16_strided_a) {
5527 TEST_REQUIRES_ARM_NEON;
5528 GemmMicrokernelTester()
5529 .mr(3)
5530 .nr(16)
5531 .kr(4)
5532 .sr(2)
5533 .m(3)
5534 .n(16)
5535 .k(16)
5536 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005537 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005538 }
5539
5540 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16_subtile) {
5541 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005542 for (uint32_t n = 1; n <= 16; n++) {
5543 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005544 GemmMicrokernelTester()
5545 .mr(3)
5546 .nr(16)
5547 .kr(4)
5548 .sr(2)
5549 .m(m)
5550 .n(n)
5551 .k(16)
5552 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005553 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005554 }
5555 }
5556 }
5557
5558 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16_subtile_m) {
5559 TEST_REQUIRES_ARM_NEON;
5560 for (uint32_t m = 1; m <= 3; m++) {
5561 GemmMicrokernelTester()
5562 .mr(3)
5563 .nr(16)
5564 .kr(4)
5565 .sr(2)
5566 .m(m)
5567 .n(16)
5568 .k(16)
5569 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005570 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005571 }
5572 }
5573
5574 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_eq_16_subtile_n) {
5575 TEST_REQUIRES_ARM_NEON;
5576 for (uint32_t n = 1; n <= 16; n++) {
5577 GemmMicrokernelTester()
5578 .mr(3)
5579 .nr(16)
5580 .kr(4)
5581 .sr(2)
5582 .m(3)
5583 .n(n)
5584 .k(16)
5585 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005586 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005587 }
5588 }
5589
5590 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_lt_16) {
5591 TEST_REQUIRES_ARM_NEON;
5592 for (size_t k = 1; k < 16; k++) {
5593 GemmMicrokernelTester()
5594 .mr(3)
5595 .nr(16)
5596 .kr(4)
5597 .sr(2)
5598 .m(3)
5599 .n(16)
5600 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005602 }
5603 }
5604
5605 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_lt_16_strided_a) {
5606 TEST_REQUIRES_ARM_NEON;
5607 for (size_t k = 1; k < 16; k++) {
5608 GemmMicrokernelTester()
5609 .mr(3)
5610 .nr(16)
5611 .kr(4)
5612 .sr(2)
5613 .m(3)
5614 .n(16)
5615 .k(k)
5616 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005617 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005618 }
5619 }
5620
5621 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_lt_16_subtile) {
5622 TEST_REQUIRES_ARM_NEON;
5623 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005624 for (uint32_t n = 1; n <= 16; n++) {
5625 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005626 GemmMicrokernelTester()
5627 .mr(3)
5628 .nr(16)
5629 .kr(4)
5630 .sr(2)
5631 .m(m)
5632 .n(n)
5633 .k(k)
5634 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005635 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005636 }
5637 }
5638 }
5639 }
5640
5641 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_gt_16) {
5642 TEST_REQUIRES_ARM_NEON;
5643 for (size_t k = 17; k < 32; k++) {
5644 GemmMicrokernelTester()
5645 .mr(3)
5646 .nr(16)
5647 .kr(4)
5648 .sr(2)
5649 .m(3)
5650 .n(16)
5651 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005653 }
5654 }
5655
5656 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_gt_16_strided_a) {
5657 TEST_REQUIRES_ARM_NEON;
5658 for (size_t k = 17; k < 32; k++) {
5659 GemmMicrokernelTester()
5660 .mr(3)
5661 .nr(16)
5662 .kr(4)
5663 .sr(2)
5664 .m(3)
5665 .n(16)
5666 .k(k)
5667 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08005668 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005669 }
5670 }
5671
5672 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_gt_16_subtile) {
5673 TEST_REQUIRES_ARM_NEON;
5674 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005675 for (uint32_t n = 1; n <= 16; n++) {
5676 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005677 GemmMicrokernelTester()
5678 .mr(3)
5679 .nr(16)
5680 .kr(4)
5681 .sr(2)
5682 .m(m)
5683 .n(n)
5684 .k(k)
5685 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005686 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005687 }
5688 }
5689 }
5690 }
5691
5692 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_div_16) {
5693 TEST_REQUIRES_ARM_NEON;
5694 for (size_t k = 32; k <= 160; k += 16) {
5695 GemmMicrokernelTester()
5696 .mr(3)
5697 .nr(16)
5698 .kr(4)
5699 .sr(2)
5700 .m(3)
5701 .n(16)
5702 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005704 }
5705 }
5706
5707 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_div_16_strided_a) {
5708 TEST_REQUIRES_ARM_NEON;
5709 for (size_t k = 32; k <= 160; k += 16) {
5710 GemmMicrokernelTester()
5711 .mr(3)
5712 .nr(16)
5713 .kr(4)
5714 .sr(2)
5715 .m(3)
5716 .n(16)
5717 .k(k)
5718 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08005719 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005720 }
5721 }
5722
5723 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, k_div_16_subtile) {
5724 TEST_REQUIRES_ARM_NEON;
5725 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005726 for (uint32_t n = 1; n <= 16; n++) {
5727 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005728 GemmMicrokernelTester()
5729 .mr(3)
5730 .nr(16)
5731 .kr(4)
5732 .sr(2)
5733 .m(m)
5734 .n(n)
5735 .k(k)
5736 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005737 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005738 }
5739 }
5740 }
5741 }
5742
5743 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16) {
5744 TEST_REQUIRES_ARM_NEON;
5745 for (uint32_t n = 17; n < 32; n++) {
5746 for (size_t k = 1; k <= 80; k += 17) {
5747 GemmMicrokernelTester()
5748 .mr(3)
5749 .nr(16)
5750 .kr(4)
5751 .sr(2)
5752 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005753 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005754 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005755 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005756 }
5757 }
5758 }
5759
5760 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16_strided_cn) {
5761 TEST_REQUIRES_ARM_NEON;
5762 for (uint32_t n = 17; n < 32; n++) {
5763 for (size_t k = 1; k <= 80; k += 17) {
5764 GemmMicrokernelTester()
5765 .mr(3)
5766 .nr(16)
5767 .kr(4)
5768 .sr(2)
5769 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005770 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005771 .k(k)
5772 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005774 }
5775 }
5776 }
5777
5778 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16_strided_a) {
5779 TEST_REQUIRES_ARM_NEON;
5780 for (uint32_t n = 17; n < 32; n++) {
5781 for (size_t k = 1; k <= 80; k += 17) {
5782 GemmMicrokernelTester()
5783 .mr(3)
5784 .nr(16)
5785 .kr(4)
5786 .sr(2)
5787 .m(3)
5788 .n(n)
5789 .k(k)
5790 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005791 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005792 }
5793 }
5794 }
5795
5796 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_gt_16_subtile) {
5797 TEST_REQUIRES_ARM_NEON;
5798 for (uint32_t n = 17; n < 32; n++) {
5799 for (size_t k = 1; k <= 80; k += 17) {
5800 for (uint32_t m = 1; m <= 3; m++) {
5801 GemmMicrokernelTester()
5802 .mr(3)
5803 .nr(16)
5804 .kr(4)
5805 .sr(2)
5806 .m(m)
5807 .n(n)
5808 .k(k)
5809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005810 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005811 }
5812 }
5813 }
5814 }
5815
5816 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16) {
5817 TEST_REQUIRES_ARM_NEON;
5818 for (uint32_t n = 32; n <= 48; n += 16) {
5819 for (size_t k = 1; k <= 80; k += 17) {
5820 GemmMicrokernelTester()
5821 .mr(3)
5822 .nr(16)
5823 .kr(4)
5824 .sr(2)
5825 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08005826 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005827 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08005828 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005829 }
5830 }
5831 }
5832
5833 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16_strided_cn) {
5834 TEST_REQUIRES_ARM_NEON;
5835 for (uint32_t n = 32; n <= 48; n += 16) {
5836 for (size_t k = 1; k <= 80; k += 17) {
5837 GemmMicrokernelTester()
5838 .mr(3)
5839 .nr(16)
5840 .kr(4)
5841 .sr(2)
5842 .m(3)
5843 .n(n)
5844 .k(k)
5845 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005846 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005847 }
5848 }
5849 }
5850
5851 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16_strided_a) {
5852 TEST_REQUIRES_ARM_NEON;
5853 for (uint32_t n = 32; n <= 48; n += 16) {
5854 for (size_t k = 1; k <= 80; k += 17) {
5855 GemmMicrokernelTester()
5856 .mr(3)
5857 .nr(16)
5858 .kr(4)
5859 .sr(2)
5860 .m(3)
5861 .n(n)
5862 .k(k)
5863 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08005864 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005865 }
5866 }
5867 }
5868
5869 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, n_div_16_subtile) {
5870 TEST_REQUIRES_ARM_NEON;
5871 for (uint32_t n = 32; n <= 48; n += 16) {
5872 for (size_t k = 1; k <= 80; k += 17) {
5873 for (uint32_t m = 1; m <= 3; m++) {
5874 GemmMicrokernelTester()
5875 .mr(3)
5876 .nr(16)
5877 .kr(4)
5878 .sr(2)
5879 .m(m)
5880 .n(n)
5881 .k(k)
5882 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005883 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005884 }
5885 }
5886 }
5887 }
5888
5889 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, strided_cm_subtile) {
5890 TEST_REQUIRES_ARM_NEON;
5891 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08005892 for (uint32_t n = 1; n <= 16; n++) {
5893 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005894 GemmMicrokernelTester()
5895 .mr(3)
5896 .nr(16)
5897 .kr(4)
5898 .sr(2)
5899 .m(m)
5900 .n(n)
5901 .k(k)
5902 .cm_stride(19)
5903 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08005904 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005905 }
5906 }
5907 }
5908 }
5909
5910 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, qmin) {
5911 TEST_REQUIRES_ARM_NEON;
5912 GemmMicrokernelTester()
5913 .mr(3)
5914 .nr(16)
5915 .kr(4)
5916 .sr(2)
5917 .m(3)
5918 .n(16)
5919 .k(16)
5920 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005921 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005922 }
5923
5924 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, qmax) {
5925 TEST_REQUIRES_ARM_NEON;
5926 GemmMicrokernelTester()
5927 .mr(3)
5928 .nr(16)
5929 .kr(4)
5930 .sr(2)
5931 .m(3)
5932 .n(16)
5933 .k(16)
5934 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08005935 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005936 }
5937
5938 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4S2__NEON_MLAL, strided_cm) {
5939 TEST_REQUIRES_ARM_NEON;
5940 GemmMicrokernelTester()
5941 .mr(3)
5942 .nr(16)
5943 .kr(4)
5944 .sr(2)
5945 .m(3)
5946 .n(16)
5947 .k(16)
5948 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005949 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005950 }
5951#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
5952
5953
5954#if XNN_ARCH_ARM || XNN_ARCH_ARM64
5955 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16) {
5956 TEST_REQUIRES_ARM_NEON;
5957 GemmMicrokernelTester()
5958 .mr(4)
5959 .nr(16)
5960 .kr(4)
5961 .sr(2)
5962 .m(4)
5963 .n(16)
5964 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08005965 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005966 }
5967
5968 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, strided_cn) {
5969 TEST_REQUIRES_ARM_NEON;
5970 GemmMicrokernelTester()
5971 .mr(4)
5972 .nr(16)
5973 .kr(4)
5974 .sr(2)
5975 .m(4)
5976 .n(16)
5977 .k(16)
5978 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005979 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005980 }
5981
5982 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16_strided_a) {
5983 TEST_REQUIRES_ARM_NEON;
5984 GemmMicrokernelTester()
5985 .mr(4)
5986 .nr(16)
5987 .kr(4)
5988 .sr(2)
5989 .m(4)
5990 .n(16)
5991 .k(16)
5992 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08005993 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08005994 }
5995
5996 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16_subtile) {
5997 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08005998 for (uint32_t n = 1; n <= 16; n++) {
5999 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006000 GemmMicrokernelTester()
6001 .mr(4)
6002 .nr(16)
6003 .kr(4)
6004 .sr(2)
6005 .m(m)
6006 .n(n)
6007 .k(16)
6008 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006009 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006010 }
6011 }
6012 }
6013
6014 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16_subtile_m) {
6015 TEST_REQUIRES_ARM_NEON;
6016 for (uint32_t m = 1; m <= 4; m++) {
6017 GemmMicrokernelTester()
6018 .mr(4)
6019 .nr(16)
6020 .kr(4)
6021 .sr(2)
6022 .m(m)
6023 .n(16)
6024 .k(16)
6025 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006026 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006027 }
6028 }
6029
6030 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_eq_16_subtile_n) {
6031 TEST_REQUIRES_ARM_NEON;
6032 for (uint32_t n = 1; n <= 16; n++) {
6033 GemmMicrokernelTester()
6034 .mr(4)
6035 .nr(16)
6036 .kr(4)
6037 .sr(2)
6038 .m(4)
6039 .n(n)
6040 .k(16)
6041 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006042 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006043 }
6044 }
6045
6046 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_lt_16) {
6047 TEST_REQUIRES_ARM_NEON;
6048 for (size_t k = 1; k < 16; k++) {
6049 GemmMicrokernelTester()
6050 .mr(4)
6051 .nr(16)
6052 .kr(4)
6053 .sr(2)
6054 .m(4)
6055 .n(16)
6056 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006057 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006058 }
6059 }
6060
6061 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_lt_16_strided_a) {
6062 TEST_REQUIRES_ARM_NEON;
6063 for (size_t k = 1; k < 16; k++) {
6064 GemmMicrokernelTester()
6065 .mr(4)
6066 .nr(16)
6067 .kr(4)
6068 .sr(2)
6069 .m(4)
6070 .n(16)
6071 .k(k)
6072 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006073 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006074 }
6075 }
6076
6077 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_lt_16_subtile) {
6078 TEST_REQUIRES_ARM_NEON;
6079 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006080 for (uint32_t n = 1; n <= 16; n++) {
6081 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006082 GemmMicrokernelTester()
6083 .mr(4)
6084 .nr(16)
6085 .kr(4)
6086 .sr(2)
6087 .m(m)
6088 .n(n)
6089 .k(k)
6090 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006091 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006092 }
6093 }
6094 }
6095 }
6096
6097 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_gt_16) {
6098 TEST_REQUIRES_ARM_NEON;
6099 for (size_t k = 17; k < 32; k++) {
6100 GemmMicrokernelTester()
6101 .mr(4)
6102 .nr(16)
6103 .kr(4)
6104 .sr(2)
6105 .m(4)
6106 .n(16)
6107 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006109 }
6110 }
6111
6112 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_gt_16_strided_a) {
6113 TEST_REQUIRES_ARM_NEON;
6114 for (size_t k = 17; k < 32; k++) {
6115 GemmMicrokernelTester()
6116 .mr(4)
6117 .nr(16)
6118 .kr(4)
6119 .sr(2)
6120 .m(4)
6121 .n(16)
6122 .k(k)
6123 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08006124 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006125 }
6126 }
6127
6128 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_gt_16_subtile) {
6129 TEST_REQUIRES_ARM_NEON;
6130 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006131 for (uint32_t n = 1; n <= 16; n++) {
6132 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006133 GemmMicrokernelTester()
6134 .mr(4)
6135 .nr(16)
6136 .kr(4)
6137 .sr(2)
6138 .m(m)
6139 .n(n)
6140 .k(k)
6141 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006142 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006143 }
6144 }
6145 }
6146 }
6147
6148 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_div_16) {
6149 TEST_REQUIRES_ARM_NEON;
6150 for (size_t k = 32; k <= 160; k += 16) {
6151 GemmMicrokernelTester()
6152 .mr(4)
6153 .nr(16)
6154 .kr(4)
6155 .sr(2)
6156 .m(4)
6157 .n(16)
6158 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006160 }
6161 }
6162
6163 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_div_16_strided_a) {
6164 TEST_REQUIRES_ARM_NEON;
6165 for (size_t k = 32; k <= 160; k += 16) {
6166 GemmMicrokernelTester()
6167 .mr(4)
6168 .nr(16)
6169 .kr(4)
6170 .sr(2)
6171 .m(4)
6172 .n(16)
6173 .k(k)
6174 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08006175 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006176 }
6177 }
6178
6179 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, k_div_16_subtile) {
6180 TEST_REQUIRES_ARM_NEON;
6181 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006182 for (uint32_t n = 1; n <= 16; n++) {
6183 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006184 GemmMicrokernelTester()
6185 .mr(4)
6186 .nr(16)
6187 .kr(4)
6188 .sr(2)
6189 .m(m)
6190 .n(n)
6191 .k(k)
6192 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006193 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006194 }
6195 }
6196 }
6197 }
6198
6199 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16) {
6200 TEST_REQUIRES_ARM_NEON;
6201 for (uint32_t n = 17; n < 32; n++) {
6202 for (size_t k = 1; k <= 80; k += 17) {
6203 GemmMicrokernelTester()
6204 .mr(4)
6205 .nr(16)
6206 .kr(4)
6207 .sr(2)
6208 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006209 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006210 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006211 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006212 }
6213 }
6214 }
6215
6216 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16_strided_cn) {
6217 TEST_REQUIRES_ARM_NEON;
6218 for (uint32_t n = 17; n < 32; n++) {
6219 for (size_t k = 1; k <= 80; k += 17) {
6220 GemmMicrokernelTester()
6221 .mr(4)
6222 .nr(16)
6223 .kr(4)
6224 .sr(2)
6225 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006226 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006227 .k(k)
6228 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006230 }
6231 }
6232 }
6233
6234 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16_strided_a) {
6235 TEST_REQUIRES_ARM_NEON;
6236 for (uint32_t n = 17; n < 32; n++) {
6237 for (size_t k = 1; k <= 80; k += 17) {
6238 GemmMicrokernelTester()
6239 .mr(4)
6240 .nr(16)
6241 .kr(4)
6242 .sr(2)
6243 .m(4)
6244 .n(n)
6245 .k(k)
6246 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006248 }
6249 }
6250 }
6251
6252 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_gt_16_subtile) {
6253 TEST_REQUIRES_ARM_NEON;
6254 for (uint32_t n = 17; n < 32; n++) {
6255 for (size_t k = 1; k <= 80; k += 17) {
6256 for (uint32_t m = 1; m <= 4; m++) {
6257 GemmMicrokernelTester()
6258 .mr(4)
6259 .nr(16)
6260 .kr(4)
6261 .sr(2)
6262 .m(m)
6263 .n(n)
6264 .k(k)
6265 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006266 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006267 }
6268 }
6269 }
6270 }
6271
6272 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16) {
6273 TEST_REQUIRES_ARM_NEON;
6274 for (uint32_t n = 32; n <= 48; n += 16) {
6275 for (size_t k = 1; k <= 80; k += 17) {
6276 GemmMicrokernelTester()
6277 .mr(4)
6278 .nr(16)
6279 .kr(4)
6280 .sr(2)
6281 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006282 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006283 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006284 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006285 }
6286 }
6287 }
6288
6289 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16_strided_cn) {
6290 TEST_REQUIRES_ARM_NEON;
6291 for (uint32_t n = 32; n <= 48; n += 16) {
6292 for (size_t k = 1; k <= 80; k += 17) {
6293 GemmMicrokernelTester()
6294 .mr(4)
6295 .nr(16)
6296 .kr(4)
6297 .sr(2)
6298 .m(4)
6299 .n(n)
6300 .k(k)
6301 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006302 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006303 }
6304 }
6305 }
6306
6307 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16_strided_a) {
6308 TEST_REQUIRES_ARM_NEON;
6309 for (uint32_t n = 32; n <= 48; n += 16) {
6310 for (size_t k = 1; k <= 80; k += 17) {
6311 GemmMicrokernelTester()
6312 .mr(4)
6313 .nr(16)
6314 .kr(4)
6315 .sr(2)
6316 .m(4)
6317 .n(n)
6318 .k(k)
6319 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006320 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006321 }
6322 }
6323 }
6324
6325 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, n_div_16_subtile) {
6326 TEST_REQUIRES_ARM_NEON;
6327 for (uint32_t n = 32; n <= 48; n += 16) {
6328 for (size_t k = 1; k <= 80; k += 17) {
6329 for (uint32_t m = 1; m <= 4; m++) {
6330 GemmMicrokernelTester()
6331 .mr(4)
6332 .nr(16)
6333 .kr(4)
6334 .sr(2)
6335 .m(m)
6336 .n(n)
6337 .k(k)
6338 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006339 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006340 }
6341 }
6342 }
6343 }
6344
6345 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, strided_cm_subtile) {
6346 TEST_REQUIRES_ARM_NEON;
6347 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006348 for (uint32_t n = 1; n <= 16; n++) {
6349 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006350 GemmMicrokernelTester()
6351 .mr(4)
6352 .nr(16)
6353 .kr(4)
6354 .sr(2)
6355 .m(m)
6356 .n(n)
6357 .k(k)
6358 .cm_stride(19)
6359 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006360 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006361 }
6362 }
6363 }
6364 }
6365
6366 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, qmin) {
6367 TEST_REQUIRES_ARM_NEON;
6368 GemmMicrokernelTester()
6369 .mr(4)
6370 .nr(16)
6371 .kr(4)
6372 .sr(2)
6373 .m(4)
6374 .n(16)
6375 .k(16)
6376 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006377 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006378 }
6379
6380 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, qmax) {
6381 TEST_REQUIRES_ARM_NEON;
6382 GemmMicrokernelTester()
6383 .mr(4)
6384 .nr(16)
6385 .kr(4)
6386 .sr(2)
6387 .m(4)
6388 .n(16)
6389 .k(16)
6390 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006391 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006392 }
6393
6394 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4S2__NEON_MLAL, strided_cm) {
6395 TEST_REQUIRES_ARM_NEON;
6396 GemmMicrokernelTester()
6397 .mr(4)
6398 .nr(16)
6399 .kr(4)
6400 .sr(2)
6401 .m(4)
6402 .n(16)
6403 .k(16)
6404 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006405 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4s2__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006406 }
6407#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6408
6409
6410#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6411 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8) {
6412 TEST_REQUIRES_ARM_NEON;
6413 GemmMicrokernelTester()
6414 .mr(1)
6415 .nr(8)
6416 .kr(2)
6417 .sr(4)
6418 .m(1)
6419 .n(8)
6420 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006421 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006422 }
6423
6424 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, strided_cn) {
6425 TEST_REQUIRES_ARM_NEON;
6426 GemmMicrokernelTester()
6427 .mr(1)
6428 .nr(8)
6429 .kr(2)
6430 .sr(4)
6431 .m(1)
6432 .n(8)
6433 .k(8)
6434 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006435 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006436 }
6437
6438 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8_strided_a) {
6439 TEST_REQUIRES_ARM_NEON;
6440 GemmMicrokernelTester()
6441 .mr(1)
6442 .nr(8)
6443 .kr(2)
6444 .sr(4)
6445 .m(1)
6446 .n(8)
6447 .k(8)
6448 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006449 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006450 }
6451
6452 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8_subtile) {
6453 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006454 for (uint32_t n = 1; n <= 8; n++) {
6455 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006456 GemmMicrokernelTester()
6457 .mr(1)
6458 .nr(8)
6459 .kr(2)
6460 .sr(4)
6461 .m(m)
6462 .n(n)
6463 .k(8)
6464 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006465 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006466 }
6467 }
6468 }
6469
6470 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8_subtile_m) {
6471 TEST_REQUIRES_ARM_NEON;
6472 for (uint32_t m = 1; m <= 1; m++) {
6473 GemmMicrokernelTester()
6474 .mr(1)
6475 .nr(8)
6476 .kr(2)
6477 .sr(4)
6478 .m(m)
6479 .n(8)
6480 .k(8)
6481 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006482 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006483 }
6484 }
6485
6486 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_eq_8_subtile_n) {
6487 TEST_REQUIRES_ARM_NEON;
6488 for (uint32_t n = 1; n <= 8; n++) {
6489 GemmMicrokernelTester()
6490 .mr(1)
6491 .nr(8)
6492 .kr(2)
6493 .sr(4)
6494 .m(1)
6495 .n(n)
6496 .k(8)
6497 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006498 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006499 }
6500 }
6501
6502 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_lt_8) {
6503 TEST_REQUIRES_ARM_NEON;
6504 for (size_t k = 1; k < 8; k++) {
6505 GemmMicrokernelTester()
6506 .mr(1)
6507 .nr(8)
6508 .kr(2)
6509 .sr(4)
6510 .m(1)
6511 .n(8)
6512 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006513 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006514 }
6515 }
6516
6517 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_lt_8_strided_a) {
6518 TEST_REQUIRES_ARM_NEON;
6519 for (size_t k = 1; k < 8; k++) {
6520 GemmMicrokernelTester()
6521 .mr(1)
6522 .nr(8)
6523 .kr(2)
6524 .sr(4)
6525 .m(1)
6526 .n(8)
6527 .k(k)
6528 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006529 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006530 }
6531 }
6532
6533 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_lt_8_subtile) {
6534 TEST_REQUIRES_ARM_NEON;
6535 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006536 for (uint32_t n = 1; n <= 8; n++) {
6537 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006538 GemmMicrokernelTester()
6539 .mr(1)
6540 .nr(8)
6541 .kr(2)
6542 .sr(4)
6543 .m(m)
6544 .n(n)
6545 .k(k)
6546 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006547 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006548 }
6549 }
6550 }
6551 }
6552
6553 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_gt_8) {
6554 TEST_REQUIRES_ARM_NEON;
6555 for (size_t k = 9; k < 16; k++) {
6556 GemmMicrokernelTester()
6557 .mr(1)
6558 .nr(8)
6559 .kr(2)
6560 .sr(4)
6561 .m(1)
6562 .n(8)
6563 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006564 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006565 }
6566 }
6567
6568 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_gt_8_strided_a) {
6569 TEST_REQUIRES_ARM_NEON;
6570 for (size_t k = 9; k < 16; k++) {
6571 GemmMicrokernelTester()
6572 .mr(1)
6573 .nr(8)
6574 .kr(2)
6575 .sr(4)
6576 .m(1)
6577 .n(8)
6578 .k(k)
6579 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08006580 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006581 }
6582 }
6583
6584 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_gt_8_subtile) {
6585 TEST_REQUIRES_ARM_NEON;
6586 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006587 for (uint32_t n = 1; n <= 8; n++) {
6588 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006589 GemmMicrokernelTester()
6590 .mr(1)
6591 .nr(8)
6592 .kr(2)
6593 .sr(4)
6594 .m(m)
6595 .n(n)
6596 .k(k)
6597 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006598 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006599 }
6600 }
6601 }
6602 }
6603
6604 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_div_8) {
6605 TEST_REQUIRES_ARM_NEON;
6606 for (size_t k = 16; k <= 80; k += 8) {
6607 GemmMicrokernelTester()
6608 .mr(1)
6609 .nr(8)
6610 .kr(2)
6611 .sr(4)
6612 .m(1)
6613 .n(8)
6614 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006616 }
6617 }
6618
6619 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_div_8_strided_a) {
6620 TEST_REQUIRES_ARM_NEON;
6621 for (size_t k = 16; k <= 80; k += 8) {
6622 GemmMicrokernelTester()
6623 .mr(1)
6624 .nr(8)
6625 .kr(2)
6626 .sr(4)
6627 .m(1)
6628 .n(8)
6629 .k(k)
6630 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08006631 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006632 }
6633 }
6634
6635 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, k_div_8_subtile) {
6636 TEST_REQUIRES_ARM_NEON;
6637 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006638 for (uint32_t n = 1; n <= 8; n++) {
6639 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006640 GemmMicrokernelTester()
6641 .mr(1)
6642 .nr(8)
6643 .kr(2)
6644 .sr(4)
6645 .m(m)
6646 .n(n)
6647 .k(k)
6648 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006649 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006650 }
6651 }
6652 }
6653 }
6654
6655 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8) {
6656 TEST_REQUIRES_ARM_NEON;
6657 for (uint32_t n = 9; n < 16; n++) {
6658 for (size_t k = 1; k <= 40; k += 9) {
6659 GemmMicrokernelTester()
6660 .mr(1)
6661 .nr(8)
6662 .kr(2)
6663 .sr(4)
6664 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006665 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006666 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006667 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006668 }
6669 }
6670 }
6671
6672 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8_strided_cn) {
6673 TEST_REQUIRES_ARM_NEON;
6674 for (uint32_t n = 9; n < 16; n++) {
6675 for (size_t k = 1; k <= 40; k += 9) {
6676 GemmMicrokernelTester()
6677 .mr(1)
6678 .nr(8)
6679 .kr(2)
6680 .sr(4)
6681 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006682 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006683 .k(k)
6684 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006686 }
6687 }
6688 }
6689
6690 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8_strided_a) {
6691 TEST_REQUIRES_ARM_NEON;
6692 for (uint32_t n = 9; n < 16; n++) {
6693 for (size_t k = 1; k <= 40; k += 9) {
6694 GemmMicrokernelTester()
6695 .mr(1)
6696 .nr(8)
6697 .kr(2)
6698 .sr(4)
6699 .m(1)
6700 .n(n)
6701 .k(k)
6702 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006704 }
6705 }
6706 }
6707
6708 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_gt_8_subtile) {
6709 TEST_REQUIRES_ARM_NEON;
6710 for (uint32_t n = 9; n < 16; n++) {
6711 for (size_t k = 1; k <= 40; k += 9) {
6712 for (uint32_t m = 1; m <= 1; m++) {
6713 GemmMicrokernelTester()
6714 .mr(1)
6715 .nr(8)
6716 .kr(2)
6717 .sr(4)
6718 .m(m)
6719 .n(n)
6720 .k(k)
6721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006722 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006723 }
6724 }
6725 }
6726 }
6727
6728 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8) {
6729 TEST_REQUIRES_ARM_NEON;
6730 for (uint32_t n = 16; n <= 24; n += 8) {
6731 for (size_t k = 1; k <= 40; k += 9) {
6732 GemmMicrokernelTester()
6733 .mr(1)
6734 .nr(8)
6735 .kr(2)
6736 .sr(4)
6737 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08006738 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006739 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006740 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006741 }
6742 }
6743 }
6744
6745 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8_strided_cn) {
6746 TEST_REQUIRES_ARM_NEON;
6747 for (uint32_t n = 16; n <= 24; n += 8) {
6748 for (size_t k = 1; k <= 40; k += 9) {
6749 GemmMicrokernelTester()
6750 .mr(1)
6751 .nr(8)
6752 .kr(2)
6753 .sr(4)
6754 .m(1)
6755 .n(n)
6756 .k(k)
6757 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006758 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006759 }
6760 }
6761 }
6762
6763 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8_strided_a) {
6764 TEST_REQUIRES_ARM_NEON;
6765 for (uint32_t n = 16; n <= 24; n += 8) {
6766 for (size_t k = 1; k <= 40; k += 9) {
6767 GemmMicrokernelTester()
6768 .mr(1)
6769 .nr(8)
6770 .kr(2)
6771 .sr(4)
6772 .m(1)
6773 .n(n)
6774 .k(k)
6775 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08006776 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006777 }
6778 }
6779 }
6780
6781 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, n_div_8_subtile) {
6782 TEST_REQUIRES_ARM_NEON;
6783 for (uint32_t n = 16; n <= 24; n += 8) {
6784 for (size_t k = 1; k <= 40; k += 9) {
6785 for (uint32_t m = 1; m <= 1; m++) {
6786 GemmMicrokernelTester()
6787 .mr(1)
6788 .nr(8)
6789 .kr(2)
6790 .sr(4)
6791 .m(m)
6792 .n(n)
6793 .k(k)
6794 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006795 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006796 }
6797 }
6798 }
6799 }
6800
6801 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, strided_cm_subtile) {
6802 TEST_REQUIRES_ARM_NEON;
6803 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006804 for (uint32_t n = 1; n <= 8; n++) {
6805 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006806 GemmMicrokernelTester()
6807 .mr(1)
6808 .nr(8)
6809 .kr(2)
6810 .sr(4)
6811 .m(m)
6812 .n(n)
6813 .k(k)
6814 .cm_stride(11)
6815 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006816 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006817 }
6818 }
6819 }
6820 }
6821
6822 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, qmin) {
6823 TEST_REQUIRES_ARM_NEON;
6824 GemmMicrokernelTester()
6825 .mr(1)
6826 .nr(8)
6827 .kr(2)
6828 .sr(4)
6829 .m(1)
6830 .n(8)
6831 .k(8)
6832 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006833 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006834 }
6835
6836 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, qmax) {
6837 TEST_REQUIRES_ARM_NEON;
6838 GemmMicrokernelTester()
6839 .mr(1)
6840 .nr(8)
6841 .kr(2)
6842 .sr(4)
6843 .m(1)
6844 .n(8)
6845 .k(8)
6846 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08006847 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006848 }
6849
6850 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C2S4__NEON_MULL, strided_cm) {
6851 TEST_REQUIRES_ARM_NEON;
6852 GemmMicrokernelTester()
6853 .mr(1)
6854 .nr(8)
6855 .kr(2)
6856 .sr(4)
6857 .m(1)
6858 .n(8)
6859 .k(8)
6860 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006861 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006862 }
6863#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
6864
6865
6866#if XNN_ARCH_ARM || XNN_ARCH_ARM64
6867 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8) {
6868 TEST_REQUIRES_ARM_NEON;
6869 GemmMicrokernelTester()
6870 .mr(4)
6871 .nr(8)
6872 .kr(2)
6873 .sr(4)
6874 .m(4)
6875 .n(8)
6876 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08006877 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006878 }
6879
6880 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, strided_cn) {
6881 TEST_REQUIRES_ARM_NEON;
6882 GemmMicrokernelTester()
6883 .mr(4)
6884 .nr(8)
6885 .kr(2)
6886 .sr(4)
6887 .m(4)
6888 .n(8)
6889 .k(8)
6890 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006891 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006892 }
6893
6894 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8_strided_a) {
6895 TEST_REQUIRES_ARM_NEON;
6896 GemmMicrokernelTester()
6897 .mr(4)
6898 .nr(8)
6899 .kr(2)
6900 .sr(4)
6901 .m(4)
6902 .n(8)
6903 .k(8)
6904 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006905 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006906 }
6907
6908 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8_subtile) {
6909 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08006910 for (uint32_t n = 1; n <= 8; n++) {
6911 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006912 GemmMicrokernelTester()
6913 .mr(4)
6914 .nr(8)
6915 .kr(2)
6916 .sr(4)
6917 .m(m)
6918 .n(n)
6919 .k(8)
6920 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006921 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006922 }
6923 }
6924 }
6925
6926 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8_subtile_m) {
6927 TEST_REQUIRES_ARM_NEON;
6928 for (uint32_t m = 1; m <= 4; m++) {
6929 GemmMicrokernelTester()
6930 .mr(4)
6931 .nr(8)
6932 .kr(2)
6933 .sr(4)
6934 .m(m)
6935 .n(8)
6936 .k(8)
6937 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006938 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006939 }
6940 }
6941
6942 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_eq_8_subtile_n) {
6943 TEST_REQUIRES_ARM_NEON;
6944 for (uint32_t n = 1; n <= 8; n++) {
6945 GemmMicrokernelTester()
6946 .mr(4)
6947 .nr(8)
6948 .kr(2)
6949 .sr(4)
6950 .m(4)
6951 .n(n)
6952 .k(8)
6953 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08006954 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006955 }
6956 }
6957
6958 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_lt_8) {
6959 TEST_REQUIRES_ARM_NEON;
6960 for (size_t k = 1; k < 8; k++) {
6961 GemmMicrokernelTester()
6962 .mr(4)
6963 .nr(8)
6964 .kr(2)
6965 .sr(4)
6966 .m(4)
6967 .n(8)
6968 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08006969 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006970 }
6971 }
6972
6973 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_lt_8_strided_a) {
6974 TEST_REQUIRES_ARM_NEON;
6975 for (size_t k = 1; k < 8; k++) {
6976 GemmMicrokernelTester()
6977 .mr(4)
6978 .nr(8)
6979 .kr(2)
6980 .sr(4)
6981 .m(4)
6982 .n(8)
6983 .k(k)
6984 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08006985 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006986 }
6987 }
6988
6989 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_lt_8_subtile) {
6990 TEST_REQUIRES_ARM_NEON;
6991 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08006992 for (uint32_t n = 1; n <= 8; n++) {
6993 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08006994 GemmMicrokernelTester()
6995 .mr(4)
6996 .nr(8)
6997 .kr(2)
6998 .sr(4)
6999 .m(m)
7000 .n(n)
7001 .k(k)
7002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007003 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007004 }
7005 }
7006 }
7007 }
7008
7009 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_gt_8) {
7010 TEST_REQUIRES_ARM_NEON;
7011 for (size_t k = 9; k < 16; k++) {
7012 GemmMicrokernelTester()
7013 .mr(4)
7014 .nr(8)
7015 .kr(2)
7016 .sr(4)
7017 .m(4)
7018 .n(8)
7019 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007020 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007021 }
7022 }
7023
7024 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_gt_8_strided_a) {
7025 TEST_REQUIRES_ARM_NEON;
7026 for (size_t k = 9; k < 16; k++) {
7027 GemmMicrokernelTester()
7028 .mr(4)
7029 .nr(8)
7030 .kr(2)
7031 .sr(4)
7032 .m(4)
7033 .n(8)
7034 .k(k)
7035 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007036 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007037 }
7038 }
7039
7040 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_gt_8_subtile) {
7041 TEST_REQUIRES_ARM_NEON;
7042 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007043 for (uint32_t n = 1; n <= 8; n++) {
7044 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007045 GemmMicrokernelTester()
7046 .mr(4)
7047 .nr(8)
7048 .kr(2)
7049 .sr(4)
7050 .m(m)
7051 .n(n)
7052 .k(k)
7053 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007054 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007055 }
7056 }
7057 }
7058 }
7059
7060 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_div_8) {
7061 TEST_REQUIRES_ARM_NEON;
7062 for (size_t k = 16; k <= 80; k += 8) {
7063 GemmMicrokernelTester()
7064 .mr(4)
7065 .nr(8)
7066 .kr(2)
7067 .sr(4)
7068 .m(4)
7069 .n(8)
7070 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007072 }
7073 }
7074
7075 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_div_8_strided_a) {
7076 TEST_REQUIRES_ARM_NEON;
7077 for (size_t k = 16; k <= 80; k += 8) {
7078 GemmMicrokernelTester()
7079 .mr(4)
7080 .nr(8)
7081 .kr(2)
7082 .sr(4)
7083 .m(4)
7084 .n(8)
7085 .k(k)
7086 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007087 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007088 }
7089 }
7090
7091 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, k_div_8_subtile) {
7092 TEST_REQUIRES_ARM_NEON;
7093 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007094 for (uint32_t n = 1; n <= 8; n++) {
7095 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007096 GemmMicrokernelTester()
7097 .mr(4)
7098 .nr(8)
7099 .kr(2)
7100 .sr(4)
7101 .m(m)
7102 .n(n)
7103 .k(k)
7104 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007105 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007106 }
7107 }
7108 }
7109 }
7110
7111 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8) {
7112 TEST_REQUIRES_ARM_NEON;
7113 for (uint32_t n = 9; n < 16; n++) {
7114 for (size_t k = 1; k <= 40; k += 9) {
7115 GemmMicrokernelTester()
7116 .mr(4)
7117 .nr(8)
7118 .kr(2)
7119 .sr(4)
7120 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007121 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007122 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007123 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007124 }
7125 }
7126 }
7127
7128 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8_strided_cn) {
7129 TEST_REQUIRES_ARM_NEON;
7130 for (uint32_t n = 9; n < 16; n++) {
7131 for (size_t k = 1; k <= 40; k += 9) {
7132 GemmMicrokernelTester()
7133 .mr(4)
7134 .nr(8)
7135 .kr(2)
7136 .sr(4)
7137 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007138 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007139 .k(k)
7140 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007142 }
7143 }
7144 }
7145
7146 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8_strided_a) {
7147 TEST_REQUIRES_ARM_NEON;
7148 for (uint32_t n = 9; n < 16; n++) {
7149 for (size_t k = 1; k <= 40; k += 9) {
7150 GemmMicrokernelTester()
7151 .mr(4)
7152 .nr(8)
7153 .kr(2)
7154 .sr(4)
7155 .m(4)
7156 .n(n)
7157 .k(k)
7158 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007160 }
7161 }
7162 }
7163
7164 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_gt_8_subtile) {
7165 TEST_REQUIRES_ARM_NEON;
7166 for (uint32_t n = 9; n < 16; n++) {
7167 for (size_t k = 1; k <= 40; k += 9) {
7168 for (uint32_t m = 1; m <= 4; m++) {
7169 GemmMicrokernelTester()
7170 .mr(4)
7171 .nr(8)
7172 .kr(2)
7173 .sr(4)
7174 .m(m)
7175 .n(n)
7176 .k(k)
7177 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007178 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007179 }
7180 }
7181 }
7182 }
7183
7184 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8) {
7185 TEST_REQUIRES_ARM_NEON;
7186 for (uint32_t n = 16; n <= 24; n += 8) {
7187 for (size_t k = 1; k <= 40; k += 9) {
7188 GemmMicrokernelTester()
7189 .mr(4)
7190 .nr(8)
7191 .kr(2)
7192 .sr(4)
7193 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007194 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007195 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007196 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007197 }
7198 }
7199 }
7200
7201 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8_strided_cn) {
7202 TEST_REQUIRES_ARM_NEON;
7203 for (uint32_t n = 16; n <= 24; n += 8) {
7204 for (size_t k = 1; k <= 40; k += 9) {
7205 GemmMicrokernelTester()
7206 .mr(4)
7207 .nr(8)
7208 .kr(2)
7209 .sr(4)
7210 .m(4)
7211 .n(n)
7212 .k(k)
7213 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007214 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007215 }
7216 }
7217 }
7218
7219 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8_strided_a) {
7220 TEST_REQUIRES_ARM_NEON;
7221 for (uint32_t n = 16; n <= 24; n += 8) {
7222 for (size_t k = 1; k <= 40; k += 9) {
7223 GemmMicrokernelTester()
7224 .mr(4)
7225 .nr(8)
7226 .kr(2)
7227 .sr(4)
7228 .m(4)
7229 .n(n)
7230 .k(k)
7231 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007232 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007233 }
7234 }
7235 }
7236
7237 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, n_div_8_subtile) {
7238 TEST_REQUIRES_ARM_NEON;
7239 for (uint32_t n = 16; n <= 24; n += 8) {
7240 for (size_t k = 1; k <= 40; k += 9) {
7241 for (uint32_t m = 1; m <= 4; m++) {
7242 GemmMicrokernelTester()
7243 .mr(4)
7244 .nr(8)
7245 .kr(2)
7246 .sr(4)
7247 .m(m)
7248 .n(n)
7249 .k(k)
7250 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007251 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007252 }
7253 }
7254 }
7255 }
7256
7257 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, strided_cm_subtile) {
7258 TEST_REQUIRES_ARM_NEON;
7259 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007260 for (uint32_t n = 1; n <= 8; n++) {
7261 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007262 GemmMicrokernelTester()
7263 .mr(4)
7264 .nr(8)
7265 .kr(2)
7266 .sr(4)
7267 .m(m)
7268 .n(n)
7269 .k(k)
7270 .cm_stride(11)
7271 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007272 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007273 }
7274 }
7275 }
7276 }
7277
7278 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, qmin) {
7279 TEST_REQUIRES_ARM_NEON;
7280 GemmMicrokernelTester()
7281 .mr(4)
7282 .nr(8)
7283 .kr(2)
7284 .sr(4)
7285 .m(4)
7286 .n(8)
7287 .k(8)
7288 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007289 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007290 }
7291
7292 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, qmax) {
7293 TEST_REQUIRES_ARM_NEON;
7294 GemmMicrokernelTester()
7295 .mr(4)
7296 .nr(8)
7297 .kr(2)
7298 .sr(4)
7299 .m(4)
7300 .n(8)
7301 .k(8)
7302 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007303 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007304 }
7305
7306 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2S4__NEON_MULL, strided_cm) {
7307 TEST_REQUIRES_ARM_NEON;
7308 GemmMicrokernelTester()
7309 .mr(4)
7310 .nr(8)
7311 .kr(2)
7312 .sr(4)
7313 .m(4)
7314 .n(8)
7315 .k(8)
7316 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007317 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007318 }
7319#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7320
7321
7322#if XNN_ARCH_ARM || XNN_ARCH_ARM64
7323 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8) {
7324 TEST_REQUIRES_ARM_NEON;
7325 GemmMicrokernelTester()
7326 .mr(3)
7327 .nr(16)
7328 .kr(2)
7329 .sr(4)
7330 .m(3)
7331 .n(16)
7332 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007333 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007334 }
7335
7336 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, strided_cn) {
7337 TEST_REQUIRES_ARM_NEON;
7338 GemmMicrokernelTester()
7339 .mr(3)
7340 .nr(16)
7341 .kr(2)
7342 .sr(4)
7343 .m(3)
7344 .n(16)
7345 .k(8)
7346 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007347 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007348 }
7349
7350 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8_strided_a) {
7351 TEST_REQUIRES_ARM_NEON;
7352 GemmMicrokernelTester()
7353 .mr(3)
7354 .nr(16)
7355 .kr(2)
7356 .sr(4)
7357 .m(3)
7358 .n(16)
7359 .k(8)
7360 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007361 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007362 }
7363
7364 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8_subtile) {
7365 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007366 for (uint32_t n = 1; n <= 16; n++) {
7367 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007368 GemmMicrokernelTester()
7369 .mr(3)
7370 .nr(16)
7371 .kr(2)
7372 .sr(4)
7373 .m(m)
7374 .n(n)
7375 .k(8)
7376 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007377 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007378 }
7379 }
7380 }
7381
7382 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8_subtile_m) {
7383 TEST_REQUIRES_ARM_NEON;
7384 for (uint32_t m = 1; m <= 3; m++) {
7385 GemmMicrokernelTester()
7386 .mr(3)
7387 .nr(16)
7388 .kr(2)
7389 .sr(4)
7390 .m(m)
7391 .n(16)
7392 .k(8)
7393 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007394 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007395 }
7396 }
7397
7398 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_eq_8_subtile_n) {
7399 TEST_REQUIRES_ARM_NEON;
7400 for (uint32_t n = 1; n <= 16; n++) {
7401 GemmMicrokernelTester()
7402 .mr(3)
7403 .nr(16)
7404 .kr(2)
7405 .sr(4)
7406 .m(3)
7407 .n(n)
7408 .k(8)
7409 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007410 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007411 }
7412 }
7413
7414 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_lt_8) {
7415 TEST_REQUIRES_ARM_NEON;
7416 for (size_t k = 1; k < 8; k++) {
7417 GemmMicrokernelTester()
7418 .mr(3)
7419 .nr(16)
7420 .kr(2)
7421 .sr(4)
7422 .m(3)
7423 .n(16)
7424 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007425 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007426 }
7427 }
7428
7429 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_lt_8_strided_a) {
7430 TEST_REQUIRES_ARM_NEON;
7431 for (size_t k = 1; k < 8; k++) {
7432 GemmMicrokernelTester()
7433 .mr(3)
7434 .nr(16)
7435 .kr(2)
7436 .sr(4)
7437 .m(3)
7438 .n(16)
7439 .k(k)
7440 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007441 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007442 }
7443 }
7444
7445 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_lt_8_subtile) {
7446 TEST_REQUIRES_ARM_NEON;
7447 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007448 for (uint32_t n = 1; n <= 16; n++) {
7449 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007450 GemmMicrokernelTester()
7451 .mr(3)
7452 .nr(16)
7453 .kr(2)
7454 .sr(4)
7455 .m(m)
7456 .n(n)
7457 .k(k)
7458 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007459 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007460 }
7461 }
7462 }
7463 }
7464
7465 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_gt_8) {
7466 TEST_REQUIRES_ARM_NEON;
7467 for (size_t k = 9; k < 16; k++) {
7468 GemmMicrokernelTester()
7469 .mr(3)
7470 .nr(16)
7471 .kr(2)
7472 .sr(4)
7473 .m(3)
7474 .n(16)
7475 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007476 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007477 }
7478 }
7479
7480 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_gt_8_strided_a) {
7481 TEST_REQUIRES_ARM_NEON;
7482 for (size_t k = 9; k < 16; k++) {
7483 GemmMicrokernelTester()
7484 .mr(3)
7485 .nr(16)
7486 .kr(2)
7487 .sr(4)
7488 .m(3)
7489 .n(16)
7490 .k(k)
7491 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007492 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007493 }
7494 }
7495
7496 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_gt_8_subtile) {
7497 TEST_REQUIRES_ARM_NEON;
7498 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007499 for (uint32_t n = 1; n <= 16; n++) {
7500 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007501 GemmMicrokernelTester()
7502 .mr(3)
7503 .nr(16)
7504 .kr(2)
7505 .sr(4)
7506 .m(m)
7507 .n(n)
7508 .k(k)
7509 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007510 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007511 }
7512 }
7513 }
7514 }
7515
7516 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_div_8) {
7517 TEST_REQUIRES_ARM_NEON;
7518 for (size_t k = 16; k <= 80; k += 8) {
7519 GemmMicrokernelTester()
7520 .mr(3)
7521 .nr(16)
7522 .kr(2)
7523 .sr(4)
7524 .m(3)
7525 .n(16)
7526 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007528 }
7529 }
7530
7531 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_div_8_strided_a) {
7532 TEST_REQUIRES_ARM_NEON;
7533 for (size_t k = 16; k <= 80; k += 8) {
7534 GemmMicrokernelTester()
7535 .mr(3)
7536 .nr(16)
7537 .kr(2)
7538 .sr(4)
7539 .m(3)
7540 .n(16)
7541 .k(k)
7542 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007543 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007544 }
7545 }
7546
7547 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, k_div_8_subtile) {
7548 TEST_REQUIRES_ARM_NEON;
7549 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007550 for (uint32_t n = 1; n <= 16; n++) {
7551 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007552 GemmMicrokernelTester()
7553 .mr(3)
7554 .nr(16)
7555 .kr(2)
7556 .sr(4)
7557 .m(m)
7558 .n(n)
7559 .k(k)
7560 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007561 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007562 }
7563 }
7564 }
7565 }
7566
7567 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16) {
7568 TEST_REQUIRES_ARM_NEON;
7569 for (uint32_t n = 17; n < 32; n++) {
7570 for (size_t k = 1; k <= 40; k += 9) {
7571 GemmMicrokernelTester()
7572 .mr(3)
7573 .nr(16)
7574 .kr(2)
7575 .sr(4)
7576 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007577 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007578 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007579 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007580 }
7581 }
7582 }
7583
7584 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16_strided_cn) {
7585 TEST_REQUIRES_ARM_NEON;
7586 for (uint32_t n = 17; n < 32; n++) {
7587 for (size_t k = 1; k <= 40; k += 9) {
7588 GemmMicrokernelTester()
7589 .mr(3)
7590 .nr(16)
7591 .kr(2)
7592 .sr(4)
7593 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007594 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007595 .k(k)
7596 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007598 }
7599 }
7600 }
7601
7602 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16_strided_a) {
7603 TEST_REQUIRES_ARM_NEON;
7604 for (uint32_t n = 17; n < 32; n++) {
7605 for (size_t k = 1; k <= 40; k += 9) {
7606 GemmMicrokernelTester()
7607 .mr(3)
7608 .nr(16)
7609 .kr(2)
7610 .sr(4)
7611 .m(3)
7612 .n(n)
7613 .k(k)
7614 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007616 }
7617 }
7618 }
7619
7620 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_gt_16_subtile) {
7621 TEST_REQUIRES_ARM_NEON;
7622 for (uint32_t n = 17; n < 32; n++) {
7623 for (size_t k = 1; k <= 40; k += 9) {
7624 for (uint32_t m = 1; m <= 3; m++) {
7625 GemmMicrokernelTester()
7626 .mr(3)
7627 .nr(16)
7628 .kr(2)
7629 .sr(4)
7630 .m(m)
7631 .n(n)
7632 .k(k)
7633 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007634 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007635 }
7636 }
7637 }
7638 }
7639
7640 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16) {
7641 TEST_REQUIRES_ARM_NEON;
7642 for (uint32_t n = 32; n <= 48; n += 16) {
7643 for (size_t k = 1; k <= 40; k += 9) {
7644 GemmMicrokernelTester()
7645 .mr(3)
7646 .nr(16)
7647 .kr(2)
7648 .sr(4)
7649 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08007650 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007651 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007652 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007653 }
7654 }
7655 }
7656
7657 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16_strided_cn) {
7658 TEST_REQUIRES_ARM_NEON;
7659 for (uint32_t n = 32; n <= 48; n += 16) {
7660 for (size_t k = 1; k <= 40; k += 9) {
7661 GemmMicrokernelTester()
7662 .mr(3)
7663 .nr(16)
7664 .kr(2)
7665 .sr(4)
7666 .m(3)
7667 .n(n)
7668 .k(k)
7669 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007670 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007671 }
7672 }
7673 }
7674
7675 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16_strided_a) {
7676 TEST_REQUIRES_ARM_NEON;
7677 for (uint32_t n = 32; n <= 48; n += 16) {
7678 for (size_t k = 1; k <= 40; k += 9) {
7679 GemmMicrokernelTester()
7680 .mr(3)
7681 .nr(16)
7682 .kr(2)
7683 .sr(4)
7684 .m(3)
7685 .n(n)
7686 .k(k)
7687 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08007688 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007689 }
7690 }
7691 }
7692
7693 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, n_div_16_subtile) {
7694 TEST_REQUIRES_ARM_NEON;
7695 for (uint32_t n = 32; n <= 48; n += 16) {
7696 for (size_t k = 1; k <= 40; k += 9) {
7697 for (uint32_t m = 1; m <= 3; m++) {
7698 GemmMicrokernelTester()
7699 .mr(3)
7700 .nr(16)
7701 .kr(2)
7702 .sr(4)
7703 .m(m)
7704 .n(n)
7705 .k(k)
7706 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007707 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007708 }
7709 }
7710 }
7711 }
7712
7713 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, strided_cm_subtile) {
7714 TEST_REQUIRES_ARM_NEON;
7715 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007716 for (uint32_t n = 1; n <= 16; n++) {
7717 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007718 GemmMicrokernelTester()
7719 .mr(3)
7720 .nr(16)
7721 .kr(2)
7722 .sr(4)
7723 .m(m)
7724 .n(n)
7725 .k(k)
7726 .cm_stride(19)
7727 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007728 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007729 }
7730 }
7731 }
7732 }
7733
7734 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, qmin) {
7735 TEST_REQUIRES_ARM_NEON;
7736 GemmMicrokernelTester()
7737 .mr(3)
7738 .nr(16)
7739 .kr(2)
7740 .sr(4)
7741 .m(3)
7742 .n(16)
7743 .k(8)
7744 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007745 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007746 }
7747
7748 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, qmax) {
7749 TEST_REQUIRES_ARM_NEON;
7750 GemmMicrokernelTester()
7751 .mr(3)
7752 .nr(16)
7753 .kr(2)
7754 .sr(4)
7755 .m(3)
7756 .n(16)
7757 .k(8)
7758 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08007759 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007760 }
7761
7762 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2S4__NEON_MULL, strided_cm) {
7763 TEST_REQUIRES_ARM_NEON;
7764 GemmMicrokernelTester()
7765 .mr(3)
7766 .nr(16)
7767 .kr(2)
7768 .sr(4)
7769 .m(3)
7770 .n(16)
7771 .k(8)
7772 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007773 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2s4__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007774 }
7775#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
7776
7777
7778#if XNN_ARCH_ARM || XNN_ARCH_ARM64
7779 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8) {
7780 TEST_REQUIRES_ARM_NEON;
7781 GemmMicrokernelTester()
7782 .mr(1)
7783 .nr(16)
7784 .kr(4)
7785 .sr(1)
7786 .m(1)
7787 .n(16)
7788 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08007789 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007790 }
7791
7792 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cn) {
7793 TEST_REQUIRES_ARM_NEON;
7794 GemmMicrokernelTester()
7795 .mr(1)
7796 .nr(16)
7797 .kr(4)
7798 .sr(1)
7799 .m(1)
7800 .n(16)
7801 .k(8)
7802 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007803 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007804 }
7805
7806 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_strided_a) {
7807 TEST_REQUIRES_ARM_NEON;
7808 GemmMicrokernelTester()
7809 .mr(1)
7810 .nr(16)
7811 .kr(4)
7812 .sr(1)
7813 .m(1)
7814 .n(16)
7815 .k(8)
7816 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007817 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007818 }
7819
7820 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile) {
7821 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08007822 for (uint32_t n = 1; n <= 16; n++) {
7823 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007824 GemmMicrokernelTester()
7825 .mr(1)
7826 .nr(16)
7827 .kr(4)
7828 .sr(1)
7829 .m(m)
7830 .n(n)
7831 .k(8)
7832 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007833 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007834 }
7835 }
7836 }
7837
7838 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile_m) {
7839 TEST_REQUIRES_ARM_NEON;
7840 for (uint32_t m = 1; m <= 1; m++) {
7841 GemmMicrokernelTester()
7842 .mr(1)
7843 .nr(16)
7844 .kr(4)
7845 .sr(1)
7846 .m(m)
7847 .n(16)
7848 .k(8)
7849 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007850 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007851 }
7852 }
7853
7854 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_eq_8_subtile_n) {
7855 TEST_REQUIRES_ARM_NEON;
7856 for (uint32_t n = 1; n <= 16; n++) {
7857 GemmMicrokernelTester()
7858 .mr(1)
7859 .nr(16)
7860 .kr(4)
7861 .sr(1)
7862 .m(1)
7863 .n(n)
7864 .k(8)
7865 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007866 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007867 }
7868 }
7869
7870 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_lt_8) {
7871 TEST_REQUIRES_ARM_NEON;
7872 for (size_t k = 1; k < 8; k++) {
7873 GemmMicrokernelTester()
7874 .mr(1)
7875 .nr(16)
7876 .kr(4)
7877 .sr(1)
7878 .m(1)
7879 .n(16)
7880 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007881 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007882 }
7883 }
7884
7885 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_lt_8_strided_a) {
7886 TEST_REQUIRES_ARM_NEON;
7887 for (size_t k = 1; k < 8; k++) {
7888 GemmMicrokernelTester()
7889 .mr(1)
7890 .nr(16)
7891 .kr(4)
7892 .sr(1)
7893 .m(1)
7894 .n(16)
7895 .k(k)
7896 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08007897 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007898 }
7899 }
7900
7901 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_lt_8_subtile) {
7902 TEST_REQUIRES_ARM_NEON;
7903 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007904 for (uint32_t n = 1; n <= 16; n++) {
7905 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007906 GemmMicrokernelTester()
7907 .mr(1)
7908 .nr(16)
7909 .kr(4)
7910 .sr(1)
7911 .m(m)
7912 .n(n)
7913 .k(k)
7914 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007915 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007916 }
7917 }
7918 }
7919 }
7920
7921 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_gt_8) {
7922 TEST_REQUIRES_ARM_NEON;
7923 for (size_t k = 9; k < 16; k++) {
7924 GemmMicrokernelTester()
7925 .mr(1)
7926 .nr(16)
7927 .kr(4)
7928 .sr(1)
7929 .m(1)
7930 .n(16)
7931 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007932 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007933 }
7934 }
7935
7936 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_gt_8_strided_a) {
7937 TEST_REQUIRES_ARM_NEON;
7938 for (size_t k = 9; k < 16; k++) {
7939 GemmMicrokernelTester()
7940 .mr(1)
7941 .nr(16)
7942 .kr(4)
7943 .sr(1)
7944 .m(1)
7945 .n(16)
7946 .k(k)
7947 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08007948 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007949 }
7950 }
7951
7952 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_gt_8_subtile) {
7953 TEST_REQUIRES_ARM_NEON;
7954 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08007955 for (uint32_t n = 1; n <= 16; n++) {
7956 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007957 GemmMicrokernelTester()
7958 .mr(1)
7959 .nr(16)
7960 .kr(4)
7961 .sr(1)
7962 .m(m)
7963 .n(n)
7964 .k(k)
7965 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08007966 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007967 }
7968 }
7969 }
7970 }
7971
7972 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_div_8) {
7973 TEST_REQUIRES_ARM_NEON;
7974 for (size_t k = 16; k <= 80; k += 8) {
7975 GemmMicrokernelTester()
7976 .mr(1)
7977 .nr(16)
7978 .kr(4)
7979 .sr(1)
7980 .m(1)
7981 .n(16)
7982 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08007983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08007984 }
7985 }
7986
7987 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_div_8_strided_a) {
7988 TEST_REQUIRES_ARM_NEON;
7989 for (size_t k = 16; k <= 80; k += 8) {
7990 GemmMicrokernelTester()
7991 .mr(1)
7992 .nr(16)
7993 .kr(4)
7994 .sr(1)
7995 .m(1)
7996 .n(16)
7997 .k(k)
7998 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08007999 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008000 }
8001 }
8002
8003 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, k_div_8_subtile) {
8004 TEST_REQUIRES_ARM_NEON;
8005 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008006 for (uint32_t n = 1; n <= 16; n++) {
8007 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008008 GemmMicrokernelTester()
8009 .mr(1)
8010 .nr(16)
8011 .kr(4)
8012 .sr(1)
8013 .m(m)
8014 .n(n)
8015 .k(k)
8016 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008017 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008018 }
8019 }
8020 }
8021 }
8022
8023 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16) {
8024 TEST_REQUIRES_ARM_NEON;
8025 for (uint32_t n = 17; n < 32; n++) {
8026 for (size_t k = 1; k <= 40; k += 9) {
8027 GemmMicrokernelTester()
8028 .mr(1)
8029 .nr(16)
8030 .kr(4)
8031 .sr(1)
8032 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008033 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008034 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008035 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008036 }
8037 }
8038 }
8039
8040 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_strided_cn) {
8041 TEST_REQUIRES_ARM_NEON;
8042 for (uint32_t n = 17; n < 32; n++) {
8043 for (size_t k = 1; k <= 40; k += 9) {
8044 GemmMicrokernelTester()
8045 .mr(1)
8046 .nr(16)
8047 .kr(4)
8048 .sr(1)
8049 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008050 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008051 .k(k)
8052 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008054 }
8055 }
8056 }
8057
8058 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_strided_a) {
8059 TEST_REQUIRES_ARM_NEON;
8060 for (uint32_t n = 17; n < 32; n++) {
8061 for (size_t k = 1; k <= 40; k += 9) {
8062 GemmMicrokernelTester()
8063 .mr(1)
8064 .nr(16)
8065 .kr(4)
8066 .sr(1)
8067 .m(1)
8068 .n(n)
8069 .k(k)
8070 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008072 }
8073 }
8074 }
8075
8076 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_gt_16_subtile) {
8077 TEST_REQUIRES_ARM_NEON;
8078 for (uint32_t n = 17; n < 32; n++) {
8079 for (size_t k = 1; k <= 40; k += 9) {
8080 for (uint32_t m = 1; m <= 1; m++) {
8081 GemmMicrokernelTester()
8082 .mr(1)
8083 .nr(16)
8084 .kr(4)
8085 .sr(1)
8086 .m(m)
8087 .n(n)
8088 .k(k)
8089 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008090 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008091 }
8092 }
8093 }
8094 }
8095
8096 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16) {
8097 TEST_REQUIRES_ARM_NEON;
8098 for (uint32_t n = 32; n <= 48; n += 16) {
8099 for (size_t k = 1; k <= 40; k += 9) {
8100 GemmMicrokernelTester()
8101 .mr(1)
8102 .nr(16)
8103 .kr(4)
8104 .sr(1)
8105 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008106 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008107 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008108 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008109 }
8110 }
8111 }
8112
8113 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_strided_cn) {
8114 TEST_REQUIRES_ARM_NEON;
8115 for (uint32_t n = 32; n <= 48; n += 16) {
8116 for (size_t k = 1; k <= 40; k += 9) {
8117 GemmMicrokernelTester()
8118 .mr(1)
8119 .nr(16)
8120 .kr(4)
8121 .sr(1)
8122 .m(1)
8123 .n(n)
8124 .k(k)
8125 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008126 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008127 }
8128 }
8129 }
8130
8131 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_strided_a) {
8132 TEST_REQUIRES_ARM_NEON;
8133 for (uint32_t n = 32; n <= 48; n += 16) {
8134 for (size_t k = 1; k <= 40; k += 9) {
8135 GemmMicrokernelTester()
8136 .mr(1)
8137 .nr(16)
8138 .kr(4)
8139 .sr(1)
8140 .m(1)
8141 .n(n)
8142 .k(k)
8143 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008144 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008145 }
8146 }
8147 }
8148
8149 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, n_div_16_subtile) {
8150 TEST_REQUIRES_ARM_NEON;
8151 for (uint32_t n = 32; n <= 48; n += 16) {
8152 for (size_t k = 1; k <= 40; k += 9) {
8153 for (uint32_t m = 1; m <= 1; m++) {
8154 GemmMicrokernelTester()
8155 .mr(1)
8156 .nr(16)
8157 .kr(4)
8158 .sr(1)
8159 .m(m)
8160 .n(n)
8161 .k(k)
8162 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008163 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008164 }
8165 }
8166 }
8167 }
8168
8169 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cm_subtile) {
8170 TEST_REQUIRES_ARM_NEON;
8171 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008172 for (uint32_t n = 1; n <= 16; n++) {
8173 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008174 GemmMicrokernelTester()
8175 .mr(1)
8176 .nr(16)
8177 .kr(4)
8178 .sr(1)
8179 .m(m)
8180 .n(n)
8181 .k(k)
8182 .cm_stride(19)
8183 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008184 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008185 }
8186 }
8187 }
8188 }
8189
8190 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, qmin) {
8191 TEST_REQUIRES_ARM_NEON;
8192 GemmMicrokernelTester()
8193 .mr(1)
8194 .nr(16)
8195 .kr(4)
8196 .sr(1)
8197 .m(1)
8198 .n(16)
8199 .k(8)
8200 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008201 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008202 }
8203
8204 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, qmax) {
8205 TEST_REQUIRES_ARM_NEON;
8206 GemmMicrokernelTester()
8207 .mr(1)
8208 .nr(16)
8209 .kr(4)
8210 .sr(1)
8211 .m(1)
8212 .n(16)
8213 .k(8)
8214 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008215 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008216 }
8217
8218 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C4__NEON_MULL_DUP, strided_cm) {
8219 TEST_REQUIRES_ARM_NEON;
8220 GemmMicrokernelTester()
8221 .mr(1)
8222 .nr(16)
8223 .kr(4)
8224 .sr(1)
8225 .m(1)
8226 .n(16)
8227 .k(8)
8228 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008229 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c4__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008230 }
8231#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8232
8233
8234#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8235 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_eq_16) {
8236 TEST_REQUIRES_ARM_NEON;
8237 GemmMicrokernelTester()
8238 .mr(3)
8239 .nr(8)
8240 .kr(4)
8241 .sr(1)
8242 .m(3)
8243 .n(8)
8244 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -08008245 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008246 }
8247
8248 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, strided_cn) {
8249 TEST_REQUIRES_ARM_NEON;
8250 GemmMicrokernelTester()
8251 .mr(3)
8252 .nr(8)
8253 .kr(4)
8254 .sr(1)
8255 .m(3)
8256 .n(8)
8257 .k(16)
8258 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008259 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008260 }
8261
8262 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_eq_16_strided_a) {
8263 TEST_REQUIRES_ARM_NEON;
8264 GemmMicrokernelTester()
8265 .mr(3)
8266 .nr(8)
8267 .kr(4)
8268 .sr(1)
8269 .m(3)
8270 .n(8)
8271 .k(16)
8272 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008273 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008274 }
8275
8276 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_eq_16_subtile) {
8277 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008278 for (uint32_t n = 1; n <= 8; n++) {
8279 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008280 GemmMicrokernelTester()
8281 .mr(3)
8282 .nr(8)
8283 .kr(4)
8284 .sr(1)
8285 .m(m)
8286 .n(n)
8287 .k(16)
8288 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008289 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008290 }
8291 }
8292 }
8293
8294 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_eq_16_subtile_m) {
8295 TEST_REQUIRES_ARM_NEON;
8296 for (uint32_t m = 1; m <= 3; m++) {
8297 GemmMicrokernelTester()
8298 .mr(3)
8299 .nr(8)
8300 .kr(4)
8301 .sr(1)
8302 .m(m)
8303 .n(8)
8304 .k(16)
8305 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008306 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008307 }
8308 }
8309
8310 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_eq_16_subtile_n) {
8311 TEST_REQUIRES_ARM_NEON;
8312 for (uint32_t n = 1; n <= 8; n++) {
8313 GemmMicrokernelTester()
8314 .mr(3)
8315 .nr(8)
8316 .kr(4)
8317 .sr(1)
8318 .m(3)
8319 .n(n)
8320 .k(16)
8321 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008322 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008323 }
8324 }
8325
8326 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_lt_16) {
8327 TEST_REQUIRES_ARM_NEON;
8328 for (size_t k = 1; k < 16; k++) {
8329 GemmMicrokernelTester()
8330 .mr(3)
8331 .nr(8)
8332 .kr(4)
8333 .sr(1)
8334 .m(3)
8335 .n(8)
8336 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008337 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008338 }
8339 }
8340
8341 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_lt_16_strided_a) {
8342 TEST_REQUIRES_ARM_NEON;
8343 for (size_t k = 1; k < 16; k++) {
8344 GemmMicrokernelTester()
8345 .mr(3)
8346 .nr(8)
8347 .kr(4)
8348 .sr(1)
8349 .m(3)
8350 .n(8)
8351 .k(k)
8352 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008353 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008354 }
8355 }
8356
8357 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_lt_16_subtile) {
8358 TEST_REQUIRES_ARM_NEON;
8359 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008360 for (uint32_t n = 1; n <= 8; n++) {
8361 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008362 GemmMicrokernelTester()
8363 .mr(3)
8364 .nr(8)
8365 .kr(4)
8366 .sr(1)
8367 .m(m)
8368 .n(n)
8369 .k(k)
8370 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008371 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008372 }
8373 }
8374 }
8375 }
8376
8377 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_gt_16) {
8378 TEST_REQUIRES_ARM_NEON;
8379 for (size_t k = 17; k < 32; k++) {
8380 GemmMicrokernelTester()
8381 .mr(3)
8382 .nr(8)
8383 .kr(4)
8384 .sr(1)
8385 .m(3)
8386 .n(8)
8387 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008388 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008389 }
8390 }
8391
8392 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_gt_16_strided_a) {
8393 TEST_REQUIRES_ARM_NEON;
8394 for (size_t k = 17; k < 32; k++) {
8395 GemmMicrokernelTester()
8396 .mr(3)
8397 .nr(8)
8398 .kr(4)
8399 .sr(1)
8400 .m(3)
8401 .n(8)
8402 .k(k)
8403 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -08008404 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008405 }
8406 }
8407
8408 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_gt_16_subtile) {
8409 TEST_REQUIRES_ARM_NEON;
8410 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008411 for (uint32_t n = 1; n <= 8; n++) {
8412 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008413 GemmMicrokernelTester()
8414 .mr(3)
8415 .nr(8)
8416 .kr(4)
8417 .sr(1)
8418 .m(m)
8419 .n(n)
8420 .k(k)
8421 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008422 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008423 }
8424 }
8425 }
8426 }
8427
8428 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_div_16) {
8429 TEST_REQUIRES_ARM_NEON;
8430 for (size_t k = 32; k <= 160; k += 16) {
8431 GemmMicrokernelTester()
8432 .mr(3)
8433 .nr(8)
8434 .kr(4)
8435 .sr(1)
8436 .m(3)
8437 .n(8)
8438 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008440 }
8441 }
8442
8443 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_div_16_strided_a) {
8444 TEST_REQUIRES_ARM_NEON;
8445 for (size_t k = 32; k <= 160; k += 16) {
8446 GemmMicrokernelTester()
8447 .mr(3)
8448 .nr(8)
8449 .kr(4)
8450 .sr(1)
8451 .m(3)
8452 .n(8)
8453 .k(k)
8454 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -08008455 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008456 }
8457 }
8458
8459 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, k_div_16_subtile) {
8460 TEST_REQUIRES_ARM_NEON;
8461 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008462 for (uint32_t n = 1; n <= 8; n++) {
8463 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008464 GemmMicrokernelTester()
8465 .mr(3)
8466 .nr(8)
8467 .kr(4)
8468 .sr(1)
8469 .m(m)
8470 .n(n)
8471 .k(k)
8472 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008473 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008474 }
8475 }
8476 }
8477 }
8478
8479 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_gt_8) {
8480 TEST_REQUIRES_ARM_NEON;
8481 for (uint32_t n = 9; n < 16; n++) {
8482 for (size_t k = 1; k <= 80; k += 17) {
8483 GemmMicrokernelTester()
8484 .mr(3)
8485 .nr(8)
8486 .kr(4)
8487 .sr(1)
8488 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008489 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008490 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008491 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008492 }
8493 }
8494 }
8495
8496 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_gt_8_strided_cn) {
8497 TEST_REQUIRES_ARM_NEON;
8498 for (uint32_t n = 9; n < 16; n++) {
8499 for (size_t k = 1; k <= 80; k += 17) {
8500 GemmMicrokernelTester()
8501 .mr(3)
8502 .nr(8)
8503 .kr(4)
8504 .sr(1)
8505 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008506 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008507 .k(k)
8508 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008509 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008510 }
8511 }
8512 }
8513
8514 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_gt_8_strided_a) {
8515 TEST_REQUIRES_ARM_NEON;
8516 for (uint32_t n = 9; n < 16; n++) {
8517 for (size_t k = 1; k <= 80; k += 17) {
8518 GemmMicrokernelTester()
8519 .mr(3)
8520 .nr(8)
8521 .kr(4)
8522 .sr(1)
8523 .m(3)
8524 .n(n)
8525 .k(k)
8526 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008528 }
8529 }
8530 }
8531
8532 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_gt_8_subtile) {
8533 TEST_REQUIRES_ARM_NEON;
8534 for (uint32_t n = 9; n < 16; n++) {
8535 for (size_t k = 1; k <= 80; k += 17) {
8536 for (uint32_t m = 1; m <= 3; m++) {
8537 GemmMicrokernelTester()
8538 .mr(3)
8539 .nr(8)
8540 .kr(4)
8541 .sr(1)
8542 .m(m)
8543 .n(n)
8544 .k(k)
8545 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008546 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008547 }
8548 }
8549 }
8550 }
8551
8552 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_div_8) {
8553 TEST_REQUIRES_ARM_NEON;
8554 for (uint32_t n = 16; n <= 24; n += 8) {
8555 for (size_t k = 1; k <= 80; k += 17) {
8556 GemmMicrokernelTester()
8557 .mr(3)
8558 .nr(8)
8559 .kr(4)
8560 .sr(1)
8561 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008562 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008563 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008564 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008565 }
8566 }
8567 }
8568
8569 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_div_8_strided_cn) {
8570 TEST_REQUIRES_ARM_NEON;
8571 for (uint32_t n = 16; n <= 24; n += 8) {
8572 for (size_t k = 1; k <= 80; k += 17) {
8573 GemmMicrokernelTester()
8574 .mr(3)
8575 .nr(8)
8576 .kr(4)
8577 .sr(1)
8578 .m(3)
8579 .n(n)
8580 .k(k)
8581 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008582 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008583 }
8584 }
8585 }
8586
8587 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_div_8_strided_a) {
8588 TEST_REQUIRES_ARM_NEON;
8589 for (uint32_t n = 16; n <= 24; n += 8) {
8590 for (size_t k = 1; k <= 80; k += 17) {
8591 GemmMicrokernelTester()
8592 .mr(3)
8593 .nr(8)
8594 .kr(4)
8595 .sr(1)
8596 .m(3)
8597 .n(n)
8598 .k(k)
8599 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008600 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008601 }
8602 }
8603 }
8604
8605 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, n_div_8_subtile) {
8606 TEST_REQUIRES_ARM_NEON;
8607 for (uint32_t n = 16; n <= 24; n += 8) {
8608 for (size_t k = 1; k <= 80; k += 17) {
8609 for (uint32_t m = 1; m <= 3; m++) {
8610 GemmMicrokernelTester()
8611 .mr(3)
8612 .nr(8)
8613 .kr(4)
8614 .sr(1)
8615 .m(m)
8616 .n(n)
8617 .k(k)
8618 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008619 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008620 }
8621 }
8622 }
8623 }
8624
8625 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, strided_cm_subtile) {
8626 TEST_REQUIRES_ARM_NEON;
8627 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008628 for (uint32_t n = 1; n <= 8; n++) {
8629 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008630 GemmMicrokernelTester()
8631 .mr(3)
8632 .nr(8)
8633 .kr(4)
8634 .sr(1)
8635 .m(m)
8636 .n(n)
8637 .k(k)
8638 .cm_stride(11)
8639 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008640 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008641 }
8642 }
8643 }
8644 }
8645
8646 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, qmin) {
8647 TEST_REQUIRES_ARM_NEON;
8648 GemmMicrokernelTester()
8649 .mr(3)
8650 .nr(8)
8651 .kr(4)
8652 .sr(1)
8653 .m(3)
8654 .n(8)
8655 .k(16)
8656 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008657 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008658 }
8659
8660 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, qmax) {
8661 TEST_REQUIRES_ARM_NEON;
8662 GemmMicrokernelTester()
8663 .mr(3)
8664 .nr(8)
8665 .kr(4)
8666 .sr(1)
8667 .m(3)
8668 .n(8)
8669 .k(16)
8670 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08008671 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008672 }
8673
8674 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MLAL_DUP, strided_cm) {
8675 TEST_REQUIRES_ARM_NEON;
8676 GemmMicrokernelTester()
8677 .mr(3)
8678 .nr(8)
8679 .kr(4)
8680 .sr(1)
8681 .m(3)
8682 .n(8)
8683 .k(16)
8684 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008685 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008686 }
8687#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
8688
8689
8690#if XNN_ARCH_ARM || XNN_ARCH_ARM64
8691 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8) {
8692 TEST_REQUIRES_ARM_NEON;
8693 GemmMicrokernelTester()
8694 .mr(2)
8695 .nr(8)
8696 .kr(4)
8697 .sr(1)
8698 .m(2)
8699 .n(8)
8700 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08008701 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008702 }
8703
8704 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cn) {
8705 TEST_REQUIRES_ARM_NEON;
8706 GemmMicrokernelTester()
8707 .mr(2)
8708 .nr(8)
8709 .kr(4)
8710 .sr(1)
8711 .m(2)
8712 .n(8)
8713 .k(8)
8714 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008715 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008716 }
8717
8718 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_strided_a) {
8719 TEST_REQUIRES_ARM_NEON;
8720 GemmMicrokernelTester()
8721 .mr(2)
8722 .nr(8)
8723 .kr(4)
8724 .sr(1)
8725 .m(2)
8726 .n(8)
8727 .k(8)
8728 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008729 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008730 }
8731
8732 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile) {
8733 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08008734 for (uint32_t n = 1; n <= 8; n++) {
8735 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008736 GemmMicrokernelTester()
8737 .mr(2)
8738 .nr(8)
8739 .kr(4)
8740 .sr(1)
8741 .m(m)
8742 .n(n)
8743 .k(8)
8744 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008745 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008746 }
8747 }
8748 }
8749
8750 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
8751 TEST_REQUIRES_ARM_NEON;
8752 for (uint32_t m = 1; m <= 2; m++) {
8753 GemmMicrokernelTester()
8754 .mr(2)
8755 .nr(8)
8756 .kr(4)
8757 .sr(1)
8758 .m(m)
8759 .n(8)
8760 .k(8)
8761 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008762 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008763 }
8764 }
8765
8766 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
8767 TEST_REQUIRES_ARM_NEON;
8768 for (uint32_t n = 1; n <= 8; n++) {
8769 GemmMicrokernelTester()
8770 .mr(2)
8771 .nr(8)
8772 .kr(4)
8773 .sr(1)
8774 .m(2)
8775 .n(n)
8776 .k(8)
8777 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008778 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008779 }
8780 }
8781
8782 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_lt_8) {
8783 TEST_REQUIRES_ARM_NEON;
8784 for (size_t k = 1; k < 8; k++) {
8785 GemmMicrokernelTester()
8786 .mr(2)
8787 .nr(8)
8788 .kr(4)
8789 .sr(1)
8790 .m(2)
8791 .n(8)
8792 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008793 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008794 }
8795 }
8796
8797 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_lt_8_strided_a) {
8798 TEST_REQUIRES_ARM_NEON;
8799 for (size_t k = 1; k < 8; k++) {
8800 GemmMicrokernelTester()
8801 .mr(2)
8802 .nr(8)
8803 .kr(4)
8804 .sr(1)
8805 .m(2)
8806 .n(8)
8807 .k(k)
8808 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008809 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008810 }
8811 }
8812
8813 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_lt_8_subtile) {
8814 TEST_REQUIRES_ARM_NEON;
8815 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008816 for (uint32_t n = 1; n <= 8; n++) {
8817 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008818 GemmMicrokernelTester()
8819 .mr(2)
8820 .nr(8)
8821 .kr(4)
8822 .sr(1)
8823 .m(m)
8824 .n(n)
8825 .k(k)
8826 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008827 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008828 }
8829 }
8830 }
8831 }
8832
8833 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_gt_8) {
8834 TEST_REQUIRES_ARM_NEON;
8835 for (size_t k = 9; k < 16; k++) {
8836 GemmMicrokernelTester()
8837 .mr(2)
8838 .nr(8)
8839 .kr(4)
8840 .sr(1)
8841 .m(2)
8842 .n(8)
8843 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008844 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008845 }
8846 }
8847
8848 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_gt_8_strided_a) {
8849 TEST_REQUIRES_ARM_NEON;
8850 for (size_t k = 9; k < 16; k++) {
8851 GemmMicrokernelTester()
8852 .mr(2)
8853 .nr(8)
8854 .kr(4)
8855 .sr(1)
8856 .m(2)
8857 .n(8)
8858 .k(k)
8859 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08008860 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008861 }
8862 }
8863
8864 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_gt_8_subtile) {
8865 TEST_REQUIRES_ARM_NEON;
8866 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008867 for (uint32_t n = 1; n <= 8; n++) {
8868 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008869 GemmMicrokernelTester()
8870 .mr(2)
8871 .nr(8)
8872 .kr(4)
8873 .sr(1)
8874 .m(m)
8875 .n(n)
8876 .k(k)
8877 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008878 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008879 }
8880 }
8881 }
8882 }
8883
8884 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_div_8) {
8885 TEST_REQUIRES_ARM_NEON;
8886 for (size_t k = 16; k <= 80; k += 8) {
8887 GemmMicrokernelTester()
8888 .mr(2)
8889 .nr(8)
8890 .kr(4)
8891 .sr(1)
8892 .m(2)
8893 .n(8)
8894 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008895 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008896 }
8897 }
8898
8899 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_div_8_strided_a) {
8900 TEST_REQUIRES_ARM_NEON;
8901 for (size_t k = 16; k <= 80; k += 8) {
8902 GemmMicrokernelTester()
8903 .mr(2)
8904 .nr(8)
8905 .kr(4)
8906 .sr(1)
8907 .m(2)
8908 .n(8)
8909 .k(k)
8910 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08008911 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008912 }
8913 }
8914
8915 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, k_div_8_subtile) {
8916 TEST_REQUIRES_ARM_NEON;
8917 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08008918 for (uint32_t n = 1; n <= 8; n++) {
8919 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008920 GemmMicrokernelTester()
8921 .mr(2)
8922 .nr(8)
8923 .kr(4)
8924 .sr(1)
8925 .m(m)
8926 .n(n)
8927 .k(k)
8928 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08008929 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008930 }
8931 }
8932 }
8933 }
8934
8935 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8) {
8936 TEST_REQUIRES_ARM_NEON;
8937 for (uint32_t n = 9; n < 16; n++) {
8938 for (size_t k = 1; k <= 40; k += 9) {
8939 GemmMicrokernelTester()
8940 .mr(2)
8941 .nr(8)
8942 .kr(4)
8943 .sr(1)
8944 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008945 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008946 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08008947 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008948 }
8949 }
8950 }
8951
8952 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) {
8953 TEST_REQUIRES_ARM_NEON;
8954 for (uint32_t n = 9; n < 16; n++) {
8955 for (size_t k = 1; k <= 40; k += 9) {
8956 GemmMicrokernelTester()
8957 .mr(2)
8958 .nr(8)
8959 .kr(4)
8960 .sr(1)
8961 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08008962 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008963 .k(k)
8964 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08008965 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008966 }
8967 }
8968 }
8969
8970 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_strided_a) {
8971 TEST_REQUIRES_ARM_NEON;
8972 for (uint32_t n = 9; n < 16; n++) {
8973 for (size_t k = 1; k <= 40; k += 9) {
8974 GemmMicrokernelTester()
8975 .mr(2)
8976 .nr(8)
8977 .kr(4)
8978 .sr(1)
8979 .m(2)
8980 .n(n)
8981 .k(k)
8982 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08008983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08008984 }
8985 }
8986 }
8987
8988 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_gt_8_subtile) {
8989 TEST_REQUIRES_ARM_NEON;
8990 for (uint32_t n = 9; n < 16; n++) {
8991 for (size_t k = 1; k <= 40; k += 9) {
8992 for (uint32_t m = 1; m <= 2; m++) {
8993 GemmMicrokernelTester()
8994 .mr(2)
8995 .nr(8)
8996 .kr(4)
8997 .sr(1)
8998 .m(m)
8999 .n(n)
9000 .k(k)
9001 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009002 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009003 }
9004 }
9005 }
9006 }
9007
9008 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8) {
9009 TEST_REQUIRES_ARM_NEON;
9010 for (uint32_t n = 16; n <= 24; n += 8) {
9011 for (size_t k = 1; k <= 40; k += 9) {
9012 GemmMicrokernelTester()
9013 .mr(2)
9014 .nr(8)
9015 .kr(4)
9016 .sr(1)
9017 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009018 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009019 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009020 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009021 }
9022 }
9023 }
9024
9025 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) {
9026 TEST_REQUIRES_ARM_NEON;
9027 for (uint32_t n = 16; n <= 24; n += 8) {
9028 for (size_t k = 1; k <= 40; k += 9) {
9029 GemmMicrokernelTester()
9030 .mr(2)
9031 .nr(8)
9032 .kr(4)
9033 .sr(1)
9034 .m(2)
9035 .n(n)
9036 .k(k)
9037 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009038 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009039 }
9040 }
9041 }
9042
9043 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_strided_a) {
9044 TEST_REQUIRES_ARM_NEON;
9045 for (uint32_t n = 16; n <= 24; n += 8) {
9046 for (size_t k = 1; k <= 40; k += 9) {
9047 GemmMicrokernelTester()
9048 .mr(2)
9049 .nr(8)
9050 .kr(4)
9051 .sr(1)
9052 .m(2)
9053 .n(n)
9054 .k(k)
9055 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009056 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009057 }
9058 }
9059 }
9060
9061 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, n_div_8_subtile) {
9062 TEST_REQUIRES_ARM_NEON;
9063 for (uint32_t n = 16; n <= 24; n += 8) {
9064 for (size_t k = 1; k <= 40; k += 9) {
9065 for (uint32_t m = 1; m <= 2; m++) {
9066 GemmMicrokernelTester()
9067 .mr(2)
9068 .nr(8)
9069 .kr(4)
9070 .sr(1)
9071 .m(m)
9072 .n(n)
9073 .k(k)
9074 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009075 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009076 }
9077 }
9078 }
9079 }
9080
9081 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cm_subtile) {
9082 TEST_REQUIRES_ARM_NEON;
9083 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009084 for (uint32_t n = 1; n <= 8; n++) {
9085 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009086 GemmMicrokernelTester()
9087 .mr(2)
9088 .nr(8)
9089 .kr(4)
9090 .sr(1)
9091 .m(m)
9092 .n(n)
9093 .k(k)
9094 .cm_stride(11)
9095 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009096 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009097 }
9098 }
9099 }
9100 }
9101
9102 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, qmin) {
9103 TEST_REQUIRES_ARM_NEON;
9104 GemmMicrokernelTester()
9105 .mr(2)
9106 .nr(8)
9107 .kr(4)
9108 .sr(1)
9109 .m(2)
9110 .n(8)
9111 .k(8)
9112 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009113 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009114 }
9115
9116 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, qmax) {
9117 TEST_REQUIRES_ARM_NEON;
9118 GemmMicrokernelTester()
9119 .mr(2)
9120 .nr(8)
9121 .kr(4)
9122 .sr(1)
9123 .m(2)
9124 .n(8)
9125 .k(8)
9126 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009127 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009128 }
9129
9130 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C4__NEON_MULL_LD1R, strided_cm) {
9131 TEST_REQUIRES_ARM_NEON;
9132 GemmMicrokernelTester()
9133 .mr(2)
9134 .nr(8)
9135 .kr(4)
9136 .sr(1)
9137 .m(2)
9138 .n(8)
9139 .k(8)
9140 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009141 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009142 }
9143#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9144
9145
9146#if XNN_ARCH_ARM || XNN_ARCH_ARM64
9147 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8) {
9148 TEST_REQUIRES_ARM_NEON;
9149 GemmMicrokernelTester()
9150 .mr(4)
9151 .nr(8)
9152 .kr(4)
9153 .sr(1)
9154 .m(4)
9155 .n(8)
9156 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009157 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009158 }
9159
9160 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, strided_cn) {
9161 TEST_REQUIRES_ARM_NEON;
9162 GemmMicrokernelTester()
9163 .mr(4)
9164 .nr(8)
9165 .kr(4)
9166 .sr(1)
9167 .m(4)
9168 .n(8)
9169 .k(8)
9170 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009171 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009172 }
9173
9174 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8_strided_a) {
9175 TEST_REQUIRES_ARM_NEON;
9176 GemmMicrokernelTester()
9177 .mr(4)
9178 .nr(8)
9179 .kr(4)
9180 .sr(1)
9181 .m(4)
9182 .n(8)
9183 .k(8)
9184 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009185 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009186 }
9187
9188 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8_subtile) {
9189 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009190 for (uint32_t n = 1; n <= 8; n++) {
9191 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009192 GemmMicrokernelTester()
9193 .mr(4)
9194 .nr(8)
9195 .kr(4)
9196 .sr(1)
9197 .m(m)
9198 .n(n)
9199 .k(8)
9200 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009201 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009202 }
9203 }
9204 }
9205
9206 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
9207 TEST_REQUIRES_ARM_NEON;
9208 for (uint32_t m = 1; m <= 4; m++) {
9209 GemmMicrokernelTester()
9210 .mr(4)
9211 .nr(8)
9212 .kr(4)
9213 .sr(1)
9214 .m(m)
9215 .n(8)
9216 .k(8)
9217 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009218 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009219 }
9220 }
9221
9222 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
9223 TEST_REQUIRES_ARM_NEON;
9224 for (uint32_t n = 1; n <= 8; n++) {
9225 GemmMicrokernelTester()
9226 .mr(4)
9227 .nr(8)
9228 .kr(4)
9229 .sr(1)
9230 .m(4)
9231 .n(n)
9232 .k(8)
9233 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009234 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009235 }
9236 }
9237
9238 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_lt_8) {
9239 TEST_REQUIRES_ARM_NEON;
9240 for (size_t k = 1; k < 8; k++) {
9241 GemmMicrokernelTester()
9242 .mr(4)
9243 .nr(8)
9244 .kr(4)
9245 .sr(1)
9246 .m(4)
9247 .n(8)
9248 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009249 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009250 }
9251 }
9252
9253 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_lt_8_strided_a) {
9254 TEST_REQUIRES_ARM_NEON;
9255 for (size_t k = 1; k < 8; k++) {
9256 GemmMicrokernelTester()
9257 .mr(4)
9258 .nr(8)
9259 .kr(4)
9260 .sr(1)
9261 .m(4)
9262 .n(8)
9263 .k(k)
9264 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009265 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009266 }
9267 }
9268
9269 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_lt_8_subtile) {
9270 TEST_REQUIRES_ARM_NEON;
9271 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009272 for (uint32_t n = 1; n <= 8; n++) {
9273 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009274 GemmMicrokernelTester()
9275 .mr(4)
9276 .nr(8)
9277 .kr(4)
9278 .sr(1)
9279 .m(m)
9280 .n(n)
9281 .k(k)
9282 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009283 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009284 }
9285 }
9286 }
9287 }
9288
9289 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_gt_8) {
9290 TEST_REQUIRES_ARM_NEON;
9291 for (size_t k = 9; k < 16; k++) {
9292 GemmMicrokernelTester()
9293 .mr(4)
9294 .nr(8)
9295 .kr(4)
9296 .sr(1)
9297 .m(4)
9298 .n(8)
9299 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009300 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009301 }
9302 }
9303
9304 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_gt_8_strided_a) {
9305 TEST_REQUIRES_ARM_NEON;
9306 for (size_t k = 9; k < 16; k++) {
9307 GemmMicrokernelTester()
9308 .mr(4)
9309 .nr(8)
9310 .kr(4)
9311 .sr(1)
9312 .m(4)
9313 .n(8)
9314 .k(k)
9315 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009316 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009317 }
9318 }
9319
9320 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_gt_8_subtile) {
9321 TEST_REQUIRES_ARM_NEON;
9322 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009323 for (uint32_t n = 1; n <= 8; n++) {
9324 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009325 GemmMicrokernelTester()
9326 .mr(4)
9327 .nr(8)
9328 .kr(4)
9329 .sr(1)
9330 .m(m)
9331 .n(n)
9332 .k(k)
9333 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009334 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009335 }
9336 }
9337 }
9338 }
9339
9340 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_div_8) {
9341 TEST_REQUIRES_ARM_NEON;
9342 for (size_t k = 16; k <= 80; k += 8) {
9343 GemmMicrokernelTester()
9344 .mr(4)
9345 .nr(8)
9346 .kr(4)
9347 .sr(1)
9348 .m(4)
9349 .n(8)
9350 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009351 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009352 }
9353 }
9354
9355 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_div_8_strided_a) {
9356 TEST_REQUIRES_ARM_NEON;
9357 for (size_t k = 16; k <= 80; k += 8) {
9358 GemmMicrokernelTester()
9359 .mr(4)
9360 .nr(8)
9361 .kr(4)
9362 .sr(1)
9363 .m(4)
9364 .n(8)
9365 .k(k)
9366 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009367 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009368 }
9369 }
9370
9371 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, k_div_8_subtile) {
9372 TEST_REQUIRES_ARM_NEON;
9373 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009374 for (uint32_t n = 1; n <= 8; n++) {
9375 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009376 GemmMicrokernelTester()
9377 .mr(4)
9378 .nr(8)
9379 .kr(4)
9380 .sr(1)
9381 .m(m)
9382 .n(n)
9383 .k(k)
9384 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009385 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009386 }
9387 }
9388 }
9389 }
9390
9391 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8) {
9392 TEST_REQUIRES_ARM_NEON;
9393 for (uint32_t n = 9; n < 16; n++) {
9394 for (size_t k = 1; k <= 40; k += 9) {
9395 GemmMicrokernelTester()
9396 .mr(4)
9397 .nr(8)
9398 .kr(4)
9399 .sr(1)
9400 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009401 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009402 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009403 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009404 }
9405 }
9406 }
9407
9408 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8_strided_cn) {
9409 TEST_REQUIRES_ARM_NEON;
9410 for (uint32_t n = 9; n < 16; n++) {
9411 for (size_t k = 1; k <= 40; k += 9) {
9412 GemmMicrokernelTester()
9413 .mr(4)
9414 .nr(8)
9415 .kr(4)
9416 .sr(1)
9417 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009418 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009419 .k(k)
9420 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009421 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009422 }
9423 }
9424 }
9425
9426 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8_strided_a) {
9427 TEST_REQUIRES_ARM_NEON;
9428 for (uint32_t n = 9; n < 16; n++) {
9429 for (size_t k = 1; k <= 40; k += 9) {
9430 GemmMicrokernelTester()
9431 .mr(4)
9432 .nr(8)
9433 .kr(4)
9434 .sr(1)
9435 .m(4)
9436 .n(n)
9437 .k(k)
9438 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009440 }
9441 }
9442 }
9443
9444 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_gt_8_subtile) {
9445 TEST_REQUIRES_ARM_NEON;
9446 for (uint32_t n = 9; n < 16; n++) {
9447 for (size_t k = 1; k <= 40; k += 9) {
9448 for (uint32_t m = 1; m <= 4; m++) {
9449 GemmMicrokernelTester()
9450 .mr(4)
9451 .nr(8)
9452 .kr(4)
9453 .sr(1)
9454 .m(m)
9455 .n(n)
9456 .k(k)
9457 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009458 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009459 }
9460 }
9461 }
9462 }
9463
9464 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8) {
9465 TEST_REQUIRES_ARM_NEON;
9466 for (uint32_t n = 16; n <= 24; n += 8) {
9467 for (size_t k = 1; k <= 40; k += 9) {
9468 GemmMicrokernelTester()
9469 .mr(4)
9470 .nr(8)
9471 .kr(4)
9472 .sr(1)
9473 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009474 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009475 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009476 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009477 }
9478 }
9479 }
9480
9481 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8_strided_cn) {
9482 TEST_REQUIRES_ARM_NEON;
9483 for (uint32_t n = 16; n <= 24; n += 8) {
9484 for (size_t k = 1; k <= 40; k += 9) {
9485 GemmMicrokernelTester()
9486 .mr(4)
9487 .nr(8)
9488 .kr(4)
9489 .sr(1)
9490 .m(4)
9491 .n(n)
9492 .k(k)
9493 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009494 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009495 }
9496 }
9497 }
9498
9499 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8_strided_a) {
9500 TEST_REQUIRES_ARM_NEON;
9501 for (uint32_t n = 16; n <= 24; n += 8) {
9502 for (size_t k = 1; k <= 40; k += 9) {
9503 GemmMicrokernelTester()
9504 .mr(4)
9505 .nr(8)
9506 .kr(4)
9507 .sr(1)
9508 .m(4)
9509 .n(n)
9510 .k(k)
9511 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009512 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009513 }
9514 }
9515 }
9516
9517 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, n_div_8_subtile) {
9518 TEST_REQUIRES_ARM_NEON;
9519 for (uint32_t n = 16; n <= 24; n += 8) {
9520 for (size_t k = 1; k <= 40; k += 9) {
9521 for (uint32_t m = 1; m <= 4; m++) {
9522 GemmMicrokernelTester()
9523 .mr(4)
9524 .nr(8)
9525 .kr(4)
9526 .sr(1)
9527 .m(m)
9528 .n(n)
9529 .k(k)
9530 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009531 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009532 }
9533 }
9534 }
9535 }
9536
9537 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, strided_cm_subtile) {
9538 TEST_REQUIRES_ARM_NEON;
9539 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009540 for (uint32_t n = 1; n <= 8; n++) {
9541 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009542 GemmMicrokernelTester()
9543 .mr(4)
9544 .nr(8)
9545 .kr(4)
9546 .sr(1)
9547 .m(m)
9548 .n(n)
9549 .k(k)
9550 .cm_stride(11)
9551 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009552 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009553 }
9554 }
9555 }
9556 }
9557
9558 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, qmin) {
9559 TEST_REQUIRES_ARM_NEON;
9560 GemmMicrokernelTester()
9561 .mr(4)
9562 .nr(8)
9563 .kr(4)
9564 .sr(1)
9565 .m(4)
9566 .n(8)
9567 .k(8)
9568 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009569 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009570 }
9571
9572 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, qmax) {
9573 TEST_REQUIRES_ARM_NEON;
9574 GemmMicrokernelTester()
9575 .mr(4)
9576 .nr(8)
9577 .kr(4)
9578 .sr(1)
9579 .m(4)
9580 .n(8)
9581 .k(8)
9582 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -08009583 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009584 }
9585
9586 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C4__NEON_MULL_LD1R, strided_cm) {
9587 TEST_REQUIRES_ARM_NEON;
9588 GemmMicrokernelTester()
9589 .mr(4)
9590 .nr(8)
9591 .kr(4)
9592 .sr(1)
9593 .m(4)
9594 .n(8)
9595 .k(8)
9596 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009597 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009598 }
9599#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
9600
9601
9602#if XNN_ARCH_ARM || XNN_ARCH_ARM64
9603 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_eq_8) {
9604 TEST_REQUIRES_ARM_NEON;
9605 GemmMicrokernelTester()
9606 .mr(3)
9607 .nr(16)
9608 .kr(4)
9609 .sr(1)
9610 .m(3)
9611 .n(16)
9612 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -08009613 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009614 }
9615
9616 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, strided_cn) {
9617 TEST_REQUIRES_ARM_NEON;
9618 GemmMicrokernelTester()
9619 .mr(3)
9620 .nr(16)
9621 .kr(4)
9622 .sr(1)
9623 .m(3)
9624 .n(16)
9625 .k(8)
9626 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009627 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009628 }
9629
9630 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_eq_8_strided_a) {
9631 TEST_REQUIRES_ARM_NEON;
9632 GemmMicrokernelTester()
9633 .mr(3)
9634 .nr(16)
9635 .kr(4)
9636 .sr(1)
9637 .m(3)
9638 .n(16)
9639 .k(8)
9640 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009641 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009642 }
9643
9644 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_eq_8_subtile) {
9645 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -08009646 for (uint32_t n = 1; n <= 16; n++) {
9647 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009648 GemmMicrokernelTester()
9649 .mr(3)
9650 .nr(16)
9651 .kr(4)
9652 .sr(1)
9653 .m(m)
9654 .n(n)
9655 .k(8)
9656 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009657 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009658 }
9659 }
9660 }
9661
9662 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_eq_8_subtile_m) {
9663 TEST_REQUIRES_ARM_NEON;
9664 for (uint32_t m = 1; m <= 3; m++) {
9665 GemmMicrokernelTester()
9666 .mr(3)
9667 .nr(16)
9668 .kr(4)
9669 .sr(1)
9670 .m(m)
9671 .n(16)
9672 .k(8)
9673 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009674 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009675 }
9676 }
9677
9678 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_eq_8_subtile_n) {
9679 TEST_REQUIRES_ARM_NEON;
9680 for (uint32_t n = 1; n <= 16; n++) {
9681 GemmMicrokernelTester()
9682 .mr(3)
9683 .nr(16)
9684 .kr(4)
9685 .sr(1)
9686 .m(3)
9687 .n(n)
9688 .k(8)
9689 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009690 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009691 }
9692 }
9693
9694 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_lt_8) {
9695 TEST_REQUIRES_ARM_NEON;
9696 for (size_t k = 1; k < 8; k++) {
9697 GemmMicrokernelTester()
9698 .mr(3)
9699 .nr(16)
9700 .kr(4)
9701 .sr(1)
9702 .m(3)
9703 .n(16)
9704 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009705 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009706 }
9707 }
9708
9709 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_lt_8_strided_a) {
9710 TEST_REQUIRES_ARM_NEON;
9711 for (size_t k = 1; k < 8; k++) {
9712 GemmMicrokernelTester()
9713 .mr(3)
9714 .nr(16)
9715 .kr(4)
9716 .sr(1)
9717 .m(3)
9718 .n(16)
9719 .k(k)
9720 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -08009721 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009722 }
9723 }
9724
9725 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_lt_8_subtile) {
9726 TEST_REQUIRES_ARM_NEON;
9727 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009728 for (uint32_t n = 1; n <= 16; n++) {
9729 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009730 GemmMicrokernelTester()
9731 .mr(3)
9732 .nr(16)
9733 .kr(4)
9734 .sr(1)
9735 .m(m)
9736 .n(n)
9737 .k(k)
9738 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009739 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009740 }
9741 }
9742 }
9743 }
9744
9745 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_gt_8) {
9746 TEST_REQUIRES_ARM_NEON;
9747 for (size_t k = 9; k < 16; k++) {
9748 GemmMicrokernelTester()
9749 .mr(3)
9750 .nr(16)
9751 .kr(4)
9752 .sr(1)
9753 .m(3)
9754 .n(16)
9755 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009756 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009757 }
9758 }
9759
9760 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_gt_8_strided_a) {
9761 TEST_REQUIRES_ARM_NEON;
9762 for (size_t k = 9; k < 16; k++) {
9763 GemmMicrokernelTester()
9764 .mr(3)
9765 .nr(16)
9766 .kr(4)
9767 .sr(1)
9768 .m(3)
9769 .n(16)
9770 .k(k)
9771 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009772 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009773 }
9774 }
9775
9776 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_gt_8_subtile) {
9777 TEST_REQUIRES_ARM_NEON;
9778 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009779 for (uint32_t n = 1; n <= 16; n++) {
9780 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009781 GemmMicrokernelTester()
9782 .mr(3)
9783 .nr(16)
9784 .kr(4)
9785 .sr(1)
9786 .m(m)
9787 .n(n)
9788 .k(k)
9789 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009790 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009791 }
9792 }
9793 }
9794 }
9795
9796 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_div_8) {
9797 TEST_REQUIRES_ARM_NEON;
9798 for (size_t k = 16; k <= 80; k += 8) {
9799 GemmMicrokernelTester()
9800 .mr(3)
9801 .nr(16)
9802 .kr(4)
9803 .sr(1)
9804 .m(3)
9805 .n(16)
9806 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009807 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009808 }
9809 }
9810
9811 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_div_8_strided_a) {
9812 TEST_REQUIRES_ARM_NEON;
9813 for (size_t k = 16; k <= 80; k += 8) {
9814 GemmMicrokernelTester()
9815 .mr(3)
9816 .nr(16)
9817 .kr(4)
9818 .sr(1)
9819 .m(3)
9820 .n(16)
9821 .k(k)
9822 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -08009823 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009824 }
9825 }
9826
9827 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, k_div_8_subtile) {
9828 TEST_REQUIRES_ARM_NEON;
9829 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009830 for (uint32_t n = 1; n <= 16; n++) {
9831 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009832 GemmMicrokernelTester()
9833 .mr(3)
9834 .nr(16)
9835 .kr(4)
9836 .sr(1)
9837 .m(m)
9838 .n(n)
9839 .k(k)
9840 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009841 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009842 }
9843 }
9844 }
9845 }
9846
9847 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, n_gt_16) {
9848 TEST_REQUIRES_ARM_NEON;
9849 for (uint32_t n = 17; n < 32; n++) {
9850 for (size_t k = 1; k <= 40; k += 9) {
9851 GemmMicrokernelTester()
9852 .mr(3)
9853 .nr(16)
9854 .kr(4)
9855 .sr(1)
9856 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009857 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009858 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009859 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009860 }
9861 }
9862 }
9863
9864 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, n_gt_16_strided_cn) {
9865 TEST_REQUIRES_ARM_NEON;
9866 for (uint32_t n = 17; n < 32; n++) {
9867 for (size_t k = 1; k <= 40; k += 9) {
9868 GemmMicrokernelTester()
9869 .mr(3)
9870 .nr(16)
9871 .kr(4)
9872 .sr(1)
9873 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009874 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009875 .k(k)
9876 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009877 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009878 }
9879 }
9880 }
9881
9882 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, n_gt_16_strided_a) {
9883 TEST_REQUIRES_ARM_NEON;
9884 for (uint32_t n = 17; n < 32; n++) {
9885 for (size_t k = 1; k <= 40; k += 9) {
9886 GemmMicrokernelTester()
9887 .mr(3)
9888 .nr(16)
9889 .kr(4)
9890 .sr(1)
9891 .m(3)
9892 .n(n)
9893 .k(k)
9894 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009895 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009896 }
9897 }
9898 }
9899
9900 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, n_gt_16_subtile) {
9901 TEST_REQUIRES_ARM_NEON;
9902 for (uint32_t n = 17; n < 32; n++) {
9903 for (size_t k = 1; k <= 40; k += 9) {
9904 for (uint32_t m = 1; m <= 3; m++) {
9905 GemmMicrokernelTester()
9906 .mr(3)
9907 .nr(16)
9908 .kr(4)
9909 .sr(1)
9910 .m(m)
9911 .n(n)
9912 .k(k)
9913 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009914 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009915 }
9916 }
9917 }
9918 }
9919
9920 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, n_div_16) {
9921 TEST_REQUIRES_ARM_NEON;
9922 for (uint32_t n = 32; n <= 48; n += 16) {
9923 for (size_t k = 1; k <= 40; k += 9) {
9924 GemmMicrokernelTester()
9925 .mr(3)
9926 .nr(16)
9927 .kr(4)
9928 .sr(1)
9929 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -08009930 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009931 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -08009932 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009933 }
9934 }
9935 }
9936
9937 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, n_div_16_strided_cn) {
9938 TEST_REQUIRES_ARM_NEON;
9939 for (uint32_t n = 32; n <= 48; n += 16) {
9940 for (size_t k = 1; k <= 40; k += 9) {
9941 GemmMicrokernelTester()
9942 .mr(3)
9943 .nr(16)
9944 .kr(4)
9945 .sr(1)
9946 .m(3)
9947 .n(n)
9948 .k(k)
9949 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -08009950 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009951 }
9952 }
9953 }
9954
9955 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, n_div_16_strided_a) {
9956 TEST_REQUIRES_ARM_NEON;
9957 for (uint32_t n = 32; n <= 48; n += 16) {
9958 for (size_t k = 1; k <= 40; k += 9) {
9959 GemmMicrokernelTester()
9960 .mr(3)
9961 .nr(16)
9962 .kr(4)
9963 .sr(1)
9964 .m(3)
9965 .n(n)
9966 .k(k)
9967 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -08009968 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009969 }
9970 }
9971 }
9972
9973 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, n_div_16_subtile) {
9974 TEST_REQUIRES_ARM_NEON;
9975 for (uint32_t n = 32; n <= 48; n += 16) {
9976 for (size_t k = 1; k <= 40; k += 9) {
9977 for (uint32_t m = 1; m <= 3; m++) {
9978 GemmMicrokernelTester()
9979 .mr(3)
9980 .nr(16)
9981 .kr(4)
9982 .sr(1)
9983 .m(m)
9984 .n(n)
9985 .k(k)
9986 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -08009987 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009988 }
9989 }
9990 }
9991 }
9992
9993 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, strided_cm_subtile) {
9994 TEST_REQUIRES_ARM_NEON;
9995 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -08009996 for (uint32_t n = 1; n <= 16; n++) {
9997 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -08009998 GemmMicrokernelTester()
9999 .mr(3)
10000 .nr(16)
10001 .kr(4)
10002 .sr(1)
10003 .m(m)
10004 .n(n)
10005 .k(k)
10006 .cm_stride(19)
10007 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010008 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010009 }
10010 }
10011 }
10012 }
10013
10014 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, qmin) {
10015 TEST_REQUIRES_ARM_NEON;
10016 GemmMicrokernelTester()
10017 .mr(3)
10018 .nr(16)
10019 .kr(4)
10020 .sr(1)
10021 .m(3)
10022 .n(16)
10023 .k(8)
10024 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010025 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010026 }
10027
10028 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, qmax) {
10029 TEST_REQUIRES_ARM_NEON;
10030 GemmMicrokernelTester()
10031 .mr(3)
10032 .nr(16)
10033 .kr(4)
10034 .sr(1)
10035 .m(3)
10036 .n(16)
10037 .k(8)
10038 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010039 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010040 }
10041
10042 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C4__NEON_MULL_LD1R, strided_cm) {
10043 TEST_REQUIRES_ARM_NEON;
10044 GemmMicrokernelTester()
10045 .mr(3)
10046 .nr(16)
10047 .kr(4)
10048 .sr(1)
10049 .m(3)
10050 .n(16)
10051 .k(8)
10052 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010053 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c4__neon_mull_ld1r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010054 }
10055#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10056
10057
10058#if XNN_ARCH_ARM || XNN_ARCH_ARM64
10059 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_eq_8) {
10060 TEST_REQUIRES_ARM_NEON;
10061 GemmMicrokernelTester()
10062 .mr(3)
10063 .nr(8)
10064 .kr(4)
10065 .sr(1)
10066 .m(3)
10067 .n(8)
10068 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010069 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010070 }
10071
10072 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, strided_cn) {
10073 TEST_REQUIRES_ARM_NEON;
10074 GemmMicrokernelTester()
10075 .mr(3)
10076 .nr(8)
10077 .kr(4)
10078 .sr(1)
10079 .m(3)
10080 .n(8)
10081 .k(8)
10082 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010083 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010084 }
10085
10086 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_eq_8_strided_a) {
10087 TEST_REQUIRES_ARM_NEON;
10088 GemmMicrokernelTester()
10089 .mr(3)
10090 .nr(8)
10091 .kr(4)
10092 .sr(1)
10093 .m(3)
10094 .n(8)
10095 .k(8)
10096 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010097 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010098 }
10099
10100 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_eq_8_subtile) {
10101 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010102 for (uint32_t n = 1; n <= 8; n++) {
10103 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010104 GemmMicrokernelTester()
10105 .mr(3)
10106 .nr(8)
10107 .kr(4)
10108 .sr(1)
10109 .m(m)
10110 .n(n)
10111 .k(8)
10112 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010113 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010114 }
10115 }
10116 }
10117
10118 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_eq_8_subtile_m) {
10119 TEST_REQUIRES_ARM_NEON;
10120 for (uint32_t m = 1; m <= 3; m++) {
10121 GemmMicrokernelTester()
10122 .mr(3)
10123 .nr(8)
10124 .kr(4)
10125 .sr(1)
10126 .m(m)
10127 .n(8)
10128 .k(8)
10129 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010130 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010131 }
10132 }
10133
10134 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_eq_8_subtile_n) {
10135 TEST_REQUIRES_ARM_NEON;
10136 for (uint32_t n = 1; n <= 8; n++) {
10137 GemmMicrokernelTester()
10138 .mr(3)
10139 .nr(8)
10140 .kr(4)
10141 .sr(1)
10142 .m(3)
10143 .n(n)
10144 .k(8)
10145 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010146 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010147 }
10148 }
10149
10150 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_lt_8) {
10151 TEST_REQUIRES_ARM_NEON;
10152 for (size_t k = 1; k < 8; k++) {
10153 GemmMicrokernelTester()
10154 .mr(3)
10155 .nr(8)
10156 .kr(4)
10157 .sr(1)
10158 .m(3)
10159 .n(8)
10160 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010161 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010162 }
10163 }
10164
10165 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_lt_8_strided_a) {
10166 TEST_REQUIRES_ARM_NEON;
10167 for (size_t k = 1; k < 8; k++) {
10168 GemmMicrokernelTester()
10169 .mr(3)
10170 .nr(8)
10171 .kr(4)
10172 .sr(1)
10173 .m(3)
10174 .n(8)
10175 .k(k)
10176 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010177 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010178 }
10179 }
10180
10181 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_lt_8_subtile) {
10182 TEST_REQUIRES_ARM_NEON;
10183 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010184 for (uint32_t n = 1; n <= 8; n++) {
10185 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010186 GemmMicrokernelTester()
10187 .mr(3)
10188 .nr(8)
10189 .kr(4)
10190 .sr(1)
10191 .m(m)
10192 .n(n)
10193 .k(k)
10194 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010195 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010196 }
10197 }
10198 }
10199 }
10200
10201 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_gt_8) {
10202 TEST_REQUIRES_ARM_NEON;
10203 for (size_t k = 9; k < 16; k++) {
10204 GemmMicrokernelTester()
10205 .mr(3)
10206 .nr(8)
10207 .kr(4)
10208 .sr(1)
10209 .m(3)
10210 .n(8)
10211 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010212 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010213 }
10214 }
10215
10216 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_gt_8_strided_a) {
10217 TEST_REQUIRES_ARM_NEON;
10218 for (size_t k = 9; k < 16; k++) {
10219 GemmMicrokernelTester()
10220 .mr(3)
10221 .nr(8)
10222 .kr(4)
10223 .sr(1)
10224 .m(3)
10225 .n(8)
10226 .k(k)
10227 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010228 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010229 }
10230 }
10231
10232 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_gt_8_subtile) {
10233 TEST_REQUIRES_ARM_NEON;
10234 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010235 for (uint32_t n = 1; n <= 8; n++) {
10236 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010237 GemmMicrokernelTester()
10238 .mr(3)
10239 .nr(8)
10240 .kr(4)
10241 .sr(1)
10242 .m(m)
10243 .n(n)
10244 .k(k)
10245 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010246 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010247 }
10248 }
10249 }
10250 }
10251
10252 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_div_8) {
10253 TEST_REQUIRES_ARM_NEON;
10254 for (size_t k = 16; k <= 80; k += 8) {
10255 GemmMicrokernelTester()
10256 .mr(3)
10257 .nr(8)
10258 .kr(4)
10259 .sr(1)
10260 .m(3)
10261 .n(8)
10262 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010263 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010264 }
10265 }
10266
10267 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_div_8_strided_a) {
10268 TEST_REQUIRES_ARM_NEON;
10269 for (size_t k = 16; k <= 80; k += 8) {
10270 GemmMicrokernelTester()
10271 .mr(3)
10272 .nr(8)
10273 .kr(4)
10274 .sr(1)
10275 .m(3)
10276 .n(8)
10277 .k(k)
10278 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010279 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010280 }
10281 }
10282
10283 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, k_div_8_subtile) {
10284 TEST_REQUIRES_ARM_NEON;
10285 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010286 for (uint32_t n = 1; n <= 8; n++) {
10287 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010288 GemmMicrokernelTester()
10289 .mr(3)
10290 .nr(8)
10291 .kr(4)
10292 .sr(1)
10293 .m(m)
10294 .n(n)
10295 .k(k)
10296 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010297 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010298 }
10299 }
10300 }
10301 }
10302
10303 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, n_gt_8) {
10304 TEST_REQUIRES_ARM_NEON;
10305 for (uint32_t n = 9; n < 16; n++) {
10306 for (size_t k = 1; k <= 40; k += 9) {
10307 GemmMicrokernelTester()
10308 .mr(3)
10309 .nr(8)
10310 .kr(4)
10311 .sr(1)
10312 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010313 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010314 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010315 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010316 }
10317 }
10318 }
10319
10320 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, n_gt_8_strided_cn) {
10321 TEST_REQUIRES_ARM_NEON;
10322 for (uint32_t n = 9; n < 16; n++) {
10323 for (size_t k = 1; k <= 40; k += 9) {
10324 GemmMicrokernelTester()
10325 .mr(3)
10326 .nr(8)
10327 .kr(4)
10328 .sr(1)
10329 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010330 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010331 .k(k)
10332 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010333 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010334 }
10335 }
10336 }
10337
10338 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, n_gt_8_strided_a) {
10339 TEST_REQUIRES_ARM_NEON;
10340 for (uint32_t n = 9; n < 16; n++) {
10341 for (size_t k = 1; k <= 40; k += 9) {
10342 GemmMicrokernelTester()
10343 .mr(3)
10344 .nr(8)
10345 .kr(4)
10346 .sr(1)
10347 .m(3)
10348 .n(n)
10349 .k(k)
10350 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010351 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010352 }
10353 }
10354 }
10355
10356 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, n_gt_8_subtile) {
10357 TEST_REQUIRES_ARM_NEON;
10358 for (uint32_t n = 9; n < 16; n++) {
10359 for (size_t k = 1; k <= 40; k += 9) {
10360 for (uint32_t m = 1; m <= 3; m++) {
10361 GemmMicrokernelTester()
10362 .mr(3)
10363 .nr(8)
10364 .kr(4)
10365 .sr(1)
10366 .m(m)
10367 .n(n)
10368 .k(k)
10369 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010370 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010371 }
10372 }
10373 }
10374 }
10375
10376 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, n_div_8) {
10377 TEST_REQUIRES_ARM_NEON;
10378 for (uint32_t n = 16; n <= 24; n += 8) {
10379 for (size_t k = 1; k <= 40; k += 9) {
10380 GemmMicrokernelTester()
10381 .mr(3)
10382 .nr(8)
10383 .kr(4)
10384 .sr(1)
10385 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010386 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010387 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010388 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010389 }
10390 }
10391 }
10392
10393 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, n_div_8_strided_cn) {
10394 TEST_REQUIRES_ARM_NEON;
10395 for (uint32_t n = 16; n <= 24; n += 8) {
10396 for (size_t k = 1; k <= 40; k += 9) {
10397 GemmMicrokernelTester()
10398 .mr(3)
10399 .nr(8)
10400 .kr(4)
10401 .sr(1)
10402 .m(3)
10403 .n(n)
10404 .k(k)
10405 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010406 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010407 }
10408 }
10409 }
10410
10411 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, n_div_8_strided_a) {
10412 TEST_REQUIRES_ARM_NEON;
10413 for (uint32_t n = 16; n <= 24; n += 8) {
10414 for (size_t k = 1; k <= 40; k += 9) {
10415 GemmMicrokernelTester()
10416 .mr(3)
10417 .nr(8)
10418 .kr(4)
10419 .sr(1)
10420 .m(3)
10421 .n(n)
10422 .k(k)
10423 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080010424 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010425 }
10426 }
10427 }
10428
10429 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, n_div_8_subtile) {
10430 TEST_REQUIRES_ARM_NEON;
10431 for (uint32_t n = 16; n <= 24; n += 8) {
10432 for (size_t k = 1; k <= 40; k += 9) {
10433 for (uint32_t m = 1; m <= 3; m++) {
10434 GemmMicrokernelTester()
10435 .mr(3)
10436 .nr(8)
10437 .kr(4)
10438 .sr(1)
10439 .m(m)
10440 .n(n)
10441 .k(k)
10442 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010443 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010444 }
10445 }
10446 }
10447 }
10448
10449 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, strided_cm_subtile) {
10450 TEST_REQUIRES_ARM_NEON;
10451 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010452 for (uint32_t n = 1; n <= 8; n++) {
10453 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010454 GemmMicrokernelTester()
10455 .mr(3)
10456 .nr(8)
10457 .kr(4)
10458 .sr(1)
10459 .m(m)
10460 .n(n)
10461 .k(k)
10462 .cm_stride(11)
10463 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010464 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010465 }
10466 }
10467 }
10468 }
10469
10470 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, qmin) {
10471 TEST_REQUIRES_ARM_NEON;
10472 GemmMicrokernelTester()
10473 .mr(3)
10474 .nr(8)
10475 .kr(4)
10476 .sr(1)
10477 .m(3)
10478 .n(8)
10479 .k(8)
10480 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010481 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010482 }
10483
10484 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, qmax) {
10485 TEST_REQUIRES_ARM_NEON;
10486 GemmMicrokernelTester()
10487 .mr(3)
10488 .nr(8)
10489 .kr(4)
10490 .sr(1)
10491 .m(3)
10492 .n(8)
10493 .k(8)
10494 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010495 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010496 }
10497
10498 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C4__NEON_MULL_LD2R, strided_cm) {
10499 TEST_REQUIRES_ARM_NEON;
10500 GemmMicrokernelTester()
10501 .mr(3)
10502 .nr(8)
10503 .kr(4)
10504 .sr(1)
10505 .m(3)
10506 .n(8)
10507 .k(8)
10508 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010509 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c4__neon_mull_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010510 }
10511#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10512
10513
10514#if XNN_ARCH_ARM || XNN_ARCH_ARM64
10515 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16) {
10516 TEST_REQUIRES_ARM_NEON;
10517 GemmMicrokernelTester()
10518 .mr(2)
10519 .nr(16)
10520 .kr(4)
10521 .sr(1)
10522 .m(2)
10523 .n(16)
10524 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080010525 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010526 }
10527
10528 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, strided_cn) {
10529 TEST_REQUIRES_ARM_NEON;
10530 GemmMicrokernelTester()
10531 .mr(2)
10532 .nr(16)
10533 .kr(4)
10534 .sr(1)
10535 .m(2)
10536 .n(16)
10537 .k(16)
10538 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010539 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010540 }
10541
10542 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16_strided_a) {
10543 TEST_REQUIRES_ARM_NEON;
10544 GemmMicrokernelTester()
10545 .mr(2)
10546 .nr(16)
10547 .kr(4)
10548 .sr(1)
10549 .m(2)
10550 .n(16)
10551 .k(16)
10552 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010553 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010554 }
10555
10556 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16_subtile) {
10557 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080010558 for (uint32_t n = 1; n <= 16; n++) {
10559 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010560 GemmMicrokernelTester()
10561 .mr(2)
10562 .nr(16)
10563 .kr(4)
10564 .sr(1)
10565 .m(m)
10566 .n(n)
10567 .k(16)
10568 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010569 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010570 }
10571 }
10572 }
10573
10574 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_m) {
10575 TEST_REQUIRES_ARM_NEON;
10576 for (uint32_t m = 1; m <= 2; m++) {
10577 GemmMicrokernelTester()
10578 .mr(2)
10579 .nr(16)
10580 .kr(4)
10581 .sr(1)
10582 .m(m)
10583 .n(16)
10584 .k(16)
10585 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010586 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010587 }
10588 }
10589
10590 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_eq_16_subtile_n) {
10591 TEST_REQUIRES_ARM_NEON;
10592 for (uint32_t n = 1; n <= 16; n++) {
10593 GemmMicrokernelTester()
10594 .mr(2)
10595 .nr(16)
10596 .kr(4)
10597 .sr(1)
10598 .m(2)
10599 .n(n)
10600 .k(16)
10601 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010602 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010603 }
10604 }
10605
10606 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_lt_16) {
10607 TEST_REQUIRES_ARM_NEON;
10608 for (size_t k = 1; k < 16; k++) {
10609 GemmMicrokernelTester()
10610 .mr(2)
10611 .nr(16)
10612 .kr(4)
10613 .sr(1)
10614 .m(2)
10615 .n(16)
10616 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010617 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010618 }
10619 }
10620
10621 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_lt_16_strided_a) {
10622 TEST_REQUIRES_ARM_NEON;
10623 for (size_t k = 1; k < 16; k++) {
10624 GemmMicrokernelTester()
10625 .mr(2)
10626 .nr(16)
10627 .kr(4)
10628 .sr(1)
10629 .m(2)
10630 .n(16)
10631 .k(k)
10632 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010633 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010634 }
10635 }
10636
10637 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_lt_16_subtile) {
10638 TEST_REQUIRES_ARM_NEON;
10639 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010640 for (uint32_t n = 1; n <= 16; n++) {
10641 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010642 GemmMicrokernelTester()
10643 .mr(2)
10644 .nr(16)
10645 .kr(4)
10646 .sr(1)
10647 .m(m)
10648 .n(n)
10649 .k(k)
10650 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010651 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010652 }
10653 }
10654 }
10655 }
10656
10657 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_gt_16) {
10658 TEST_REQUIRES_ARM_NEON;
10659 for (size_t k = 17; k < 32; k++) {
10660 GemmMicrokernelTester()
10661 .mr(2)
10662 .nr(16)
10663 .kr(4)
10664 .sr(1)
10665 .m(2)
10666 .n(16)
10667 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010668 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010669 }
10670 }
10671
10672 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_gt_16_strided_a) {
10673 TEST_REQUIRES_ARM_NEON;
10674 for (size_t k = 17; k < 32; k++) {
10675 GemmMicrokernelTester()
10676 .mr(2)
10677 .nr(16)
10678 .kr(4)
10679 .sr(1)
10680 .m(2)
10681 .n(16)
10682 .k(k)
10683 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080010684 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010685 }
10686 }
10687
10688 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_gt_16_subtile) {
10689 TEST_REQUIRES_ARM_NEON;
10690 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010691 for (uint32_t n = 1; n <= 16; n++) {
10692 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010693 GemmMicrokernelTester()
10694 .mr(2)
10695 .nr(16)
10696 .kr(4)
10697 .sr(1)
10698 .m(m)
10699 .n(n)
10700 .k(k)
10701 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010702 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010703 }
10704 }
10705 }
10706 }
10707
10708 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_div_16) {
10709 TEST_REQUIRES_ARM_NEON;
10710 for (size_t k = 32; k <= 160; k += 16) {
10711 GemmMicrokernelTester()
10712 .mr(2)
10713 .nr(16)
10714 .kr(4)
10715 .sr(1)
10716 .m(2)
10717 .n(16)
10718 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010719 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010720 }
10721 }
10722
10723 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_div_16_strided_a) {
10724 TEST_REQUIRES_ARM_NEON;
10725 for (size_t k = 32; k <= 160; k += 16) {
10726 GemmMicrokernelTester()
10727 .mr(2)
10728 .nr(16)
10729 .kr(4)
10730 .sr(1)
10731 .m(2)
10732 .n(16)
10733 .k(k)
10734 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080010735 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010736 }
10737 }
10738
10739 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, k_div_16_subtile) {
10740 TEST_REQUIRES_ARM_NEON;
10741 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010742 for (uint32_t n = 1; n <= 16; n++) {
10743 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010744 GemmMicrokernelTester()
10745 .mr(2)
10746 .nr(16)
10747 .kr(4)
10748 .sr(1)
10749 .m(m)
10750 .n(n)
10751 .k(k)
10752 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010753 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010754 }
10755 }
10756 }
10757 }
10758
10759 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16) {
10760 TEST_REQUIRES_ARM_NEON;
10761 for (uint32_t n = 17; n < 32; n++) {
10762 for (size_t k = 1; k <= 80; k += 17) {
10763 GemmMicrokernelTester()
10764 .mr(2)
10765 .nr(16)
10766 .kr(4)
10767 .sr(1)
10768 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010769 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010770 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010771 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010772 }
10773 }
10774 }
10775
10776 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16_strided_cn) {
10777 TEST_REQUIRES_ARM_NEON;
10778 for (uint32_t n = 17; n < 32; n++) {
10779 for (size_t k = 1; k <= 80; k += 17) {
10780 GemmMicrokernelTester()
10781 .mr(2)
10782 .nr(16)
10783 .kr(4)
10784 .sr(1)
10785 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010786 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010787 .k(k)
10788 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010789 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010790 }
10791 }
10792 }
10793
10794 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16_strided_a) {
10795 TEST_REQUIRES_ARM_NEON;
10796 for (uint32_t n = 17; n < 32; n++) {
10797 for (size_t k = 1; k <= 80; k += 17) {
10798 GemmMicrokernelTester()
10799 .mr(2)
10800 .nr(16)
10801 .kr(4)
10802 .sr(1)
10803 .m(2)
10804 .n(n)
10805 .k(k)
10806 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010807 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010808 }
10809 }
10810 }
10811
10812 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_gt_16_subtile) {
10813 TEST_REQUIRES_ARM_NEON;
10814 for (uint32_t n = 17; n < 32; n++) {
10815 for (size_t k = 1; k <= 80; k += 17) {
10816 for (uint32_t m = 1; m <= 2; m++) {
10817 GemmMicrokernelTester()
10818 .mr(2)
10819 .nr(16)
10820 .kr(4)
10821 .sr(1)
10822 .m(m)
10823 .n(n)
10824 .k(k)
10825 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010826 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010827 }
10828 }
10829 }
10830 }
10831
10832 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16) {
10833 TEST_REQUIRES_ARM_NEON;
10834 for (uint32_t n = 32; n <= 48; n += 16) {
10835 for (size_t k = 1; k <= 80; k += 17) {
10836 GemmMicrokernelTester()
10837 .mr(2)
10838 .nr(16)
10839 .kr(4)
10840 .sr(1)
10841 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080010842 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010843 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080010844 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010845 }
10846 }
10847 }
10848
10849 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16_strided_cn) {
10850 TEST_REQUIRES_ARM_NEON;
10851 for (uint32_t n = 32; n <= 48; n += 16) {
10852 for (size_t k = 1; k <= 80; k += 17) {
10853 GemmMicrokernelTester()
10854 .mr(2)
10855 .nr(16)
10856 .kr(4)
10857 .sr(1)
10858 .m(2)
10859 .n(n)
10860 .k(k)
10861 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010862 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010863 }
10864 }
10865 }
10866
10867 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16_strided_a) {
10868 TEST_REQUIRES_ARM_NEON;
10869 for (uint32_t n = 32; n <= 48; n += 16) {
10870 for (size_t k = 1; k <= 80; k += 17) {
10871 GemmMicrokernelTester()
10872 .mr(2)
10873 .nr(16)
10874 .kr(4)
10875 .sr(1)
10876 .m(2)
10877 .n(n)
10878 .k(k)
10879 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080010880 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010881 }
10882 }
10883 }
10884
10885 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, n_div_16_subtile) {
10886 TEST_REQUIRES_ARM_NEON;
10887 for (uint32_t n = 32; n <= 48; n += 16) {
10888 for (size_t k = 1; k <= 80; k += 17) {
10889 for (uint32_t m = 1; m <= 2; m++) {
10890 GemmMicrokernelTester()
10891 .mr(2)
10892 .nr(16)
10893 .kr(4)
10894 .sr(1)
10895 .m(m)
10896 .n(n)
10897 .k(k)
10898 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010899 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010900 }
10901 }
10902 }
10903 }
10904
10905 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, strided_cm_subtile) {
10906 TEST_REQUIRES_ARM_NEON;
10907 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080010908 for (uint32_t n = 1; n <= 16; n++) {
10909 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010910 GemmMicrokernelTester()
10911 .mr(2)
10912 .nr(16)
10913 .kr(4)
10914 .sr(1)
10915 .m(m)
10916 .n(n)
10917 .k(k)
10918 .cm_stride(19)
10919 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080010920 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010921 }
10922 }
10923 }
10924 }
10925
10926 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, qmin) {
10927 TEST_REQUIRES_ARM_NEON;
10928 GemmMicrokernelTester()
10929 .mr(2)
10930 .nr(16)
10931 .kr(4)
10932 .sr(1)
10933 .m(2)
10934 .n(16)
10935 .k(16)
10936 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010937 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010938 }
10939
10940 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, qmax) {
10941 TEST_REQUIRES_ARM_NEON;
10942 GemmMicrokernelTester()
10943 .mr(2)
10944 .nr(16)
10945 .kr(4)
10946 .sr(1)
10947 .m(2)
10948 .n(16)
10949 .k(16)
10950 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080010951 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010952 }
10953
10954 TEST(QS8_GEMM_MINMAX_RNDNU_2X16C4__NEON_MLAL_LD2R, strided_cm) {
10955 TEST_REQUIRES_ARM_NEON;
10956 GemmMicrokernelTester()
10957 .mr(2)
10958 .nr(16)
10959 .kr(4)
10960 .sr(1)
10961 .m(2)
10962 .n(16)
10963 .k(16)
10964 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080010965 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16c4__neon_mlal_ld2r, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010966 }
10967#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
10968
10969
10970#if XNN_ARCH_ARM || XNN_ARCH_ARM64
10971 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8) {
10972 TEST_REQUIRES_ARM_NEON;
10973 GemmMicrokernelTester()
10974 .mr(4)
10975 .nr(8)
10976 .kr(2)
10977 .sr(1)
10978 .m(4)
10979 .n(8)
10980 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080010981 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010982 }
10983
10984 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cn) {
10985 TEST_REQUIRES_ARM_NEON;
10986 GemmMicrokernelTester()
10987 .mr(4)
10988 .nr(8)
10989 .kr(2)
10990 .sr(1)
10991 .m(4)
10992 .n(8)
10993 .k(8)
10994 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080010995 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080010996 }
10997
10998 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_strided_a) {
10999 TEST_REQUIRES_ARM_NEON;
11000 GemmMicrokernelTester()
11001 .mr(4)
11002 .nr(8)
11003 .kr(2)
11004 .sr(1)
11005 .m(4)
11006 .n(8)
11007 .k(8)
11008 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011009 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011010 }
11011
11012 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile) {
11013 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011014 for (uint32_t n = 1; n <= 8; n++) {
11015 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011016 GemmMicrokernelTester()
11017 .mr(4)
11018 .nr(8)
11019 .kr(2)
11020 .sr(1)
11021 .m(m)
11022 .n(n)
11023 .k(8)
11024 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011025 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011026 }
11027 }
11028 }
11029
11030 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
11031 TEST_REQUIRES_ARM_NEON;
11032 for (uint32_t m = 1; m <= 4; m++) {
11033 GemmMicrokernelTester()
11034 .mr(4)
11035 .nr(8)
11036 .kr(2)
11037 .sr(1)
11038 .m(m)
11039 .n(8)
11040 .k(8)
11041 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011042 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011043 }
11044 }
11045
11046 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
11047 TEST_REQUIRES_ARM_NEON;
11048 for (uint32_t n = 1; n <= 8; n++) {
11049 GemmMicrokernelTester()
11050 .mr(4)
11051 .nr(8)
11052 .kr(2)
11053 .sr(1)
11054 .m(4)
11055 .n(n)
11056 .k(8)
11057 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011058 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011059 }
11060 }
11061
11062 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_lt_8) {
11063 TEST_REQUIRES_ARM_NEON;
11064 for (size_t k = 1; k < 8; k++) {
11065 GemmMicrokernelTester()
11066 .mr(4)
11067 .nr(8)
11068 .kr(2)
11069 .sr(1)
11070 .m(4)
11071 .n(8)
11072 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011073 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011074 }
11075 }
11076
11077 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_lt_8_strided_a) {
11078 TEST_REQUIRES_ARM_NEON;
11079 for (size_t k = 1; k < 8; k++) {
11080 GemmMicrokernelTester()
11081 .mr(4)
11082 .nr(8)
11083 .kr(2)
11084 .sr(1)
11085 .m(4)
11086 .n(8)
11087 .k(k)
11088 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011089 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011090 }
11091 }
11092
11093 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_lt_8_subtile) {
11094 TEST_REQUIRES_ARM_NEON;
11095 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011096 for (uint32_t n = 1; n <= 8; n++) {
11097 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011098 GemmMicrokernelTester()
11099 .mr(4)
11100 .nr(8)
11101 .kr(2)
11102 .sr(1)
11103 .m(m)
11104 .n(n)
11105 .k(k)
11106 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011107 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011108 }
11109 }
11110 }
11111 }
11112
11113 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_gt_8) {
11114 TEST_REQUIRES_ARM_NEON;
11115 for (size_t k = 9; k < 16; k++) {
11116 GemmMicrokernelTester()
11117 .mr(4)
11118 .nr(8)
11119 .kr(2)
11120 .sr(1)
11121 .m(4)
11122 .n(8)
11123 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011124 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011125 }
11126 }
11127
11128 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_gt_8_strided_a) {
11129 TEST_REQUIRES_ARM_NEON;
11130 for (size_t k = 9; k < 16; k++) {
11131 GemmMicrokernelTester()
11132 .mr(4)
11133 .nr(8)
11134 .kr(2)
11135 .sr(1)
11136 .m(4)
11137 .n(8)
11138 .k(k)
11139 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011140 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011141 }
11142 }
11143
11144 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_gt_8_subtile) {
11145 TEST_REQUIRES_ARM_NEON;
11146 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011147 for (uint32_t n = 1; n <= 8; n++) {
11148 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011149 GemmMicrokernelTester()
11150 .mr(4)
11151 .nr(8)
11152 .kr(2)
11153 .sr(1)
11154 .m(m)
11155 .n(n)
11156 .k(k)
11157 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011158 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011159 }
11160 }
11161 }
11162 }
11163
11164 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_div_8) {
11165 TEST_REQUIRES_ARM_NEON;
11166 for (size_t k = 16; k <= 80; k += 8) {
11167 GemmMicrokernelTester()
11168 .mr(4)
11169 .nr(8)
11170 .kr(2)
11171 .sr(1)
11172 .m(4)
11173 .n(8)
11174 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011175 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011176 }
11177 }
11178
11179 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_div_8_strided_a) {
11180 TEST_REQUIRES_ARM_NEON;
11181 for (size_t k = 16; k <= 80; k += 8) {
11182 GemmMicrokernelTester()
11183 .mr(4)
11184 .nr(8)
11185 .kr(2)
11186 .sr(1)
11187 .m(4)
11188 .n(8)
11189 .k(k)
11190 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011191 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011192 }
11193 }
11194
11195 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, k_div_8_subtile) {
11196 TEST_REQUIRES_ARM_NEON;
11197 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011198 for (uint32_t n = 1; n <= 8; n++) {
11199 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011200 GemmMicrokernelTester()
11201 .mr(4)
11202 .nr(8)
11203 .kr(2)
11204 .sr(1)
11205 .m(m)
11206 .n(n)
11207 .k(k)
11208 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011209 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011210 }
11211 }
11212 }
11213 }
11214
11215 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8) {
11216 TEST_REQUIRES_ARM_NEON;
11217 for (uint32_t n = 9; n < 16; n++) {
11218 for (size_t k = 1; k <= 40; k += 9) {
11219 GemmMicrokernelTester()
11220 .mr(4)
11221 .nr(8)
11222 .kr(2)
11223 .sr(1)
11224 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011225 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011226 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011227 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011228 }
11229 }
11230 }
11231
11232 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_strided_cn) {
11233 TEST_REQUIRES_ARM_NEON;
11234 for (uint32_t n = 9; n < 16; n++) {
11235 for (size_t k = 1; k <= 40; k += 9) {
11236 GemmMicrokernelTester()
11237 .mr(4)
11238 .nr(8)
11239 .kr(2)
11240 .sr(1)
11241 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011242 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011243 .k(k)
11244 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011245 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011246 }
11247 }
11248 }
11249
11250 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_strided_a) {
11251 TEST_REQUIRES_ARM_NEON;
11252 for (uint32_t n = 9; n < 16; n++) {
11253 for (size_t k = 1; k <= 40; k += 9) {
11254 GemmMicrokernelTester()
11255 .mr(4)
11256 .nr(8)
11257 .kr(2)
11258 .sr(1)
11259 .m(4)
11260 .n(n)
11261 .k(k)
11262 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011263 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011264 }
11265 }
11266 }
11267
11268 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_gt_8_subtile) {
11269 TEST_REQUIRES_ARM_NEON;
11270 for (uint32_t n = 9; n < 16; n++) {
11271 for (size_t k = 1; k <= 40; k += 9) {
11272 for (uint32_t m = 1; m <= 4; m++) {
11273 GemmMicrokernelTester()
11274 .mr(4)
11275 .nr(8)
11276 .kr(2)
11277 .sr(1)
11278 .m(m)
11279 .n(n)
11280 .k(k)
11281 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011282 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011283 }
11284 }
11285 }
11286 }
11287
11288 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8) {
11289 TEST_REQUIRES_ARM_NEON;
11290 for (uint32_t n = 16; n <= 24; n += 8) {
11291 for (size_t k = 1; k <= 40; k += 9) {
11292 GemmMicrokernelTester()
11293 .mr(4)
11294 .nr(8)
11295 .kr(2)
11296 .sr(1)
11297 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011298 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011299 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011300 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011301 }
11302 }
11303 }
11304
11305 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_strided_cn) {
11306 TEST_REQUIRES_ARM_NEON;
11307 for (uint32_t n = 16; n <= 24; n += 8) {
11308 for (size_t k = 1; k <= 40; k += 9) {
11309 GemmMicrokernelTester()
11310 .mr(4)
11311 .nr(8)
11312 .kr(2)
11313 .sr(1)
11314 .m(4)
11315 .n(n)
11316 .k(k)
11317 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011318 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011319 }
11320 }
11321 }
11322
11323 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_strided_a) {
11324 TEST_REQUIRES_ARM_NEON;
11325 for (uint32_t n = 16; n <= 24; n += 8) {
11326 for (size_t k = 1; k <= 40; k += 9) {
11327 GemmMicrokernelTester()
11328 .mr(4)
11329 .nr(8)
11330 .kr(2)
11331 .sr(1)
11332 .m(4)
11333 .n(n)
11334 .k(k)
11335 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011336 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011337 }
11338 }
11339 }
11340
11341 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, n_div_8_subtile) {
11342 TEST_REQUIRES_ARM_NEON;
11343 for (uint32_t n = 16; n <= 24; n += 8) {
11344 for (size_t k = 1; k <= 40; k += 9) {
11345 for (uint32_t m = 1; m <= 4; m++) {
11346 GemmMicrokernelTester()
11347 .mr(4)
11348 .nr(8)
11349 .kr(2)
11350 .sr(1)
11351 .m(m)
11352 .n(n)
11353 .k(k)
11354 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011355 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011356 }
11357 }
11358 }
11359 }
11360
11361 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cm_subtile) {
11362 TEST_REQUIRES_ARM_NEON;
11363 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011364 for (uint32_t n = 1; n <= 8; n++) {
11365 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011366 GemmMicrokernelTester()
11367 .mr(4)
11368 .nr(8)
11369 .kr(2)
11370 .sr(1)
11371 .m(m)
11372 .n(n)
11373 .k(k)
11374 .cm_stride(11)
11375 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011376 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011377 }
11378 }
11379 }
11380 }
11381
11382 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, qmin) {
11383 TEST_REQUIRES_ARM_NEON;
11384 GemmMicrokernelTester()
11385 .mr(4)
11386 .nr(8)
11387 .kr(2)
11388 .sr(1)
11389 .m(4)
11390 .n(8)
11391 .k(8)
11392 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011393 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011394 }
11395
11396 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, qmax) {
11397 TEST_REQUIRES_ARM_NEON;
11398 GemmMicrokernelTester()
11399 .mr(4)
11400 .nr(8)
11401 .kr(2)
11402 .sr(1)
11403 .m(4)
11404 .n(8)
11405 .k(8)
11406 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011407 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011408 }
11409
11410 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MULL_DUP, strided_cm) {
11411 TEST_REQUIRES_ARM_NEON;
11412 GemmMicrokernelTester()
11413 .mr(4)
11414 .nr(8)
11415 .kr(2)
11416 .sr(1)
11417 .m(4)
11418 .n(8)
11419 .k(8)
11420 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011421 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011422 }
11423#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11424
11425
11426#if XNN_ARCH_ARM || XNN_ARCH_ARM64
11427 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_eq_8) {
11428 TEST_REQUIRES_ARM_NEON;
11429 GemmMicrokernelTester()
11430 .mr(3)
11431 .nr(16)
11432 .kr(2)
11433 .sr(1)
11434 .m(3)
11435 .n(16)
11436 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011437 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011438 }
11439
11440 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, strided_cn) {
11441 TEST_REQUIRES_ARM_NEON;
11442 GemmMicrokernelTester()
11443 .mr(3)
11444 .nr(16)
11445 .kr(2)
11446 .sr(1)
11447 .m(3)
11448 .n(16)
11449 .k(8)
11450 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011451 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011452 }
11453
11454 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_eq_8_strided_a) {
11455 TEST_REQUIRES_ARM_NEON;
11456 GemmMicrokernelTester()
11457 .mr(3)
11458 .nr(16)
11459 .kr(2)
11460 .sr(1)
11461 .m(3)
11462 .n(16)
11463 .k(8)
11464 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011465 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011466 }
11467
11468 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_eq_8_subtile) {
11469 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011470 for (uint32_t n = 1; n <= 16; n++) {
11471 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011472 GemmMicrokernelTester()
11473 .mr(3)
11474 .nr(16)
11475 .kr(2)
11476 .sr(1)
11477 .m(m)
11478 .n(n)
11479 .k(8)
11480 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011481 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011482 }
11483 }
11484 }
11485
11486 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
11487 TEST_REQUIRES_ARM_NEON;
11488 for (uint32_t m = 1; m <= 3; m++) {
11489 GemmMicrokernelTester()
11490 .mr(3)
11491 .nr(16)
11492 .kr(2)
11493 .sr(1)
11494 .m(m)
11495 .n(16)
11496 .k(8)
11497 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011498 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011499 }
11500 }
11501
11502 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
11503 TEST_REQUIRES_ARM_NEON;
11504 for (uint32_t n = 1; n <= 16; n++) {
11505 GemmMicrokernelTester()
11506 .mr(3)
11507 .nr(16)
11508 .kr(2)
11509 .sr(1)
11510 .m(3)
11511 .n(n)
11512 .k(8)
11513 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011514 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011515 }
11516 }
11517
11518 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_lt_8) {
11519 TEST_REQUIRES_ARM_NEON;
11520 for (size_t k = 1; k < 8; k++) {
11521 GemmMicrokernelTester()
11522 .mr(3)
11523 .nr(16)
11524 .kr(2)
11525 .sr(1)
11526 .m(3)
11527 .n(16)
11528 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011529 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011530 }
11531 }
11532
11533 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_lt_8_strided_a) {
11534 TEST_REQUIRES_ARM_NEON;
11535 for (size_t k = 1; k < 8; k++) {
11536 GemmMicrokernelTester()
11537 .mr(3)
11538 .nr(16)
11539 .kr(2)
11540 .sr(1)
11541 .m(3)
11542 .n(16)
11543 .k(k)
11544 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011545 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011546 }
11547 }
11548
11549 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_lt_8_subtile) {
11550 TEST_REQUIRES_ARM_NEON;
11551 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011552 for (uint32_t n = 1; n <= 16; n++) {
11553 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011554 GemmMicrokernelTester()
11555 .mr(3)
11556 .nr(16)
11557 .kr(2)
11558 .sr(1)
11559 .m(m)
11560 .n(n)
11561 .k(k)
11562 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011563 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011564 }
11565 }
11566 }
11567 }
11568
11569 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_gt_8) {
11570 TEST_REQUIRES_ARM_NEON;
11571 for (size_t k = 9; k < 16; k++) {
11572 GemmMicrokernelTester()
11573 .mr(3)
11574 .nr(16)
11575 .kr(2)
11576 .sr(1)
11577 .m(3)
11578 .n(16)
11579 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011580 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011581 }
11582 }
11583
11584 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_gt_8_strided_a) {
11585 TEST_REQUIRES_ARM_NEON;
11586 for (size_t k = 9; k < 16; k++) {
11587 GemmMicrokernelTester()
11588 .mr(3)
11589 .nr(16)
11590 .kr(2)
11591 .sr(1)
11592 .m(3)
11593 .n(16)
11594 .k(k)
11595 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011596 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011597 }
11598 }
11599
11600 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_gt_8_subtile) {
11601 TEST_REQUIRES_ARM_NEON;
11602 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011603 for (uint32_t n = 1; n <= 16; n++) {
11604 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011605 GemmMicrokernelTester()
11606 .mr(3)
11607 .nr(16)
11608 .kr(2)
11609 .sr(1)
11610 .m(m)
11611 .n(n)
11612 .k(k)
11613 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011614 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011615 }
11616 }
11617 }
11618 }
11619
11620 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_div_8) {
11621 TEST_REQUIRES_ARM_NEON;
11622 for (size_t k = 16; k <= 80; k += 8) {
11623 GemmMicrokernelTester()
11624 .mr(3)
11625 .nr(16)
11626 .kr(2)
11627 .sr(1)
11628 .m(3)
11629 .n(16)
11630 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011631 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011632 }
11633 }
11634
11635 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_div_8_strided_a) {
11636 TEST_REQUIRES_ARM_NEON;
11637 for (size_t k = 16; k <= 80; k += 8) {
11638 GemmMicrokernelTester()
11639 .mr(3)
11640 .nr(16)
11641 .kr(2)
11642 .sr(1)
11643 .m(3)
11644 .n(16)
11645 .k(k)
11646 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080011647 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011648 }
11649 }
11650
11651 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, k_div_8_subtile) {
11652 TEST_REQUIRES_ARM_NEON;
11653 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011654 for (uint32_t n = 1; n <= 16; n++) {
11655 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011656 GemmMicrokernelTester()
11657 .mr(3)
11658 .nr(16)
11659 .kr(2)
11660 .sr(1)
11661 .m(m)
11662 .n(n)
11663 .k(k)
11664 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011665 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011666 }
11667 }
11668 }
11669 }
11670
11671 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, n_gt_16) {
11672 TEST_REQUIRES_ARM_NEON;
11673 for (uint32_t n = 17; n < 32; n++) {
11674 for (size_t k = 1; k <= 40; k += 9) {
11675 GemmMicrokernelTester()
11676 .mr(3)
11677 .nr(16)
11678 .kr(2)
11679 .sr(1)
11680 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011681 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011682 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011683 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011684 }
11685 }
11686 }
11687
11688 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, n_gt_16_strided_cn) {
11689 TEST_REQUIRES_ARM_NEON;
11690 for (uint32_t n = 17; n < 32; n++) {
11691 for (size_t k = 1; k <= 40; k += 9) {
11692 GemmMicrokernelTester()
11693 .mr(3)
11694 .nr(16)
11695 .kr(2)
11696 .sr(1)
11697 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011698 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011699 .k(k)
11700 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011701 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011702 }
11703 }
11704 }
11705
11706 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, n_gt_16_strided_a) {
11707 TEST_REQUIRES_ARM_NEON;
11708 for (uint32_t n = 17; n < 32; n++) {
11709 for (size_t k = 1; k <= 40; k += 9) {
11710 GemmMicrokernelTester()
11711 .mr(3)
11712 .nr(16)
11713 .kr(2)
11714 .sr(1)
11715 .m(3)
11716 .n(n)
11717 .k(k)
11718 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011719 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011720 }
11721 }
11722 }
11723
11724 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, n_gt_16_subtile) {
11725 TEST_REQUIRES_ARM_NEON;
11726 for (uint32_t n = 17; n < 32; n++) {
11727 for (size_t k = 1; k <= 40; k += 9) {
11728 for (uint32_t m = 1; m <= 3; m++) {
11729 GemmMicrokernelTester()
11730 .mr(3)
11731 .nr(16)
11732 .kr(2)
11733 .sr(1)
11734 .m(m)
11735 .n(n)
11736 .k(k)
11737 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011738 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011739 }
11740 }
11741 }
11742 }
11743
11744 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, n_div_16) {
11745 TEST_REQUIRES_ARM_NEON;
11746 for (uint32_t n = 32; n <= 48; n += 16) {
11747 for (size_t k = 1; k <= 40; k += 9) {
11748 GemmMicrokernelTester()
11749 .mr(3)
11750 .nr(16)
11751 .kr(2)
11752 .sr(1)
11753 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080011754 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011755 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011756 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011757 }
11758 }
11759 }
11760
11761 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, n_div_16_strided_cn) {
11762 TEST_REQUIRES_ARM_NEON;
11763 for (uint32_t n = 32; n <= 48; n += 16) {
11764 for (size_t k = 1; k <= 40; k += 9) {
11765 GemmMicrokernelTester()
11766 .mr(3)
11767 .nr(16)
11768 .kr(2)
11769 .sr(1)
11770 .m(3)
11771 .n(n)
11772 .k(k)
11773 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011774 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011775 }
11776 }
11777 }
11778
11779 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, n_div_16_strided_a) {
11780 TEST_REQUIRES_ARM_NEON;
11781 for (uint32_t n = 32; n <= 48; n += 16) {
11782 for (size_t k = 1; k <= 40; k += 9) {
11783 GemmMicrokernelTester()
11784 .mr(3)
11785 .nr(16)
11786 .kr(2)
11787 .sr(1)
11788 .m(3)
11789 .n(n)
11790 .k(k)
11791 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080011792 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011793 }
11794 }
11795 }
11796
11797 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, n_div_16_subtile) {
11798 TEST_REQUIRES_ARM_NEON;
11799 for (uint32_t n = 32; n <= 48; n += 16) {
11800 for (size_t k = 1; k <= 40; k += 9) {
11801 for (uint32_t m = 1; m <= 3; m++) {
11802 GemmMicrokernelTester()
11803 .mr(3)
11804 .nr(16)
11805 .kr(2)
11806 .sr(1)
11807 .m(m)
11808 .n(n)
11809 .k(k)
11810 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011811 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011812 }
11813 }
11814 }
11815 }
11816
11817 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, strided_cm_subtile) {
11818 TEST_REQUIRES_ARM_NEON;
11819 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080011820 for (uint32_t n = 1; n <= 16; n++) {
11821 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011822 GemmMicrokernelTester()
11823 .mr(3)
11824 .nr(16)
11825 .kr(2)
11826 .sr(1)
11827 .m(m)
11828 .n(n)
11829 .k(k)
11830 .cm_stride(19)
11831 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011832 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011833 }
11834 }
11835 }
11836 }
11837
11838 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, qmin) {
11839 TEST_REQUIRES_ARM_NEON;
11840 GemmMicrokernelTester()
11841 .mr(3)
11842 .nr(16)
11843 .kr(2)
11844 .sr(1)
11845 .m(3)
11846 .n(16)
11847 .k(8)
11848 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011849 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011850 }
11851
11852 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, qmax) {
11853 TEST_REQUIRES_ARM_NEON;
11854 GemmMicrokernelTester()
11855 .mr(3)
11856 .nr(16)
11857 .kr(2)
11858 .sr(1)
11859 .m(3)
11860 .n(16)
11861 .k(8)
11862 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080011863 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011864 }
11865
11866 TEST(QS8_GEMM_MINMAX_RNDNU_3X16C2__NEON_MULL_DUP, strided_cm) {
11867 TEST_REQUIRES_ARM_NEON;
11868 GemmMicrokernelTester()
11869 .mr(3)
11870 .nr(16)
11871 .kr(2)
11872 .sr(1)
11873 .m(3)
11874 .n(16)
11875 .k(8)
11876 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011877 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011878 }
11879#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
11880
11881
11882#if XNN_ARCH_ARM || XNN_ARCH_ARM64
11883 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8) {
11884 TEST_REQUIRES_ARM_NEON;
11885 GemmMicrokernelTester()
11886 .mr(4)
11887 .nr(16)
11888 .kr(2)
11889 .sr(1)
11890 .m(4)
11891 .n(16)
11892 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080011893 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011894 }
11895
11896 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, strided_cn) {
11897 TEST_REQUIRES_ARM_NEON;
11898 GemmMicrokernelTester()
11899 .mr(4)
11900 .nr(16)
11901 .kr(2)
11902 .sr(1)
11903 .m(4)
11904 .n(16)
11905 .k(8)
11906 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080011907 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011908 }
11909
11910 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8_strided_a) {
11911 TEST_REQUIRES_ARM_NEON;
11912 GemmMicrokernelTester()
11913 .mr(4)
11914 .nr(16)
11915 .kr(2)
11916 .sr(1)
11917 .m(4)
11918 .n(16)
11919 .k(8)
11920 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080011921 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011922 }
11923
11924 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8_subtile) {
11925 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080011926 for (uint32_t n = 1; n <= 16; n++) {
11927 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011928 GemmMicrokernelTester()
11929 .mr(4)
11930 .nr(16)
11931 .kr(2)
11932 .sr(1)
11933 .m(m)
11934 .n(n)
11935 .k(8)
11936 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011937 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011938 }
11939 }
11940 }
11941
11942 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8_subtile_m) {
11943 TEST_REQUIRES_ARM_NEON;
11944 for (uint32_t m = 1; m <= 4; m++) {
11945 GemmMicrokernelTester()
11946 .mr(4)
11947 .nr(16)
11948 .kr(2)
11949 .sr(1)
11950 .m(m)
11951 .n(16)
11952 .k(8)
11953 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011954 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011955 }
11956 }
11957
11958 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_eq_8_subtile_n) {
11959 TEST_REQUIRES_ARM_NEON;
11960 for (uint32_t n = 1; n <= 16; n++) {
11961 GemmMicrokernelTester()
11962 .mr(4)
11963 .nr(16)
11964 .kr(2)
11965 .sr(1)
11966 .m(4)
11967 .n(n)
11968 .k(8)
11969 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080011970 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011971 }
11972 }
11973
11974 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_lt_8) {
11975 TEST_REQUIRES_ARM_NEON;
11976 for (size_t k = 1; k < 8; k++) {
11977 GemmMicrokernelTester()
11978 .mr(4)
11979 .nr(16)
11980 .kr(2)
11981 .sr(1)
11982 .m(4)
11983 .n(16)
11984 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080011985 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080011986 }
11987 }
11988
11989 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_lt_8_strided_a) {
11990 TEST_REQUIRES_ARM_NEON;
11991 for (size_t k = 1; k < 8; k++) {
11992 GemmMicrokernelTester()
11993 .mr(4)
11994 .nr(16)
11995 .kr(2)
11996 .sr(1)
11997 .m(4)
11998 .n(16)
11999 .k(k)
12000 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012001 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012002 }
12003 }
12004
12005 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_lt_8_subtile) {
12006 TEST_REQUIRES_ARM_NEON;
12007 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012008 for (uint32_t n = 1; n <= 16; n++) {
12009 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012010 GemmMicrokernelTester()
12011 .mr(4)
12012 .nr(16)
12013 .kr(2)
12014 .sr(1)
12015 .m(m)
12016 .n(n)
12017 .k(k)
12018 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012019 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012020 }
12021 }
12022 }
12023 }
12024
12025 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_gt_8) {
12026 TEST_REQUIRES_ARM_NEON;
12027 for (size_t k = 9; k < 16; k++) {
12028 GemmMicrokernelTester()
12029 .mr(4)
12030 .nr(16)
12031 .kr(2)
12032 .sr(1)
12033 .m(4)
12034 .n(16)
12035 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012036 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012037 }
12038 }
12039
12040 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_gt_8_strided_a) {
12041 TEST_REQUIRES_ARM_NEON;
12042 for (size_t k = 9; k < 16; k++) {
12043 GemmMicrokernelTester()
12044 .mr(4)
12045 .nr(16)
12046 .kr(2)
12047 .sr(1)
12048 .m(4)
12049 .n(16)
12050 .k(k)
12051 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012052 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012053 }
12054 }
12055
12056 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_gt_8_subtile) {
12057 TEST_REQUIRES_ARM_NEON;
12058 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012059 for (uint32_t n = 1; n <= 16; n++) {
12060 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012061 GemmMicrokernelTester()
12062 .mr(4)
12063 .nr(16)
12064 .kr(2)
12065 .sr(1)
12066 .m(m)
12067 .n(n)
12068 .k(k)
12069 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012070 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012071 }
12072 }
12073 }
12074 }
12075
12076 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_div_8) {
12077 TEST_REQUIRES_ARM_NEON;
12078 for (size_t k = 16; k <= 80; k += 8) {
12079 GemmMicrokernelTester()
12080 .mr(4)
12081 .nr(16)
12082 .kr(2)
12083 .sr(1)
12084 .m(4)
12085 .n(16)
12086 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012087 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012088 }
12089 }
12090
12091 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_div_8_strided_a) {
12092 TEST_REQUIRES_ARM_NEON;
12093 for (size_t k = 16; k <= 80; k += 8) {
12094 GemmMicrokernelTester()
12095 .mr(4)
12096 .nr(16)
12097 .kr(2)
12098 .sr(1)
12099 .m(4)
12100 .n(16)
12101 .k(k)
12102 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012103 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012104 }
12105 }
12106
12107 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, k_div_8_subtile) {
12108 TEST_REQUIRES_ARM_NEON;
12109 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012110 for (uint32_t n = 1; n <= 16; n++) {
12111 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012112 GemmMicrokernelTester()
12113 .mr(4)
12114 .nr(16)
12115 .kr(2)
12116 .sr(1)
12117 .m(m)
12118 .n(n)
12119 .k(k)
12120 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012121 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012122 }
12123 }
12124 }
12125 }
12126
12127 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16) {
12128 TEST_REQUIRES_ARM_NEON;
12129 for (uint32_t n = 17; n < 32; n++) {
12130 for (size_t k = 1; k <= 40; k += 9) {
12131 GemmMicrokernelTester()
12132 .mr(4)
12133 .nr(16)
12134 .kr(2)
12135 .sr(1)
12136 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012137 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012138 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012139 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012140 }
12141 }
12142 }
12143
12144 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16_strided_cn) {
12145 TEST_REQUIRES_ARM_NEON;
12146 for (uint32_t n = 17; n < 32; n++) {
12147 for (size_t k = 1; k <= 40; k += 9) {
12148 GemmMicrokernelTester()
12149 .mr(4)
12150 .nr(16)
12151 .kr(2)
12152 .sr(1)
12153 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012154 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012155 .k(k)
12156 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012157 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012158 }
12159 }
12160 }
12161
12162 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16_strided_a) {
12163 TEST_REQUIRES_ARM_NEON;
12164 for (uint32_t n = 17; n < 32; n++) {
12165 for (size_t k = 1; k <= 40; k += 9) {
12166 GemmMicrokernelTester()
12167 .mr(4)
12168 .nr(16)
12169 .kr(2)
12170 .sr(1)
12171 .m(4)
12172 .n(n)
12173 .k(k)
12174 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012175 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012176 }
12177 }
12178 }
12179
12180 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_gt_16_subtile) {
12181 TEST_REQUIRES_ARM_NEON;
12182 for (uint32_t n = 17; n < 32; n++) {
12183 for (size_t k = 1; k <= 40; k += 9) {
12184 for (uint32_t m = 1; m <= 4; m++) {
12185 GemmMicrokernelTester()
12186 .mr(4)
12187 .nr(16)
12188 .kr(2)
12189 .sr(1)
12190 .m(m)
12191 .n(n)
12192 .k(k)
12193 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012194 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012195 }
12196 }
12197 }
12198 }
12199
12200 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16) {
12201 TEST_REQUIRES_ARM_NEON;
12202 for (uint32_t n = 32; n <= 48; n += 16) {
12203 for (size_t k = 1; k <= 40; k += 9) {
12204 GemmMicrokernelTester()
12205 .mr(4)
12206 .nr(16)
12207 .kr(2)
12208 .sr(1)
12209 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012210 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012211 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012212 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012213 }
12214 }
12215 }
12216
12217 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16_strided_cn) {
12218 TEST_REQUIRES_ARM_NEON;
12219 for (uint32_t n = 32; n <= 48; n += 16) {
12220 for (size_t k = 1; k <= 40; k += 9) {
12221 GemmMicrokernelTester()
12222 .mr(4)
12223 .nr(16)
12224 .kr(2)
12225 .sr(1)
12226 .m(4)
12227 .n(n)
12228 .k(k)
12229 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012230 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012231 }
12232 }
12233 }
12234
12235 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16_strided_a) {
12236 TEST_REQUIRES_ARM_NEON;
12237 for (uint32_t n = 32; n <= 48; n += 16) {
12238 for (size_t k = 1; k <= 40; k += 9) {
12239 GemmMicrokernelTester()
12240 .mr(4)
12241 .nr(16)
12242 .kr(2)
12243 .sr(1)
12244 .m(4)
12245 .n(n)
12246 .k(k)
12247 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080012248 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012249 }
12250 }
12251 }
12252
12253 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, n_div_16_subtile) {
12254 TEST_REQUIRES_ARM_NEON;
12255 for (uint32_t n = 32; n <= 48; n += 16) {
12256 for (size_t k = 1; k <= 40; k += 9) {
12257 for (uint32_t m = 1; m <= 4; m++) {
12258 GemmMicrokernelTester()
12259 .mr(4)
12260 .nr(16)
12261 .kr(2)
12262 .sr(1)
12263 .m(m)
12264 .n(n)
12265 .k(k)
12266 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012267 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012268 }
12269 }
12270 }
12271 }
12272
12273 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, strided_cm_subtile) {
12274 TEST_REQUIRES_ARM_NEON;
12275 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012276 for (uint32_t n = 1; n <= 16; n++) {
12277 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012278 GemmMicrokernelTester()
12279 .mr(4)
12280 .nr(16)
12281 .kr(2)
12282 .sr(1)
12283 .m(m)
12284 .n(n)
12285 .k(k)
12286 .cm_stride(19)
12287 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012288 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012289 }
12290 }
12291 }
12292 }
12293
12294 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, qmin) {
12295 TEST_REQUIRES_ARM_NEON;
12296 GemmMicrokernelTester()
12297 .mr(4)
12298 .nr(16)
12299 .kr(2)
12300 .sr(1)
12301 .m(4)
12302 .n(16)
12303 .k(8)
12304 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012305 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012306 }
12307
12308 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, qmax) {
12309 TEST_REQUIRES_ARM_NEON;
12310 GemmMicrokernelTester()
12311 .mr(4)
12312 .nr(16)
12313 .kr(2)
12314 .sr(1)
12315 .m(4)
12316 .n(16)
12317 .k(8)
12318 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012319 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012320 }
12321
12322 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C2__NEON_MULL_DUP, strided_cm) {
12323 TEST_REQUIRES_ARM_NEON;
12324 GemmMicrokernelTester()
12325 .mr(4)
12326 .nr(16)
12327 .kr(2)
12328 .sr(1)
12329 .m(4)
12330 .n(16)
12331 .k(8)
12332 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012333 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c2__neon_mull_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012334 }
12335#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12336
12337
12338#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12339 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16) {
12340 TEST_REQUIRES_ARM_NEON;
12341 GemmMicrokernelTester()
12342 .mr(2)
12343 .nr(8)
12344 .kr(2)
12345 .sr(1)
12346 .m(2)
12347 .n(8)
12348 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080012349 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012350 }
12351
12352 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, strided_cn) {
12353 TEST_REQUIRES_ARM_NEON;
12354 GemmMicrokernelTester()
12355 .mr(2)
12356 .nr(8)
12357 .kr(2)
12358 .sr(1)
12359 .m(2)
12360 .n(8)
12361 .k(16)
12362 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012363 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012364 }
12365
12366 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16_strided_a) {
12367 TEST_REQUIRES_ARM_NEON;
12368 GemmMicrokernelTester()
12369 .mr(2)
12370 .nr(8)
12371 .kr(2)
12372 .sr(1)
12373 .m(2)
12374 .n(8)
12375 .k(16)
12376 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012377 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012378 }
12379
12380 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
12381 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012382 for (uint32_t n = 1; n <= 8; n++) {
12383 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012384 GemmMicrokernelTester()
12385 .mr(2)
12386 .nr(8)
12387 .kr(2)
12388 .sr(1)
12389 .m(m)
12390 .n(n)
12391 .k(16)
12392 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012393 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012394 }
12395 }
12396 }
12397
12398 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
12399 TEST_REQUIRES_ARM_NEON;
12400 for (uint32_t m = 1; m <= 2; m++) {
12401 GemmMicrokernelTester()
12402 .mr(2)
12403 .nr(8)
12404 .kr(2)
12405 .sr(1)
12406 .m(m)
12407 .n(8)
12408 .k(16)
12409 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012410 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012411 }
12412 }
12413
12414 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
12415 TEST_REQUIRES_ARM_NEON;
12416 for (uint32_t n = 1; n <= 8; n++) {
12417 GemmMicrokernelTester()
12418 .mr(2)
12419 .nr(8)
12420 .kr(2)
12421 .sr(1)
12422 .m(2)
12423 .n(n)
12424 .k(16)
12425 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012426 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012427 }
12428 }
12429
12430 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_lt_16) {
12431 TEST_REQUIRES_ARM_NEON;
12432 for (size_t k = 1; k < 16; k++) {
12433 GemmMicrokernelTester()
12434 .mr(2)
12435 .nr(8)
12436 .kr(2)
12437 .sr(1)
12438 .m(2)
12439 .n(8)
12440 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012441 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012442 }
12443 }
12444
12445 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_lt_16_strided_a) {
12446 TEST_REQUIRES_ARM_NEON;
12447 for (size_t k = 1; k < 16; k++) {
12448 GemmMicrokernelTester()
12449 .mr(2)
12450 .nr(8)
12451 .kr(2)
12452 .sr(1)
12453 .m(2)
12454 .n(8)
12455 .k(k)
12456 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012457 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012458 }
12459 }
12460
12461 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
12462 TEST_REQUIRES_ARM_NEON;
12463 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012464 for (uint32_t n = 1; n <= 8; n++) {
12465 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012466 GemmMicrokernelTester()
12467 .mr(2)
12468 .nr(8)
12469 .kr(2)
12470 .sr(1)
12471 .m(m)
12472 .n(n)
12473 .k(k)
12474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012475 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012476 }
12477 }
12478 }
12479 }
12480
12481 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_gt_16) {
12482 TEST_REQUIRES_ARM_NEON;
12483 for (size_t k = 17; k < 32; k++) {
12484 GemmMicrokernelTester()
12485 .mr(2)
12486 .nr(8)
12487 .kr(2)
12488 .sr(1)
12489 .m(2)
12490 .n(8)
12491 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012492 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012493 }
12494 }
12495
12496 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_gt_16_strided_a) {
12497 TEST_REQUIRES_ARM_NEON;
12498 for (size_t k = 17; k < 32; k++) {
12499 GemmMicrokernelTester()
12500 .mr(2)
12501 .nr(8)
12502 .kr(2)
12503 .sr(1)
12504 .m(2)
12505 .n(8)
12506 .k(k)
12507 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080012508 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012509 }
12510 }
12511
12512 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
12513 TEST_REQUIRES_ARM_NEON;
12514 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012515 for (uint32_t n = 1; n <= 8; n++) {
12516 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012517 GemmMicrokernelTester()
12518 .mr(2)
12519 .nr(8)
12520 .kr(2)
12521 .sr(1)
12522 .m(m)
12523 .n(n)
12524 .k(k)
12525 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012526 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012527 }
12528 }
12529 }
12530 }
12531
12532 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_div_16) {
12533 TEST_REQUIRES_ARM_NEON;
12534 for (size_t k = 32; k <= 160; k += 16) {
12535 GemmMicrokernelTester()
12536 .mr(2)
12537 .nr(8)
12538 .kr(2)
12539 .sr(1)
12540 .m(2)
12541 .n(8)
12542 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012543 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012544 }
12545 }
12546
12547 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_div_16_strided_a) {
12548 TEST_REQUIRES_ARM_NEON;
12549 for (size_t k = 32; k <= 160; k += 16) {
12550 GemmMicrokernelTester()
12551 .mr(2)
12552 .nr(8)
12553 .kr(2)
12554 .sr(1)
12555 .m(2)
12556 .n(8)
12557 .k(k)
12558 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080012559 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012560 }
12561 }
12562
12563 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
12564 TEST_REQUIRES_ARM_NEON;
12565 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012566 for (uint32_t n = 1; n <= 8; n++) {
12567 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012568 GemmMicrokernelTester()
12569 .mr(2)
12570 .nr(8)
12571 .kr(2)
12572 .sr(1)
12573 .m(m)
12574 .n(n)
12575 .k(k)
12576 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012577 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012578 }
12579 }
12580 }
12581 }
12582
12583 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8) {
12584 TEST_REQUIRES_ARM_NEON;
12585 for (uint32_t n = 9; n < 16; n++) {
12586 for (size_t k = 1; k <= 80; k += 17) {
12587 GemmMicrokernelTester()
12588 .mr(2)
12589 .nr(8)
12590 .kr(2)
12591 .sr(1)
12592 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012593 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012594 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012595 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012596 }
12597 }
12598 }
12599
12600 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
12601 TEST_REQUIRES_ARM_NEON;
12602 for (uint32_t n = 9; n < 16; n++) {
12603 for (size_t k = 1; k <= 80; k += 17) {
12604 GemmMicrokernelTester()
12605 .mr(2)
12606 .nr(8)
12607 .kr(2)
12608 .sr(1)
12609 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012610 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012611 .k(k)
12612 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012613 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012614 }
12615 }
12616 }
12617
12618 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8_strided_a) {
12619 TEST_REQUIRES_ARM_NEON;
12620 for (uint32_t n = 9; n < 16; n++) {
12621 for (size_t k = 1; k <= 80; k += 17) {
12622 GemmMicrokernelTester()
12623 .mr(2)
12624 .nr(8)
12625 .kr(2)
12626 .sr(1)
12627 .m(2)
12628 .n(n)
12629 .k(k)
12630 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012631 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012632 }
12633 }
12634 }
12635
12636 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
12637 TEST_REQUIRES_ARM_NEON;
12638 for (uint32_t n = 9; n < 16; n++) {
12639 for (size_t k = 1; k <= 80; k += 17) {
12640 for (uint32_t m = 1; m <= 2; m++) {
12641 GemmMicrokernelTester()
12642 .mr(2)
12643 .nr(8)
12644 .kr(2)
12645 .sr(1)
12646 .m(m)
12647 .n(n)
12648 .k(k)
12649 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012650 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012651 }
12652 }
12653 }
12654 }
12655
12656 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8) {
12657 TEST_REQUIRES_ARM_NEON;
12658 for (uint32_t n = 16; n <= 24; n += 8) {
12659 for (size_t k = 1; k <= 80; k += 17) {
12660 GemmMicrokernelTester()
12661 .mr(2)
12662 .nr(8)
12663 .kr(2)
12664 .sr(1)
12665 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080012666 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012667 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012668 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012669 }
12670 }
12671 }
12672
12673 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
12674 TEST_REQUIRES_ARM_NEON;
12675 for (uint32_t n = 16; n <= 24; n += 8) {
12676 for (size_t k = 1; k <= 80; k += 17) {
12677 GemmMicrokernelTester()
12678 .mr(2)
12679 .nr(8)
12680 .kr(2)
12681 .sr(1)
12682 .m(2)
12683 .n(n)
12684 .k(k)
12685 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012686 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012687 }
12688 }
12689 }
12690
12691 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8_strided_a) {
12692 TEST_REQUIRES_ARM_NEON;
12693 for (uint32_t n = 16; n <= 24; n += 8) {
12694 for (size_t k = 1; k <= 80; k += 17) {
12695 GemmMicrokernelTester()
12696 .mr(2)
12697 .nr(8)
12698 .kr(2)
12699 .sr(1)
12700 .m(2)
12701 .n(n)
12702 .k(k)
12703 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080012704 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012705 }
12706 }
12707 }
12708
12709 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
12710 TEST_REQUIRES_ARM_NEON;
12711 for (uint32_t n = 16; n <= 24; n += 8) {
12712 for (size_t k = 1; k <= 80; k += 17) {
12713 for (uint32_t m = 1; m <= 2; m++) {
12714 GemmMicrokernelTester()
12715 .mr(2)
12716 .nr(8)
12717 .kr(2)
12718 .sr(1)
12719 .m(m)
12720 .n(n)
12721 .k(k)
12722 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012723 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012724 }
12725 }
12726 }
12727 }
12728
12729 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
12730 TEST_REQUIRES_ARM_NEON;
12731 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012732 for (uint32_t n = 1; n <= 8; n++) {
12733 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012734 GemmMicrokernelTester()
12735 .mr(2)
12736 .nr(8)
12737 .kr(2)
12738 .sr(1)
12739 .m(m)
12740 .n(n)
12741 .k(k)
12742 .cm_stride(11)
12743 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012744 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012745 }
12746 }
12747 }
12748 }
12749
12750 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, qmin) {
12751 TEST_REQUIRES_ARM_NEON;
12752 GemmMicrokernelTester()
12753 .mr(2)
12754 .nr(8)
12755 .kr(2)
12756 .sr(1)
12757 .m(2)
12758 .n(8)
12759 .k(16)
12760 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012761 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012762 }
12763
12764 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, qmax) {
12765 TEST_REQUIRES_ARM_NEON;
12766 GemmMicrokernelTester()
12767 .mr(2)
12768 .nr(8)
12769 .kr(2)
12770 .sr(1)
12771 .m(2)
12772 .n(8)
12773 .k(16)
12774 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080012775 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012776 }
12777
12778 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C2__NEON_MLAL_DUP, strided_cm) {
12779 TEST_REQUIRES_ARM_NEON;
12780 GemmMicrokernelTester()
12781 .mr(2)
12782 .nr(8)
12783 .kr(2)
12784 .sr(1)
12785 .m(2)
12786 .n(8)
12787 .k(16)
12788 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012789 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012790 }
12791#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
12792
12793
12794#if XNN_ARCH_ARM || XNN_ARCH_ARM64
12795 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16) {
12796 TEST_REQUIRES_ARM_NEON;
12797 GemmMicrokernelTester()
12798 .mr(3)
12799 .nr(8)
12800 .kr(2)
12801 .sr(1)
12802 .m(3)
12803 .n(8)
12804 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080012805 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012806 }
12807
12808 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cn) {
12809 TEST_REQUIRES_ARM_NEON;
12810 GemmMicrokernelTester()
12811 .mr(3)
12812 .nr(8)
12813 .kr(2)
12814 .sr(1)
12815 .m(3)
12816 .n(8)
12817 .k(16)
12818 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080012819 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012820 }
12821
12822 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_strided_a) {
12823 TEST_REQUIRES_ARM_NEON;
12824 GemmMicrokernelTester()
12825 .mr(3)
12826 .nr(8)
12827 .kr(2)
12828 .sr(1)
12829 .m(3)
12830 .n(8)
12831 .k(16)
12832 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012833 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012834 }
12835
12836 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
12837 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080012838 for (uint32_t n = 1; n <= 8; n++) {
12839 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012840 GemmMicrokernelTester()
12841 .mr(3)
12842 .nr(8)
12843 .kr(2)
12844 .sr(1)
12845 .m(m)
12846 .n(n)
12847 .k(16)
12848 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012849 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012850 }
12851 }
12852 }
12853
12854 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
12855 TEST_REQUIRES_ARM_NEON;
12856 for (uint32_t m = 1; m <= 3; m++) {
12857 GemmMicrokernelTester()
12858 .mr(3)
12859 .nr(8)
12860 .kr(2)
12861 .sr(1)
12862 .m(m)
12863 .n(8)
12864 .k(16)
12865 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012866 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012867 }
12868 }
12869
12870 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
12871 TEST_REQUIRES_ARM_NEON;
12872 for (uint32_t n = 1; n <= 8; n++) {
12873 GemmMicrokernelTester()
12874 .mr(3)
12875 .nr(8)
12876 .kr(2)
12877 .sr(1)
12878 .m(3)
12879 .n(n)
12880 .k(16)
12881 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012882 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012883 }
12884 }
12885
12886 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_lt_16) {
12887 TEST_REQUIRES_ARM_NEON;
12888 for (size_t k = 1; k < 16; k++) {
12889 GemmMicrokernelTester()
12890 .mr(3)
12891 .nr(8)
12892 .kr(2)
12893 .sr(1)
12894 .m(3)
12895 .n(8)
12896 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012897 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012898 }
12899 }
12900
12901 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_lt_16_strided_a) {
12902 TEST_REQUIRES_ARM_NEON;
12903 for (size_t k = 1; k < 16; k++) {
12904 GemmMicrokernelTester()
12905 .mr(3)
12906 .nr(8)
12907 .kr(2)
12908 .sr(1)
12909 .m(3)
12910 .n(8)
12911 .k(k)
12912 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080012913 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012914 }
12915 }
12916
12917 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
12918 TEST_REQUIRES_ARM_NEON;
12919 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012920 for (uint32_t n = 1; n <= 8; n++) {
12921 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012922 GemmMicrokernelTester()
12923 .mr(3)
12924 .nr(8)
12925 .kr(2)
12926 .sr(1)
12927 .m(m)
12928 .n(n)
12929 .k(k)
12930 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012931 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012932 }
12933 }
12934 }
12935 }
12936
12937 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_gt_16) {
12938 TEST_REQUIRES_ARM_NEON;
12939 for (size_t k = 17; k < 32; k++) {
12940 GemmMicrokernelTester()
12941 .mr(3)
12942 .nr(8)
12943 .kr(2)
12944 .sr(1)
12945 .m(3)
12946 .n(8)
12947 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012948 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012949 }
12950 }
12951
12952 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_gt_16_strided_a) {
12953 TEST_REQUIRES_ARM_NEON;
12954 for (size_t k = 17; k < 32; k++) {
12955 GemmMicrokernelTester()
12956 .mr(3)
12957 .nr(8)
12958 .kr(2)
12959 .sr(1)
12960 .m(3)
12961 .n(8)
12962 .k(k)
12963 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080012964 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012965 }
12966 }
12967
12968 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
12969 TEST_REQUIRES_ARM_NEON;
12970 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080012971 for (uint32_t n = 1; n <= 8; n++) {
12972 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012973 GemmMicrokernelTester()
12974 .mr(3)
12975 .nr(8)
12976 .kr(2)
12977 .sr(1)
12978 .m(m)
12979 .n(n)
12980 .k(k)
12981 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080012982 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080012983 }
12984 }
12985 }
12986 }
12987
12988 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_div_16) {
12989 TEST_REQUIRES_ARM_NEON;
12990 for (size_t k = 32; k <= 160; k += 16) {
12991 GemmMicrokernelTester()
12992 .mr(3)
12993 .nr(8)
12994 .kr(2)
12995 .sr(1)
12996 .m(3)
12997 .n(8)
12998 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080012999 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013000 }
13001 }
13002
13003 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_div_16_strided_a) {
13004 TEST_REQUIRES_ARM_NEON;
13005 for (size_t k = 32; k <= 160; k += 16) {
13006 GemmMicrokernelTester()
13007 .mr(3)
13008 .nr(8)
13009 .kr(2)
13010 .sr(1)
13011 .m(3)
13012 .n(8)
13013 .k(k)
13014 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080013015 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013016 }
13017 }
13018
13019 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
13020 TEST_REQUIRES_ARM_NEON;
13021 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013022 for (uint32_t n = 1; n <= 8; n++) {
13023 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013024 GemmMicrokernelTester()
13025 .mr(3)
13026 .nr(8)
13027 .kr(2)
13028 .sr(1)
13029 .m(m)
13030 .n(n)
13031 .k(k)
13032 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013033 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013034 }
13035 }
13036 }
13037 }
13038
13039 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8) {
13040 TEST_REQUIRES_ARM_NEON;
13041 for (uint32_t n = 9; n < 16; n++) {
13042 for (size_t k = 1; k <= 80; k += 17) {
13043 GemmMicrokernelTester()
13044 .mr(3)
13045 .nr(8)
13046 .kr(2)
13047 .sr(1)
13048 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013049 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013050 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013051 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013052 }
13053 }
13054 }
13055
13056 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
13057 TEST_REQUIRES_ARM_NEON;
13058 for (uint32_t n = 9; n < 16; n++) {
13059 for (size_t k = 1; k <= 80; k += 17) {
13060 GemmMicrokernelTester()
13061 .mr(3)
13062 .nr(8)
13063 .kr(2)
13064 .sr(1)
13065 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013066 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013067 .k(k)
13068 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013069 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013070 }
13071 }
13072 }
13073
13074 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_strided_a) {
13075 TEST_REQUIRES_ARM_NEON;
13076 for (uint32_t n = 9; n < 16; n++) {
13077 for (size_t k = 1; k <= 80; k += 17) {
13078 GemmMicrokernelTester()
13079 .mr(3)
13080 .nr(8)
13081 .kr(2)
13082 .sr(1)
13083 .m(3)
13084 .n(n)
13085 .k(k)
13086 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013087 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013088 }
13089 }
13090 }
13091
13092 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
13093 TEST_REQUIRES_ARM_NEON;
13094 for (uint32_t n = 9; n < 16; n++) {
13095 for (size_t k = 1; k <= 80; k += 17) {
13096 for (uint32_t m = 1; m <= 3; m++) {
13097 GemmMicrokernelTester()
13098 .mr(3)
13099 .nr(8)
13100 .kr(2)
13101 .sr(1)
13102 .m(m)
13103 .n(n)
13104 .k(k)
13105 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013106 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013107 }
13108 }
13109 }
13110 }
13111
13112 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8) {
13113 TEST_REQUIRES_ARM_NEON;
13114 for (uint32_t n = 16; n <= 24; n += 8) {
13115 for (size_t k = 1; k <= 80; k += 17) {
13116 GemmMicrokernelTester()
13117 .mr(3)
13118 .nr(8)
13119 .kr(2)
13120 .sr(1)
13121 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013122 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013123 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013124 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013125 }
13126 }
13127 }
13128
13129 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
13130 TEST_REQUIRES_ARM_NEON;
13131 for (uint32_t n = 16; n <= 24; n += 8) {
13132 for (size_t k = 1; k <= 80; k += 17) {
13133 GemmMicrokernelTester()
13134 .mr(3)
13135 .nr(8)
13136 .kr(2)
13137 .sr(1)
13138 .m(3)
13139 .n(n)
13140 .k(k)
13141 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013142 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013143 }
13144 }
13145 }
13146
13147 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_strided_a) {
13148 TEST_REQUIRES_ARM_NEON;
13149 for (uint32_t n = 16; n <= 24; n += 8) {
13150 for (size_t k = 1; k <= 80; k += 17) {
13151 GemmMicrokernelTester()
13152 .mr(3)
13153 .nr(8)
13154 .kr(2)
13155 .sr(1)
13156 .m(3)
13157 .n(n)
13158 .k(k)
13159 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013160 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013161 }
13162 }
13163 }
13164
13165 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
13166 TEST_REQUIRES_ARM_NEON;
13167 for (uint32_t n = 16; n <= 24; n += 8) {
13168 for (size_t k = 1; k <= 80; k += 17) {
13169 for (uint32_t m = 1; m <= 3; m++) {
13170 GemmMicrokernelTester()
13171 .mr(3)
13172 .nr(8)
13173 .kr(2)
13174 .sr(1)
13175 .m(m)
13176 .n(n)
13177 .k(k)
13178 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013179 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013180 }
13181 }
13182 }
13183 }
13184
13185 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
13186 TEST_REQUIRES_ARM_NEON;
13187 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013188 for (uint32_t n = 1; n <= 8; n++) {
13189 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013190 GemmMicrokernelTester()
13191 .mr(3)
13192 .nr(8)
13193 .kr(2)
13194 .sr(1)
13195 .m(m)
13196 .n(n)
13197 .k(k)
13198 .cm_stride(11)
13199 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013200 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013201 }
13202 }
13203 }
13204 }
13205
13206 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, qmin) {
13207 TEST_REQUIRES_ARM_NEON;
13208 GemmMicrokernelTester()
13209 .mr(3)
13210 .nr(8)
13211 .kr(2)
13212 .sr(1)
13213 .m(3)
13214 .n(8)
13215 .k(16)
13216 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013217 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013218 }
13219
13220 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, qmax) {
13221 TEST_REQUIRES_ARM_NEON;
13222 GemmMicrokernelTester()
13223 .mr(3)
13224 .nr(8)
13225 .kr(2)
13226 .sr(1)
13227 .m(3)
13228 .n(8)
13229 .k(16)
13230 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013231 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013232 }
13233
13234 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C2__NEON_MLAL_DUP, strided_cm) {
13235 TEST_REQUIRES_ARM_NEON;
13236 GemmMicrokernelTester()
13237 .mr(3)
13238 .nr(8)
13239 .kr(2)
13240 .sr(1)
13241 .m(3)
13242 .n(8)
13243 .k(16)
13244 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013245 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013246 }
13247#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13248
13249
13250#if XNN_ARCH_ARM || XNN_ARCH_ARM64
13251 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16) {
13252 TEST_REQUIRES_ARM_NEON;
13253 GemmMicrokernelTester()
13254 .mr(4)
13255 .nr(8)
13256 .kr(2)
13257 .sr(1)
13258 .m(4)
13259 .n(8)
13260 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080013261 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013262 }
13263
13264 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cn) {
13265 TEST_REQUIRES_ARM_NEON;
13266 GemmMicrokernelTester()
13267 .mr(4)
13268 .nr(8)
13269 .kr(2)
13270 .sr(1)
13271 .m(4)
13272 .n(8)
13273 .k(16)
13274 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013275 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013276 }
13277
13278 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_strided_a) {
13279 TEST_REQUIRES_ARM_NEON;
13280 GemmMicrokernelTester()
13281 .mr(4)
13282 .nr(8)
13283 .kr(2)
13284 .sr(1)
13285 .m(4)
13286 .n(8)
13287 .k(16)
13288 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013289 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013290 }
13291
13292 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile) {
13293 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013294 for (uint32_t n = 1; n <= 8; n++) {
13295 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013296 GemmMicrokernelTester()
13297 .mr(4)
13298 .nr(8)
13299 .kr(2)
13300 .sr(1)
13301 .m(m)
13302 .n(n)
13303 .k(16)
13304 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013305 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013306 }
13307 }
13308 }
13309
13310 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
13311 TEST_REQUIRES_ARM_NEON;
13312 for (uint32_t m = 1; m <= 4; m++) {
13313 GemmMicrokernelTester()
13314 .mr(4)
13315 .nr(8)
13316 .kr(2)
13317 .sr(1)
13318 .m(m)
13319 .n(8)
13320 .k(16)
13321 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013322 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013323 }
13324 }
13325
13326 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
13327 TEST_REQUIRES_ARM_NEON;
13328 for (uint32_t n = 1; n <= 8; n++) {
13329 GemmMicrokernelTester()
13330 .mr(4)
13331 .nr(8)
13332 .kr(2)
13333 .sr(1)
13334 .m(4)
13335 .n(n)
13336 .k(16)
13337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013338 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013339 }
13340 }
13341
13342 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_lt_16) {
13343 TEST_REQUIRES_ARM_NEON;
13344 for (size_t k = 1; k < 16; k++) {
13345 GemmMicrokernelTester()
13346 .mr(4)
13347 .nr(8)
13348 .kr(2)
13349 .sr(1)
13350 .m(4)
13351 .n(8)
13352 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013353 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013354 }
13355 }
13356
13357 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_lt_16_strided_a) {
13358 TEST_REQUIRES_ARM_NEON;
13359 for (size_t k = 1; k < 16; k++) {
13360 GemmMicrokernelTester()
13361 .mr(4)
13362 .nr(8)
13363 .kr(2)
13364 .sr(1)
13365 .m(4)
13366 .n(8)
13367 .k(k)
13368 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013369 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013370 }
13371 }
13372
13373 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_lt_16_subtile) {
13374 TEST_REQUIRES_ARM_NEON;
13375 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013376 for (uint32_t n = 1; n <= 8; n++) {
13377 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013378 GemmMicrokernelTester()
13379 .mr(4)
13380 .nr(8)
13381 .kr(2)
13382 .sr(1)
13383 .m(m)
13384 .n(n)
13385 .k(k)
13386 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013387 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013388 }
13389 }
13390 }
13391 }
13392
13393 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_gt_16) {
13394 TEST_REQUIRES_ARM_NEON;
13395 for (size_t k = 17; k < 32; k++) {
13396 GemmMicrokernelTester()
13397 .mr(4)
13398 .nr(8)
13399 .kr(2)
13400 .sr(1)
13401 .m(4)
13402 .n(8)
13403 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013404 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013405 }
13406 }
13407
13408 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_gt_16_strided_a) {
13409 TEST_REQUIRES_ARM_NEON;
13410 for (size_t k = 17; k < 32; k++) {
13411 GemmMicrokernelTester()
13412 .mr(4)
13413 .nr(8)
13414 .kr(2)
13415 .sr(1)
13416 .m(4)
13417 .n(8)
13418 .k(k)
13419 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080013420 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013421 }
13422 }
13423
13424 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_gt_16_subtile) {
13425 TEST_REQUIRES_ARM_NEON;
13426 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013427 for (uint32_t n = 1; n <= 8; n++) {
13428 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013429 GemmMicrokernelTester()
13430 .mr(4)
13431 .nr(8)
13432 .kr(2)
13433 .sr(1)
13434 .m(m)
13435 .n(n)
13436 .k(k)
13437 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013438 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013439 }
13440 }
13441 }
13442 }
13443
13444 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_div_16) {
13445 TEST_REQUIRES_ARM_NEON;
13446 for (size_t k = 32; k <= 160; k += 16) {
13447 GemmMicrokernelTester()
13448 .mr(4)
13449 .nr(8)
13450 .kr(2)
13451 .sr(1)
13452 .m(4)
13453 .n(8)
13454 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013455 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013456 }
13457 }
13458
13459 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_div_16_strided_a) {
13460 TEST_REQUIRES_ARM_NEON;
13461 for (size_t k = 32; k <= 160; k += 16) {
13462 GemmMicrokernelTester()
13463 .mr(4)
13464 .nr(8)
13465 .kr(2)
13466 .sr(1)
13467 .m(4)
13468 .n(8)
13469 .k(k)
13470 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080013471 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013472 }
13473 }
13474
13475 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, k_div_16_subtile) {
13476 TEST_REQUIRES_ARM_NEON;
13477 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013478 for (uint32_t n = 1; n <= 8; n++) {
13479 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013480 GemmMicrokernelTester()
13481 .mr(4)
13482 .nr(8)
13483 .kr(2)
13484 .sr(1)
13485 .m(m)
13486 .n(n)
13487 .k(k)
13488 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013489 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013490 }
13491 }
13492 }
13493 }
13494
13495 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8) {
13496 TEST_REQUIRES_ARM_NEON;
13497 for (uint32_t n = 9; n < 16; n++) {
13498 for (size_t k = 1; k <= 80; k += 17) {
13499 GemmMicrokernelTester()
13500 .mr(4)
13501 .nr(8)
13502 .kr(2)
13503 .sr(1)
13504 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013505 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013506 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013507 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013508 }
13509 }
13510 }
13511
13512 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_strided_cn) {
13513 TEST_REQUIRES_ARM_NEON;
13514 for (uint32_t n = 9; n < 16; n++) {
13515 for (size_t k = 1; k <= 80; k += 17) {
13516 GemmMicrokernelTester()
13517 .mr(4)
13518 .nr(8)
13519 .kr(2)
13520 .sr(1)
13521 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013522 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013523 .k(k)
13524 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013525 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013526 }
13527 }
13528 }
13529
13530 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_strided_a) {
13531 TEST_REQUIRES_ARM_NEON;
13532 for (uint32_t n = 9; n < 16; n++) {
13533 for (size_t k = 1; k <= 80; k += 17) {
13534 GemmMicrokernelTester()
13535 .mr(4)
13536 .nr(8)
13537 .kr(2)
13538 .sr(1)
13539 .m(4)
13540 .n(n)
13541 .k(k)
13542 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013543 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013544 }
13545 }
13546 }
13547
13548 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_gt_8_subtile) {
13549 TEST_REQUIRES_ARM_NEON;
13550 for (uint32_t n = 9; n < 16; n++) {
13551 for (size_t k = 1; k <= 80; k += 17) {
13552 for (uint32_t m = 1; m <= 4; m++) {
13553 GemmMicrokernelTester()
13554 .mr(4)
13555 .nr(8)
13556 .kr(2)
13557 .sr(1)
13558 .m(m)
13559 .n(n)
13560 .k(k)
13561 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013562 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013563 }
13564 }
13565 }
13566 }
13567
13568 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8) {
13569 TEST_REQUIRES_ARM_NEON;
13570 for (uint32_t n = 16; n <= 24; n += 8) {
13571 for (size_t k = 1; k <= 80; k += 17) {
13572 GemmMicrokernelTester()
13573 .mr(4)
13574 .nr(8)
13575 .kr(2)
13576 .sr(1)
13577 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013578 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013579 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013580 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013581 }
13582 }
13583 }
13584
13585 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_strided_cn) {
13586 TEST_REQUIRES_ARM_NEON;
13587 for (uint32_t n = 16; n <= 24; n += 8) {
13588 for (size_t k = 1; k <= 80; k += 17) {
13589 GemmMicrokernelTester()
13590 .mr(4)
13591 .nr(8)
13592 .kr(2)
13593 .sr(1)
13594 .m(4)
13595 .n(n)
13596 .k(k)
13597 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013598 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013599 }
13600 }
13601 }
13602
13603 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_strided_a) {
13604 TEST_REQUIRES_ARM_NEON;
13605 for (uint32_t n = 16; n <= 24; n += 8) {
13606 for (size_t k = 1; k <= 80; k += 17) {
13607 GemmMicrokernelTester()
13608 .mr(4)
13609 .nr(8)
13610 .kr(2)
13611 .sr(1)
13612 .m(4)
13613 .n(n)
13614 .k(k)
13615 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013616 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013617 }
13618 }
13619 }
13620
13621 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, n_div_8_subtile) {
13622 TEST_REQUIRES_ARM_NEON;
13623 for (uint32_t n = 16; n <= 24; n += 8) {
13624 for (size_t k = 1; k <= 80; k += 17) {
13625 for (uint32_t m = 1; m <= 4; m++) {
13626 GemmMicrokernelTester()
13627 .mr(4)
13628 .nr(8)
13629 .kr(2)
13630 .sr(1)
13631 .m(m)
13632 .n(n)
13633 .k(k)
13634 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013635 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013636 }
13637 }
13638 }
13639 }
13640
13641 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cm_subtile) {
13642 TEST_REQUIRES_ARM_NEON;
13643 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013644 for (uint32_t n = 1; n <= 8; n++) {
13645 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013646 GemmMicrokernelTester()
13647 .mr(4)
13648 .nr(8)
13649 .kr(2)
13650 .sr(1)
13651 .m(m)
13652 .n(n)
13653 .k(k)
13654 .cm_stride(11)
13655 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013656 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013657 }
13658 }
13659 }
13660 }
13661
13662 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, qmin) {
13663 TEST_REQUIRES_ARM_NEON;
13664 GemmMicrokernelTester()
13665 .mr(4)
13666 .nr(8)
13667 .kr(2)
13668 .sr(1)
13669 .m(4)
13670 .n(8)
13671 .k(16)
13672 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013673 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013674 }
13675
13676 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, qmax) {
13677 TEST_REQUIRES_ARM_NEON;
13678 GemmMicrokernelTester()
13679 .mr(4)
13680 .nr(8)
13681 .kr(2)
13682 .sr(1)
13683 .m(4)
13684 .n(8)
13685 .k(16)
13686 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080013687 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013688 }
13689
13690 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C2__NEON_MLAL_DUP, strided_cm) {
13691 TEST_REQUIRES_ARM_NEON;
13692 GemmMicrokernelTester()
13693 .mr(4)
13694 .nr(8)
13695 .kr(2)
13696 .sr(1)
13697 .m(4)
13698 .n(8)
13699 .k(16)
13700 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080013701 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013702 }
13703#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
13704
13705
13706#if XNN_ARCH_ARM || XNN_ARCH_ARM64
13707 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_eq_16) {
13708 TEST_REQUIRES_ARM_NEON;
13709 GemmMicrokernelTester()
13710 .mr(1)
13711 .nr(16)
13712 .kr(2)
13713 .sr(1)
13714 .m(1)
13715 .n(16)
13716 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080013717 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013718 }
13719
13720 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, strided_cn) {
13721 TEST_REQUIRES_ARM_NEON;
13722 GemmMicrokernelTester()
13723 .mr(1)
13724 .nr(16)
13725 .kr(2)
13726 .sr(1)
13727 .m(1)
13728 .n(16)
13729 .k(16)
13730 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013731 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013732 }
13733
13734 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_eq_16_strided_a) {
13735 TEST_REQUIRES_ARM_NEON;
13736 GemmMicrokernelTester()
13737 .mr(1)
13738 .nr(16)
13739 .kr(2)
13740 .sr(1)
13741 .m(1)
13742 .n(16)
13743 .k(16)
13744 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013745 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013746 }
13747
13748 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_eq_16_subtile) {
13749 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080013750 for (uint32_t n = 1; n <= 16; n++) {
13751 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013752 GemmMicrokernelTester()
13753 .mr(1)
13754 .nr(16)
13755 .kr(2)
13756 .sr(1)
13757 .m(m)
13758 .n(n)
13759 .k(16)
13760 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013761 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013762 }
13763 }
13764 }
13765
13766 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_eq_16_subtile_m) {
13767 TEST_REQUIRES_ARM_NEON;
13768 for (uint32_t m = 1; m <= 1; m++) {
13769 GemmMicrokernelTester()
13770 .mr(1)
13771 .nr(16)
13772 .kr(2)
13773 .sr(1)
13774 .m(m)
13775 .n(16)
13776 .k(16)
13777 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013778 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013779 }
13780 }
13781
13782 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_eq_16_subtile_n) {
13783 TEST_REQUIRES_ARM_NEON;
13784 for (uint32_t n = 1; n <= 16; n++) {
13785 GemmMicrokernelTester()
13786 .mr(1)
13787 .nr(16)
13788 .kr(2)
13789 .sr(1)
13790 .m(1)
13791 .n(n)
13792 .k(16)
13793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013794 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013795 }
13796 }
13797
13798 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_lt_16) {
13799 TEST_REQUIRES_ARM_NEON;
13800 for (size_t k = 1; k < 16; k++) {
13801 GemmMicrokernelTester()
13802 .mr(1)
13803 .nr(16)
13804 .kr(2)
13805 .sr(1)
13806 .m(1)
13807 .n(16)
13808 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013809 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013810 }
13811 }
13812
13813 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_lt_16_strided_a) {
13814 TEST_REQUIRES_ARM_NEON;
13815 for (size_t k = 1; k < 16; k++) {
13816 GemmMicrokernelTester()
13817 .mr(1)
13818 .nr(16)
13819 .kr(2)
13820 .sr(1)
13821 .m(1)
13822 .n(16)
13823 .k(k)
13824 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013825 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013826 }
13827 }
13828
13829 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_lt_16_subtile) {
13830 TEST_REQUIRES_ARM_NEON;
13831 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013832 for (uint32_t n = 1; n <= 16; n++) {
13833 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013834 GemmMicrokernelTester()
13835 .mr(1)
13836 .nr(16)
13837 .kr(2)
13838 .sr(1)
13839 .m(m)
13840 .n(n)
13841 .k(k)
13842 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013843 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013844 }
13845 }
13846 }
13847 }
13848
13849 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_gt_16) {
13850 TEST_REQUIRES_ARM_NEON;
13851 for (size_t k = 17; k < 32; k++) {
13852 GemmMicrokernelTester()
13853 .mr(1)
13854 .nr(16)
13855 .kr(2)
13856 .sr(1)
13857 .m(1)
13858 .n(16)
13859 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013860 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013861 }
13862 }
13863
13864 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_gt_16_strided_a) {
13865 TEST_REQUIRES_ARM_NEON;
13866 for (size_t k = 17; k < 32; k++) {
13867 GemmMicrokernelTester()
13868 .mr(1)
13869 .nr(16)
13870 .kr(2)
13871 .sr(1)
13872 .m(1)
13873 .n(16)
13874 .k(k)
13875 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080013876 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013877 }
13878 }
13879
13880 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_gt_16_subtile) {
13881 TEST_REQUIRES_ARM_NEON;
13882 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013883 for (uint32_t n = 1; n <= 16; n++) {
13884 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013885 GemmMicrokernelTester()
13886 .mr(1)
13887 .nr(16)
13888 .kr(2)
13889 .sr(1)
13890 .m(m)
13891 .n(n)
13892 .k(k)
13893 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013894 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013895 }
13896 }
13897 }
13898 }
13899
13900 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_div_16) {
13901 TEST_REQUIRES_ARM_NEON;
13902 for (size_t k = 32; k <= 160; k += 16) {
13903 GemmMicrokernelTester()
13904 .mr(1)
13905 .nr(16)
13906 .kr(2)
13907 .sr(1)
13908 .m(1)
13909 .n(16)
13910 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013911 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013912 }
13913 }
13914
13915 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_div_16_strided_a) {
13916 TEST_REQUIRES_ARM_NEON;
13917 for (size_t k = 32; k <= 160; k += 16) {
13918 GemmMicrokernelTester()
13919 .mr(1)
13920 .nr(16)
13921 .kr(2)
13922 .sr(1)
13923 .m(1)
13924 .n(16)
13925 .k(k)
13926 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080013927 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013928 }
13929 }
13930
13931 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, k_div_16_subtile) {
13932 TEST_REQUIRES_ARM_NEON;
13933 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080013934 for (uint32_t n = 1; n <= 16; n++) {
13935 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013936 GemmMicrokernelTester()
13937 .mr(1)
13938 .nr(16)
13939 .kr(2)
13940 .sr(1)
13941 .m(m)
13942 .n(n)
13943 .k(k)
13944 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080013945 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013946 }
13947 }
13948 }
13949 }
13950
13951 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_gt_16) {
13952 TEST_REQUIRES_ARM_NEON;
13953 for (uint32_t n = 17; n < 32; n++) {
13954 for (size_t k = 1; k <= 80; k += 17) {
13955 GemmMicrokernelTester()
13956 .mr(1)
13957 .nr(16)
13958 .kr(2)
13959 .sr(1)
13960 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013961 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013962 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080013963 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013964 }
13965 }
13966 }
13967
13968 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_gt_16_strided_cn) {
13969 TEST_REQUIRES_ARM_NEON;
13970 for (uint32_t n = 17; n < 32; n++) {
13971 for (size_t k = 1; k <= 80; k += 17) {
13972 GemmMicrokernelTester()
13973 .mr(1)
13974 .nr(16)
13975 .kr(2)
13976 .sr(1)
13977 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080013978 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013979 .k(k)
13980 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080013981 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080013982 }
13983 }
13984 }
13985
13986 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_gt_16_strided_a) {
13987 TEST_REQUIRES_ARM_NEON;
13988 for (uint32_t n = 17; n < 32; n++) {
13989 for (size_t k = 1; k <= 80; k += 17) {
13990 GemmMicrokernelTester()
13991 .mr(1)
13992 .nr(16)
13993 .kr(2)
13994 .sr(1)
13995 .m(1)
13996 .n(n)
13997 .k(k)
13998 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080013999 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014000 }
14001 }
14002 }
14003
14004 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_gt_16_subtile) {
14005 TEST_REQUIRES_ARM_NEON;
14006 for (uint32_t n = 17; n < 32; n++) {
14007 for (size_t k = 1; k <= 80; k += 17) {
14008 for (uint32_t m = 1; m <= 1; m++) {
14009 GemmMicrokernelTester()
14010 .mr(1)
14011 .nr(16)
14012 .kr(2)
14013 .sr(1)
14014 .m(m)
14015 .n(n)
14016 .k(k)
14017 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014018 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014019 }
14020 }
14021 }
14022 }
14023
14024 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_div_16) {
14025 TEST_REQUIRES_ARM_NEON;
14026 for (uint32_t n = 32; n <= 48; n += 16) {
14027 for (size_t k = 1; k <= 80; k += 17) {
14028 GemmMicrokernelTester()
14029 .mr(1)
14030 .nr(16)
14031 .kr(2)
14032 .sr(1)
14033 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014034 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014035 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014036 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014037 }
14038 }
14039 }
14040
14041 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_div_16_strided_cn) {
14042 TEST_REQUIRES_ARM_NEON;
14043 for (uint32_t n = 32; n <= 48; n += 16) {
14044 for (size_t k = 1; k <= 80; k += 17) {
14045 GemmMicrokernelTester()
14046 .mr(1)
14047 .nr(16)
14048 .kr(2)
14049 .sr(1)
14050 .m(1)
14051 .n(n)
14052 .k(k)
14053 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014054 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014055 }
14056 }
14057 }
14058
14059 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_div_16_strided_a) {
14060 TEST_REQUIRES_ARM_NEON;
14061 for (uint32_t n = 32; n <= 48; n += 16) {
14062 for (size_t k = 1; k <= 80; k += 17) {
14063 GemmMicrokernelTester()
14064 .mr(1)
14065 .nr(16)
14066 .kr(2)
14067 .sr(1)
14068 .m(1)
14069 .n(n)
14070 .k(k)
14071 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014072 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014073 }
14074 }
14075 }
14076
14077 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, n_div_16_subtile) {
14078 TEST_REQUIRES_ARM_NEON;
14079 for (uint32_t n = 32; n <= 48; n += 16) {
14080 for (size_t k = 1; k <= 80; k += 17) {
14081 for (uint32_t m = 1; m <= 1; m++) {
14082 GemmMicrokernelTester()
14083 .mr(1)
14084 .nr(16)
14085 .kr(2)
14086 .sr(1)
14087 .m(m)
14088 .n(n)
14089 .k(k)
14090 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014091 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014092 }
14093 }
14094 }
14095 }
14096
14097 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, strided_cm_subtile) {
14098 TEST_REQUIRES_ARM_NEON;
14099 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014100 for (uint32_t n = 1; n <= 16; n++) {
14101 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014102 GemmMicrokernelTester()
14103 .mr(1)
14104 .nr(16)
14105 .kr(2)
14106 .sr(1)
14107 .m(m)
14108 .n(n)
14109 .k(k)
14110 .cm_stride(19)
14111 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014112 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014113 }
14114 }
14115 }
14116 }
14117
14118 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, qmin) {
14119 TEST_REQUIRES_ARM_NEON;
14120 GemmMicrokernelTester()
14121 .mr(1)
14122 .nr(16)
14123 .kr(2)
14124 .sr(1)
14125 .m(1)
14126 .n(16)
14127 .k(16)
14128 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014130 }
14131
14132 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, qmax) {
14133 TEST_REQUIRES_ARM_NEON;
14134 GemmMicrokernelTester()
14135 .mr(1)
14136 .nr(16)
14137 .kr(2)
14138 .sr(1)
14139 .m(1)
14140 .n(16)
14141 .k(16)
14142 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014143 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014144 }
14145
14146 TEST(QS8_GEMM_MINMAX_RNDNU_1X16C2__NEON_MLAL_DUP, strided_cm) {
14147 TEST_REQUIRES_ARM_NEON;
14148 GemmMicrokernelTester()
14149 .mr(1)
14150 .nr(16)
14151 .kr(2)
14152 .sr(1)
14153 .m(1)
14154 .n(16)
14155 .k(16)
14156 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014157 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16c2__neon_mlal_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014158 }
14159#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
14160
14161
14162#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
14163 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16) {
14164 TEST_REQUIRES_ARM_NEON;
14165 GemmMicrokernelTester()
14166 .mr(2)
14167 .nr(8)
14168 .kr(8)
14169 .sr(1)
14170 .m(2)
14171 .n(8)
14172 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080014173 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014174 }
14175
14176 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cn) {
14177 TEST_REQUIRES_ARM_NEON;
14178 GemmMicrokernelTester()
14179 .mr(2)
14180 .nr(8)
14181 .kr(8)
14182 .sr(1)
14183 .m(2)
14184 .n(8)
14185 .k(16)
14186 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014187 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014188 }
14189
14190 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_strided_a) {
14191 TEST_REQUIRES_ARM_NEON;
14192 GemmMicrokernelTester()
14193 .mr(2)
14194 .nr(8)
14195 .kr(8)
14196 .sr(1)
14197 .m(2)
14198 .n(8)
14199 .k(16)
14200 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014201 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014202 }
14203
14204 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile) {
14205 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014206 for (uint32_t n = 1; n <= 8; n++) {
14207 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014208 GemmMicrokernelTester()
14209 .mr(2)
14210 .nr(8)
14211 .kr(8)
14212 .sr(1)
14213 .m(m)
14214 .n(n)
14215 .k(16)
14216 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014217 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014218 }
14219 }
14220 }
14221
14222 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_m) {
14223 TEST_REQUIRES_ARM_NEON;
14224 for (uint32_t m = 1; m <= 2; m++) {
14225 GemmMicrokernelTester()
14226 .mr(2)
14227 .nr(8)
14228 .kr(8)
14229 .sr(1)
14230 .m(m)
14231 .n(8)
14232 .k(16)
14233 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014234 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014235 }
14236 }
14237
14238 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_eq_16_subtile_n) {
14239 TEST_REQUIRES_ARM_NEON;
14240 for (uint32_t n = 1; n <= 8; n++) {
14241 GemmMicrokernelTester()
14242 .mr(2)
14243 .nr(8)
14244 .kr(8)
14245 .sr(1)
14246 .m(2)
14247 .n(n)
14248 .k(16)
14249 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014250 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014251 }
14252 }
14253
14254 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16) {
14255 TEST_REQUIRES_ARM_NEON;
14256 for (size_t k = 1; k < 16; k++) {
14257 GemmMicrokernelTester()
14258 .mr(2)
14259 .nr(8)
14260 .kr(8)
14261 .sr(1)
14262 .m(2)
14263 .n(8)
14264 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014265 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014266 }
14267 }
14268
14269 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16_strided_a) {
14270 TEST_REQUIRES_ARM_NEON;
14271 for (size_t k = 1; k < 16; k++) {
14272 GemmMicrokernelTester()
14273 .mr(2)
14274 .nr(8)
14275 .kr(8)
14276 .sr(1)
14277 .m(2)
14278 .n(8)
14279 .k(k)
14280 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014281 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014282 }
14283 }
14284
14285 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_lt_16_subtile) {
14286 TEST_REQUIRES_ARM_NEON;
14287 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014288 for (uint32_t n = 1; n <= 8; n++) {
14289 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014290 GemmMicrokernelTester()
14291 .mr(2)
14292 .nr(8)
14293 .kr(8)
14294 .sr(1)
14295 .m(m)
14296 .n(n)
14297 .k(k)
14298 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014299 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014300 }
14301 }
14302 }
14303 }
14304
14305 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16) {
14306 TEST_REQUIRES_ARM_NEON;
14307 for (size_t k = 17; k < 32; k++) {
14308 GemmMicrokernelTester()
14309 .mr(2)
14310 .nr(8)
14311 .kr(8)
14312 .sr(1)
14313 .m(2)
14314 .n(8)
14315 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014316 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014317 }
14318 }
14319
14320 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16_strided_a) {
14321 TEST_REQUIRES_ARM_NEON;
14322 for (size_t k = 17; k < 32; k++) {
14323 GemmMicrokernelTester()
14324 .mr(2)
14325 .nr(8)
14326 .kr(8)
14327 .sr(1)
14328 .m(2)
14329 .n(8)
14330 .k(k)
14331 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080014332 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014333 }
14334 }
14335
14336 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_gt_16_subtile) {
14337 TEST_REQUIRES_ARM_NEON;
14338 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014339 for (uint32_t n = 1; n <= 8; n++) {
14340 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014341 GemmMicrokernelTester()
14342 .mr(2)
14343 .nr(8)
14344 .kr(8)
14345 .sr(1)
14346 .m(m)
14347 .n(n)
14348 .k(k)
14349 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014350 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014351 }
14352 }
14353 }
14354 }
14355
14356 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16) {
14357 TEST_REQUIRES_ARM_NEON;
14358 for (size_t k = 32; k <= 160; k += 16) {
14359 GemmMicrokernelTester()
14360 .mr(2)
14361 .nr(8)
14362 .kr(8)
14363 .sr(1)
14364 .m(2)
14365 .n(8)
14366 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014367 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014368 }
14369 }
14370
14371 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16_strided_a) {
14372 TEST_REQUIRES_ARM_NEON;
14373 for (size_t k = 32; k <= 160; k += 16) {
14374 GemmMicrokernelTester()
14375 .mr(2)
14376 .nr(8)
14377 .kr(8)
14378 .sr(1)
14379 .m(2)
14380 .n(8)
14381 .k(k)
14382 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080014383 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014384 }
14385 }
14386
14387 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, k_div_16_subtile) {
14388 TEST_REQUIRES_ARM_NEON;
14389 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014390 for (uint32_t n = 1; n <= 8; n++) {
14391 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014392 GemmMicrokernelTester()
14393 .mr(2)
14394 .nr(8)
14395 .kr(8)
14396 .sr(1)
14397 .m(m)
14398 .n(n)
14399 .k(k)
14400 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014401 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014402 }
14403 }
14404 }
14405 }
14406
14407 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8) {
14408 TEST_REQUIRES_ARM_NEON;
14409 for (uint32_t n = 9; n < 16; n++) {
14410 for (size_t k = 1; k <= 80; k += 17) {
14411 GemmMicrokernelTester()
14412 .mr(2)
14413 .nr(8)
14414 .kr(8)
14415 .sr(1)
14416 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014417 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014418 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014419 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014420 }
14421 }
14422 }
14423
14424 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_strided_cn) {
14425 TEST_REQUIRES_ARM_NEON;
14426 for (uint32_t n = 9; n < 16; n++) {
14427 for (size_t k = 1; k <= 80; k += 17) {
14428 GemmMicrokernelTester()
14429 .mr(2)
14430 .nr(8)
14431 .kr(8)
14432 .sr(1)
14433 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014434 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014435 .k(k)
14436 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014437 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014438 }
14439 }
14440 }
14441
14442 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_strided_a) {
14443 TEST_REQUIRES_ARM_NEON;
14444 for (uint32_t n = 9; n < 16; n++) {
14445 for (size_t k = 1; k <= 80; k += 17) {
14446 GemmMicrokernelTester()
14447 .mr(2)
14448 .nr(8)
14449 .kr(8)
14450 .sr(1)
14451 .m(2)
14452 .n(n)
14453 .k(k)
14454 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014455 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014456 }
14457 }
14458 }
14459
14460 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, n_gt_8_subtile) {
14461 TEST_REQUIRES_ARM_NEON;
14462 for (uint32_t n = 9; n < 16; n++) {
14463 for (size_t k = 1; k <= 80; k += 17) {
14464 for (uint32_t m = 1; m <= 2; m++) {
14465 GemmMicrokernelTester()
14466 .mr(2)
14467 .nr(8)
14468 .kr(8)
14469 .sr(1)
14470 .m(m)
14471 .n(n)
14472 .k(k)
14473 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014474 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014475 }
14476 }
14477 }
14478 }
14479
14480 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8) {
14481 TEST_REQUIRES_ARM_NEON;
14482 for (uint32_t n = 16; n <= 24; n += 8) {
14483 for (size_t k = 1; k <= 80; k += 17) {
14484 GemmMicrokernelTester()
14485 .mr(2)
14486 .nr(8)
14487 .kr(8)
14488 .sr(1)
14489 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014490 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014491 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014492 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014493 }
14494 }
14495 }
14496
14497 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_strided_cn) {
14498 TEST_REQUIRES_ARM_NEON;
14499 for (uint32_t n = 16; n <= 24; n += 8) {
14500 for (size_t k = 1; k <= 80; k += 17) {
14501 GemmMicrokernelTester()
14502 .mr(2)
14503 .nr(8)
14504 .kr(8)
14505 .sr(1)
14506 .m(2)
14507 .n(n)
14508 .k(k)
14509 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014510 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014511 }
14512 }
14513 }
14514
14515 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_strided_a) {
14516 TEST_REQUIRES_ARM_NEON;
14517 for (uint32_t n = 16; n <= 24; n += 8) {
14518 for (size_t k = 1; k <= 80; k += 17) {
14519 GemmMicrokernelTester()
14520 .mr(2)
14521 .nr(8)
14522 .kr(8)
14523 .sr(1)
14524 .m(2)
14525 .n(n)
14526 .k(k)
14527 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014528 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014529 }
14530 }
14531 }
14532
14533 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, n_div_8_subtile) {
14534 TEST_REQUIRES_ARM_NEON;
14535 for (uint32_t n = 16; n <= 24; n += 8) {
14536 for (size_t k = 1; k <= 80; k += 17) {
14537 for (uint32_t m = 1; m <= 2; m++) {
14538 GemmMicrokernelTester()
14539 .mr(2)
14540 .nr(8)
14541 .kr(8)
14542 .sr(1)
14543 .m(m)
14544 .n(n)
14545 .k(k)
14546 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014547 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014548 }
14549 }
14550 }
14551 }
14552
14553 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm_subtile) {
14554 TEST_REQUIRES_ARM_NEON;
14555 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014556 for (uint32_t n = 1; n <= 8; n++) {
14557 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014558 GemmMicrokernelTester()
14559 .mr(2)
14560 .nr(8)
14561 .kr(8)
14562 .sr(1)
14563 .m(m)
14564 .n(n)
14565 .k(k)
14566 .cm_stride(11)
14567 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014568 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014569 }
14570 }
14571 }
14572 }
14573
14574 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, qmin) {
14575 TEST_REQUIRES_ARM_NEON;
14576 GemmMicrokernelTester()
14577 .mr(2)
14578 .nr(8)
14579 .kr(8)
14580 .sr(1)
14581 .m(2)
14582 .n(8)
14583 .k(16)
14584 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014586 }
14587
14588 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, qmax) {
14589 TEST_REQUIRES_ARM_NEON;
14590 GemmMicrokernelTester()
14591 .mr(2)
14592 .nr(8)
14593 .kr(8)
14594 .sr(1)
14595 .m(2)
14596 .n(8)
14597 .k(16)
14598 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080014599 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014600 }
14601
14602 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MLAL_PRFM, strided_cm) {
14603 TEST_REQUIRES_ARM_NEON;
14604 GemmMicrokernelTester()
14605 .mr(2)
14606 .nr(8)
14607 .kr(8)
14608 .sr(1)
14609 .m(2)
14610 .n(8)
14611 .k(16)
14612 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014613 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mlal_prfm, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014614 }
14615#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
14616
14617
14618#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
14619 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_eq_8) {
14620 TEST_REQUIRES_ARM_NEON;
14621 GemmMicrokernelTester()
14622 .mr(2)
14623 .nr(8)
14624 .kr(8)
14625 .sr(1)
14626 .m(2)
14627 .n(8)
14628 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080014629 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014630 }
14631
14632 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, strided_cn) {
14633 TEST_REQUIRES_ARM_NEON;
14634 GemmMicrokernelTester()
14635 .mr(2)
14636 .nr(8)
14637 .kr(8)
14638 .sr(1)
14639 .m(2)
14640 .n(8)
14641 .k(8)
14642 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014643 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014644 }
14645
14646 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_eq_8_strided_a) {
14647 TEST_REQUIRES_ARM_NEON;
14648 GemmMicrokernelTester()
14649 .mr(2)
14650 .nr(8)
14651 .kr(8)
14652 .sr(1)
14653 .m(2)
14654 .n(8)
14655 .k(8)
14656 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014657 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014658 }
14659
14660 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_eq_8_subtile) {
14661 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080014662 for (uint32_t n = 1; n <= 8; n++) {
14663 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014664 GemmMicrokernelTester()
14665 .mr(2)
14666 .nr(8)
14667 .kr(8)
14668 .sr(1)
14669 .m(m)
14670 .n(n)
14671 .k(8)
14672 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014673 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014674 }
14675 }
14676 }
14677
14678 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_eq_8_subtile_m) {
14679 TEST_REQUIRES_ARM_NEON;
14680 for (uint32_t m = 1; m <= 2; m++) {
14681 GemmMicrokernelTester()
14682 .mr(2)
14683 .nr(8)
14684 .kr(8)
14685 .sr(1)
14686 .m(m)
14687 .n(8)
14688 .k(8)
14689 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014690 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014691 }
14692 }
14693
14694 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_eq_8_subtile_n) {
14695 TEST_REQUIRES_ARM_NEON;
14696 for (uint32_t n = 1; n <= 8; n++) {
14697 GemmMicrokernelTester()
14698 .mr(2)
14699 .nr(8)
14700 .kr(8)
14701 .sr(1)
14702 .m(2)
14703 .n(n)
14704 .k(8)
14705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014706 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014707 }
14708 }
14709
14710 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_lt_8) {
14711 TEST_REQUIRES_ARM_NEON;
14712 for (size_t k = 1; k < 8; k++) {
14713 GemmMicrokernelTester()
14714 .mr(2)
14715 .nr(8)
14716 .kr(8)
14717 .sr(1)
14718 .m(2)
14719 .n(8)
14720 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014721 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014722 }
14723 }
14724
14725 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_lt_8_strided_a) {
14726 TEST_REQUIRES_ARM_NEON;
14727 for (size_t k = 1; k < 8; k++) {
14728 GemmMicrokernelTester()
14729 .mr(2)
14730 .nr(8)
14731 .kr(8)
14732 .sr(1)
14733 .m(2)
14734 .n(8)
14735 .k(k)
14736 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014737 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014738 }
14739 }
14740
14741 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_lt_8_subtile) {
14742 TEST_REQUIRES_ARM_NEON;
14743 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014744 for (uint32_t n = 1; n <= 8; n++) {
14745 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014746 GemmMicrokernelTester()
14747 .mr(2)
14748 .nr(8)
14749 .kr(8)
14750 .sr(1)
14751 .m(m)
14752 .n(n)
14753 .k(k)
14754 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014755 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014756 }
14757 }
14758 }
14759 }
14760
14761 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_gt_8) {
14762 TEST_REQUIRES_ARM_NEON;
14763 for (size_t k = 9; k < 16; k++) {
14764 GemmMicrokernelTester()
14765 .mr(2)
14766 .nr(8)
14767 .kr(8)
14768 .sr(1)
14769 .m(2)
14770 .n(8)
14771 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014772 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014773 }
14774 }
14775
14776 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_gt_8_strided_a) {
14777 TEST_REQUIRES_ARM_NEON;
14778 for (size_t k = 9; k < 16; k++) {
14779 GemmMicrokernelTester()
14780 .mr(2)
14781 .nr(8)
14782 .kr(8)
14783 .sr(1)
14784 .m(2)
14785 .n(8)
14786 .k(k)
14787 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080014788 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014789 }
14790 }
14791
14792 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_gt_8_subtile) {
14793 TEST_REQUIRES_ARM_NEON;
14794 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014795 for (uint32_t n = 1; n <= 8; n++) {
14796 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014797 GemmMicrokernelTester()
14798 .mr(2)
14799 .nr(8)
14800 .kr(8)
14801 .sr(1)
14802 .m(m)
14803 .n(n)
14804 .k(k)
14805 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014806 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014807 }
14808 }
14809 }
14810 }
14811
14812 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_div_8) {
14813 TEST_REQUIRES_ARM_NEON;
14814 for (size_t k = 16; k <= 80; k += 8) {
14815 GemmMicrokernelTester()
14816 .mr(2)
14817 .nr(8)
14818 .kr(8)
14819 .sr(1)
14820 .m(2)
14821 .n(8)
14822 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014823 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014824 }
14825 }
14826
14827 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_div_8_strided_a) {
14828 TEST_REQUIRES_ARM_NEON;
14829 for (size_t k = 16; k <= 80; k += 8) {
14830 GemmMicrokernelTester()
14831 .mr(2)
14832 .nr(8)
14833 .kr(8)
14834 .sr(1)
14835 .m(2)
14836 .n(8)
14837 .k(k)
14838 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080014839 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014840 }
14841 }
14842
14843 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, k_div_8_subtile) {
14844 TEST_REQUIRES_ARM_NEON;
14845 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080014846 for (uint32_t n = 1; n <= 8; n++) {
14847 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014848 GemmMicrokernelTester()
14849 .mr(2)
14850 .nr(8)
14851 .kr(8)
14852 .sr(1)
14853 .m(m)
14854 .n(n)
14855 .k(k)
14856 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014857 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014858 }
14859 }
14860 }
14861 }
14862
14863 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, n_gt_8) {
14864 TEST_REQUIRES_ARM_NEON;
14865 for (uint32_t n = 9; n < 16; n++) {
14866 for (size_t k = 1; k <= 40; k += 9) {
14867 GemmMicrokernelTester()
14868 .mr(2)
14869 .nr(8)
14870 .kr(8)
14871 .sr(1)
14872 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014873 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014874 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014875 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014876 }
14877 }
14878 }
14879
14880 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, n_gt_8_strided_cn) {
14881 TEST_REQUIRES_ARM_NEON;
14882 for (uint32_t n = 9; n < 16; n++) {
14883 for (size_t k = 1; k <= 40; k += 9) {
14884 GemmMicrokernelTester()
14885 .mr(2)
14886 .nr(8)
14887 .kr(8)
14888 .sr(1)
14889 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014890 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014891 .k(k)
14892 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014893 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014894 }
14895 }
14896 }
14897
14898 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, n_gt_8_strided_a) {
14899 TEST_REQUIRES_ARM_NEON;
14900 for (uint32_t n = 9; n < 16; n++) {
14901 for (size_t k = 1; k <= 40; k += 9) {
14902 GemmMicrokernelTester()
14903 .mr(2)
14904 .nr(8)
14905 .kr(8)
14906 .sr(1)
14907 .m(2)
14908 .n(n)
14909 .k(k)
14910 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014911 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014912 }
14913 }
14914 }
14915
14916 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, n_gt_8_subtile) {
14917 TEST_REQUIRES_ARM_NEON;
14918 for (uint32_t n = 9; n < 16; n++) {
14919 for (size_t k = 1; k <= 40; k += 9) {
14920 for (uint32_t m = 1; m <= 2; m++) {
14921 GemmMicrokernelTester()
14922 .mr(2)
14923 .nr(8)
14924 .kr(8)
14925 .sr(1)
14926 .m(m)
14927 .n(n)
14928 .k(k)
14929 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080014930 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014931 }
14932 }
14933 }
14934 }
14935
14936 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, n_div_8) {
14937 TEST_REQUIRES_ARM_NEON;
14938 for (uint32_t n = 16; n <= 24; n += 8) {
14939 for (size_t k = 1; k <= 40; k += 9) {
14940 GemmMicrokernelTester()
14941 .mr(2)
14942 .nr(8)
14943 .kr(8)
14944 .sr(1)
14945 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080014946 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014947 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080014948 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014949 }
14950 }
14951 }
14952
14953 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, n_div_8_strided_cn) {
14954 TEST_REQUIRES_ARM_NEON;
14955 for (uint32_t n = 16; n <= 24; n += 8) {
14956 for (size_t k = 1; k <= 40; k += 9) {
14957 GemmMicrokernelTester()
14958 .mr(2)
14959 .nr(8)
14960 .kr(8)
14961 .sr(1)
14962 .m(2)
14963 .n(n)
14964 .k(k)
14965 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080014966 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014967 }
14968 }
14969 }
14970
14971 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, n_div_8_strided_a) {
14972 TEST_REQUIRES_ARM_NEON;
14973 for (uint32_t n = 16; n <= 24; n += 8) {
14974 for (size_t k = 1; k <= 40; k += 9) {
14975 GemmMicrokernelTester()
14976 .mr(2)
14977 .nr(8)
14978 .kr(8)
14979 .sr(1)
14980 .m(2)
14981 .n(n)
14982 .k(k)
14983 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080014984 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080014985 }
14986 }
14987 }
14988
14989 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, n_div_8_subtile) {
14990 TEST_REQUIRES_ARM_NEON;
14991 for (uint32_t n = 16; n <= 24; n += 8) {
14992 for (size_t k = 1; k <= 40; k += 9) {
14993 for (uint32_t m = 1; m <= 2; m++) {
14994 GemmMicrokernelTester()
14995 .mr(2)
14996 .nr(8)
14997 .kr(8)
14998 .sr(1)
14999 .m(m)
15000 .n(n)
15001 .k(k)
15002 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015003 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015004 }
15005 }
15006 }
15007 }
15008
15009 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, strided_cm_subtile) {
15010 TEST_REQUIRES_ARM_NEON;
15011 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015012 for (uint32_t n = 1; n <= 8; n++) {
15013 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015014 GemmMicrokernelTester()
15015 .mr(2)
15016 .nr(8)
15017 .kr(8)
15018 .sr(1)
15019 .m(m)
15020 .n(n)
15021 .k(k)
15022 .cm_stride(11)
15023 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015024 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015025 }
15026 }
15027 }
15028 }
15029
15030 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, qmin) {
15031 TEST_REQUIRES_ARM_NEON;
15032 GemmMicrokernelTester()
15033 .mr(2)
15034 .nr(8)
15035 .kr(8)
15036 .sr(1)
15037 .m(2)
15038 .n(8)
15039 .k(8)
15040 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015042 }
15043
15044 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, qmax) {
15045 TEST_REQUIRES_ARM_NEON;
15046 GemmMicrokernelTester()
15047 .mr(2)
15048 .nr(8)
15049 .kr(8)
15050 .sr(1)
15051 .m(2)
15052 .n(8)
15053 .k(8)
15054 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015055 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015056 }
15057
15058 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__AARCH64_NEON_MULL, strided_cm) {
15059 TEST_REQUIRES_ARM_NEON;
15060 GemmMicrokernelTester()
15061 .mr(2)
15062 .nr(8)
15063 .kr(8)
15064 .sr(1)
15065 .m(2)
15066 .n(8)
15067 .k(8)
15068 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015069 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__aarch64_neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015070 }
15071#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
15072
15073
15074#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
15075 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16) {
15076 TEST_REQUIRES_ARM_NEON_DOT;
15077 GemmMicrokernelTester()
15078 .mr(4)
15079 .nr(16)
15080 .kr(4)
15081 .sr(1)
15082 .m(4)
15083 .n(16)
15084 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080015085 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015086 }
15087
15088 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cn) {
15089 TEST_REQUIRES_ARM_NEON_DOT;
15090 GemmMicrokernelTester()
15091 .mr(4)
15092 .nr(16)
15093 .kr(4)
15094 .sr(1)
15095 .m(4)
15096 .n(16)
15097 .k(16)
15098 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015099 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015100 }
15101
15102 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_strided_a) {
15103 TEST_REQUIRES_ARM_NEON_DOT;
15104 GemmMicrokernelTester()
15105 .mr(4)
15106 .nr(16)
15107 .kr(4)
15108 .sr(1)
15109 .m(4)
15110 .n(16)
15111 .k(16)
15112 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015113 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015114 }
15115
15116 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile) {
15117 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015118 for (uint32_t n = 1; n <= 16; n++) {
15119 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015120 GemmMicrokernelTester()
15121 .mr(4)
15122 .nr(16)
15123 .kr(4)
15124 .sr(1)
15125 .m(m)
15126 .n(n)
15127 .k(16)
15128 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015130 }
15131 }
15132 }
15133
15134 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_m) {
15135 TEST_REQUIRES_ARM_NEON_DOT;
15136 for (uint32_t m = 1; m <= 4; m++) {
15137 GemmMicrokernelTester()
15138 .mr(4)
15139 .nr(16)
15140 .kr(4)
15141 .sr(1)
15142 .m(m)
15143 .n(16)
15144 .k(16)
15145 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015146 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015147 }
15148 }
15149
15150 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_eq_16_subtile_n) {
15151 TEST_REQUIRES_ARM_NEON_DOT;
15152 for (uint32_t n = 1; n <= 16; n++) {
15153 GemmMicrokernelTester()
15154 .mr(4)
15155 .nr(16)
15156 .kr(4)
15157 .sr(1)
15158 .m(4)
15159 .n(n)
15160 .k(16)
15161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015162 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015163 }
15164 }
15165
15166 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16) {
15167 TEST_REQUIRES_ARM_NEON_DOT;
15168 for (size_t k = 1; k < 16; k++) {
15169 GemmMicrokernelTester()
15170 .mr(4)
15171 .nr(16)
15172 .kr(4)
15173 .sr(1)
15174 .m(4)
15175 .n(16)
15176 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015177 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015178 }
15179 }
15180
15181 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_strided_a) {
15182 TEST_REQUIRES_ARM_NEON_DOT;
15183 for (size_t k = 1; k < 16; k++) {
15184 GemmMicrokernelTester()
15185 .mr(4)
15186 .nr(16)
15187 .kr(4)
15188 .sr(1)
15189 .m(4)
15190 .n(16)
15191 .k(k)
15192 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015193 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015194 }
15195 }
15196
15197 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_lt_16_subtile) {
15198 TEST_REQUIRES_ARM_NEON_DOT;
15199 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015200 for (uint32_t n = 1; n <= 16; n++) {
15201 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015202 GemmMicrokernelTester()
15203 .mr(4)
15204 .nr(16)
15205 .kr(4)
15206 .sr(1)
15207 .m(m)
15208 .n(n)
15209 .k(k)
15210 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015211 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015212 }
15213 }
15214 }
15215 }
15216
15217 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16) {
15218 TEST_REQUIRES_ARM_NEON_DOT;
15219 for (size_t k = 17; k < 32; k++) {
15220 GemmMicrokernelTester()
15221 .mr(4)
15222 .nr(16)
15223 .kr(4)
15224 .sr(1)
15225 .m(4)
15226 .n(16)
15227 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015228 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015229 }
15230 }
15231
15232 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_strided_a) {
15233 TEST_REQUIRES_ARM_NEON_DOT;
15234 for (size_t k = 17; k < 32; k++) {
15235 GemmMicrokernelTester()
15236 .mr(4)
15237 .nr(16)
15238 .kr(4)
15239 .sr(1)
15240 .m(4)
15241 .n(16)
15242 .k(k)
15243 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080015244 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015245 }
15246 }
15247
15248 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_gt_16_subtile) {
15249 TEST_REQUIRES_ARM_NEON_DOT;
15250 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015251 for (uint32_t n = 1; n <= 16; n++) {
15252 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015253 GemmMicrokernelTester()
15254 .mr(4)
15255 .nr(16)
15256 .kr(4)
15257 .sr(1)
15258 .m(m)
15259 .n(n)
15260 .k(k)
15261 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015262 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015263 }
15264 }
15265 }
15266 }
15267
15268 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16) {
15269 TEST_REQUIRES_ARM_NEON_DOT;
15270 for (size_t k = 32; k <= 160; k += 16) {
15271 GemmMicrokernelTester()
15272 .mr(4)
15273 .nr(16)
15274 .kr(4)
15275 .sr(1)
15276 .m(4)
15277 .n(16)
15278 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015279 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015280 }
15281 }
15282
15283 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_strided_a) {
15284 TEST_REQUIRES_ARM_NEON_DOT;
15285 for (size_t k = 32; k <= 160; k += 16) {
15286 GemmMicrokernelTester()
15287 .mr(4)
15288 .nr(16)
15289 .kr(4)
15290 .sr(1)
15291 .m(4)
15292 .n(16)
15293 .k(k)
15294 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080015295 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015296 }
15297 }
15298
15299 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, k_div_16_subtile) {
15300 TEST_REQUIRES_ARM_NEON_DOT;
15301 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015302 for (uint32_t n = 1; n <= 16; n++) {
15303 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015304 GemmMicrokernelTester()
15305 .mr(4)
15306 .nr(16)
15307 .kr(4)
15308 .sr(1)
15309 .m(m)
15310 .n(n)
15311 .k(k)
15312 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015313 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015314 }
15315 }
15316 }
15317 }
15318
15319 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16) {
15320 TEST_REQUIRES_ARM_NEON_DOT;
15321 for (uint32_t n = 17; n < 32; n++) {
15322 for (size_t k = 1; k <= 80; k += 17) {
15323 GemmMicrokernelTester()
15324 .mr(4)
15325 .nr(16)
15326 .kr(4)
15327 .sr(1)
15328 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015329 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015330 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015331 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015332 }
15333 }
15334 }
15335
15336 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_cn) {
15337 TEST_REQUIRES_ARM_NEON_DOT;
15338 for (uint32_t n = 17; n < 32; n++) {
15339 for (size_t k = 1; k <= 80; k += 17) {
15340 GemmMicrokernelTester()
15341 .mr(4)
15342 .nr(16)
15343 .kr(4)
15344 .sr(1)
15345 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015346 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015347 .k(k)
15348 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015349 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015350 }
15351 }
15352 }
15353
15354 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_strided_a) {
15355 TEST_REQUIRES_ARM_NEON_DOT;
15356 for (uint32_t n = 17; n < 32; n++) {
15357 for (size_t k = 1; k <= 80; k += 17) {
15358 GemmMicrokernelTester()
15359 .mr(4)
15360 .nr(16)
15361 .kr(4)
15362 .sr(1)
15363 .m(4)
15364 .n(n)
15365 .k(k)
15366 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015367 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015368 }
15369 }
15370 }
15371
15372 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_gt_16_subtile) {
15373 TEST_REQUIRES_ARM_NEON_DOT;
15374 for (uint32_t n = 17; n < 32; n++) {
15375 for (size_t k = 1; k <= 80; k += 17) {
15376 for (uint32_t m = 1; m <= 4; m++) {
15377 GemmMicrokernelTester()
15378 .mr(4)
15379 .nr(16)
15380 .kr(4)
15381 .sr(1)
15382 .m(m)
15383 .n(n)
15384 .k(k)
15385 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015386 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015387 }
15388 }
15389 }
15390 }
15391
15392 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16) {
15393 TEST_REQUIRES_ARM_NEON_DOT;
15394 for (uint32_t n = 32; n <= 48; n += 16) {
15395 for (size_t k = 1; k <= 80; k += 17) {
15396 GemmMicrokernelTester()
15397 .mr(4)
15398 .nr(16)
15399 .kr(4)
15400 .sr(1)
15401 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015402 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015403 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015404 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015405 }
15406 }
15407 }
15408
15409 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_cn) {
15410 TEST_REQUIRES_ARM_NEON_DOT;
15411 for (uint32_t n = 32; n <= 48; n += 16) {
15412 for (size_t k = 1; k <= 80; k += 17) {
15413 GemmMicrokernelTester()
15414 .mr(4)
15415 .nr(16)
15416 .kr(4)
15417 .sr(1)
15418 .m(4)
15419 .n(n)
15420 .k(k)
15421 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015422 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015423 }
15424 }
15425 }
15426
15427 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_strided_a) {
15428 TEST_REQUIRES_ARM_NEON_DOT;
15429 for (uint32_t n = 32; n <= 48; n += 16) {
15430 for (size_t k = 1; k <= 80; k += 17) {
15431 GemmMicrokernelTester()
15432 .mr(4)
15433 .nr(16)
15434 .kr(4)
15435 .sr(1)
15436 .m(4)
15437 .n(n)
15438 .k(k)
15439 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015440 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015441 }
15442 }
15443 }
15444
15445 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, n_div_16_subtile) {
15446 TEST_REQUIRES_ARM_NEON_DOT;
15447 for (uint32_t n = 32; n <= 48; n += 16) {
15448 for (size_t k = 1; k <= 80; k += 17) {
15449 for (uint32_t m = 1; m <= 4; m++) {
15450 GemmMicrokernelTester()
15451 .mr(4)
15452 .nr(16)
15453 .kr(4)
15454 .sr(1)
15455 .m(m)
15456 .n(n)
15457 .k(k)
15458 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015459 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015460 }
15461 }
15462 }
15463 }
15464
15465 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm_subtile) {
15466 TEST_REQUIRES_ARM_NEON_DOT;
15467 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015468 for (uint32_t n = 1; n <= 16; n++) {
15469 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015470 GemmMicrokernelTester()
15471 .mr(4)
15472 .nr(16)
15473 .kr(4)
15474 .sr(1)
15475 .m(m)
15476 .n(n)
15477 .k(k)
15478 .cm_stride(19)
15479 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015480 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015481 }
15482 }
15483 }
15484 }
15485
15486 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmin) {
15487 TEST_REQUIRES_ARM_NEON_DOT;
15488 GemmMicrokernelTester()
15489 .mr(4)
15490 .nr(16)
15491 .kr(4)
15492 .sr(1)
15493 .m(4)
15494 .n(16)
15495 .k(16)
15496 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015497 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015498 }
15499
15500 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, qmax) {
15501 TEST_REQUIRES_ARM_NEON_DOT;
15502 GemmMicrokernelTester()
15503 .mr(4)
15504 .nr(16)
15505 .kr(4)
15506 .sr(1)
15507 .m(4)
15508 .n(16)
15509 .k(16)
15510 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015511 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015512 }
15513
15514 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__AARCH64_NEONDOT_CORTEX_A55, strided_cm) {
15515 TEST_REQUIRES_ARM_NEON_DOT;
15516 GemmMicrokernelTester()
15517 .mr(4)
15518 .nr(16)
15519 .kr(4)
15520 .sr(1)
15521 .m(4)
15522 .n(16)
15523 .k(16)
15524 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015525 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__aarch64_neondot_cortex_a55, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015526 }
15527#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
15528
15529
15530#if XNN_ARCH_ARM || XNN_ARCH_ARM64
15531 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8) {
15532 TEST_REQUIRES_ARM_NEON;
15533 GemmMicrokernelTester()
15534 .mr(2)
15535 .nr(8)
15536 .kr(8)
15537 .sr(1)
15538 .m(2)
15539 .n(8)
15540 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015541 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015542 }
15543
15544 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, strided_cn) {
15545 TEST_REQUIRES_ARM_NEON;
15546 GemmMicrokernelTester()
15547 .mr(2)
15548 .nr(8)
15549 .kr(8)
15550 .sr(1)
15551 .m(2)
15552 .n(8)
15553 .k(8)
15554 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015555 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015556 }
15557
15558 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8_strided_a) {
15559 TEST_REQUIRES_ARM_NEON;
15560 GemmMicrokernelTester()
15561 .mr(2)
15562 .nr(8)
15563 .kr(8)
15564 .sr(1)
15565 .m(2)
15566 .n(8)
15567 .k(8)
15568 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015569 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015570 }
15571
15572 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8_subtile) {
15573 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080015574 for (uint32_t n = 1; n <= 8; n++) {
15575 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015576 GemmMicrokernelTester()
15577 .mr(2)
15578 .nr(8)
15579 .kr(8)
15580 .sr(1)
15581 .m(m)
15582 .n(n)
15583 .k(8)
15584 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015586 }
15587 }
15588 }
15589
15590 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8_subtile_m) {
15591 TEST_REQUIRES_ARM_NEON;
15592 for (uint32_t m = 1; m <= 2; m++) {
15593 GemmMicrokernelTester()
15594 .mr(2)
15595 .nr(8)
15596 .kr(8)
15597 .sr(1)
15598 .m(m)
15599 .n(8)
15600 .k(8)
15601 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015602 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015603 }
15604 }
15605
15606 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_eq_8_subtile_n) {
15607 TEST_REQUIRES_ARM_NEON;
15608 for (uint32_t n = 1; n <= 8; n++) {
15609 GemmMicrokernelTester()
15610 .mr(2)
15611 .nr(8)
15612 .kr(8)
15613 .sr(1)
15614 .m(2)
15615 .n(n)
15616 .k(8)
15617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015618 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015619 }
15620 }
15621
15622 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_lt_8) {
15623 TEST_REQUIRES_ARM_NEON;
15624 for (size_t k = 1; k < 8; k++) {
15625 GemmMicrokernelTester()
15626 .mr(2)
15627 .nr(8)
15628 .kr(8)
15629 .sr(1)
15630 .m(2)
15631 .n(8)
15632 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015633 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015634 }
15635 }
15636
15637 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_lt_8_strided_a) {
15638 TEST_REQUIRES_ARM_NEON;
15639 for (size_t k = 1; k < 8; k++) {
15640 GemmMicrokernelTester()
15641 .mr(2)
15642 .nr(8)
15643 .kr(8)
15644 .sr(1)
15645 .m(2)
15646 .n(8)
15647 .k(k)
15648 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015649 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015650 }
15651 }
15652
15653 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_lt_8_subtile) {
15654 TEST_REQUIRES_ARM_NEON;
15655 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015656 for (uint32_t n = 1; n <= 8; n++) {
15657 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015658 GemmMicrokernelTester()
15659 .mr(2)
15660 .nr(8)
15661 .kr(8)
15662 .sr(1)
15663 .m(m)
15664 .n(n)
15665 .k(k)
15666 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015667 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015668 }
15669 }
15670 }
15671 }
15672
15673 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_gt_8) {
15674 TEST_REQUIRES_ARM_NEON;
15675 for (size_t k = 9; k < 16; k++) {
15676 GemmMicrokernelTester()
15677 .mr(2)
15678 .nr(8)
15679 .kr(8)
15680 .sr(1)
15681 .m(2)
15682 .n(8)
15683 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015684 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015685 }
15686 }
15687
15688 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_gt_8_strided_a) {
15689 TEST_REQUIRES_ARM_NEON;
15690 for (size_t k = 9; k < 16; k++) {
15691 GemmMicrokernelTester()
15692 .mr(2)
15693 .nr(8)
15694 .kr(8)
15695 .sr(1)
15696 .m(2)
15697 .n(8)
15698 .k(k)
15699 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080015700 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015701 }
15702 }
15703
15704 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_gt_8_subtile) {
15705 TEST_REQUIRES_ARM_NEON;
15706 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015707 for (uint32_t n = 1; n <= 8; n++) {
15708 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015709 GemmMicrokernelTester()
15710 .mr(2)
15711 .nr(8)
15712 .kr(8)
15713 .sr(1)
15714 .m(m)
15715 .n(n)
15716 .k(k)
15717 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015718 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015719 }
15720 }
15721 }
15722 }
15723
15724 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_div_8) {
15725 TEST_REQUIRES_ARM_NEON;
15726 for (size_t k = 16; k <= 80; k += 8) {
15727 GemmMicrokernelTester()
15728 .mr(2)
15729 .nr(8)
15730 .kr(8)
15731 .sr(1)
15732 .m(2)
15733 .n(8)
15734 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015735 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015736 }
15737 }
15738
15739 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_div_8_strided_a) {
15740 TEST_REQUIRES_ARM_NEON;
15741 for (size_t k = 16; k <= 80; k += 8) {
15742 GemmMicrokernelTester()
15743 .mr(2)
15744 .nr(8)
15745 .kr(8)
15746 .sr(1)
15747 .m(2)
15748 .n(8)
15749 .k(k)
15750 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080015751 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015752 }
15753 }
15754
15755 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, k_div_8_subtile) {
15756 TEST_REQUIRES_ARM_NEON;
15757 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015758 for (uint32_t n = 1; n <= 8; n++) {
15759 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015760 GemmMicrokernelTester()
15761 .mr(2)
15762 .nr(8)
15763 .kr(8)
15764 .sr(1)
15765 .m(m)
15766 .n(n)
15767 .k(k)
15768 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015769 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015770 }
15771 }
15772 }
15773 }
15774
15775 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8) {
15776 TEST_REQUIRES_ARM_NEON;
15777 for (uint32_t n = 9; n < 16; n++) {
15778 for (size_t k = 1; k <= 40; k += 9) {
15779 GemmMicrokernelTester()
15780 .mr(2)
15781 .nr(8)
15782 .kr(8)
15783 .sr(1)
15784 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015785 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015786 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015787 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015788 }
15789 }
15790 }
15791
15792 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8_strided_cn) {
15793 TEST_REQUIRES_ARM_NEON;
15794 for (uint32_t n = 9; n < 16; n++) {
15795 for (size_t k = 1; k <= 40; k += 9) {
15796 GemmMicrokernelTester()
15797 .mr(2)
15798 .nr(8)
15799 .kr(8)
15800 .sr(1)
15801 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015802 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015803 .k(k)
15804 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015805 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015806 }
15807 }
15808 }
15809
15810 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8_strided_a) {
15811 TEST_REQUIRES_ARM_NEON;
15812 for (uint32_t n = 9; n < 16; n++) {
15813 for (size_t k = 1; k <= 40; k += 9) {
15814 GemmMicrokernelTester()
15815 .mr(2)
15816 .nr(8)
15817 .kr(8)
15818 .sr(1)
15819 .m(2)
15820 .n(n)
15821 .k(k)
15822 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015823 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015824 }
15825 }
15826 }
15827
15828 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_gt_8_subtile) {
15829 TEST_REQUIRES_ARM_NEON;
15830 for (uint32_t n = 9; n < 16; n++) {
15831 for (size_t k = 1; k <= 40; k += 9) {
15832 for (uint32_t m = 1; m <= 2; m++) {
15833 GemmMicrokernelTester()
15834 .mr(2)
15835 .nr(8)
15836 .kr(8)
15837 .sr(1)
15838 .m(m)
15839 .n(n)
15840 .k(k)
15841 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015842 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015843 }
15844 }
15845 }
15846 }
15847
15848 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8) {
15849 TEST_REQUIRES_ARM_NEON;
15850 for (uint32_t n = 16; n <= 24; n += 8) {
15851 for (size_t k = 1; k <= 40; k += 9) {
15852 GemmMicrokernelTester()
15853 .mr(2)
15854 .nr(8)
15855 .kr(8)
15856 .sr(1)
15857 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080015858 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015859 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080015860 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015861 }
15862 }
15863 }
15864
15865 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8_strided_cn) {
15866 TEST_REQUIRES_ARM_NEON;
15867 for (uint32_t n = 16; n <= 24; n += 8) {
15868 for (size_t k = 1; k <= 40; k += 9) {
15869 GemmMicrokernelTester()
15870 .mr(2)
15871 .nr(8)
15872 .kr(8)
15873 .sr(1)
15874 .m(2)
15875 .n(n)
15876 .k(k)
15877 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015878 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015879 }
15880 }
15881 }
15882
15883 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8_strided_a) {
15884 TEST_REQUIRES_ARM_NEON;
15885 for (uint32_t n = 16; n <= 24; n += 8) {
15886 for (size_t k = 1; k <= 40; k += 9) {
15887 GemmMicrokernelTester()
15888 .mr(2)
15889 .nr(8)
15890 .kr(8)
15891 .sr(1)
15892 .m(2)
15893 .n(n)
15894 .k(k)
15895 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080015896 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015897 }
15898 }
15899 }
15900
15901 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, n_div_8_subtile) {
15902 TEST_REQUIRES_ARM_NEON;
15903 for (uint32_t n = 16; n <= 24; n += 8) {
15904 for (size_t k = 1; k <= 40; k += 9) {
15905 for (uint32_t m = 1; m <= 2; m++) {
15906 GemmMicrokernelTester()
15907 .mr(2)
15908 .nr(8)
15909 .kr(8)
15910 .sr(1)
15911 .m(m)
15912 .n(n)
15913 .k(k)
15914 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015915 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015916 }
15917 }
15918 }
15919 }
15920
15921 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, strided_cm_subtile) {
15922 TEST_REQUIRES_ARM_NEON;
15923 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080015924 for (uint32_t n = 1; n <= 8; n++) {
15925 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015926 GemmMicrokernelTester()
15927 .mr(2)
15928 .nr(8)
15929 .kr(8)
15930 .sr(1)
15931 .m(m)
15932 .n(n)
15933 .k(k)
15934 .cm_stride(11)
15935 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080015936 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015937 }
15938 }
15939 }
15940 }
15941
15942 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, qmin) {
15943 TEST_REQUIRES_ARM_NEON;
15944 GemmMicrokernelTester()
15945 .mr(2)
15946 .nr(8)
15947 .kr(8)
15948 .sr(1)
15949 .m(2)
15950 .n(8)
15951 .k(8)
15952 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015953 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015954 }
15955
15956 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, qmax) {
15957 TEST_REQUIRES_ARM_NEON;
15958 GemmMicrokernelTester()
15959 .mr(2)
15960 .nr(8)
15961 .kr(8)
15962 .sr(1)
15963 .m(2)
15964 .n(8)
15965 .k(8)
15966 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080015967 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015968 }
15969
15970 TEST(QS8_GEMM_MINMAX_RNDNU_2X8C8__NEON_MULL, strided_cm) {
15971 TEST_REQUIRES_ARM_NEON;
15972 GemmMicrokernelTester()
15973 .mr(2)
15974 .nr(8)
15975 .kr(8)
15976 .sr(1)
15977 .m(2)
15978 .n(8)
15979 .k(8)
15980 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080015981 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015982 }
15983#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
15984
15985
15986#if XNN_ARCH_ARM || XNN_ARCH_ARM64
15987 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8) {
15988 TEST_REQUIRES_ARM_NEON;
15989 GemmMicrokernelTester()
15990 .mr(3)
15991 .nr(8)
15992 .kr(8)
15993 .sr(1)
15994 .m(3)
15995 .n(8)
15996 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080015997 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080015998 }
15999
16000 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cn) {
16001 TEST_REQUIRES_ARM_NEON;
16002 GemmMicrokernelTester()
16003 .mr(3)
16004 .nr(8)
16005 .kr(8)
16006 .sr(1)
16007 .m(3)
16008 .n(8)
16009 .k(8)
16010 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016011 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016012 }
16013
16014 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_strided_a) {
16015 TEST_REQUIRES_ARM_NEON;
16016 GemmMicrokernelTester()
16017 .mr(3)
16018 .nr(8)
16019 .kr(8)
16020 .sr(1)
16021 .m(3)
16022 .n(8)
16023 .k(8)
16024 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016025 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016026 }
16027
16028 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile) {
16029 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016030 for (uint32_t n = 1; n <= 8; n++) {
16031 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016032 GemmMicrokernelTester()
16033 .mr(3)
16034 .nr(8)
16035 .kr(8)
16036 .sr(1)
16037 .m(m)
16038 .n(n)
16039 .k(8)
16040 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016042 }
16043 }
16044 }
16045
16046 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile_m) {
16047 TEST_REQUIRES_ARM_NEON;
16048 for (uint32_t m = 1; m <= 3; m++) {
16049 GemmMicrokernelTester()
16050 .mr(3)
16051 .nr(8)
16052 .kr(8)
16053 .sr(1)
16054 .m(m)
16055 .n(8)
16056 .k(8)
16057 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016058 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016059 }
16060 }
16061
16062 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_eq_8_subtile_n) {
16063 TEST_REQUIRES_ARM_NEON;
16064 for (uint32_t n = 1; n <= 8; n++) {
16065 GemmMicrokernelTester()
16066 .mr(3)
16067 .nr(8)
16068 .kr(8)
16069 .sr(1)
16070 .m(3)
16071 .n(n)
16072 .k(8)
16073 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016074 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016075 }
16076 }
16077
16078 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_lt_8) {
16079 TEST_REQUIRES_ARM_NEON;
16080 for (size_t k = 1; k < 8; k++) {
16081 GemmMicrokernelTester()
16082 .mr(3)
16083 .nr(8)
16084 .kr(8)
16085 .sr(1)
16086 .m(3)
16087 .n(8)
16088 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016089 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016090 }
16091 }
16092
16093 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_lt_8_strided_a) {
16094 TEST_REQUIRES_ARM_NEON;
16095 for (size_t k = 1; k < 8; k++) {
16096 GemmMicrokernelTester()
16097 .mr(3)
16098 .nr(8)
16099 .kr(8)
16100 .sr(1)
16101 .m(3)
16102 .n(8)
16103 .k(k)
16104 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016105 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016106 }
16107 }
16108
16109 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_lt_8_subtile) {
16110 TEST_REQUIRES_ARM_NEON;
16111 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016112 for (uint32_t n = 1; n <= 8; n++) {
16113 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016114 GemmMicrokernelTester()
16115 .mr(3)
16116 .nr(8)
16117 .kr(8)
16118 .sr(1)
16119 .m(m)
16120 .n(n)
16121 .k(k)
16122 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016123 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016124 }
16125 }
16126 }
16127 }
16128
16129 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_gt_8) {
16130 TEST_REQUIRES_ARM_NEON;
16131 for (size_t k = 9; k < 16; k++) {
16132 GemmMicrokernelTester()
16133 .mr(3)
16134 .nr(8)
16135 .kr(8)
16136 .sr(1)
16137 .m(3)
16138 .n(8)
16139 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016140 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016141 }
16142 }
16143
16144 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_gt_8_strided_a) {
16145 TEST_REQUIRES_ARM_NEON;
16146 for (size_t k = 9; k < 16; k++) {
16147 GemmMicrokernelTester()
16148 .mr(3)
16149 .nr(8)
16150 .kr(8)
16151 .sr(1)
16152 .m(3)
16153 .n(8)
16154 .k(k)
16155 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016156 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016157 }
16158 }
16159
16160 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_gt_8_subtile) {
16161 TEST_REQUIRES_ARM_NEON;
16162 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016163 for (uint32_t n = 1; n <= 8; n++) {
16164 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016165 GemmMicrokernelTester()
16166 .mr(3)
16167 .nr(8)
16168 .kr(8)
16169 .sr(1)
16170 .m(m)
16171 .n(n)
16172 .k(k)
16173 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016174 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016175 }
16176 }
16177 }
16178 }
16179
16180 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_div_8) {
16181 TEST_REQUIRES_ARM_NEON;
16182 for (size_t k = 16; k <= 80; k += 8) {
16183 GemmMicrokernelTester()
16184 .mr(3)
16185 .nr(8)
16186 .kr(8)
16187 .sr(1)
16188 .m(3)
16189 .n(8)
16190 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016191 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016192 }
16193 }
16194
16195 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_div_8_strided_a) {
16196 TEST_REQUIRES_ARM_NEON;
16197 for (size_t k = 16; k <= 80; k += 8) {
16198 GemmMicrokernelTester()
16199 .mr(3)
16200 .nr(8)
16201 .kr(8)
16202 .sr(1)
16203 .m(3)
16204 .n(8)
16205 .k(k)
16206 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016207 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016208 }
16209 }
16210
16211 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, k_div_8_subtile) {
16212 TEST_REQUIRES_ARM_NEON;
16213 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016214 for (uint32_t n = 1; n <= 8; n++) {
16215 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016216 GemmMicrokernelTester()
16217 .mr(3)
16218 .nr(8)
16219 .kr(8)
16220 .sr(1)
16221 .m(m)
16222 .n(n)
16223 .k(k)
16224 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016225 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016226 }
16227 }
16228 }
16229 }
16230
16231 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8) {
16232 TEST_REQUIRES_ARM_NEON;
16233 for (uint32_t n = 9; n < 16; n++) {
16234 for (size_t k = 1; k <= 40; k += 9) {
16235 GemmMicrokernelTester()
16236 .mr(3)
16237 .nr(8)
16238 .kr(8)
16239 .sr(1)
16240 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016241 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016242 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016243 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016244 }
16245 }
16246 }
16247
16248 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_strided_cn) {
16249 TEST_REQUIRES_ARM_NEON;
16250 for (uint32_t n = 9; n < 16; n++) {
16251 for (size_t k = 1; k <= 40; k += 9) {
16252 GemmMicrokernelTester()
16253 .mr(3)
16254 .nr(8)
16255 .kr(8)
16256 .sr(1)
16257 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016258 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016259 .k(k)
16260 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016261 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016262 }
16263 }
16264 }
16265
16266 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_strided_a) {
16267 TEST_REQUIRES_ARM_NEON;
16268 for (uint32_t n = 9; n < 16; n++) {
16269 for (size_t k = 1; k <= 40; k += 9) {
16270 GemmMicrokernelTester()
16271 .mr(3)
16272 .nr(8)
16273 .kr(8)
16274 .sr(1)
16275 .m(3)
16276 .n(n)
16277 .k(k)
16278 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016279 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016280 }
16281 }
16282 }
16283
16284 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_gt_8_subtile) {
16285 TEST_REQUIRES_ARM_NEON;
16286 for (uint32_t n = 9; n < 16; n++) {
16287 for (size_t k = 1; k <= 40; k += 9) {
16288 for (uint32_t m = 1; m <= 3; m++) {
16289 GemmMicrokernelTester()
16290 .mr(3)
16291 .nr(8)
16292 .kr(8)
16293 .sr(1)
16294 .m(m)
16295 .n(n)
16296 .k(k)
16297 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016298 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016299 }
16300 }
16301 }
16302 }
16303
16304 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8) {
16305 TEST_REQUIRES_ARM_NEON;
16306 for (uint32_t n = 16; n <= 24; n += 8) {
16307 for (size_t k = 1; k <= 40; k += 9) {
16308 GemmMicrokernelTester()
16309 .mr(3)
16310 .nr(8)
16311 .kr(8)
16312 .sr(1)
16313 .m(3)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016314 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016315 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016316 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016317 }
16318 }
16319 }
16320
16321 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_strided_cn) {
16322 TEST_REQUIRES_ARM_NEON;
16323 for (uint32_t n = 16; n <= 24; n += 8) {
16324 for (size_t k = 1; k <= 40; k += 9) {
16325 GemmMicrokernelTester()
16326 .mr(3)
16327 .nr(8)
16328 .kr(8)
16329 .sr(1)
16330 .m(3)
16331 .n(n)
16332 .k(k)
16333 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016334 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016335 }
16336 }
16337 }
16338
16339 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_strided_a) {
16340 TEST_REQUIRES_ARM_NEON;
16341 for (uint32_t n = 16; n <= 24; n += 8) {
16342 for (size_t k = 1; k <= 40; k += 9) {
16343 GemmMicrokernelTester()
16344 .mr(3)
16345 .nr(8)
16346 .kr(8)
16347 .sr(1)
16348 .m(3)
16349 .n(n)
16350 .k(k)
16351 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016352 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016353 }
16354 }
16355 }
16356
16357 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, n_div_8_subtile) {
16358 TEST_REQUIRES_ARM_NEON;
16359 for (uint32_t n = 16; n <= 24; n += 8) {
16360 for (size_t k = 1; k <= 40; k += 9) {
16361 for (uint32_t m = 1; m <= 3; m++) {
16362 GemmMicrokernelTester()
16363 .mr(3)
16364 .nr(8)
16365 .kr(8)
16366 .sr(1)
16367 .m(m)
16368 .n(n)
16369 .k(k)
16370 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016371 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016372 }
16373 }
16374 }
16375 }
16376
16377 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cm_subtile) {
16378 TEST_REQUIRES_ARM_NEON;
16379 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016380 for (uint32_t n = 1; n <= 8; n++) {
16381 for (uint32_t m = 1; m <= 3; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016382 GemmMicrokernelTester()
16383 .mr(3)
16384 .nr(8)
16385 .kr(8)
16386 .sr(1)
16387 .m(m)
16388 .n(n)
16389 .k(k)
16390 .cm_stride(11)
16391 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016392 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016393 }
16394 }
16395 }
16396 }
16397
16398 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, qmin) {
16399 TEST_REQUIRES_ARM_NEON;
16400 GemmMicrokernelTester()
16401 .mr(3)
16402 .nr(8)
16403 .kr(8)
16404 .sr(1)
16405 .m(3)
16406 .n(8)
16407 .k(8)
16408 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016409 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016410 }
16411
16412 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, qmax) {
16413 TEST_REQUIRES_ARM_NEON;
16414 GemmMicrokernelTester()
16415 .mr(3)
16416 .nr(8)
16417 .kr(8)
16418 .sr(1)
16419 .m(3)
16420 .n(8)
16421 .k(8)
16422 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016423 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016424 }
16425
16426 TEST(QS8_GEMM_MINMAX_RNDNU_3X8C8__NEON_MULL, strided_cm) {
16427 TEST_REQUIRES_ARM_NEON;
16428 GemmMicrokernelTester()
16429 .mr(3)
16430 .nr(8)
16431 .kr(8)
16432 .sr(1)
16433 .m(3)
16434 .n(8)
16435 .k(8)
16436 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016437 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_3x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016438 }
16439#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16440
16441
16442#if XNN_ARCH_ARM || XNN_ARCH_ARM64
16443 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8) {
16444 TEST_REQUIRES_ARM_NEON;
16445 GemmMicrokernelTester()
16446 .mr(4)
16447 .nr(8)
16448 .kr(8)
16449 .sr(1)
16450 .m(4)
16451 .n(8)
16452 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080016453 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016454 }
16455
16456 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, strided_cn) {
16457 TEST_REQUIRES_ARM_NEON;
16458 GemmMicrokernelTester()
16459 .mr(4)
16460 .nr(8)
16461 .kr(8)
16462 .sr(1)
16463 .m(4)
16464 .n(8)
16465 .k(8)
16466 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016467 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016468 }
16469
16470 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8_strided_a) {
16471 TEST_REQUIRES_ARM_NEON;
16472 GemmMicrokernelTester()
16473 .mr(4)
16474 .nr(8)
16475 .kr(8)
16476 .sr(1)
16477 .m(4)
16478 .n(8)
16479 .k(8)
16480 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016481 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016482 }
16483
16484 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8_subtile) {
16485 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016486 for (uint32_t n = 1; n <= 8; n++) {
16487 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016488 GemmMicrokernelTester()
16489 .mr(4)
16490 .nr(8)
16491 .kr(8)
16492 .sr(1)
16493 .m(m)
16494 .n(n)
16495 .k(8)
16496 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016497 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016498 }
16499 }
16500 }
16501
16502 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8_subtile_m) {
16503 TEST_REQUIRES_ARM_NEON;
16504 for (uint32_t m = 1; m <= 4; m++) {
16505 GemmMicrokernelTester()
16506 .mr(4)
16507 .nr(8)
16508 .kr(8)
16509 .sr(1)
16510 .m(m)
16511 .n(8)
16512 .k(8)
16513 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016514 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016515 }
16516 }
16517
16518 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_eq_8_subtile_n) {
16519 TEST_REQUIRES_ARM_NEON;
16520 for (uint32_t n = 1; n <= 8; n++) {
16521 GemmMicrokernelTester()
16522 .mr(4)
16523 .nr(8)
16524 .kr(8)
16525 .sr(1)
16526 .m(4)
16527 .n(n)
16528 .k(8)
16529 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016530 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016531 }
16532 }
16533
16534 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_lt_8) {
16535 TEST_REQUIRES_ARM_NEON;
16536 for (size_t k = 1; k < 8; k++) {
16537 GemmMicrokernelTester()
16538 .mr(4)
16539 .nr(8)
16540 .kr(8)
16541 .sr(1)
16542 .m(4)
16543 .n(8)
16544 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016545 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016546 }
16547 }
16548
16549 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_lt_8_strided_a) {
16550 TEST_REQUIRES_ARM_NEON;
16551 for (size_t k = 1; k < 8; k++) {
16552 GemmMicrokernelTester()
16553 .mr(4)
16554 .nr(8)
16555 .kr(8)
16556 .sr(1)
16557 .m(4)
16558 .n(8)
16559 .k(k)
16560 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016561 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016562 }
16563 }
16564
16565 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_lt_8_subtile) {
16566 TEST_REQUIRES_ARM_NEON;
16567 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016568 for (uint32_t n = 1; n <= 8; n++) {
16569 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016570 GemmMicrokernelTester()
16571 .mr(4)
16572 .nr(8)
16573 .kr(8)
16574 .sr(1)
16575 .m(m)
16576 .n(n)
16577 .k(k)
16578 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016579 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016580 }
16581 }
16582 }
16583 }
16584
16585 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_gt_8) {
16586 TEST_REQUIRES_ARM_NEON;
16587 for (size_t k = 9; k < 16; k++) {
16588 GemmMicrokernelTester()
16589 .mr(4)
16590 .nr(8)
16591 .kr(8)
16592 .sr(1)
16593 .m(4)
16594 .n(8)
16595 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016596 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016597 }
16598 }
16599
16600 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_gt_8_strided_a) {
16601 TEST_REQUIRES_ARM_NEON;
16602 for (size_t k = 9; k < 16; k++) {
16603 GemmMicrokernelTester()
16604 .mr(4)
16605 .nr(8)
16606 .kr(8)
16607 .sr(1)
16608 .m(4)
16609 .n(8)
16610 .k(k)
16611 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016612 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016613 }
16614 }
16615
16616 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_gt_8_subtile) {
16617 TEST_REQUIRES_ARM_NEON;
16618 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016619 for (uint32_t n = 1; n <= 8; n++) {
16620 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016621 GemmMicrokernelTester()
16622 .mr(4)
16623 .nr(8)
16624 .kr(8)
16625 .sr(1)
16626 .m(m)
16627 .n(n)
16628 .k(k)
16629 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016630 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016631 }
16632 }
16633 }
16634 }
16635
16636 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_div_8) {
16637 TEST_REQUIRES_ARM_NEON;
16638 for (size_t k = 16; k <= 80; k += 8) {
16639 GemmMicrokernelTester()
16640 .mr(4)
16641 .nr(8)
16642 .kr(8)
16643 .sr(1)
16644 .m(4)
16645 .n(8)
16646 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016647 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016648 }
16649 }
16650
16651 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_div_8_strided_a) {
16652 TEST_REQUIRES_ARM_NEON;
16653 for (size_t k = 16; k <= 80; k += 8) {
16654 GemmMicrokernelTester()
16655 .mr(4)
16656 .nr(8)
16657 .kr(8)
16658 .sr(1)
16659 .m(4)
16660 .n(8)
16661 .k(k)
16662 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080016663 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016664 }
16665 }
16666
16667 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, k_div_8_subtile) {
16668 TEST_REQUIRES_ARM_NEON;
16669 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016670 for (uint32_t n = 1; n <= 8; n++) {
16671 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016672 GemmMicrokernelTester()
16673 .mr(4)
16674 .nr(8)
16675 .kr(8)
16676 .sr(1)
16677 .m(m)
16678 .n(n)
16679 .k(k)
16680 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016681 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016682 }
16683 }
16684 }
16685 }
16686
16687 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8) {
16688 TEST_REQUIRES_ARM_NEON;
16689 for (uint32_t n = 9; n < 16; n++) {
16690 for (size_t k = 1; k <= 40; k += 9) {
16691 GemmMicrokernelTester()
16692 .mr(4)
16693 .nr(8)
16694 .kr(8)
16695 .sr(1)
16696 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016697 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016698 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016699 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016700 }
16701 }
16702 }
16703
16704 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8_strided_cn) {
16705 TEST_REQUIRES_ARM_NEON;
16706 for (uint32_t n = 9; n < 16; n++) {
16707 for (size_t k = 1; k <= 40; k += 9) {
16708 GemmMicrokernelTester()
16709 .mr(4)
16710 .nr(8)
16711 .kr(8)
16712 .sr(1)
16713 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016714 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016715 .k(k)
16716 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016717 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016718 }
16719 }
16720 }
16721
16722 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8_strided_a) {
16723 TEST_REQUIRES_ARM_NEON;
16724 for (uint32_t n = 9; n < 16; n++) {
16725 for (size_t k = 1; k <= 40; k += 9) {
16726 GemmMicrokernelTester()
16727 .mr(4)
16728 .nr(8)
16729 .kr(8)
16730 .sr(1)
16731 .m(4)
16732 .n(n)
16733 .k(k)
16734 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016735 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016736 }
16737 }
16738 }
16739
16740 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_gt_8_subtile) {
16741 TEST_REQUIRES_ARM_NEON;
16742 for (uint32_t n = 9; n < 16; n++) {
16743 for (size_t k = 1; k <= 40; k += 9) {
16744 for (uint32_t m = 1; m <= 4; m++) {
16745 GemmMicrokernelTester()
16746 .mr(4)
16747 .nr(8)
16748 .kr(8)
16749 .sr(1)
16750 .m(m)
16751 .n(n)
16752 .k(k)
16753 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016754 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016755 }
16756 }
16757 }
16758 }
16759
16760 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8) {
16761 TEST_REQUIRES_ARM_NEON;
16762 for (uint32_t n = 16; n <= 24; n += 8) {
16763 for (size_t k = 1; k <= 40; k += 9) {
16764 GemmMicrokernelTester()
16765 .mr(4)
16766 .nr(8)
16767 .kr(8)
16768 .sr(1)
16769 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080016770 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016771 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080016772 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016773 }
16774 }
16775 }
16776
16777 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8_strided_cn) {
16778 TEST_REQUIRES_ARM_NEON;
16779 for (uint32_t n = 16; n <= 24; n += 8) {
16780 for (size_t k = 1; k <= 40; k += 9) {
16781 GemmMicrokernelTester()
16782 .mr(4)
16783 .nr(8)
16784 .kr(8)
16785 .sr(1)
16786 .m(4)
16787 .n(n)
16788 .k(k)
16789 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016790 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016791 }
16792 }
16793 }
16794
16795 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8_strided_a) {
16796 TEST_REQUIRES_ARM_NEON;
16797 for (uint32_t n = 16; n <= 24; n += 8) {
16798 for (size_t k = 1; k <= 40; k += 9) {
16799 GemmMicrokernelTester()
16800 .mr(4)
16801 .nr(8)
16802 .kr(8)
16803 .sr(1)
16804 .m(4)
16805 .n(n)
16806 .k(k)
16807 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080016808 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016809 }
16810 }
16811 }
16812
16813 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, n_div_8_subtile) {
16814 TEST_REQUIRES_ARM_NEON;
16815 for (uint32_t n = 16; n <= 24; n += 8) {
16816 for (size_t k = 1; k <= 40; k += 9) {
16817 for (uint32_t m = 1; m <= 4; m++) {
16818 GemmMicrokernelTester()
16819 .mr(4)
16820 .nr(8)
16821 .kr(8)
16822 .sr(1)
16823 .m(m)
16824 .n(n)
16825 .k(k)
16826 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016827 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016828 }
16829 }
16830 }
16831 }
16832
16833 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, strided_cm_subtile) {
16834 TEST_REQUIRES_ARM_NEON;
16835 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080016836 for (uint32_t n = 1; n <= 8; n++) {
16837 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016838 GemmMicrokernelTester()
16839 .mr(4)
16840 .nr(8)
16841 .kr(8)
16842 .sr(1)
16843 .m(m)
16844 .n(n)
16845 .k(k)
16846 .cm_stride(11)
16847 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016848 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016849 }
16850 }
16851 }
16852 }
16853
16854 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, qmin) {
16855 TEST_REQUIRES_ARM_NEON;
16856 GemmMicrokernelTester()
16857 .mr(4)
16858 .nr(8)
16859 .kr(8)
16860 .sr(1)
16861 .m(4)
16862 .n(8)
16863 .k(8)
16864 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016865 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016866 }
16867
16868 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, qmax) {
16869 TEST_REQUIRES_ARM_NEON;
16870 GemmMicrokernelTester()
16871 .mr(4)
16872 .nr(8)
16873 .kr(8)
16874 .sr(1)
16875 .m(4)
16876 .n(8)
16877 .k(8)
16878 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080016879 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016880 }
16881
16882 TEST(QS8_GEMM_MINMAX_RNDNU_4X8C8__NEON_MULL, strided_cm) {
16883 TEST_REQUIRES_ARM_NEON;
16884 GemmMicrokernelTester()
16885 .mr(4)
16886 .nr(8)
16887 .kr(8)
16888 .sr(1)
16889 .m(4)
16890 .n(8)
16891 .k(8)
16892 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016893 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8c8__neon_mull, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016894 }
16895#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
16896
16897
16898#if XNN_ARCH_ARM || XNN_ARCH_ARM64
16899 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16) {
16900 TEST_REQUIRES_ARM_NEON;
16901 GemmMicrokernelTester()
16902 .mr(1)
16903 .nr(8)
16904 .kr(8)
16905 .sr(1)
16906 .m(1)
16907 .n(8)
16908 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080016909 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016910 }
16911
16912 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, strided_cn) {
16913 TEST_REQUIRES_ARM_NEON;
16914 GemmMicrokernelTester()
16915 .mr(1)
16916 .nr(8)
16917 .kr(8)
16918 .sr(1)
16919 .m(1)
16920 .n(8)
16921 .k(16)
16922 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080016923 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016924 }
16925
16926 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16_strided_a) {
16927 TEST_REQUIRES_ARM_NEON;
16928 GemmMicrokernelTester()
16929 .mr(1)
16930 .nr(8)
16931 .kr(8)
16932 .sr(1)
16933 .m(1)
16934 .n(8)
16935 .k(16)
16936 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080016937 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016938 }
16939
16940 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16_subtile) {
16941 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080016942 for (uint32_t n = 1; n <= 8; n++) {
16943 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016944 GemmMicrokernelTester()
16945 .mr(1)
16946 .nr(8)
16947 .kr(8)
16948 .sr(1)
16949 .m(m)
16950 .n(n)
16951 .k(16)
16952 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016953 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016954 }
16955 }
16956 }
16957
16958 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16_subtile_m) {
16959 TEST_REQUIRES_ARM_NEON;
16960 for (uint32_t m = 1; m <= 1; m++) {
16961 GemmMicrokernelTester()
16962 .mr(1)
16963 .nr(8)
16964 .kr(8)
16965 .sr(1)
16966 .m(m)
16967 .n(8)
16968 .k(16)
16969 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016970 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016971 }
16972 }
16973
16974 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_eq_16_subtile_n) {
16975 TEST_REQUIRES_ARM_NEON;
16976 for (uint32_t n = 1; n <= 8; n++) {
16977 GemmMicrokernelTester()
16978 .mr(1)
16979 .nr(8)
16980 .kr(8)
16981 .sr(1)
16982 .m(1)
16983 .n(n)
16984 .k(16)
16985 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080016986 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080016987 }
16988 }
16989
16990 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_lt_16) {
16991 TEST_REQUIRES_ARM_NEON;
16992 for (size_t k = 1; k < 16; k++) {
16993 GemmMicrokernelTester()
16994 .mr(1)
16995 .nr(8)
16996 .kr(8)
16997 .sr(1)
16998 .m(1)
16999 .n(8)
17000 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017001 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017002 }
17003 }
17004
17005 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_lt_16_strided_a) {
17006 TEST_REQUIRES_ARM_NEON;
17007 for (size_t k = 1; k < 16; k++) {
17008 GemmMicrokernelTester()
17009 .mr(1)
17010 .nr(8)
17011 .kr(8)
17012 .sr(1)
17013 .m(1)
17014 .n(8)
17015 .k(k)
17016 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017017 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017018 }
17019 }
17020
17021 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_lt_16_subtile) {
17022 TEST_REQUIRES_ARM_NEON;
17023 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017024 for (uint32_t n = 1; n <= 8; n++) {
17025 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017026 GemmMicrokernelTester()
17027 .mr(1)
17028 .nr(8)
17029 .kr(8)
17030 .sr(1)
17031 .m(m)
17032 .n(n)
17033 .k(k)
17034 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017035 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017036 }
17037 }
17038 }
17039 }
17040
17041 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_gt_16) {
17042 TEST_REQUIRES_ARM_NEON;
17043 for (size_t k = 17; k < 32; k++) {
17044 GemmMicrokernelTester()
17045 .mr(1)
17046 .nr(8)
17047 .kr(8)
17048 .sr(1)
17049 .m(1)
17050 .n(8)
17051 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017052 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017053 }
17054 }
17055
17056 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_gt_16_strided_a) {
17057 TEST_REQUIRES_ARM_NEON;
17058 for (size_t k = 17; k < 32; k++) {
17059 GemmMicrokernelTester()
17060 .mr(1)
17061 .nr(8)
17062 .kr(8)
17063 .sr(1)
17064 .m(1)
17065 .n(8)
17066 .k(k)
17067 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080017068 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017069 }
17070 }
17071
17072 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_gt_16_subtile) {
17073 TEST_REQUIRES_ARM_NEON;
17074 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017075 for (uint32_t n = 1; n <= 8; n++) {
17076 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017077 GemmMicrokernelTester()
17078 .mr(1)
17079 .nr(8)
17080 .kr(8)
17081 .sr(1)
17082 .m(m)
17083 .n(n)
17084 .k(k)
17085 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017086 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017087 }
17088 }
17089 }
17090 }
17091
17092 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_div_16) {
17093 TEST_REQUIRES_ARM_NEON;
17094 for (size_t k = 32; k <= 160; k += 16) {
17095 GemmMicrokernelTester()
17096 .mr(1)
17097 .nr(8)
17098 .kr(8)
17099 .sr(1)
17100 .m(1)
17101 .n(8)
17102 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017103 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017104 }
17105 }
17106
17107 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_div_16_strided_a) {
17108 TEST_REQUIRES_ARM_NEON;
17109 for (size_t k = 32; k <= 160; k += 16) {
17110 GemmMicrokernelTester()
17111 .mr(1)
17112 .nr(8)
17113 .kr(8)
17114 .sr(1)
17115 .m(1)
17116 .n(8)
17117 .k(k)
17118 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080017119 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017120 }
17121 }
17122
17123 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, k_div_16_subtile) {
17124 TEST_REQUIRES_ARM_NEON;
17125 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017126 for (uint32_t n = 1; n <= 8; n++) {
17127 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017128 GemmMicrokernelTester()
17129 .mr(1)
17130 .nr(8)
17131 .kr(8)
17132 .sr(1)
17133 .m(m)
17134 .n(n)
17135 .k(k)
17136 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017137 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017138 }
17139 }
17140 }
17141 }
17142
17143 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8) {
17144 TEST_REQUIRES_ARM_NEON;
17145 for (uint32_t n = 9; n < 16; n++) {
17146 for (size_t k = 1; k <= 80; k += 17) {
17147 GemmMicrokernelTester()
17148 .mr(1)
17149 .nr(8)
17150 .kr(8)
17151 .sr(1)
17152 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017153 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017154 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017155 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017156 }
17157 }
17158 }
17159
17160 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8_strided_cn) {
17161 TEST_REQUIRES_ARM_NEON;
17162 for (uint32_t n = 9; n < 16; n++) {
17163 for (size_t k = 1; k <= 80; k += 17) {
17164 GemmMicrokernelTester()
17165 .mr(1)
17166 .nr(8)
17167 .kr(8)
17168 .sr(1)
17169 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017170 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017171 .k(k)
17172 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017173 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017174 }
17175 }
17176 }
17177
17178 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8_strided_a) {
17179 TEST_REQUIRES_ARM_NEON;
17180 for (uint32_t n = 9; n < 16; n++) {
17181 for (size_t k = 1; k <= 80; k += 17) {
17182 GemmMicrokernelTester()
17183 .mr(1)
17184 .nr(8)
17185 .kr(8)
17186 .sr(1)
17187 .m(1)
17188 .n(n)
17189 .k(k)
17190 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017191 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017192 }
17193 }
17194 }
17195
17196 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_gt_8_subtile) {
17197 TEST_REQUIRES_ARM_NEON;
17198 for (uint32_t n = 9; n < 16; n++) {
17199 for (size_t k = 1; k <= 80; k += 17) {
17200 for (uint32_t m = 1; m <= 1; m++) {
17201 GemmMicrokernelTester()
17202 .mr(1)
17203 .nr(8)
17204 .kr(8)
17205 .sr(1)
17206 .m(m)
17207 .n(n)
17208 .k(k)
17209 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017210 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017211 }
17212 }
17213 }
17214 }
17215
17216 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8) {
17217 TEST_REQUIRES_ARM_NEON;
17218 for (uint32_t n = 16; n <= 24; n += 8) {
17219 for (size_t k = 1; k <= 80; k += 17) {
17220 GemmMicrokernelTester()
17221 .mr(1)
17222 .nr(8)
17223 .kr(8)
17224 .sr(1)
17225 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017226 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017227 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017228 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017229 }
17230 }
17231 }
17232
17233 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8_strided_cn) {
17234 TEST_REQUIRES_ARM_NEON;
17235 for (uint32_t n = 16; n <= 24; n += 8) {
17236 for (size_t k = 1; k <= 80; k += 17) {
17237 GemmMicrokernelTester()
17238 .mr(1)
17239 .nr(8)
17240 .kr(8)
17241 .sr(1)
17242 .m(1)
17243 .n(n)
17244 .k(k)
17245 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017246 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017247 }
17248 }
17249 }
17250
17251 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8_strided_a) {
17252 TEST_REQUIRES_ARM_NEON;
17253 for (uint32_t n = 16; n <= 24; n += 8) {
17254 for (size_t k = 1; k <= 80; k += 17) {
17255 GemmMicrokernelTester()
17256 .mr(1)
17257 .nr(8)
17258 .kr(8)
17259 .sr(1)
17260 .m(1)
17261 .n(n)
17262 .k(k)
17263 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017264 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017265 }
17266 }
17267 }
17268
17269 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, n_div_8_subtile) {
17270 TEST_REQUIRES_ARM_NEON;
17271 for (uint32_t n = 16; n <= 24; n += 8) {
17272 for (size_t k = 1; k <= 80; k += 17) {
17273 for (uint32_t m = 1; m <= 1; m++) {
17274 GemmMicrokernelTester()
17275 .mr(1)
17276 .nr(8)
17277 .kr(8)
17278 .sr(1)
17279 .m(m)
17280 .n(n)
17281 .k(k)
17282 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017283 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017284 }
17285 }
17286 }
17287 }
17288
17289 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, strided_cm_subtile) {
17290 TEST_REQUIRES_ARM_NEON;
17291 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017292 for (uint32_t n = 1; n <= 8; n++) {
17293 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017294 GemmMicrokernelTester()
17295 .mr(1)
17296 .nr(8)
17297 .kr(8)
17298 .sr(1)
17299 .m(m)
17300 .n(n)
17301 .k(k)
17302 .cm_stride(11)
17303 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017304 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017305 }
17306 }
17307 }
17308 }
17309
17310 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, qmin) {
17311 TEST_REQUIRES_ARM_NEON;
17312 GemmMicrokernelTester()
17313 .mr(1)
17314 .nr(8)
17315 .kr(8)
17316 .sr(1)
17317 .m(1)
17318 .n(8)
17319 .k(16)
17320 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017321 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017322 }
17323
17324 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, qmax) {
17325 TEST_REQUIRES_ARM_NEON;
17326 GemmMicrokernelTester()
17327 .mr(1)
17328 .nr(8)
17329 .kr(8)
17330 .sr(1)
17331 .m(1)
17332 .n(8)
17333 .k(16)
17334 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017335 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017336 }
17337
17338 TEST(QS8_GEMM_MINMAX_RNDNU_1X8C8__NEON_MLAL, strided_cm) {
17339 TEST_REQUIRES_ARM_NEON;
17340 GemmMicrokernelTester()
17341 .mr(1)
17342 .nr(8)
17343 .kr(8)
17344 .sr(1)
17345 .m(1)
17346 .n(8)
17347 .k(16)
17348 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080017349 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x8c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017350 }
17351#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
17352
17353
17354#if XNN_ARCH_ARM || XNN_ARCH_ARM64
17355 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16) {
17356 TEST_REQUIRES_ARM_NEON;
17357 GemmMicrokernelTester()
17358 .mr(4)
17359 .nr(16)
17360 .kr(8)
17361 .sr(1)
17362 .m(4)
17363 .n(16)
17364 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080017365 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017366 }
17367
17368 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, strided_cn) {
17369 TEST_REQUIRES_ARM_NEON;
17370 GemmMicrokernelTester()
17371 .mr(4)
17372 .nr(16)
17373 .kr(8)
17374 .sr(1)
17375 .m(4)
17376 .n(16)
17377 .k(16)
17378 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017379 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017380 }
17381
17382 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16_strided_a) {
17383 TEST_REQUIRES_ARM_NEON;
17384 GemmMicrokernelTester()
17385 .mr(4)
17386 .nr(16)
17387 .kr(8)
17388 .sr(1)
17389 .m(4)
17390 .n(16)
17391 .k(16)
17392 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017393 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017394 }
17395
17396 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16_subtile) {
17397 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017398 for (uint32_t n = 1; n <= 16; n++) {
17399 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017400 GemmMicrokernelTester()
17401 .mr(4)
17402 .nr(16)
17403 .kr(8)
17404 .sr(1)
17405 .m(m)
17406 .n(n)
17407 .k(16)
17408 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017409 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017410 }
17411 }
17412 }
17413
17414 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16_subtile_m) {
17415 TEST_REQUIRES_ARM_NEON;
17416 for (uint32_t m = 1; m <= 4; m++) {
17417 GemmMicrokernelTester()
17418 .mr(4)
17419 .nr(16)
17420 .kr(8)
17421 .sr(1)
17422 .m(m)
17423 .n(16)
17424 .k(16)
17425 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017426 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017427 }
17428 }
17429
17430 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_eq_16_subtile_n) {
17431 TEST_REQUIRES_ARM_NEON;
17432 for (uint32_t n = 1; n <= 16; n++) {
17433 GemmMicrokernelTester()
17434 .mr(4)
17435 .nr(16)
17436 .kr(8)
17437 .sr(1)
17438 .m(4)
17439 .n(n)
17440 .k(16)
17441 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017442 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017443 }
17444 }
17445
17446 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_lt_16) {
17447 TEST_REQUIRES_ARM_NEON;
17448 for (size_t k = 1; k < 16; k++) {
17449 GemmMicrokernelTester()
17450 .mr(4)
17451 .nr(16)
17452 .kr(8)
17453 .sr(1)
17454 .m(4)
17455 .n(16)
17456 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017457 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017458 }
17459 }
17460
17461 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_lt_16_strided_a) {
17462 TEST_REQUIRES_ARM_NEON;
17463 for (size_t k = 1; k < 16; k++) {
17464 GemmMicrokernelTester()
17465 .mr(4)
17466 .nr(16)
17467 .kr(8)
17468 .sr(1)
17469 .m(4)
17470 .n(16)
17471 .k(k)
17472 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017473 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017474 }
17475 }
17476
17477 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_lt_16_subtile) {
17478 TEST_REQUIRES_ARM_NEON;
17479 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017480 for (uint32_t n = 1; n <= 16; n++) {
17481 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017482 GemmMicrokernelTester()
17483 .mr(4)
17484 .nr(16)
17485 .kr(8)
17486 .sr(1)
17487 .m(m)
17488 .n(n)
17489 .k(k)
17490 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017491 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017492 }
17493 }
17494 }
17495 }
17496
17497 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_gt_16) {
17498 TEST_REQUIRES_ARM_NEON;
17499 for (size_t k = 17; k < 32; k++) {
17500 GemmMicrokernelTester()
17501 .mr(4)
17502 .nr(16)
17503 .kr(8)
17504 .sr(1)
17505 .m(4)
17506 .n(16)
17507 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017508 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017509 }
17510 }
17511
17512 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_gt_16_strided_a) {
17513 TEST_REQUIRES_ARM_NEON;
17514 for (size_t k = 17; k < 32; k++) {
17515 GemmMicrokernelTester()
17516 .mr(4)
17517 .nr(16)
17518 .kr(8)
17519 .sr(1)
17520 .m(4)
17521 .n(16)
17522 .k(k)
17523 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080017524 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017525 }
17526 }
17527
17528 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_gt_16_subtile) {
17529 TEST_REQUIRES_ARM_NEON;
17530 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017531 for (uint32_t n = 1; n <= 16; n++) {
17532 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017533 GemmMicrokernelTester()
17534 .mr(4)
17535 .nr(16)
17536 .kr(8)
17537 .sr(1)
17538 .m(m)
17539 .n(n)
17540 .k(k)
17541 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017542 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017543 }
17544 }
17545 }
17546 }
17547
17548 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_div_16) {
17549 TEST_REQUIRES_ARM_NEON;
17550 for (size_t k = 32; k <= 160; k += 16) {
17551 GemmMicrokernelTester()
17552 .mr(4)
17553 .nr(16)
17554 .kr(8)
17555 .sr(1)
17556 .m(4)
17557 .n(16)
17558 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017559 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017560 }
17561 }
17562
17563 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_div_16_strided_a) {
17564 TEST_REQUIRES_ARM_NEON;
17565 for (size_t k = 32; k <= 160; k += 16) {
17566 GemmMicrokernelTester()
17567 .mr(4)
17568 .nr(16)
17569 .kr(8)
17570 .sr(1)
17571 .m(4)
17572 .n(16)
17573 .k(k)
17574 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080017575 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017576 }
17577 }
17578
17579 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, k_div_16_subtile) {
17580 TEST_REQUIRES_ARM_NEON;
17581 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017582 for (uint32_t n = 1; n <= 16; n++) {
17583 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017584 GemmMicrokernelTester()
17585 .mr(4)
17586 .nr(16)
17587 .kr(8)
17588 .sr(1)
17589 .m(m)
17590 .n(n)
17591 .k(k)
17592 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017593 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017594 }
17595 }
17596 }
17597 }
17598
17599 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16) {
17600 TEST_REQUIRES_ARM_NEON;
17601 for (uint32_t n = 17; n < 32; n++) {
17602 for (size_t k = 1; k <= 80; k += 17) {
17603 GemmMicrokernelTester()
17604 .mr(4)
17605 .nr(16)
17606 .kr(8)
17607 .sr(1)
17608 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017609 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017610 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017611 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017612 }
17613 }
17614 }
17615
17616 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16_strided_cn) {
17617 TEST_REQUIRES_ARM_NEON;
17618 for (uint32_t n = 17; n < 32; n++) {
17619 for (size_t k = 1; k <= 80; k += 17) {
17620 GemmMicrokernelTester()
17621 .mr(4)
17622 .nr(16)
17623 .kr(8)
17624 .sr(1)
17625 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017626 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017627 .k(k)
17628 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017629 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017630 }
17631 }
17632 }
17633
17634 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16_strided_a) {
17635 TEST_REQUIRES_ARM_NEON;
17636 for (uint32_t n = 17; n < 32; n++) {
17637 for (size_t k = 1; k <= 80; k += 17) {
17638 GemmMicrokernelTester()
17639 .mr(4)
17640 .nr(16)
17641 .kr(8)
17642 .sr(1)
17643 .m(4)
17644 .n(n)
17645 .k(k)
17646 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017647 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017648 }
17649 }
17650 }
17651
17652 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_gt_16_subtile) {
17653 TEST_REQUIRES_ARM_NEON;
17654 for (uint32_t n = 17; n < 32; n++) {
17655 for (size_t k = 1; k <= 80; k += 17) {
17656 for (uint32_t m = 1; m <= 4; m++) {
17657 GemmMicrokernelTester()
17658 .mr(4)
17659 .nr(16)
17660 .kr(8)
17661 .sr(1)
17662 .m(m)
17663 .n(n)
17664 .k(k)
17665 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017666 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017667 }
17668 }
17669 }
17670 }
17671
17672 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16) {
17673 TEST_REQUIRES_ARM_NEON;
17674 for (uint32_t n = 32; n <= 48; n += 16) {
17675 for (size_t k = 1; k <= 80; k += 17) {
17676 GemmMicrokernelTester()
17677 .mr(4)
17678 .nr(16)
17679 .kr(8)
17680 .sr(1)
17681 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080017682 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017683 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017684 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017685 }
17686 }
17687 }
17688
17689 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16_strided_cn) {
17690 TEST_REQUIRES_ARM_NEON;
17691 for (uint32_t n = 32; n <= 48; n += 16) {
17692 for (size_t k = 1; k <= 80; k += 17) {
17693 GemmMicrokernelTester()
17694 .mr(4)
17695 .nr(16)
17696 .kr(8)
17697 .sr(1)
17698 .m(4)
17699 .n(n)
17700 .k(k)
17701 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017702 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017703 }
17704 }
17705 }
17706
17707 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16_strided_a) {
17708 TEST_REQUIRES_ARM_NEON;
17709 for (uint32_t n = 32; n <= 48; n += 16) {
17710 for (size_t k = 1; k <= 80; k += 17) {
17711 GemmMicrokernelTester()
17712 .mr(4)
17713 .nr(16)
17714 .kr(8)
17715 .sr(1)
17716 .m(4)
17717 .n(n)
17718 .k(k)
17719 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080017720 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017721 }
17722 }
17723 }
17724
17725 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, n_div_16_subtile) {
17726 TEST_REQUIRES_ARM_NEON;
17727 for (uint32_t n = 32; n <= 48; n += 16) {
17728 for (size_t k = 1; k <= 80; k += 17) {
17729 for (uint32_t m = 1; m <= 4; m++) {
17730 GemmMicrokernelTester()
17731 .mr(4)
17732 .nr(16)
17733 .kr(8)
17734 .sr(1)
17735 .m(m)
17736 .n(n)
17737 .k(k)
17738 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017739 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017740 }
17741 }
17742 }
17743 }
17744
17745 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, strided_cm_subtile) {
17746 TEST_REQUIRES_ARM_NEON;
17747 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017748 for (uint32_t n = 1; n <= 16; n++) {
17749 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017750 GemmMicrokernelTester()
17751 .mr(4)
17752 .nr(16)
17753 .kr(8)
17754 .sr(1)
17755 .m(m)
17756 .n(n)
17757 .k(k)
17758 .cm_stride(19)
17759 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017760 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017761 }
17762 }
17763 }
17764 }
17765
17766 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, qmin) {
17767 TEST_REQUIRES_ARM_NEON;
17768 GemmMicrokernelTester()
17769 .mr(4)
17770 .nr(16)
17771 .kr(8)
17772 .sr(1)
17773 .m(4)
17774 .n(16)
17775 .k(16)
17776 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017777 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017778 }
17779
17780 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, qmax) {
17781 TEST_REQUIRES_ARM_NEON;
17782 GemmMicrokernelTester()
17783 .mr(4)
17784 .nr(16)
17785 .kr(8)
17786 .sr(1)
17787 .m(4)
17788 .n(16)
17789 .k(16)
17790 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080017791 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017792 }
17793
17794 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C8__NEON_MLAL, strided_cm) {
17795 TEST_REQUIRES_ARM_NEON;
17796 GemmMicrokernelTester()
17797 .mr(4)
17798 .nr(16)
17799 .kr(8)
17800 .sr(1)
17801 .m(4)
17802 .n(16)
17803 .k(16)
17804 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017805 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c8__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017806 }
17807#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
17808
17809
17810#if XNN_ARCH_ARM || XNN_ARCH_ARM64
17811 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_eq_16) {
17812 TEST_REQUIRES_ARM_NEON;
17813 GemmMicrokernelTester()
17814 .mr(4)
17815 .nr(16)
17816 .kr(16)
17817 .sr(1)
17818 .m(4)
17819 .n(16)
17820 .k(16)
Marat Dukhan50323b82022-01-11 00:12:01 -080017821 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017822 }
17823
17824 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, strided_cn) {
17825 TEST_REQUIRES_ARM_NEON;
17826 GemmMicrokernelTester()
17827 .mr(4)
17828 .nr(16)
17829 .kr(16)
17830 .sr(1)
17831 .m(4)
17832 .n(16)
17833 .k(16)
17834 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017835 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017836 }
17837
17838 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_eq_16_strided_a) {
17839 TEST_REQUIRES_ARM_NEON;
17840 GemmMicrokernelTester()
17841 .mr(4)
17842 .nr(16)
17843 .kr(16)
17844 .sr(1)
17845 .m(4)
17846 .n(16)
17847 .k(16)
17848 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017849 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017850 }
17851
17852 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_eq_16_subtile) {
17853 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080017854 for (uint32_t n = 1; n <= 16; n++) {
17855 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017856 GemmMicrokernelTester()
17857 .mr(4)
17858 .nr(16)
17859 .kr(16)
17860 .sr(1)
17861 .m(m)
17862 .n(n)
17863 .k(16)
17864 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017865 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017866 }
17867 }
17868 }
17869
17870 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_eq_16_subtile_m) {
17871 TEST_REQUIRES_ARM_NEON;
17872 for (uint32_t m = 1; m <= 4; m++) {
17873 GemmMicrokernelTester()
17874 .mr(4)
17875 .nr(16)
17876 .kr(16)
17877 .sr(1)
17878 .m(m)
17879 .n(16)
17880 .k(16)
17881 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017882 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017883 }
17884 }
17885
17886 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_eq_16_subtile_n) {
17887 TEST_REQUIRES_ARM_NEON;
17888 for (uint32_t n = 1; n <= 16; n++) {
17889 GemmMicrokernelTester()
17890 .mr(4)
17891 .nr(16)
17892 .kr(16)
17893 .sr(1)
17894 .m(4)
17895 .n(n)
17896 .k(16)
17897 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017898 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017899 }
17900 }
17901
17902 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_lt_16) {
17903 TEST_REQUIRES_ARM_NEON;
17904 for (size_t k = 1; k < 16; k++) {
17905 GemmMicrokernelTester()
17906 .mr(4)
17907 .nr(16)
17908 .kr(16)
17909 .sr(1)
17910 .m(4)
17911 .n(16)
17912 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017913 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017914 }
17915 }
17916
17917 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_lt_16_strided_a) {
17918 TEST_REQUIRES_ARM_NEON;
17919 for (size_t k = 1; k < 16; k++) {
17920 GemmMicrokernelTester()
17921 .mr(4)
17922 .nr(16)
17923 .kr(16)
17924 .sr(1)
17925 .m(4)
17926 .n(16)
17927 .k(k)
17928 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080017929 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017930 }
17931 }
17932
17933 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_lt_16_subtile) {
17934 TEST_REQUIRES_ARM_NEON;
17935 for (size_t k = 1; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017936 for (uint32_t n = 1; n <= 16; n++) {
17937 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017938 GemmMicrokernelTester()
17939 .mr(4)
17940 .nr(16)
17941 .kr(16)
17942 .sr(1)
17943 .m(m)
17944 .n(n)
17945 .k(k)
17946 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017947 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017948 }
17949 }
17950 }
17951 }
17952
17953 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_gt_16) {
17954 TEST_REQUIRES_ARM_NEON;
17955 for (size_t k = 17; k < 32; k++) {
17956 GemmMicrokernelTester()
17957 .mr(4)
17958 .nr(16)
17959 .kr(16)
17960 .sr(1)
17961 .m(4)
17962 .n(16)
17963 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080017964 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017965 }
17966 }
17967
17968 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_gt_16_strided_a) {
17969 TEST_REQUIRES_ARM_NEON;
17970 for (size_t k = 17; k < 32; k++) {
17971 GemmMicrokernelTester()
17972 .mr(4)
17973 .nr(16)
17974 .kr(16)
17975 .sr(1)
17976 .m(4)
17977 .n(16)
17978 .k(k)
17979 .a_stride(37)
Marat Dukhan50323b82022-01-11 00:12:01 -080017980 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017981 }
17982 }
17983
17984 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_gt_16_subtile) {
17985 TEST_REQUIRES_ARM_NEON;
17986 for (size_t k = 17; k < 32; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080017987 for (uint32_t n = 1; n <= 16; n++) {
17988 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017989 GemmMicrokernelTester()
17990 .mr(4)
17991 .nr(16)
17992 .kr(16)
17993 .sr(1)
17994 .m(m)
17995 .n(n)
17996 .k(k)
17997 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080017998 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080017999 }
18000 }
18001 }
18002 }
18003
18004 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_div_16) {
18005 TEST_REQUIRES_ARM_NEON;
18006 for (size_t k = 32; k <= 160; k += 16) {
18007 GemmMicrokernelTester()
18008 .mr(4)
18009 .nr(16)
18010 .kr(16)
18011 .sr(1)
18012 .m(4)
18013 .n(16)
18014 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018015 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018016 }
18017 }
18018
18019 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_div_16_strided_a) {
18020 TEST_REQUIRES_ARM_NEON;
18021 for (size_t k = 32; k <= 160; k += 16) {
18022 GemmMicrokernelTester()
18023 .mr(4)
18024 .nr(16)
18025 .kr(16)
18026 .sr(1)
18027 .m(4)
18028 .n(16)
18029 .k(k)
18030 .a_stride(163)
Marat Dukhan50323b82022-01-11 00:12:01 -080018031 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018032 }
18033 }
18034
18035 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, k_div_16_subtile) {
18036 TEST_REQUIRES_ARM_NEON;
18037 for (size_t k = 32; k <= 160; k += 16) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018038 for (uint32_t n = 1; n <= 16; n++) {
18039 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018040 GemmMicrokernelTester()
18041 .mr(4)
18042 .nr(16)
18043 .kr(16)
18044 .sr(1)
18045 .m(m)
18046 .n(n)
18047 .k(k)
18048 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018049 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018050 }
18051 }
18052 }
18053 }
18054
18055 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_gt_16) {
18056 TEST_REQUIRES_ARM_NEON;
18057 for (uint32_t n = 17; n < 32; n++) {
18058 for (size_t k = 1; k <= 80; k += 17) {
18059 GemmMicrokernelTester()
18060 .mr(4)
18061 .nr(16)
18062 .kr(16)
18063 .sr(1)
18064 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018065 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018066 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018067 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018068 }
18069 }
18070 }
18071
18072 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_gt_16_strided_cn) {
18073 TEST_REQUIRES_ARM_NEON;
18074 for (uint32_t n = 17; n < 32; n++) {
18075 for (size_t k = 1; k <= 80; k += 17) {
18076 GemmMicrokernelTester()
18077 .mr(4)
18078 .nr(16)
18079 .kr(16)
18080 .sr(1)
18081 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018082 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018083 .k(k)
18084 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018085 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018086 }
18087 }
18088 }
18089
18090 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_gt_16_strided_a) {
18091 TEST_REQUIRES_ARM_NEON;
18092 for (uint32_t n = 17; n < 32; n++) {
18093 for (size_t k = 1; k <= 80; k += 17) {
18094 GemmMicrokernelTester()
18095 .mr(4)
18096 .nr(16)
18097 .kr(16)
18098 .sr(1)
18099 .m(4)
18100 .n(n)
18101 .k(k)
18102 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018103 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018104 }
18105 }
18106 }
18107
18108 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_gt_16_subtile) {
18109 TEST_REQUIRES_ARM_NEON;
18110 for (uint32_t n = 17; n < 32; n++) {
18111 for (size_t k = 1; k <= 80; k += 17) {
18112 for (uint32_t m = 1; m <= 4; m++) {
18113 GemmMicrokernelTester()
18114 .mr(4)
18115 .nr(16)
18116 .kr(16)
18117 .sr(1)
18118 .m(m)
18119 .n(n)
18120 .k(k)
18121 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018122 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018123 }
18124 }
18125 }
18126 }
18127
18128 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_div_16) {
18129 TEST_REQUIRES_ARM_NEON;
18130 for (uint32_t n = 32; n <= 48; n += 16) {
18131 for (size_t k = 1; k <= 80; k += 17) {
18132 GemmMicrokernelTester()
18133 .mr(4)
18134 .nr(16)
18135 .kr(16)
18136 .sr(1)
18137 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018138 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018139 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018140 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018141 }
18142 }
18143 }
18144
18145 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_div_16_strided_cn) {
18146 TEST_REQUIRES_ARM_NEON;
18147 for (uint32_t n = 32; n <= 48; n += 16) {
18148 for (size_t k = 1; k <= 80; k += 17) {
18149 GemmMicrokernelTester()
18150 .mr(4)
18151 .nr(16)
18152 .kr(16)
18153 .sr(1)
18154 .m(4)
18155 .n(n)
18156 .k(k)
18157 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018158 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018159 }
18160 }
18161 }
18162
18163 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_div_16_strided_a) {
18164 TEST_REQUIRES_ARM_NEON;
18165 for (uint32_t n = 32; n <= 48; n += 16) {
18166 for (size_t k = 1; k <= 80; k += 17) {
18167 GemmMicrokernelTester()
18168 .mr(4)
18169 .nr(16)
18170 .kr(16)
18171 .sr(1)
18172 .m(4)
18173 .n(n)
18174 .k(k)
18175 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018176 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018177 }
18178 }
18179 }
18180
18181 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, n_div_16_subtile) {
18182 TEST_REQUIRES_ARM_NEON;
18183 for (uint32_t n = 32; n <= 48; n += 16) {
18184 for (size_t k = 1; k <= 80; k += 17) {
18185 for (uint32_t m = 1; m <= 4; m++) {
18186 GemmMicrokernelTester()
18187 .mr(4)
18188 .nr(16)
18189 .kr(16)
18190 .sr(1)
18191 .m(m)
18192 .n(n)
18193 .k(k)
18194 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018195 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018196 }
18197 }
18198 }
18199 }
18200
18201 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, strided_cm_subtile) {
18202 TEST_REQUIRES_ARM_NEON;
18203 for (size_t k = 1; k <= 80; k += 17) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018204 for (uint32_t n = 1; n <= 16; n++) {
18205 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018206 GemmMicrokernelTester()
18207 .mr(4)
18208 .nr(16)
18209 .kr(16)
18210 .sr(1)
18211 .m(m)
18212 .n(n)
18213 .k(k)
18214 .cm_stride(19)
18215 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018216 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018217 }
18218 }
18219 }
18220 }
18221
18222 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, qmin) {
18223 TEST_REQUIRES_ARM_NEON;
18224 GemmMicrokernelTester()
18225 .mr(4)
18226 .nr(16)
18227 .kr(16)
18228 .sr(1)
18229 .m(4)
18230 .n(16)
18231 .k(16)
18232 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018233 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018234 }
18235
18236 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, qmax) {
18237 TEST_REQUIRES_ARM_NEON;
18238 GemmMicrokernelTester()
18239 .mr(4)
18240 .nr(16)
18241 .kr(16)
18242 .sr(1)
18243 .m(4)
18244 .n(16)
18245 .k(16)
18246 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018247 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018248 }
18249
18250 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C16__NEON_MLAL, strided_cm) {
18251 TEST_REQUIRES_ARM_NEON;
18252 GemmMicrokernelTester()
18253 .mr(4)
18254 .nr(16)
18255 .kr(16)
18256 .sr(1)
18257 .m(4)
18258 .n(16)
18259 .k(16)
18260 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018261 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c16__neon_mlal, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018262 }
18263#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
18264
18265
18266#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
18267 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8) {
18268 TEST_REQUIRES_ARM_NEON_DOT;
18269 GemmMicrokernelTester()
18270 .mr(4)
18271 .nr(16)
18272 .kr(4)
18273 .sr(1)
18274 .m(4)
18275 .n(16)
18276 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018277 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018278 }
18279
18280 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cn) {
18281 TEST_REQUIRES_ARM_NEON_DOT;
18282 GemmMicrokernelTester()
18283 .mr(4)
18284 .nr(16)
18285 .kr(4)
18286 .sr(1)
18287 .m(4)
18288 .n(16)
18289 .k(8)
18290 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018291 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018292 }
18293
18294 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_strided_a) {
18295 TEST_REQUIRES_ARM_NEON_DOT;
18296 GemmMicrokernelTester()
18297 .mr(4)
18298 .nr(16)
18299 .kr(4)
18300 .sr(1)
18301 .m(4)
18302 .n(16)
18303 .k(8)
18304 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018305 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018306 }
18307
18308 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile) {
18309 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018310 for (uint32_t n = 1; n <= 16; n++) {
18311 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018312 GemmMicrokernelTester()
18313 .mr(4)
18314 .nr(16)
18315 .kr(4)
18316 .sr(1)
18317 .m(m)
18318 .n(n)
18319 .k(8)
18320 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018321 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018322 }
18323 }
18324 }
18325
18326 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_m) {
18327 TEST_REQUIRES_ARM_NEON_DOT;
18328 for (uint32_t m = 1; m <= 4; m++) {
18329 GemmMicrokernelTester()
18330 .mr(4)
18331 .nr(16)
18332 .kr(4)
18333 .sr(1)
18334 .m(m)
18335 .n(16)
18336 .k(8)
18337 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018338 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018339 }
18340 }
18341
18342 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_eq_8_subtile_n) {
18343 TEST_REQUIRES_ARM_NEON_DOT;
18344 for (uint32_t n = 1; n <= 16; n++) {
18345 GemmMicrokernelTester()
18346 .mr(4)
18347 .nr(16)
18348 .kr(4)
18349 .sr(1)
18350 .m(4)
18351 .n(n)
18352 .k(8)
18353 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018354 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018355 }
18356 }
18357
18358 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8) {
18359 TEST_REQUIRES_ARM_NEON_DOT;
18360 for (size_t k = 1; k < 8; k++) {
18361 GemmMicrokernelTester()
18362 .mr(4)
18363 .nr(16)
18364 .kr(4)
18365 .sr(1)
18366 .m(4)
18367 .n(16)
18368 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018369 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018370 }
18371 }
18372
18373 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8_strided_a) {
18374 TEST_REQUIRES_ARM_NEON_DOT;
18375 for (size_t k = 1; k < 8; k++) {
18376 GemmMicrokernelTester()
18377 .mr(4)
18378 .nr(16)
18379 .kr(4)
18380 .sr(1)
18381 .m(4)
18382 .n(16)
18383 .k(k)
18384 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018385 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018386 }
18387 }
18388
18389 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_lt_8_subtile) {
18390 TEST_REQUIRES_ARM_NEON_DOT;
18391 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018392 for (uint32_t n = 1; n <= 16; n++) {
18393 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018394 GemmMicrokernelTester()
18395 .mr(4)
18396 .nr(16)
18397 .kr(4)
18398 .sr(1)
18399 .m(m)
18400 .n(n)
18401 .k(k)
18402 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018403 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018404 }
18405 }
18406 }
18407 }
18408
18409 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8) {
18410 TEST_REQUIRES_ARM_NEON_DOT;
18411 for (size_t k = 9; k < 16; k++) {
18412 GemmMicrokernelTester()
18413 .mr(4)
18414 .nr(16)
18415 .kr(4)
18416 .sr(1)
18417 .m(4)
18418 .n(16)
18419 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018420 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018421 }
18422 }
18423
18424 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8_strided_a) {
18425 TEST_REQUIRES_ARM_NEON_DOT;
18426 for (size_t k = 9; k < 16; k++) {
18427 GemmMicrokernelTester()
18428 .mr(4)
18429 .nr(16)
18430 .kr(4)
18431 .sr(1)
18432 .m(4)
18433 .n(16)
18434 .k(k)
18435 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018436 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018437 }
18438 }
18439
18440 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_gt_8_subtile) {
18441 TEST_REQUIRES_ARM_NEON_DOT;
18442 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018443 for (uint32_t n = 1; n <= 16; n++) {
18444 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018445 GemmMicrokernelTester()
18446 .mr(4)
18447 .nr(16)
18448 .kr(4)
18449 .sr(1)
18450 .m(m)
18451 .n(n)
18452 .k(k)
18453 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018454 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018455 }
18456 }
18457 }
18458 }
18459
18460 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8) {
18461 TEST_REQUIRES_ARM_NEON_DOT;
18462 for (size_t k = 16; k <= 80; k += 8) {
18463 GemmMicrokernelTester()
18464 .mr(4)
18465 .nr(16)
18466 .kr(4)
18467 .sr(1)
18468 .m(4)
18469 .n(16)
18470 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018471 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018472 }
18473 }
18474
18475 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8_strided_a) {
18476 TEST_REQUIRES_ARM_NEON_DOT;
18477 for (size_t k = 16; k <= 80; k += 8) {
18478 GemmMicrokernelTester()
18479 .mr(4)
18480 .nr(16)
18481 .kr(4)
18482 .sr(1)
18483 .m(4)
18484 .n(16)
18485 .k(k)
18486 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018487 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018488 }
18489 }
18490
18491 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, k_div_8_subtile) {
18492 TEST_REQUIRES_ARM_NEON_DOT;
18493 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018494 for (uint32_t n = 1; n <= 16; n++) {
18495 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018496 GemmMicrokernelTester()
18497 .mr(4)
18498 .nr(16)
18499 .kr(4)
18500 .sr(1)
18501 .m(m)
18502 .n(n)
18503 .k(k)
18504 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018505 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018506 }
18507 }
18508 }
18509 }
18510
18511 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16) {
18512 TEST_REQUIRES_ARM_NEON_DOT;
18513 for (uint32_t n = 17; n < 32; n++) {
18514 for (size_t k = 1; k <= 40; k += 9) {
18515 GemmMicrokernelTester()
18516 .mr(4)
18517 .nr(16)
18518 .kr(4)
18519 .sr(1)
18520 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018521 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018522 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018523 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018524 }
18525 }
18526 }
18527
18528 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_strided_cn) {
18529 TEST_REQUIRES_ARM_NEON_DOT;
18530 for (uint32_t n = 17; n < 32; n++) {
18531 for (size_t k = 1; k <= 40; k += 9) {
18532 GemmMicrokernelTester()
18533 .mr(4)
18534 .nr(16)
18535 .kr(4)
18536 .sr(1)
18537 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018538 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018539 .k(k)
18540 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018541 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018542 }
18543 }
18544 }
18545
18546 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_strided_a) {
18547 TEST_REQUIRES_ARM_NEON_DOT;
18548 for (uint32_t n = 17; n < 32; n++) {
18549 for (size_t k = 1; k <= 40; k += 9) {
18550 GemmMicrokernelTester()
18551 .mr(4)
18552 .nr(16)
18553 .kr(4)
18554 .sr(1)
18555 .m(4)
18556 .n(n)
18557 .k(k)
18558 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018559 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018560 }
18561 }
18562 }
18563
18564 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_gt_16_subtile) {
18565 TEST_REQUIRES_ARM_NEON_DOT;
18566 for (uint32_t n = 17; n < 32; n++) {
18567 for (size_t k = 1; k <= 40; k += 9) {
18568 for (uint32_t m = 1; m <= 4; m++) {
18569 GemmMicrokernelTester()
18570 .mr(4)
18571 .nr(16)
18572 .kr(4)
18573 .sr(1)
18574 .m(m)
18575 .n(n)
18576 .k(k)
18577 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018578 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018579 }
18580 }
18581 }
18582 }
18583
18584 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16) {
18585 TEST_REQUIRES_ARM_NEON_DOT;
18586 for (uint32_t n = 32; n <= 48; n += 16) {
18587 for (size_t k = 1; k <= 40; k += 9) {
18588 GemmMicrokernelTester()
18589 .mr(4)
18590 .nr(16)
18591 .kr(4)
18592 .sr(1)
18593 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018594 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018595 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018596 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018597 }
18598 }
18599 }
18600
18601 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_strided_cn) {
18602 TEST_REQUIRES_ARM_NEON_DOT;
18603 for (uint32_t n = 32; n <= 48; n += 16) {
18604 for (size_t k = 1; k <= 40; k += 9) {
18605 GemmMicrokernelTester()
18606 .mr(4)
18607 .nr(16)
18608 .kr(4)
18609 .sr(1)
18610 .m(4)
18611 .n(n)
18612 .k(k)
18613 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018614 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018615 }
18616 }
18617 }
18618
18619 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_strided_a) {
18620 TEST_REQUIRES_ARM_NEON_DOT;
18621 for (uint32_t n = 32; n <= 48; n += 16) {
18622 for (size_t k = 1; k <= 40; k += 9) {
18623 GemmMicrokernelTester()
18624 .mr(4)
18625 .nr(16)
18626 .kr(4)
18627 .sr(1)
18628 .m(4)
18629 .n(n)
18630 .k(k)
18631 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080018632 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018633 }
18634 }
18635 }
18636
18637 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, n_div_16_subtile) {
18638 TEST_REQUIRES_ARM_NEON_DOT;
18639 for (uint32_t n = 32; n <= 48; n += 16) {
18640 for (size_t k = 1; k <= 40; k += 9) {
18641 for (uint32_t m = 1; m <= 4; m++) {
18642 GemmMicrokernelTester()
18643 .mr(4)
18644 .nr(16)
18645 .kr(4)
18646 .sr(1)
18647 .m(m)
18648 .n(n)
18649 .k(k)
18650 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018651 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018652 }
18653 }
18654 }
18655 }
18656
18657 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm_subtile) {
18658 TEST_REQUIRES_ARM_NEON_DOT;
18659 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018660 for (uint32_t n = 1; n <= 16; n++) {
18661 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018662 GemmMicrokernelTester()
18663 .mr(4)
18664 .nr(16)
18665 .kr(4)
18666 .sr(1)
18667 .m(m)
18668 .n(n)
18669 .k(k)
18670 .cm_stride(19)
18671 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018672 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018673 }
18674 }
18675 }
18676 }
18677
18678 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmin) {
18679 TEST_REQUIRES_ARM_NEON_DOT;
18680 GemmMicrokernelTester()
18681 .mr(4)
18682 .nr(16)
18683 .kr(4)
18684 .sr(1)
18685 .m(4)
18686 .n(16)
18687 .k(8)
18688 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018689 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018690 }
18691
18692 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, qmax) {
18693 TEST_REQUIRES_ARM_NEON_DOT;
18694 GemmMicrokernelTester()
18695 .mr(4)
18696 .nr(16)
18697 .kr(4)
18698 .sr(1)
18699 .m(4)
18700 .n(16)
18701 .k(8)
18702 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080018703 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018704 }
18705
18706 TEST(QS8_GEMM_MINMAX_RNDNU_4X16C4__NEONDOT, strided_cm) {
18707 TEST_REQUIRES_ARM_NEON_DOT;
18708 GemmMicrokernelTester()
18709 .mr(4)
18710 .nr(16)
18711 .kr(4)
18712 .sr(1)
18713 .m(4)
18714 .n(16)
18715 .k(8)
18716 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018717 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018718 }
18719#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
18720
18721
18722#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
18723 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8) {
18724 TEST_REQUIRES_ARM_NEON_DOT;
18725 GemmMicrokernelTester()
18726 .mr(6)
18727 .nr(16)
18728 .kr(4)
18729 .sr(1)
18730 .m(6)
18731 .n(16)
18732 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080018733 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018734 }
18735
18736 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cn) {
18737 TEST_REQUIRES_ARM_NEON_DOT;
18738 GemmMicrokernelTester()
18739 .mr(6)
18740 .nr(16)
18741 .kr(4)
18742 .sr(1)
18743 .m(6)
18744 .n(16)
18745 .k(8)
18746 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018747 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018748 }
18749
18750 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_strided_a) {
18751 TEST_REQUIRES_ARM_NEON_DOT;
18752 GemmMicrokernelTester()
18753 .mr(6)
18754 .nr(16)
18755 .kr(4)
18756 .sr(1)
18757 .m(6)
18758 .n(16)
18759 .k(8)
18760 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018761 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018762 }
18763
18764 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile) {
18765 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080018766 for (uint32_t n = 1; n <= 16; n++) {
18767 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018768 GemmMicrokernelTester()
18769 .mr(6)
18770 .nr(16)
18771 .kr(4)
18772 .sr(1)
18773 .m(m)
18774 .n(n)
18775 .k(8)
18776 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018777 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018778 }
18779 }
18780 }
18781
18782 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_m) {
18783 TEST_REQUIRES_ARM_NEON_DOT;
18784 for (uint32_t m = 1; m <= 6; m++) {
18785 GemmMicrokernelTester()
18786 .mr(6)
18787 .nr(16)
18788 .kr(4)
18789 .sr(1)
18790 .m(m)
18791 .n(16)
18792 .k(8)
18793 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018794 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018795 }
18796 }
18797
18798 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_eq_8_subtile_n) {
18799 TEST_REQUIRES_ARM_NEON_DOT;
18800 for (uint32_t n = 1; n <= 16; n++) {
18801 GemmMicrokernelTester()
18802 .mr(6)
18803 .nr(16)
18804 .kr(4)
18805 .sr(1)
18806 .m(6)
18807 .n(n)
18808 .k(8)
18809 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018810 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018811 }
18812 }
18813
18814 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8) {
18815 TEST_REQUIRES_ARM_NEON_DOT;
18816 for (size_t k = 1; k < 8; k++) {
18817 GemmMicrokernelTester()
18818 .mr(6)
18819 .nr(16)
18820 .kr(4)
18821 .sr(1)
18822 .m(6)
18823 .n(16)
18824 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018825 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018826 }
18827 }
18828
18829 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8_strided_a) {
18830 TEST_REQUIRES_ARM_NEON_DOT;
18831 for (size_t k = 1; k < 8; k++) {
18832 GemmMicrokernelTester()
18833 .mr(6)
18834 .nr(16)
18835 .kr(4)
18836 .sr(1)
18837 .m(6)
18838 .n(16)
18839 .k(k)
18840 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080018841 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018842 }
18843 }
18844
18845 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_lt_8_subtile) {
18846 TEST_REQUIRES_ARM_NEON_DOT;
18847 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018848 for (uint32_t n = 1; n <= 16; n++) {
18849 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018850 GemmMicrokernelTester()
18851 .mr(6)
18852 .nr(16)
18853 .kr(4)
18854 .sr(1)
18855 .m(m)
18856 .n(n)
18857 .k(k)
18858 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018859 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018860 }
18861 }
18862 }
18863 }
18864
18865 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8) {
18866 TEST_REQUIRES_ARM_NEON_DOT;
18867 for (size_t k = 9; k < 16; k++) {
18868 GemmMicrokernelTester()
18869 .mr(6)
18870 .nr(16)
18871 .kr(4)
18872 .sr(1)
18873 .m(6)
18874 .n(16)
18875 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018876 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018877 }
18878 }
18879
18880 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8_strided_a) {
18881 TEST_REQUIRES_ARM_NEON_DOT;
18882 for (size_t k = 9; k < 16; k++) {
18883 GemmMicrokernelTester()
18884 .mr(6)
18885 .nr(16)
18886 .kr(4)
18887 .sr(1)
18888 .m(6)
18889 .n(16)
18890 .k(k)
18891 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018892 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018893 }
18894 }
18895
18896 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_gt_8_subtile) {
18897 TEST_REQUIRES_ARM_NEON_DOT;
18898 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018899 for (uint32_t n = 1; n <= 16; n++) {
18900 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018901 GemmMicrokernelTester()
18902 .mr(6)
18903 .nr(16)
18904 .kr(4)
18905 .sr(1)
18906 .m(m)
18907 .n(n)
18908 .k(k)
18909 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018910 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018911 }
18912 }
18913 }
18914 }
18915
18916 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8) {
18917 TEST_REQUIRES_ARM_NEON_DOT;
18918 for (size_t k = 16; k <= 80; k += 8) {
18919 GemmMicrokernelTester()
18920 .mr(6)
18921 .nr(16)
18922 .kr(4)
18923 .sr(1)
18924 .m(6)
18925 .n(16)
18926 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018927 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018928 }
18929 }
18930
18931 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8_strided_a) {
18932 TEST_REQUIRES_ARM_NEON_DOT;
18933 for (size_t k = 16; k <= 80; k += 8) {
18934 GemmMicrokernelTester()
18935 .mr(6)
18936 .nr(16)
18937 .kr(4)
18938 .sr(1)
18939 .m(6)
18940 .n(16)
18941 .k(k)
18942 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080018943 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018944 }
18945 }
18946
18947 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, k_div_8_subtile) {
18948 TEST_REQUIRES_ARM_NEON_DOT;
18949 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080018950 for (uint32_t n = 1; n <= 16; n++) {
18951 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018952 GemmMicrokernelTester()
18953 .mr(6)
18954 .nr(16)
18955 .kr(4)
18956 .sr(1)
18957 .m(m)
18958 .n(n)
18959 .k(k)
18960 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080018961 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018962 }
18963 }
18964 }
18965 }
18966
18967 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16) {
18968 TEST_REQUIRES_ARM_NEON_DOT;
18969 for (uint32_t n = 17; n < 32; n++) {
18970 for (size_t k = 1; k <= 40; k += 9) {
18971 GemmMicrokernelTester()
18972 .mr(6)
18973 .nr(16)
18974 .kr(4)
18975 .sr(1)
18976 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018977 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018978 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080018979 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018980 }
18981 }
18982 }
18983
18984 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_strided_cn) {
18985 TEST_REQUIRES_ARM_NEON_DOT;
18986 for (uint32_t n = 17; n < 32; n++) {
18987 for (size_t k = 1; k <= 40; k += 9) {
18988 GemmMicrokernelTester()
18989 .mr(6)
18990 .nr(16)
18991 .kr(4)
18992 .sr(1)
18993 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080018994 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018995 .k(k)
18996 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080018997 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080018998 }
18999 }
19000 }
19001
19002 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_strided_a) {
19003 TEST_REQUIRES_ARM_NEON_DOT;
19004 for (uint32_t n = 17; n < 32; n++) {
19005 for (size_t k = 1; k <= 40; k += 9) {
19006 GemmMicrokernelTester()
19007 .mr(6)
19008 .nr(16)
19009 .kr(4)
19010 .sr(1)
19011 .m(6)
19012 .n(n)
19013 .k(k)
19014 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019015 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019016 }
19017 }
19018 }
19019
19020 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_gt_16_subtile) {
19021 TEST_REQUIRES_ARM_NEON_DOT;
19022 for (uint32_t n = 17; n < 32; n++) {
19023 for (size_t k = 1; k <= 40; k += 9) {
19024 for (uint32_t m = 1; m <= 6; m++) {
19025 GemmMicrokernelTester()
19026 .mr(6)
19027 .nr(16)
19028 .kr(4)
19029 .sr(1)
19030 .m(m)
19031 .n(n)
19032 .k(k)
19033 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019034 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019035 }
19036 }
19037 }
19038 }
19039
19040 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16) {
19041 TEST_REQUIRES_ARM_NEON_DOT;
19042 for (uint32_t n = 32; n <= 48; n += 16) {
19043 for (size_t k = 1; k <= 40; k += 9) {
19044 GemmMicrokernelTester()
19045 .mr(6)
19046 .nr(16)
19047 .kr(4)
19048 .sr(1)
19049 .m(6)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019050 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019051 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019052 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019053 }
19054 }
19055 }
19056
19057 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_strided_cn) {
19058 TEST_REQUIRES_ARM_NEON_DOT;
19059 for (uint32_t n = 32; n <= 48; n += 16) {
19060 for (size_t k = 1; k <= 40; k += 9) {
19061 GemmMicrokernelTester()
19062 .mr(6)
19063 .nr(16)
19064 .kr(4)
19065 .sr(1)
19066 .m(6)
19067 .n(n)
19068 .k(k)
19069 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019070 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019071 }
19072 }
19073 }
19074
19075 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_strided_a) {
19076 TEST_REQUIRES_ARM_NEON_DOT;
19077 for (uint32_t n = 32; n <= 48; n += 16) {
19078 for (size_t k = 1; k <= 40; k += 9) {
19079 GemmMicrokernelTester()
19080 .mr(6)
19081 .nr(16)
19082 .kr(4)
19083 .sr(1)
19084 .m(6)
19085 .n(n)
19086 .k(k)
19087 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019088 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019089 }
19090 }
19091 }
19092
19093 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, n_div_16_subtile) {
19094 TEST_REQUIRES_ARM_NEON_DOT;
19095 for (uint32_t n = 32; n <= 48; n += 16) {
19096 for (size_t k = 1; k <= 40; k += 9) {
19097 for (uint32_t m = 1; m <= 6; m++) {
19098 GemmMicrokernelTester()
19099 .mr(6)
19100 .nr(16)
19101 .kr(4)
19102 .sr(1)
19103 .m(m)
19104 .n(n)
19105 .k(k)
19106 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019107 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019108 }
19109 }
19110 }
19111 }
19112
19113 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm_subtile) {
19114 TEST_REQUIRES_ARM_NEON_DOT;
19115 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019116 for (uint32_t n = 1; n <= 16; n++) {
19117 for (uint32_t m = 1; m <= 6; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019118 GemmMicrokernelTester()
19119 .mr(6)
19120 .nr(16)
19121 .kr(4)
19122 .sr(1)
19123 .m(m)
19124 .n(n)
19125 .k(k)
19126 .cm_stride(19)
19127 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019128 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019129 }
19130 }
19131 }
19132 }
19133
19134 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmin) {
19135 TEST_REQUIRES_ARM_NEON_DOT;
19136 GemmMicrokernelTester()
19137 .mr(6)
19138 .nr(16)
19139 .kr(4)
19140 .sr(1)
19141 .m(6)
19142 .n(16)
19143 .k(8)
19144 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019146 }
19147
19148 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, qmax) {
19149 TEST_REQUIRES_ARM_NEON_DOT;
19150 GemmMicrokernelTester()
19151 .mr(6)
19152 .nr(16)
19153 .kr(4)
19154 .sr(1)
19155 .m(6)
19156 .n(16)
19157 .k(8)
19158 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019159 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019160 }
19161
19162 TEST(QS8_GEMM_MINMAX_RNDNU_6X16C4__NEONDOT, strided_cm) {
19163 TEST_REQUIRES_ARM_NEON_DOT;
19164 GemmMicrokernelTester()
19165 .mr(6)
19166 .nr(16)
19167 .kr(4)
19168 .sr(1)
19169 .m(6)
19170 .n(16)
19171 .k(8)
19172 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019173 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_6x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019174 }
19175#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
19176
19177
19178#if XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
19179 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8) {
19180 TEST_REQUIRES_ARM_NEON_DOT;
19181 GemmMicrokernelTester()
19182 .mr(8)
19183 .nr(16)
19184 .kr(4)
19185 .sr(1)
19186 .m(8)
19187 .n(16)
19188 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019189 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019190 }
19191
19192 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cn) {
19193 TEST_REQUIRES_ARM_NEON_DOT;
19194 GemmMicrokernelTester()
19195 .mr(8)
19196 .nr(16)
19197 .kr(4)
19198 .sr(1)
19199 .m(8)
19200 .n(16)
19201 .k(8)
19202 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019203 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019204 }
19205
19206 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_strided_a) {
19207 TEST_REQUIRES_ARM_NEON_DOT;
19208 GemmMicrokernelTester()
19209 .mr(8)
19210 .nr(16)
19211 .kr(4)
19212 .sr(1)
19213 .m(8)
19214 .n(16)
19215 .k(8)
19216 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019217 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019218 }
19219
19220 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile) {
19221 TEST_REQUIRES_ARM_NEON_DOT;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019222 for (uint32_t n = 1; n <= 16; n++) {
19223 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019224 GemmMicrokernelTester()
19225 .mr(8)
19226 .nr(16)
19227 .kr(4)
19228 .sr(1)
19229 .m(m)
19230 .n(n)
19231 .k(8)
19232 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019233 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019234 }
19235 }
19236 }
19237
19238 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_m) {
19239 TEST_REQUIRES_ARM_NEON_DOT;
19240 for (uint32_t m = 1; m <= 8; m++) {
19241 GemmMicrokernelTester()
19242 .mr(8)
19243 .nr(16)
19244 .kr(4)
19245 .sr(1)
19246 .m(m)
19247 .n(16)
19248 .k(8)
19249 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019250 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019251 }
19252 }
19253
19254 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_eq_8_subtile_n) {
19255 TEST_REQUIRES_ARM_NEON_DOT;
19256 for (uint32_t n = 1; n <= 16; n++) {
19257 GemmMicrokernelTester()
19258 .mr(8)
19259 .nr(16)
19260 .kr(4)
19261 .sr(1)
19262 .m(8)
19263 .n(n)
19264 .k(8)
19265 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019266 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019267 }
19268 }
19269
19270 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8) {
19271 TEST_REQUIRES_ARM_NEON_DOT;
19272 for (size_t k = 1; k < 8; k++) {
19273 GemmMicrokernelTester()
19274 .mr(8)
19275 .nr(16)
19276 .kr(4)
19277 .sr(1)
19278 .m(8)
19279 .n(16)
19280 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019281 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019282 }
19283 }
19284
19285 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8_strided_a) {
19286 TEST_REQUIRES_ARM_NEON_DOT;
19287 for (size_t k = 1; k < 8; k++) {
19288 GemmMicrokernelTester()
19289 .mr(8)
19290 .nr(16)
19291 .kr(4)
19292 .sr(1)
19293 .m(8)
19294 .n(16)
19295 .k(k)
19296 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019297 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019298 }
19299 }
19300
19301 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_lt_8_subtile) {
19302 TEST_REQUIRES_ARM_NEON_DOT;
19303 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019304 for (uint32_t n = 1; n <= 16; n++) {
19305 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019306 GemmMicrokernelTester()
19307 .mr(8)
19308 .nr(16)
19309 .kr(4)
19310 .sr(1)
19311 .m(m)
19312 .n(n)
19313 .k(k)
19314 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019315 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019316 }
19317 }
19318 }
19319 }
19320
19321 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8) {
19322 TEST_REQUIRES_ARM_NEON_DOT;
19323 for (size_t k = 9; k < 16; k++) {
19324 GemmMicrokernelTester()
19325 .mr(8)
19326 .nr(16)
19327 .kr(4)
19328 .sr(1)
19329 .m(8)
19330 .n(16)
19331 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019332 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019333 }
19334 }
19335
19336 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8_strided_a) {
19337 TEST_REQUIRES_ARM_NEON_DOT;
19338 for (size_t k = 9; k < 16; k++) {
19339 GemmMicrokernelTester()
19340 .mr(8)
19341 .nr(16)
19342 .kr(4)
19343 .sr(1)
19344 .m(8)
19345 .n(16)
19346 .k(k)
19347 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019348 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019349 }
19350 }
19351
19352 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_gt_8_subtile) {
19353 TEST_REQUIRES_ARM_NEON_DOT;
19354 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019355 for (uint32_t n = 1; n <= 16; n++) {
19356 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019357 GemmMicrokernelTester()
19358 .mr(8)
19359 .nr(16)
19360 .kr(4)
19361 .sr(1)
19362 .m(m)
19363 .n(n)
19364 .k(k)
19365 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019366 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019367 }
19368 }
19369 }
19370 }
19371
19372 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8) {
19373 TEST_REQUIRES_ARM_NEON_DOT;
19374 for (size_t k = 16; k <= 80; k += 8) {
19375 GemmMicrokernelTester()
19376 .mr(8)
19377 .nr(16)
19378 .kr(4)
19379 .sr(1)
19380 .m(8)
19381 .n(16)
19382 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019383 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019384 }
19385 }
19386
19387 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8_strided_a) {
19388 TEST_REQUIRES_ARM_NEON_DOT;
19389 for (size_t k = 16; k <= 80; k += 8) {
19390 GemmMicrokernelTester()
19391 .mr(8)
19392 .nr(16)
19393 .kr(4)
19394 .sr(1)
19395 .m(8)
19396 .n(16)
19397 .k(k)
19398 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019399 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019400 }
19401 }
19402
19403 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, k_div_8_subtile) {
19404 TEST_REQUIRES_ARM_NEON_DOT;
19405 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019406 for (uint32_t n = 1; n <= 16; n++) {
19407 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019408 GemmMicrokernelTester()
19409 .mr(8)
19410 .nr(16)
19411 .kr(4)
19412 .sr(1)
19413 .m(m)
19414 .n(n)
19415 .k(k)
19416 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019417 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019418 }
19419 }
19420 }
19421 }
19422
19423 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16) {
19424 TEST_REQUIRES_ARM_NEON_DOT;
19425 for (uint32_t n = 17; n < 32; n++) {
19426 for (size_t k = 1; k <= 40; k += 9) {
19427 GemmMicrokernelTester()
19428 .mr(8)
19429 .nr(16)
19430 .kr(4)
19431 .sr(1)
19432 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019433 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019434 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019435 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019436 }
19437 }
19438 }
19439
19440 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_strided_cn) {
19441 TEST_REQUIRES_ARM_NEON_DOT;
19442 for (uint32_t n = 17; n < 32; n++) {
19443 for (size_t k = 1; k <= 40; k += 9) {
19444 GemmMicrokernelTester()
19445 .mr(8)
19446 .nr(16)
19447 .kr(4)
19448 .sr(1)
19449 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019450 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019451 .k(k)
19452 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019453 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019454 }
19455 }
19456 }
19457
19458 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_strided_a) {
19459 TEST_REQUIRES_ARM_NEON_DOT;
19460 for (uint32_t n = 17; n < 32; n++) {
19461 for (size_t k = 1; k <= 40; k += 9) {
19462 GemmMicrokernelTester()
19463 .mr(8)
19464 .nr(16)
19465 .kr(4)
19466 .sr(1)
19467 .m(8)
19468 .n(n)
19469 .k(k)
19470 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019471 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019472 }
19473 }
19474 }
19475
19476 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_gt_16_subtile) {
19477 TEST_REQUIRES_ARM_NEON_DOT;
19478 for (uint32_t n = 17; n < 32; n++) {
19479 for (size_t k = 1; k <= 40; k += 9) {
19480 for (uint32_t m = 1; m <= 8; m++) {
19481 GemmMicrokernelTester()
19482 .mr(8)
19483 .nr(16)
19484 .kr(4)
19485 .sr(1)
19486 .m(m)
19487 .n(n)
19488 .k(k)
19489 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019490 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019491 }
19492 }
19493 }
19494 }
19495
19496 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16) {
19497 TEST_REQUIRES_ARM_NEON_DOT;
19498 for (uint32_t n = 32; n <= 48; n += 16) {
19499 for (size_t k = 1; k <= 40; k += 9) {
19500 GemmMicrokernelTester()
19501 .mr(8)
19502 .nr(16)
19503 .kr(4)
19504 .sr(1)
19505 .m(8)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019506 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019507 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019508 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019509 }
19510 }
19511 }
19512
19513 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_strided_cn) {
19514 TEST_REQUIRES_ARM_NEON_DOT;
19515 for (uint32_t n = 32; n <= 48; n += 16) {
19516 for (size_t k = 1; k <= 40; k += 9) {
19517 GemmMicrokernelTester()
19518 .mr(8)
19519 .nr(16)
19520 .kr(4)
19521 .sr(1)
19522 .m(8)
19523 .n(n)
19524 .k(k)
19525 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019526 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019527 }
19528 }
19529 }
19530
19531 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_strided_a) {
19532 TEST_REQUIRES_ARM_NEON_DOT;
19533 for (uint32_t n = 32; n <= 48; n += 16) {
19534 for (size_t k = 1; k <= 40; k += 9) {
19535 GemmMicrokernelTester()
19536 .mr(8)
19537 .nr(16)
19538 .kr(4)
19539 .sr(1)
19540 .m(8)
19541 .n(n)
19542 .k(k)
19543 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019544 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019545 }
19546 }
19547 }
19548
19549 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, n_div_16_subtile) {
19550 TEST_REQUIRES_ARM_NEON_DOT;
19551 for (uint32_t n = 32; n <= 48; n += 16) {
19552 for (size_t k = 1; k <= 40; k += 9) {
19553 for (uint32_t m = 1; m <= 8; m++) {
19554 GemmMicrokernelTester()
19555 .mr(8)
19556 .nr(16)
19557 .kr(4)
19558 .sr(1)
19559 .m(m)
19560 .n(n)
19561 .k(k)
19562 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019563 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019564 }
19565 }
19566 }
19567 }
19568
19569 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm_subtile) {
19570 TEST_REQUIRES_ARM_NEON_DOT;
19571 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019572 for (uint32_t n = 1; n <= 16; n++) {
19573 for (uint32_t m = 1; m <= 8; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019574 GemmMicrokernelTester()
19575 .mr(8)
19576 .nr(16)
19577 .kr(4)
19578 .sr(1)
19579 .m(m)
19580 .n(n)
19581 .k(k)
19582 .cm_stride(19)
19583 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019584 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019585 }
19586 }
19587 }
19588 }
19589
19590 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmin) {
19591 TEST_REQUIRES_ARM_NEON_DOT;
19592 GemmMicrokernelTester()
19593 .mr(8)
19594 .nr(16)
19595 .kr(4)
19596 .sr(1)
19597 .m(8)
19598 .n(16)
19599 .k(8)
19600 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019602 }
19603
19604 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, qmax) {
19605 TEST_REQUIRES_ARM_NEON_DOT;
19606 GemmMicrokernelTester()
19607 .mr(8)
19608 .nr(16)
19609 .kr(4)
19610 .sr(1)
19611 .m(8)
19612 .n(16)
19613 .k(8)
19614 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080019615 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019616 }
19617
19618 TEST(QS8_GEMM_MINMAX_RNDNU_8X16C4__NEONDOT, strided_cm) {
19619 TEST_REQUIRES_ARM_NEON_DOT;
19620 GemmMicrokernelTester()
19621 .mr(8)
19622 .nr(16)
19623 .kr(4)
19624 .sr(1)
19625 .m(8)
19626 .n(16)
19627 .k(8)
19628 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019629 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_8x16c4__neondot, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019630 }
19631#endif // XNN_ARCH_ARM && !XNN_PLATFORM_IOS || XNN_ARCH_ARM64
19632
19633
19634#if XNN_ARCH_ARM || XNN_ARCH_ARM64
19635 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8) {
19636 TEST_REQUIRES_ARM_NEON;
19637 GemmMicrokernelTester()
19638 .mr(1)
19639 .nr(16)
19640 .kr(1)
19641 .sr(1)
19642 .m(1)
19643 .n(16)
19644 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080019645 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019646 }
19647
19648 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cn) {
19649 TEST_REQUIRES_ARM_NEON;
19650 GemmMicrokernelTester()
19651 .mr(1)
19652 .nr(16)
19653 .kr(1)
19654 .sr(1)
19655 .m(1)
19656 .n(16)
19657 .k(8)
19658 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019659 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019660 }
19661
19662 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_strided_a) {
19663 TEST_REQUIRES_ARM_NEON;
19664 GemmMicrokernelTester()
19665 .mr(1)
19666 .nr(16)
19667 .kr(1)
19668 .sr(1)
19669 .m(1)
19670 .n(16)
19671 .k(8)
19672 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019673 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019674 }
19675
19676 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile) {
19677 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080019678 for (uint32_t n = 1; n <= 16; n++) {
19679 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019680 GemmMicrokernelTester()
19681 .mr(1)
19682 .nr(16)
19683 .kr(1)
19684 .sr(1)
19685 .m(m)
19686 .n(n)
19687 .k(8)
19688 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019689 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019690 }
19691 }
19692 }
19693
19694 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_m) {
19695 TEST_REQUIRES_ARM_NEON;
19696 for (uint32_t m = 1; m <= 1; m++) {
19697 GemmMicrokernelTester()
19698 .mr(1)
19699 .nr(16)
19700 .kr(1)
19701 .sr(1)
19702 .m(m)
19703 .n(16)
19704 .k(8)
19705 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019706 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019707 }
19708 }
19709
19710 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_eq_8_subtile_n) {
19711 TEST_REQUIRES_ARM_NEON;
19712 for (uint32_t n = 1; n <= 16; n++) {
19713 GemmMicrokernelTester()
19714 .mr(1)
19715 .nr(16)
19716 .kr(1)
19717 .sr(1)
19718 .m(1)
19719 .n(n)
19720 .k(8)
19721 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019722 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019723 }
19724 }
19725
19726 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8) {
19727 TEST_REQUIRES_ARM_NEON;
19728 for (size_t k = 1; k < 8; k++) {
19729 GemmMicrokernelTester()
19730 .mr(1)
19731 .nr(16)
19732 .kr(1)
19733 .sr(1)
19734 .m(1)
19735 .n(16)
19736 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019737 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019738 }
19739 }
19740
19741 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8_strided_a) {
19742 TEST_REQUIRES_ARM_NEON;
19743 for (size_t k = 1; k < 8; k++) {
19744 GemmMicrokernelTester()
19745 .mr(1)
19746 .nr(16)
19747 .kr(1)
19748 .sr(1)
19749 .m(1)
19750 .n(16)
19751 .k(k)
19752 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080019753 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019754 }
19755 }
19756
19757 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_lt_8_subtile) {
19758 TEST_REQUIRES_ARM_NEON;
19759 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019760 for (uint32_t n = 1; n <= 16; n++) {
19761 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019762 GemmMicrokernelTester()
19763 .mr(1)
19764 .nr(16)
19765 .kr(1)
19766 .sr(1)
19767 .m(m)
19768 .n(n)
19769 .k(k)
19770 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019771 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019772 }
19773 }
19774 }
19775 }
19776
19777 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8) {
19778 TEST_REQUIRES_ARM_NEON;
19779 for (size_t k = 9; k < 16; k++) {
19780 GemmMicrokernelTester()
19781 .mr(1)
19782 .nr(16)
19783 .kr(1)
19784 .sr(1)
19785 .m(1)
19786 .n(16)
19787 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019788 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019789 }
19790 }
19791
19792 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8_strided_a) {
19793 TEST_REQUIRES_ARM_NEON;
19794 for (size_t k = 9; k < 16; k++) {
19795 GemmMicrokernelTester()
19796 .mr(1)
19797 .nr(16)
19798 .kr(1)
19799 .sr(1)
19800 .m(1)
19801 .n(16)
19802 .k(k)
19803 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019804 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019805 }
19806 }
19807
19808 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_gt_8_subtile) {
19809 TEST_REQUIRES_ARM_NEON;
19810 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019811 for (uint32_t n = 1; n <= 16; n++) {
19812 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019813 GemmMicrokernelTester()
19814 .mr(1)
19815 .nr(16)
19816 .kr(1)
19817 .sr(1)
19818 .m(m)
19819 .n(n)
19820 .k(k)
19821 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019822 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019823 }
19824 }
19825 }
19826 }
19827
19828 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8) {
19829 TEST_REQUIRES_ARM_NEON;
19830 for (size_t k = 16; k <= 80; k += 8) {
19831 GemmMicrokernelTester()
19832 .mr(1)
19833 .nr(16)
19834 .kr(1)
19835 .sr(1)
19836 .m(1)
19837 .n(16)
19838 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019839 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019840 }
19841 }
19842
19843 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8_strided_a) {
19844 TEST_REQUIRES_ARM_NEON;
19845 for (size_t k = 16; k <= 80; k += 8) {
19846 GemmMicrokernelTester()
19847 .mr(1)
19848 .nr(16)
19849 .kr(1)
19850 .sr(1)
19851 .m(1)
19852 .n(16)
19853 .k(k)
19854 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080019855 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019856 }
19857 }
19858
19859 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, k_div_8_subtile) {
19860 TEST_REQUIRES_ARM_NEON;
19861 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080019862 for (uint32_t n = 1; n <= 16; n++) {
19863 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019864 GemmMicrokernelTester()
19865 .mr(1)
19866 .nr(16)
19867 .kr(1)
19868 .sr(1)
19869 .m(m)
19870 .n(n)
19871 .k(k)
19872 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019873 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019874 }
19875 }
19876 }
19877 }
19878
19879 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16) {
19880 TEST_REQUIRES_ARM_NEON;
19881 for (uint32_t n = 17; n < 32; n++) {
19882 for (size_t k = 1; k <= 40; k += 9) {
19883 GemmMicrokernelTester()
19884 .mr(1)
19885 .nr(16)
19886 .kr(1)
19887 .sr(1)
19888 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019889 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019890 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019891 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019892 }
19893 }
19894 }
19895
19896 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_strided_cn) {
19897 TEST_REQUIRES_ARM_NEON;
19898 for (uint32_t n = 17; n < 32; n++) {
19899 for (size_t k = 1; k <= 40; k += 9) {
19900 GemmMicrokernelTester()
19901 .mr(1)
19902 .nr(16)
19903 .kr(1)
19904 .sr(1)
19905 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019906 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019907 .k(k)
19908 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019909 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019910 }
19911 }
19912 }
19913
19914 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_strided_a) {
19915 TEST_REQUIRES_ARM_NEON;
19916 for (uint32_t n = 17; n < 32; n++) {
19917 for (size_t k = 1; k <= 40; k += 9) {
19918 GemmMicrokernelTester()
19919 .mr(1)
19920 .nr(16)
19921 .kr(1)
19922 .sr(1)
19923 .m(1)
19924 .n(n)
19925 .k(k)
19926 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080019927 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019928 }
19929 }
19930 }
19931
19932 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_gt_16_subtile) {
19933 TEST_REQUIRES_ARM_NEON;
19934 for (uint32_t n = 17; n < 32; n++) {
19935 for (size_t k = 1; k <= 40; k += 9) {
19936 for (uint32_t m = 1; m <= 1; m++) {
19937 GemmMicrokernelTester()
19938 .mr(1)
19939 .nr(16)
19940 .kr(1)
19941 .sr(1)
19942 .m(m)
19943 .n(n)
19944 .k(k)
19945 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080019946 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019947 }
19948 }
19949 }
19950 }
19951
19952 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16) {
19953 TEST_REQUIRES_ARM_NEON;
19954 for (uint32_t n = 32; n <= 48; n += 16) {
19955 for (size_t k = 1; k <= 40; k += 9) {
19956 GemmMicrokernelTester()
19957 .mr(1)
19958 .nr(16)
19959 .kr(1)
19960 .sr(1)
19961 .m(1)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080019962 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019963 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080019964 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019965 }
19966 }
19967 }
19968
19969 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_strided_cn) {
19970 TEST_REQUIRES_ARM_NEON;
19971 for (uint32_t n = 32; n <= 48; n += 16) {
19972 for (size_t k = 1; k <= 40; k += 9) {
19973 GemmMicrokernelTester()
19974 .mr(1)
19975 .nr(16)
19976 .kr(1)
19977 .sr(1)
19978 .m(1)
19979 .n(n)
19980 .k(k)
19981 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080019982 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080019983 }
19984 }
19985 }
19986
19987 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_strided_a) {
19988 TEST_REQUIRES_ARM_NEON;
19989 for (uint32_t n = 32; n <= 48; n += 16) {
19990 for (size_t k = 1; k <= 40; k += 9) {
19991 GemmMicrokernelTester()
19992 .mr(1)
19993 .nr(16)
19994 .kr(1)
19995 .sr(1)
19996 .m(1)
19997 .n(n)
19998 .k(k)
19999 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020000 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020001 }
20002 }
20003 }
20004
20005 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, n_div_16_subtile) {
20006 TEST_REQUIRES_ARM_NEON;
20007 for (uint32_t n = 32; n <= 48; n += 16) {
20008 for (size_t k = 1; k <= 40; k += 9) {
20009 for (uint32_t m = 1; m <= 1; m++) {
20010 GemmMicrokernelTester()
20011 .mr(1)
20012 .nr(16)
20013 .kr(1)
20014 .sr(1)
20015 .m(m)
20016 .n(n)
20017 .k(k)
20018 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020019 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020020 }
20021 }
20022 }
20023 }
20024
20025 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm_subtile) {
20026 TEST_REQUIRES_ARM_NEON;
20027 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020028 for (uint32_t n = 1; n <= 16; n++) {
20029 for (uint32_t m = 1; m <= 1; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020030 GemmMicrokernelTester()
20031 .mr(1)
20032 .nr(16)
20033 .kr(1)
20034 .sr(1)
20035 .m(m)
20036 .n(n)
20037 .k(k)
20038 .cm_stride(19)
20039 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020040 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020041 }
20042 }
20043 }
20044 }
20045
20046 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmin) {
20047 TEST_REQUIRES_ARM_NEON;
20048 GemmMicrokernelTester()
20049 .mr(1)
20050 .nr(16)
20051 .kr(1)
20052 .sr(1)
20053 .m(1)
20054 .n(16)
20055 .k(8)
20056 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020057 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020058 }
20059
20060 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, qmax) {
20061 TEST_REQUIRES_ARM_NEON;
20062 GemmMicrokernelTester()
20063 .mr(1)
20064 .nr(16)
20065 .kr(1)
20066 .sr(1)
20067 .m(1)
20068 .n(16)
20069 .k(8)
20070 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020071 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020072 }
20073
20074 TEST(QS8_GEMM_MINMAX_RNDNU_1X16__NEON_MLAL_LANE, strided_cm) {
20075 TEST_REQUIRES_ARM_NEON;
20076 GemmMicrokernelTester()
20077 .mr(1)
20078 .nr(16)
20079 .kr(1)
20080 .sr(1)
20081 .m(1)
20082 .n(16)
20083 .k(8)
20084 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020085 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_1x16__neon_mlal_lane, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020086 }
20087#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
20088
20089
20090#if XNN_ARCH_ARM || XNN_ARCH_ARM64
20091 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8) {
20092 TEST_REQUIRES_ARM_NEON;
20093 GemmMicrokernelTester()
20094 .mr(2)
20095 .nr(8)
20096 .kr(1)
20097 .sr(1)
20098 .m(2)
20099 .n(8)
20100 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020101 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020102 }
20103
20104 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cn) {
20105 TEST_REQUIRES_ARM_NEON;
20106 GemmMicrokernelTester()
20107 .mr(2)
20108 .nr(8)
20109 .kr(1)
20110 .sr(1)
20111 .m(2)
20112 .n(8)
20113 .k(8)
20114 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020115 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020116 }
20117
20118 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
20119 TEST_REQUIRES_ARM_NEON;
20120 GemmMicrokernelTester()
20121 .mr(2)
20122 .nr(8)
20123 .kr(1)
20124 .sr(1)
20125 .m(2)
20126 .n(8)
20127 .k(8)
20128 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020129 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020130 }
20131
20132 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
20133 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020134 for (uint32_t n = 1; n <= 8; n++) {
20135 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020136 GemmMicrokernelTester()
20137 .mr(2)
20138 .nr(8)
20139 .kr(1)
20140 .sr(1)
20141 .m(m)
20142 .n(n)
20143 .k(8)
20144 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020145 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020146 }
20147 }
20148 }
20149
20150 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
20151 TEST_REQUIRES_ARM_NEON;
20152 for (uint32_t m = 1; m <= 2; m++) {
20153 GemmMicrokernelTester()
20154 .mr(2)
20155 .nr(8)
20156 .kr(1)
20157 .sr(1)
20158 .m(m)
20159 .n(8)
20160 .k(8)
20161 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020162 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020163 }
20164 }
20165
20166 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
20167 TEST_REQUIRES_ARM_NEON;
20168 for (uint32_t n = 1; n <= 8; n++) {
20169 GemmMicrokernelTester()
20170 .mr(2)
20171 .nr(8)
20172 .kr(1)
20173 .sr(1)
20174 .m(2)
20175 .n(n)
20176 .k(8)
20177 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020178 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020179 }
20180 }
20181
20182 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_lt_8) {
20183 TEST_REQUIRES_ARM_NEON;
20184 for (size_t k = 1; k < 8; k++) {
20185 GemmMicrokernelTester()
20186 .mr(2)
20187 .nr(8)
20188 .kr(1)
20189 .sr(1)
20190 .m(2)
20191 .n(8)
20192 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020193 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020194 }
20195 }
20196
20197 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
20198 TEST_REQUIRES_ARM_NEON;
20199 for (size_t k = 1; k < 8; k++) {
20200 GemmMicrokernelTester()
20201 .mr(2)
20202 .nr(8)
20203 .kr(1)
20204 .sr(1)
20205 .m(2)
20206 .n(8)
20207 .k(k)
20208 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020209 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020210 }
20211 }
20212
20213 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
20214 TEST_REQUIRES_ARM_NEON;
20215 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020216 for (uint32_t n = 1; n <= 8; n++) {
20217 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020218 GemmMicrokernelTester()
20219 .mr(2)
20220 .nr(8)
20221 .kr(1)
20222 .sr(1)
20223 .m(m)
20224 .n(n)
20225 .k(k)
20226 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020227 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020228 }
20229 }
20230 }
20231 }
20232
20233 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_gt_8) {
20234 TEST_REQUIRES_ARM_NEON;
20235 for (size_t k = 9; k < 16; k++) {
20236 GemmMicrokernelTester()
20237 .mr(2)
20238 .nr(8)
20239 .kr(1)
20240 .sr(1)
20241 .m(2)
20242 .n(8)
20243 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020244 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020245 }
20246 }
20247
20248 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
20249 TEST_REQUIRES_ARM_NEON;
20250 for (size_t k = 9; k < 16; k++) {
20251 GemmMicrokernelTester()
20252 .mr(2)
20253 .nr(8)
20254 .kr(1)
20255 .sr(1)
20256 .m(2)
20257 .n(8)
20258 .k(k)
20259 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020260 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020261 }
20262 }
20263
20264 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
20265 TEST_REQUIRES_ARM_NEON;
20266 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020267 for (uint32_t n = 1; n <= 8; n++) {
20268 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020269 GemmMicrokernelTester()
20270 .mr(2)
20271 .nr(8)
20272 .kr(1)
20273 .sr(1)
20274 .m(m)
20275 .n(n)
20276 .k(k)
20277 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020278 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020279 }
20280 }
20281 }
20282 }
20283
20284 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_div_8) {
20285 TEST_REQUIRES_ARM_NEON;
20286 for (size_t k = 16; k <= 80; k += 8) {
20287 GemmMicrokernelTester()
20288 .mr(2)
20289 .nr(8)
20290 .kr(1)
20291 .sr(1)
20292 .m(2)
20293 .n(8)
20294 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020295 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020296 }
20297 }
20298
20299 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
20300 TEST_REQUIRES_ARM_NEON;
20301 for (size_t k = 16; k <= 80; k += 8) {
20302 GemmMicrokernelTester()
20303 .mr(2)
20304 .nr(8)
20305 .kr(1)
20306 .sr(1)
20307 .m(2)
20308 .n(8)
20309 .k(k)
20310 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020311 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020312 }
20313 }
20314
20315 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
20316 TEST_REQUIRES_ARM_NEON;
20317 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020318 for (uint32_t n = 1; n <= 8; n++) {
20319 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020320 GemmMicrokernelTester()
20321 .mr(2)
20322 .nr(8)
20323 .kr(1)
20324 .sr(1)
20325 .m(m)
20326 .n(n)
20327 .k(k)
20328 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020329 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020330 }
20331 }
20332 }
20333 }
20334
20335 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8) {
20336 TEST_REQUIRES_ARM_NEON;
20337 for (uint32_t n = 9; n < 16; n++) {
20338 for (size_t k = 1; k <= 40; k += 9) {
20339 GemmMicrokernelTester()
20340 .mr(2)
20341 .nr(8)
20342 .kr(1)
20343 .sr(1)
20344 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020345 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020346 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020347 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020348 }
20349 }
20350 }
20351
20352 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
20353 TEST_REQUIRES_ARM_NEON;
20354 for (uint32_t n = 9; n < 16; n++) {
20355 for (size_t k = 1; k <= 40; k += 9) {
20356 GemmMicrokernelTester()
20357 .mr(2)
20358 .nr(8)
20359 .kr(1)
20360 .sr(1)
20361 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020362 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020363 .k(k)
20364 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020365 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020366 }
20367 }
20368 }
20369
20370 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) {
20371 TEST_REQUIRES_ARM_NEON;
20372 for (uint32_t n = 9; n < 16; n++) {
20373 for (size_t k = 1; k <= 40; k += 9) {
20374 GemmMicrokernelTester()
20375 .mr(2)
20376 .nr(8)
20377 .kr(1)
20378 .sr(1)
20379 .m(2)
20380 .n(n)
20381 .k(k)
20382 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020383 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020384 }
20385 }
20386 }
20387
20388 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
20389 TEST_REQUIRES_ARM_NEON;
20390 for (uint32_t n = 9; n < 16; n++) {
20391 for (size_t k = 1; k <= 40; k += 9) {
20392 for (uint32_t m = 1; m <= 2; m++) {
20393 GemmMicrokernelTester()
20394 .mr(2)
20395 .nr(8)
20396 .kr(1)
20397 .sr(1)
20398 .m(m)
20399 .n(n)
20400 .k(k)
20401 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020402 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020403 }
20404 }
20405 }
20406 }
20407
20408 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8) {
20409 TEST_REQUIRES_ARM_NEON;
20410 for (uint32_t n = 16; n <= 24; n += 8) {
20411 for (size_t k = 1; k <= 40; k += 9) {
20412 GemmMicrokernelTester()
20413 .mr(2)
20414 .nr(8)
20415 .kr(1)
20416 .sr(1)
20417 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020418 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020419 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020420 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020421 }
20422 }
20423 }
20424
20425 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
20426 TEST_REQUIRES_ARM_NEON;
20427 for (uint32_t n = 16; n <= 24; n += 8) {
20428 for (size_t k = 1; k <= 40; k += 9) {
20429 GemmMicrokernelTester()
20430 .mr(2)
20431 .nr(8)
20432 .kr(1)
20433 .sr(1)
20434 .m(2)
20435 .n(n)
20436 .k(k)
20437 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020438 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020439 }
20440 }
20441 }
20442
20443 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) {
20444 TEST_REQUIRES_ARM_NEON;
20445 for (uint32_t n = 16; n <= 24; n += 8) {
20446 for (size_t k = 1; k <= 40; k += 9) {
20447 GemmMicrokernelTester()
20448 .mr(2)
20449 .nr(8)
20450 .kr(1)
20451 .sr(1)
20452 .m(2)
20453 .n(n)
20454 .k(k)
20455 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020456 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020457 }
20458 }
20459 }
20460
20461 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
20462 TEST_REQUIRES_ARM_NEON;
20463 for (uint32_t n = 16; n <= 24; n += 8) {
20464 for (size_t k = 1; k <= 40; k += 9) {
20465 for (uint32_t m = 1; m <= 2; m++) {
20466 GemmMicrokernelTester()
20467 .mr(2)
20468 .nr(8)
20469 .kr(1)
20470 .sr(1)
20471 .m(m)
20472 .n(n)
20473 .k(k)
20474 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020475 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020476 }
20477 }
20478 }
20479 }
20480
20481 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
20482 TEST_REQUIRES_ARM_NEON;
20483 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020484 for (uint32_t n = 1; n <= 8; n++) {
20485 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020486 GemmMicrokernelTester()
20487 .mr(2)
20488 .nr(8)
20489 .kr(1)
20490 .sr(1)
20491 .m(m)
20492 .n(n)
20493 .k(k)
20494 .cm_stride(11)
20495 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020496 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020497 }
20498 }
20499 }
20500 }
20501
20502 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, qmin) {
20503 TEST_REQUIRES_ARM_NEON;
20504 GemmMicrokernelTester()
20505 .mr(2)
20506 .nr(8)
20507 .kr(1)
20508 .sr(1)
20509 .m(2)
20510 .n(8)
20511 .k(8)
20512 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020513 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020514 }
20515
20516 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, qmax) {
20517 TEST_REQUIRES_ARM_NEON;
20518 GemmMicrokernelTester()
20519 .mr(2)
20520 .nr(8)
20521 .kr(1)
20522 .sr(1)
20523 .m(2)
20524 .n(8)
20525 .k(8)
20526 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020527 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020528 }
20529
20530 TEST(QS8_GEMM_MINMAX_RNDNU_2X8__NEON_MULL_ADDW_DUP, strided_cm) {
20531 TEST_REQUIRES_ARM_NEON;
20532 GemmMicrokernelTester()
20533 .mr(2)
20534 .nr(8)
20535 .kr(1)
20536 .sr(1)
20537 .m(2)
20538 .n(8)
20539 .k(8)
20540 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020541 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020542 }
20543#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
20544
20545
20546#if XNN_ARCH_ARM || XNN_ARCH_ARM64
20547 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8) {
20548 TEST_REQUIRES_ARM_NEON;
20549 GemmMicrokernelTester()
20550 .mr(4)
20551 .nr(8)
20552 .kr(1)
20553 .sr(1)
20554 .m(4)
20555 .n(8)
20556 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080020557 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020558 }
20559
20560 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, strided_cn) {
20561 TEST_REQUIRES_ARM_NEON;
20562 GemmMicrokernelTester()
20563 .mr(4)
20564 .nr(8)
20565 .kr(1)
20566 .sr(1)
20567 .m(4)
20568 .n(8)
20569 .k(8)
20570 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020571 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020572 }
20573
20574 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
20575 TEST_REQUIRES_ARM_NEON;
20576 GemmMicrokernelTester()
20577 .mr(4)
20578 .nr(8)
20579 .kr(1)
20580 .sr(1)
20581 .m(4)
20582 .n(8)
20583 .k(8)
20584 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020585 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020586 }
20587
20588 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
20589 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080020590 for (uint32_t n = 1; n <= 8; n++) {
20591 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020592 GemmMicrokernelTester()
20593 .mr(4)
20594 .nr(8)
20595 .kr(1)
20596 .sr(1)
20597 .m(m)
20598 .n(n)
20599 .k(8)
20600 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020601 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020602 }
20603 }
20604 }
20605
20606 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
20607 TEST_REQUIRES_ARM_NEON;
20608 for (uint32_t m = 1; m <= 4; m++) {
20609 GemmMicrokernelTester()
20610 .mr(4)
20611 .nr(8)
20612 .kr(1)
20613 .sr(1)
20614 .m(m)
20615 .n(8)
20616 .k(8)
20617 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020618 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020619 }
20620 }
20621
20622 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
20623 TEST_REQUIRES_ARM_NEON;
20624 for (uint32_t n = 1; n <= 8; n++) {
20625 GemmMicrokernelTester()
20626 .mr(4)
20627 .nr(8)
20628 .kr(1)
20629 .sr(1)
20630 .m(4)
20631 .n(n)
20632 .k(8)
20633 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020634 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020635 }
20636 }
20637
20638 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_lt_8) {
20639 TEST_REQUIRES_ARM_NEON;
20640 for (size_t k = 1; k < 8; k++) {
20641 GemmMicrokernelTester()
20642 .mr(4)
20643 .nr(8)
20644 .kr(1)
20645 .sr(1)
20646 .m(4)
20647 .n(8)
20648 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020649 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020650 }
20651 }
20652
20653 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
20654 TEST_REQUIRES_ARM_NEON;
20655 for (size_t k = 1; k < 8; k++) {
20656 GemmMicrokernelTester()
20657 .mr(4)
20658 .nr(8)
20659 .kr(1)
20660 .sr(1)
20661 .m(4)
20662 .n(8)
20663 .k(k)
20664 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020665 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020666 }
20667 }
20668
20669 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
20670 TEST_REQUIRES_ARM_NEON;
20671 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020672 for (uint32_t n = 1; n <= 8; n++) {
20673 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020674 GemmMicrokernelTester()
20675 .mr(4)
20676 .nr(8)
20677 .kr(1)
20678 .sr(1)
20679 .m(m)
20680 .n(n)
20681 .k(k)
20682 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020683 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020684 }
20685 }
20686 }
20687 }
20688
20689 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_gt_8) {
20690 TEST_REQUIRES_ARM_NEON;
20691 for (size_t k = 9; k < 16; k++) {
20692 GemmMicrokernelTester()
20693 .mr(4)
20694 .nr(8)
20695 .kr(1)
20696 .sr(1)
20697 .m(4)
20698 .n(8)
20699 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020700 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020701 }
20702 }
20703
20704 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
20705 TEST_REQUIRES_ARM_NEON;
20706 for (size_t k = 9; k < 16; k++) {
20707 GemmMicrokernelTester()
20708 .mr(4)
20709 .nr(8)
20710 .kr(1)
20711 .sr(1)
20712 .m(4)
20713 .n(8)
20714 .k(k)
20715 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080020716 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020717 }
20718 }
20719
20720 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
20721 TEST_REQUIRES_ARM_NEON;
20722 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020723 for (uint32_t n = 1; n <= 8; n++) {
20724 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020725 GemmMicrokernelTester()
20726 .mr(4)
20727 .nr(8)
20728 .kr(1)
20729 .sr(1)
20730 .m(m)
20731 .n(n)
20732 .k(k)
20733 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020734 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020735 }
20736 }
20737 }
20738 }
20739
20740 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_div_8) {
20741 TEST_REQUIRES_ARM_NEON;
20742 for (size_t k = 16; k <= 80; k += 8) {
20743 GemmMicrokernelTester()
20744 .mr(4)
20745 .nr(8)
20746 .kr(1)
20747 .sr(1)
20748 .m(4)
20749 .n(8)
20750 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020751 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020752 }
20753 }
20754
20755 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
20756 TEST_REQUIRES_ARM_NEON;
20757 for (size_t k = 16; k <= 80; k += 8) {
20758 GemmMicrokernelTester()
20759 .mr(4)
20760 .nr(8)
20761 .kr(1)
20762 .sr(1)
20763 .m(4)
20764 .n(8)
20765 .k(k)
20766 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080020767 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020768 }
20769 }
20770
20771 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
20772 TEST_REQUIRES_ARM_NEON;
20773 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020774 for (uint32_t n = 1; n <= 8; n++) {
20775 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020776 GemmMicrokernelTester()
20777 .mr(4)
20778 .nr(8)
20779 .kr(1)
20780 .sr(1)
20781 .m(m)
20782 .n(n)
20783 .k(k)
20784 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020785 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020786 }
20787 }
20788 }
20789 }
20790
20791 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8) {
20792 TEST_REQUIRES_ARM_NEON;
20793 for (uint32_t n = 9; n < 16; n++) {
20794 for (size_t k = 1; k <= 40; k += 9) {
20795 GemmMicrokernelTester()
20796 .mr(4)
20797 .nr(8)
20798 .kr(1)
20799 .sr(1)
20800 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020801 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020802 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020803 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020804 }
20805 }
20806 }
20807
20808 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_cn) {
20809 TEST_REQUIRES_ARM_NEON;
20810 for (uint32_t n = 9; n < 16; n++) {
20811 for (size_t k = 1; k <= 40; k += 9) {
20812 GemmMicrokernelTester()
20813 .mr(4)
20814 .nr(8)
20815 .kr(1)
20816 .sr(1)
20817 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020818 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020819 .k(k)
20820 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020821 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020822 }
20823 }
20824 }
20825
20826 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8_strided_a) {
20827 TEST_REQUIRES_ARM_NEON;
20828 for (uint32_t n = 9; n < 16; n++) {
20829 for (size_t k = 1; k <= 40; k += 9) {
20830 GemmMicrokernelTester()
20831 .mr(4)
20832 .nr(8)
20833 .kr(1)
20834 .sr(1)
20835 .m(4)
20836 .n(n)
20837 .k(k)
20838 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020839 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020840 }
20841 }
20842 }
20843
20844 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_gt_8_subtile) {
20845 TEST_REQUIRES_ARM_NEON;
20846 for (uint32_t n = 9; n < 16; n++) {
20847 for (size_t k = 1; k <= 40; k += 9) {
20848 for (uint32_t m = 1; m <= 4; m++) {
20849 GemmMicrokernelTester()
20850 .mr(4)
20851 .nr(8)
20852 .kr(1)
20853 .sr(1)
20854 .m(m)
20855 .n(n)
20856 .k(k)
20857 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020858 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020859 }
20860 }
20861 }
20862 }
20863
20864 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8) {
20865 TEST_REQUIRES_ARM_NEON;
20866 for (uint32_t n = 16; n <= 24; n += 8) {
20867 for (size_t k = 1; k <= 40; k += 9) {
20868 GemmMicrokernelTester()
20869 .mr(4)
20870 .nr(8)
20871 .kr(1)
20872 .sr(1)
20873 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080020874 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020875 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080020876 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020877 }
20878 }
20879 }
20880
20881 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8_strided_cn) {
20882 TEST_REQUIRES_ARM_NEON;
20883 for (uint32_t n = 16; n <= 24; n += 8) {
20884 for (size_t k = 1; k <= 40; k += 9) {
20885 GemmMicrokernelTester()
20886 .mr(4)
20887 .nr(8)
20888 .kr(1)
20889 .sr(1)
20890 .m(4)
20891 .n(n)
20892 .k(k)
20893 .cn_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020894 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020895 }
20896 }
20897 }
20898
20899 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8_strided_a) {
20900 TEST_REQUIRES_ARM_NEON;
20901 for (uint32_t n = 16; n <= 24; n += 8) {
20902 for (size_t k = 1; k <= 40; k += 9) {
20903 GemmMicrokernelTester()
20904 .mr(4)
20905 .nr(8)
20906 .kr(1)
20907 .sr(1)
20908 .m(4)
20909 .n(n)
20910 .k(k)
20911 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080020912 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020913 }
20914 }
20915 }
20916
20917 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, n_div_8_subtile) {
20918 TEST_REQUIRES_ARM_NEON;
20919 for (uint32_t n = 16; n <= 24; n += 8) {
20920 for (size_t k = 1; k <= 40; k += 9) {
20921 for (uint32_t m = 1; m <= 4; m++) {
20922 GemmMicrokernelTester()
20923 .mr(4)
20924 .nr(8)
20925 .kr(1)
20926 .sr(1)
20927 .m(m)
20928 .n(n)
20929 .k(k)
20930 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020931 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020932 }
20933 }
20934 }
20935 }
20936
20937 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
20938 TEST_REQUIRES_ARM_NEON;
20939 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080020940 for (uint32_t n = 1; n <= 8; n++) {
20941 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020942 GemmMicrokernelTester()
20943 .mr(4)
20944 .nr(8)
20945 .kr(1)
20946 .sr(1)
20947 .m(m)
20948 .n(n)
20949 .k(k)
20950 .cm_stride(11)
20951 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080020952 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020953 }
20954 }
20955 }
20956 }
20957
20958 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, qmin) {
20959 TEST_REQUIRES_ARM_NEON;
20960 GemmMicrokernelTester()
20961 .mr(4)
20962 .nr(8)
20963 .kr(1)
20964 .sr(1)
20965 .m(4)
20966 .n(8)
20967 .k(8)
20968 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020969 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020970 }
20971
20972 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, qmax) {
20973 TEST_REQUIRES_ARM_NEON;
20974 GemmMicrokernelTester()
20975 .mr(4)
20976 .nr(8)
20977 .kr(1)
20978 .sr(1)
20979 .m(4)
20980 .n(8)
20981 .k(8)
20982 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080020983 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020984 }
20985
20986 TEST(QS8_GEMM_MINMAX_RNDNU_4X8__NEON_MULL_ADDW_DUP, strided_cm) {
20987 TEST_REQUIRES_ARM_NEON;
20988 GemmMicrokernelTester()
20989 .mr(4)
20990 .nr(8)
20991 .kr(1)
20992 .sr(1)
20993 .m(4)
20994 .n(8)
20995 .k(8)
20996 .cm_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080020997 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_4x8__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080020998 }
20999#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
21000
21001
21002#if XNN_ARCH_ARM || XNN_ARCH_ARM64
21003 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_eq_8) {
21004 TEST_REQUIRES_ARM_NEON;
21005 GemmMicrokernelTester()
21006 .mr(2)
21007 .nr(16)
21008 .kr(1)
21009 .sr(1)
21010 .m(2)
21011 .n(16)
21012 .k(8)
Marat Dukhan50323b82022-01-11 00:12:01 -080021013 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021014 }
21015
21016 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, strided_cn) {
21017 TEST_REQUIRES_ARM_NEON;
21018 GemmMicrokernelTester()
21019 .mr(2)
21020 .nr(16)
21021 .kr(1)
21022 .sr(1)
21023 .m(2)
21024 .n(16)
21025 .k(8)
21026 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021027 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021028 }
21029
21030 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_eq_8_strided_a) {
21031 TEST_REQUIRES_ARM_NEON;
21032 GemmMicrokernelTester()
21033 .mr(2)
21034 .nr(16)
21035 .kr(1)
21036 .sr(1)
21037 .m(2)
21038 .n(16)
21039 .k(8)
21040 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021041 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021042 }
21043
21044 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile) {
21045 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021046 for (uint32_t n = 1; n <= 16; n++) {
21047 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021048 GemmMicrokernelTester()
21049 .mr(2)
21050 .nr(16)
21051 .kr(1)
21052 .sr(1)
21053 .m(m)
21054 .n(n)
21055 .k(8)
21056 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021057 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021058 }
21059 }
21060 }
21061
21062 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_m) {
21063 TEST_REQUIRES_ARM_NEON;
21064 for (uint32_t m = 1; m <= 2; m++) {
21065 GemmMicrokernelTester()
21066 .mr(2)
21067 .nr(16)
21068 .kr(1)
21069 .sr(1)
21070 .m(m)
21071 .n(16)
21072 .k(8)
21073 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021074 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021075 }
21076 }
21077
21078 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_eq_8_subtile_n) {
21079 TEST_REQUIRES_ARM_NEON;
21080 for (uint32_t n = 1; n <= 16; n++) {
21081 GemmMicrokernelTester()
21082 .mr(2)
21083 .nr(16)
21084 .kr(1)
21085 .sr(1)
21086 .m(2)
21087 .n(n)
21088 .k(8)
21089 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021090 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021091 }
21092 }
21093
21094 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_lt_8) {
21095 TEST_REQUIRES_ARM_NEON;
21096 for (size_t k = 1; k < 8; k++) {
21097 GemmMicrokernelTester()
21098 .mr(2)
21099 .nr(16)
21100 .kr(1)
21101 .sr(1)
21102 .m(2)
21103 .n(16)
21104 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021105 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021106 }
21107 }
21108
21109 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_lt_8_strided_a) {
21110 TEST_REQUIRES_ARM_NEON;
21111 for (size_t k = 1; k < 8; k++) {
21112 GemmMicrokernelTester()
21113 .mr(2)
21114 .nr(16)
21115 .kr(1)
21116 .sr(1)
21117 .m(2)
21118 .n(16)
21119 .k(k)
21120 .a_stride(11)
Marat Dukhan50323b82022-01-11 00:12:01 -080021121 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021122 }
21123 }
21124
21125 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_lt_8_subtile) {
21126 TEST_REQUIRES_ARM_NEON;
21127 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021128 for (uint32_t n = 1; n <= 16; n++) {
21129 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021130 GemmMicrokernelTester()
21131 .mr(2)
21132 .nr(16)
21133 .kr(1)
21134 .sr(1)
21135 .m(m)
21136 .n(n)
21137 .k(k)
21138 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021139 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021140 }
21141 }
21142 }
21143 }
21144
21145 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_gt_8) {
21146 TEST_REQUIRES_ARM_NEON;
21147 for (size_t k = 9; k < 16; k++) {
21148 GemmMicrokernelTester()
21149 .mr(2)
21150 .nr(16)
21151 .kr(1)
21152 .sr(1)
21153 .m(2)
21154 .n(16)
21155 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021156 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021157 }
21158 }
21159
21160 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_gt_8_strided_a) {
21161 TEST_REQUIRES_ARM_NEON;
21162 for (size_t k = 9; k < 16; k++) {
21163 GemmMicrokernelTester()
21164 .mr(2)
21165 .nr(16)
21166 .kr(1)
21167 .sr(1)
21168 .m(2)
21169 .n(16)
21170 .k(k)
21171 .a_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021172 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021173 }
21174 }
21175
21176 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_gt_8_subtile) {
21177 TEST_REQUIRES_ARM_NEON;
21178 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021179 for (uint32_t n = 1; n <= 16; n++) {
21180 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021181 GemmMicrokernelTester()
21182 .mr(2)
21183 .nr(16)
21184 .kr(1)
21185 .sr(1)
21186 .m(m)
21187 .n(n)
21188 .k(k)
21189 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021190 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021191 }
21192 }
21193 }
21194 }
21195
21196 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_div_8) {
21197 TEST_REQUIRES_ARM_NEON;
21198 for (size_t k = 16; k <= 80; k += 8) {
21199 GemmMicrokernelTester()
21200 .mr(2)
21201 .nr(16)
21202 .kr(1)
21203 .sr(1)
21204 .m(2)
21205 .n(16)
21206 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021207 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021208 }
21209 }
21210
21211 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_div_8_strided_a) {
21212 TEST_REQUIRES_ARM_NEON;
21213 for (size_t k = 16; k <= 80; k += 8) {
21214 GemmMicrokernelTester()
21215 .mr(2)
21216 .nr(16)
21217 .kr(1)
21218 .sr(1)
21219 .m(2)
21220 .n(16)
21221 .k(k)
21222 .a_stride(83)
Marat Dukhan50323b82022-01-11 00:12:01 -080021223 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021224 }
21225 }
21226
21227 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, k_div_8_subtile) {
21228 TEST_REQUIRES_ARM_NEON;
21229 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021230 for (uint32_t n = 1; n <= 16; n++) {
21231 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021232 GemmMicrokernelTester()
21233 .mr(2)
21234 .nr(16)
21235 .kr(1)
21236 .sr(1)
21237 .m(m)
21238 .n(n)
21239 .k(k)
21240 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021241 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021242 }
21243 }
21244 }
21245 }
21246
21247 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, n_gt_16) {
21248 TEST_REQUIRES_ARM_NEON;
21249 for (uint32_t n = 17; n < 32; n++) {
21250 for (size_t k = 1; k <= 40; k += 9) {
21251 GemmMicrokernelTester()
21252 .mr(2)
21253 .nr(16)
21254 .kr(1)
21255 .sr(1)
21256 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021257 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021258 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021259 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021260 }
21261 }
21262 }
21263
21264 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_cn) {
21265 TEST_REQUIRES_ARM_NEON;
21266 for (uint32_t n = 17; n < 32; n++) {
21267 for (size_t k = 1; k <= 40; k += 9) {
21268 GemmMicrokernelTester()
21269 .mr(2)
21270 .nr(16)
21271 .kr(1)
21272 .sr(1)
21273 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021274 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021275 .k(k)
21276 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021277 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021278 }
21279 }
21280 }
21281
21282 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, n_gt_16_strided_a) {
21283 TEST_REQUIRES_ARM_NEON;
21284 for (uint32_t n = 17; n < 32; n++) {
21285 for (size_t k = 1; k <= 40; k += 9) {
21286 GemmMicrokernelTester()
21287 .mr(2)
21288 .nr(16)
21289 .kr(1)
21290 .sr(1)
21291 .m(2)
21292 .n(n)
21293 .k(k)
21294 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080021295 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021296 }
21297 }
21298 }
21299
21300 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, n_gt_16_subtile) {
21301 TEST_REQUIRES_ARM_NEON;
21302 for (uint32_t n = 17; n < 32; n++) {
21303 for (size_t k = 1; k <= 40; k += 9) {
21304 for (uint32_t m = 1; m <= 2; m++) {
21305 GemmMicrokernelTester()
21306 .mr(2)
21307 .nr(16)
21308 .kr(1)
21309 .sr(1)
21310 .m(m)
21311 .n(n)
21312 .k(k)
21313 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021314 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021315 }
21316 }
21317 }
21318 }
21319
21320 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, n_div_16) {
21321 TEST_REQUIRES_ARM_NEON;
21322 for (uint32_t n = 32; n <= 48; n += 16) {
21323 for (size_t k = 1; k <= 40; k += 9) {
21324 GemmMicrokernelTester()
21325 .mr(2)
21326 .nr(16)
21327 .kr(1)
21328 .sr(1)
21329 .m(2)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021330 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021331 .k(k)
Marat Dukhan50323b82022-01-11 00:12:01 -080021332 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021333 }
21334 }
21335 }
21336
21337 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, n_div_16_strided_cn) {
21338 TEST_REQUIRES_ARM_NEON;
21339 for (uint32_t n = 32; n <= 48; n += 16) {
21340 for (size_t k = 1; k <= 40; k += 9) {
21341 GemmMicrokernelTester()
21342 .mr(2)
21343 .nr(16)
21344 .kr(1)
21345 .sr(1)
21346 .m(2)
21347 .n(n)
21348 .k(k)
21349 .cn_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021350 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021351 }
21352 }
21353 }
21354
21355 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, n_div_16_strided_a) {
21356 TEST_REQUIRES_ARM_NEON;
21357 for (uint32_t n = 32; n <= 48; n += 16) {
21358 for (size_t k = 1; k <= 40; k += 9) {
21359 GemmMicrokernelTester()
21360 .mr(2)
21361 .nr(16)
21362 .kr(1)
21363 .sr(1)
21364 .m(2)
21365 .n(n)
21366 .k(k)
21367 .a_stride(43)
Marat Dukhan50323b82022-01-11 00:12:01 -080021368 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021369 }
21370 }
21371 }
21372
21373 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, n_div_16_subtile) {
21374 TEST_REQUIRES_ARM_NEON;
21375 for (uint32_t n = 32; n <= 48; n += 16) {
21376 for (size_t k = 1; k <= 40; k += 9) {
21377 for (uint32_t m = 1; m <= 2; m++) {
21378 GemmMicrokernelTester()
21379 .mr(2)
21380 .nr(16)
21381 .kr(1)
21382 .sr(1)
21383 .m(m)
21384 .n(n)
21385 .k(k)
21386 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021387 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021388 }
21389 }
21390 }
21391 }
21392
21393 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, strided_cm_subtile) {
21394 TEST_REQUIRES_ARM_NEON;
21395 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021396 for (uint32_t n = 1; n <= 16; n++) {
21397 for (uint32_t m = 1; m <= 2; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021398 GemmMicrokernelTester()
21399 .mr(2)
21400 .nr(16)
21401 .kr(1)
21402 .sr(1)
21403 .m(m)
21404 .n(n)
21405 .k(k)
21406 .cm_stride(19)
21407 .iterations(1)
Marat Dukhan50323b82022-01-11 00:12:01 -080021408 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021409 }
21410 }
21411 }
21412 }
21413
21414 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, qmin) {
21415 TEST_REQUIRES_ARM_NEON;
21416 GemmMicrokernelTester()
21417 .mr(2)
21418 .nr(16)
21419 .kr(1)
21420 .sr(1)
21421 .m(2)
21422 .n(16)
21423 .k(8)
21424 .qmin(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021425 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021426 }
21427
21428 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, qmax) {
21429 TEST_REQUIRES_ARM_NEON;
21430 GemmMicrokernelTester()
21431 .mr(2)
21432 .nr(16)
21433 .kr(1)
21434 .sr(1)
21435 .m(2)
21436 .n(16)
21437 .k(8)
21438 .qmax(128)
Marat Dukhan50323b82022-01-11 00:12:01 -080021439 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021440 }
21441
21442 TEST(QS8_GEMM_MINMAX_RNDNU_2X16__NEON_MULL_ADDW_DUP, strided_cm) {
21443 TEST_REQUIRES_ARM_NEON;
21444 GemmMicrokernelTester()
21445 .mr(2)
21446 .nr(16)
21447 .kr(1)
21448 .sr(1)
21449 .m(2)
21450 .n(16)
21451 .k(8)
21452 .cm_stride(19)
Marat Dukhan50323b82022-01-11 00:12:01 -080021453 .Test(xnn_qs8_gemm_minmax_rndnu_ukernel_2x16__neon_mull_addw_dup, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021454 }
21455#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
21456
21457
21458#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT
21459 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8) {
21460 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021461 GemmMicrokernelTester()
21462 .mr(4)
21463 .nr(8)
21464 .kr(1)
21465 .sr(1)
21466 .m(4)
21467 .n(8)
21468 .k(8)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021469 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021470 }
21471
21472 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cn) {
21473 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021474 GemmMicrokernelTester()
21475 .mr(4)
21476 .nr(8)
21477 .kr(1)
21478 .sr(1)
21479 .m(4)
21480 .n(8)
21481 .k(8)
21482 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021483 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021484 }
21485
21486 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_strided_a) {
21487 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021488 GemmMicrokernelTester()
21489 .mr(4)
21490 .nr(8)
21491 .kr(1)
21492 .sr(1)
21493 .m(4)
21494 .n(8)
21495 .k(8)
21496 .a_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021497 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021498 }
21499
21500 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile) {
21501 TEST_REQUIRES_ARM_NEON;
Zhi An Ng83844ae2022-01-14 09:52:25 -080021502 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021503 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021504 GemmMicrokernelTester()
21505 .mr(4)
21506 .nr(8)
21507 .kr(1)
21508 .sr(1)
21509 .m(m)
21510 .n(n)
21511 .k(8)
21512 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021513 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021514 }
21515 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021516 }
21517
21518 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_m) {
21519 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021520 for (uint32_t m = 1; m <= 4; m++) {
21521 GemmMicrokernelTester()
21522 .mr(4)
21523 .nr(8)
21524 .kr(1)
21525 .sr(1)
21526 .m(m)
21527 .n(8)
21528 .k(8)
21529 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021530 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021531 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021532 }
21533
21534 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_eq_8_subtile_n) {
21535 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021536 for (uint32_t n = 1; n <= 8; n++) {
21537 GemmMicrokernelTester()
21538 .mr(4)
21539 .nr(8)
21540 .kr(1)
21541 .sr(1)
21542 .m(4)
21543 .n(n)
21544 .k(8)
21545 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021546 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021547 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021548 }
21549
21550 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8) {
21551 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021552 for (size_t k = 1; k < 8; k++) {
21553 GemmMicrokernelTester()
21554 .mr(4)
21555 .nr(8)
21556 .kr(1)
21557 .sr(1)
21558 .m(4)
21559 .n(8)
21560 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021561 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021562 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021563 }
21564
21565 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_strided_a) {
21566 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021567 for (size_t k = 1; k < 8; k++) {
21568 GemmMicrokernelTester()
21569 .mr(4)
21570 .nr(8)
21571 .kr(1)
21572 .sr(1)
21573 .m(4)
21574 .n(8)
21575 .k(k)
21576 .a_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021577 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021578 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021579 }
21580
21581 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_lt_8_subtile) {
21582 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021583 for (size_t k = 1; k < 8; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021584 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021585 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021586 GemmMicrokernelTester()
21587 .mr(4)
21588 .nr(8)
21589 .kr(1)
21590 .sr(1)
21591 .m(m)
21592 .n(n)
21593 .k(k)
21594 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021595 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021596 }
21597 }
21598 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021599 }
21600
21601 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8) {
21602 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021603 for (size_t k = 9; k < 16; k++) {
21604 GemmMicrokernelTester()
21605 .mr(4)
21606 .nr(8)
21607 .kr(1)
21608 .sr(1)
21609 .m(4)
21610 .n(8)
21611 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021612 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021613 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021614 }
21615
21616 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_strided_a) {
21617 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021618 for (size_t k = 9; k < 16; k++) {
21619 GemmMicrokernelTester()
21620 .mr(4)
21621 .nr(8)
21622 .kr(1)
21623 .sr(1)
21624 .m(4)
21625 .n(8)
21626 .k(k)
21627 .a_stride(19)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021628 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021629 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021630 }
21631
21632 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_gt_8_subtile) {
21633 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021634 for (size_t k = 9; k < 16; k++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021635 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021636 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021637 GemmMicrokernelTester()
21638 .mr(4)
21639 .nr(8)
21640 .kr(1)
21641 .sr(1)
21642 .m(m)
21643 .n(n)
21644 .k(k)
21645 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021646 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021647 }
21648 }
21649 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021650 }
21651
21652 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8) {
21653 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021654 for (size_t k = 16; k <= 80; k += 8) {
21655 GemmMicrokernelTester()
21656 .mr(4)
21657 .nr(8)
21658 .kr(1)
21659 .sr(1)
21660 .m(4)
21661 .n(8)
21662 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021663 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021664 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021665 }
21666
21667 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_strided_a) {
21668 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021669 for (size_t k = 16; k <= 80; k += 8) {
21670 GemmMicrokernelTester()
21671 .mr(4)
21672 .nr(8)
21673 .kr(1)
21674 .sr(1)
21675 .m(4)
21676 .n(8)
21677 .k(k)
21678 .a_stride(83)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021679 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021680 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021681 }
21682
21683 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, k_div_8_subtile) {
21684 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021685 for (size_t k = 16; k <= 80; k += 8) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021686 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021687 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021688 GemmMicrokernelTester()
21689 .mr(4)
21690 .nr(8)
21691 .kr(1)
21692 .sr(1)
21693 .m(m)
21694 .n(n)
21695 .k(k)
21696 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021697 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021698 }
21699 }
21700 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021701 }
21702
21703 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8) {
21704 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021705 for (uint32_t n = 9; n < 16; n++) {
21706 for (size_t k = 1; k <= 40; k += 9) {
21707 GemmMicrokernelTester()
21708 .mr(4)
21709 .nr(8)
21710 .kr(1)
21711 .sr(1)
21712 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021713 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021714 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021715 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021716 }
21717 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021718 }
21719
21720 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_cn) {
21721 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021722 for (uint32_t n = 9; n < 16; n++) {
21723 for (size_t k = 1; k <= 40; k += 9) {
21724 GemmMicrokernelTester()
21725 .mr(4)
21726 .nr(8)
21727 .kr(1)
21728 .sr(1)
21729 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021730 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021731 .k(k)
21732 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021733 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021734 }
21735 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021736 }
21737
21738 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_strided_a) {
21739 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021740 for (uint32_t n = 9; n < 16; n++) {
21741 for (size_t k = 1; k <= 40; k += 9) {
21742 GemmMicrokernelTester()
21743 .mr(4)
21744 .nr(8)
21745 .kr(1)
21746 .sr(1)
21747 .m(4)
21748 .n(n)
21749 .k(k)
21750 .a_stride(43)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021751 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021752 }
21753 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021754 }
21755
21756 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_gt_8_subtile) {
21757 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021758 for (uint32_t n = 9; n < 16; n++) {
21759 for (size_t k = 1; k <= 40; k += 9) {
21760 for (uint32_t m = 1; m <= 4; m++) {
21761 GemmMicrokernelTester()
21762 .mr(4)
21763 .nr(8)
21764 .kr(1)
21765 .sr(1)
21766 .m(m)
21767 .n(n)
21768 .k(k)
21769 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021770 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021771 }
21772 }
21773 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021774 }
21775
21776 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8) {
21777 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021778 for (uint32_t n = 16; n <= 24; n += 8) {
21779 for (size_t k = 1; k <= 40; k += 9) {
21780 GemmMicrokernelTester()
21781 .mr(4)
21782 .nr(8)
21783 .kr(1)
21784 .sr(1)
21785 .m(4)
Zhi An Ngaf9ff852022-01-13 10:48:37 -080021786 .n(n)
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021787 .k(k)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021788 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021789 }
21790 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021791 }
21792
21793 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_cn) {
21794 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021795 for (uint32_t n = 16; n <= 24; n += 8) {
21796 for (size_t k = 1; k <= 40; k += 9) {
21797 GemmMicrokernelTester()
21798 .mr(4)
21799 .nr(8)
21800 .kr(1)
21801 .sr(1)
21802 .m(4)
21803 .n(n)
21804 .k(k)
21805 .cn_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021806 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021807 }
21808 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021809 }
21810
21811 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_strided_a) {
21812 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021813 for (uint32_t n = 16; n <= 24; n += 8) {
21814 for (size_t k = 1; k <= 40; k += 9) {
21815 GemmMicrokernelTester()
21816 .mr(4)
21817 .nr(8)
21818 .kr(1)
21819 .sr(1)
21820 .m(4)
21821 .n(n)
21822 .k(k)
21823 .a_stride(43)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021824 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021825 }
21826 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021827 }
21828
21829 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, n_div_8_subtile) {
21830 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021831 for (uint32_t n = 16; n <= 24; n += 8) {
21832 for (size_t k = 1; k <= 40; k += 9) {
21833 for (uint32_t m = 1; m <= 4; m++) {
21834 GemmMicrokernelTester()
21835 .mr(4)
21836 .nr(8)
21837 .kr(1)
21838 .sr(1)
21839 .m(m)
21840 .n(n)
21841 .k(k)
21842 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021843 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021844 }
21845 }
21846 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021847 }
21848
21849 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm_subtile) {
21850 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021851 for (size_t k = 1; k <= 40; k += 9) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021852 for (uint32_t n = 1; n <= 8; n++) {
Zhi An Ng83844ae2022-01-14 09:52:25 -080021853 for (uint32_t m = 1; m <= 4; m++) {
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021854 GemmMicrokernelTester()
21855 .mr(4)
21856 .nr(8)
21857 .kr(1)
21858 .sr(1)
21859 .m(m)
21860 .n(n)
21861 .k(k)
21862 .cm_stride(11)
21863 .iterations(1)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021864 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021865 }
21866 }
21867 }
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021868 }
21869
21870 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmin) {
21871 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021872 GemmMicrokernelTester()
21873 .mr(4)
21874 .nr(8)
21875 .kr(1)
21876 .sr(1)
21877 .m(4)
21878 .n(8)
21879 .k(8)
21880 .qmin(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021881 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021882 }
21883
21884 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, qmax) {
21885 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021886 GemmMicrokernelTester()
21887 .mr(4)
21888 .nr(8)
21889 .kr(1)
21890 .sr(1)
21891 .m(4)
21892 .n(8)
21893 .k(8)
21894 .qmax(128)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021895 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021896 }
21897
21898 TEST(GENERATE_QS8_GEMM_RNDNU_4X8__AARCH32_NEON_MLAL_LANE_PRFM_LD64, strided_cm) {
21899 TEST_REQUIRES_ARM_NEON;
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021900 GemmMicrokernelTester()
21901 .mr(4)
21902 .nr(8)
21903 .kr(1)
21904 .sr(1)
21905 .m(4)
21906 .n(8)
21907 .k(8)
21908 .cm_stride(11)
Zhi An Ng0ec25cf2022-01-19 11:38:55 -080021909 .Test(xnn_generate_qs8_gemm_rndnu_ukernel_4x8__aarch32_neon_mlal_lane_prfm_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu);
Zhi An Ng4c1fd6f2022-01-10 19:35:06 -080021910 }
21911#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY && XNN_PLATFORM_JIT